common.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. import re
  2. from urllib.parse import urlparse
  3. from django.conf import settings
  4. from django.core.exceptions import PermissionDenied
  5. from django.core.mail import mail_managers
  6. from django.http import HttpResponsePermanentRedirect
  7. from django.urls import is_valid_path
  8. from django.utils.deprecation import MiddlewareMixin
  9. from django.utils.http import escape_leading_slashes
  10. class CommonMiddleware(MiddlewareMixin):
  11. """
  12. "Common" middleware for taking care of some basic operations:
  13. - Forbid access to User-Agents in settings.DISALLOWED_USER_AGENTS
  14. - URL rewriting: Based on the APPEND_SLASH and PREPEND_WWW settings,
  15. append missing slashes and/or prepends missing "www."s.
  16. - If APPEND_SLASH is set and the initial URL doesn't end with a
  17. slash, and it is not found in urlpatterns, form a new URL by
  18. appending a slash at the end. If this new URL is found in
  19. urlpatterns, return an HTTP redirect to this new URL; otherwise
  20. process the initial URL as usual.
  21. This behavior can be customized by subclassing CommonMiddleware and
  22. overriding the response_redirect_class attribute.
  23. """
  24. response_redirect_class = HttpResponsePermanentRedirect
  25. def process_request(self, request):
  26. """
  27. Check for denied User-Agents and rewrite the URL based on
  28. settings.APPEND_SLASH and settings.PREPEND_WWW
  29. """
  30. # Check for denied User-Agents
  31. user_agent = request.META.get("HTTP_USER_AGENT")
  32. if user_agent is not None:
  33. for user_agent_regex in settings.DISALLOWED_USER_AGENTS:
  34. if user_agent_regex.search(user_agent):
  35. raise PermissionDenied("Forbidden user agent")
  36. # Check for a redirect based on settings.PREPEND_WWW
  37. host = request.get_host()
  38. if settings.PREPEND_WWW and host and not host.startswith("www."):
  39. # Check if we also need to append a slash so we can do it all
  40. # with a single redirect. (This check may be somewhat expensive,
  41. # so we only do it if we already know we're sending a redirect,
  42. # or in process_response if we get a 404.)
  43. if self.should_redirect_with_slash(request):
  44. path = self.get_full_path_with_slash(request)
  45. else:
  46. path = request.get_full_path()
  47. return self.response_redirect_class(f"{request.scheme}://www.{host}{path}")
  48. def should_redirect_with_slash(self, request):
  49. """
  50. Return True if settings.APPEND_SLASH is True and appending a slash to
  51. the request path turns an invalid path into a valid one.
  52. """
  53. if settings.APPEND_SLASH and not request.path_info.endswith("/"):
  54. urlconf = getattr(request, "urlconf", None)
  55. if not is_valid_path(request.path_info, urlconf):
  56. match = is_valid_path("%s/" % request.path_info, urlconf)
  57. if match:
  58. view = match.func
  59. return getattr(view, "should_append_slash", True)
  60. return False
  61. def get_full_path_with_slash(self, request):
  62. """
  63. Return the full path of the request with a trailing slash appended.
  64. Raise a RuntimeError if settings.DEBUG is True and request.method is
  65. DELETE, POST, PUT, or PATCH.
  66. """
  67. new_path = request.get_full_path(force_append_slash=True)
  68. # Prevent construction of scheme relative urls.
  69. new_path = escape_leading_slashes(new_path)
  70. if settings.DEBUG and request.method in ("DELETE", "POST", "PUT", "PATCH"):
  71. raise RuntimeError(
  72. "You called this URL via %(method)s, but the URL doesn't end "
  73. "in a slash and you have APPEND_SLASH set. Django can't "
  74. "redirect to the slash URL while maintaining %(method)s data. "
  75. "Change your form to point to %(url)s (note the trailing "
  76. "slash), or set APPEND_SLASH=False in your Django settings."
  77. % {
  78. "method": request.method,
  79. "url": request.get_host() + new_path,
  80. }
  81. )
  82. return new_path
  83. def process_response(self, request, response):
  84. """
  85. When the status code of the response is 404, it may redirect to a path
  86. with an appended slash if should_redirect_with_slash() returns True.
  87. """
  88. # If the given URL is "Not Found", then check if we should redirect to
  89. # a path with a slash appended.
  90. if response.status_code == 404 and self.should_redirect_with_slash(request):
  91. return self.response_redirect_class(self.get_full_path_with_slash(request))
  92. # Add the Content-Length header to non-streaming responses if not
  93. # already set.
  94. if not response.streaming and not response.has_header("Content-Length"):
  95. response.headers["Content-Length"] = str(len(response.content))
  96. return response
  97. class BrokenLinkEmailsMiddleware(MiddlewareMixin):
  98. def process_response(self, request, response):
  99. """Send broken link emails for relevant 404 NOT FOUND responses."""
  100. if response.status_code == 404 and not settings.DEBUG:
  101. domain = request.get_host()
  102. path = request.get_full_path()
  103. referer = request.META.get("HTTP_REFERER", "")
  104. if not self.is_ignorable_request(request, path, domain, referer):
  105. ua = request.META.get("HTTP_USER_AGENT", "<none>")
  106. ip = request.META.get("REMOTE_ADDR", "<none>")
  107. mail_managers(
  108. "Broken %slink on %s"
  109. % (
  110. (
  111. "INTERNAL "
  112. if self.is_internal_request(domain, referer)
  113. else ""
  114. ),
  115. domain,
  116. ),
  117. "Referrer: %s\nRequested URL: %s\nUser agent: %s\n"
  118. "IP address: %s\n" % (referer, path, ua, ip),
  119. fail_silently=True,
  120. )
  121. return response
  122. def is_internal_request(self, domain, referer):
  123. """
  124. Return True if the referring URL is the same domain as the current
  125. request.
  126. """
  127. # Different subdomains are treated as different domains.
  128. return bool(re.match("^https?://%s/" % re.escape(domain), referer))
  129. def is_ignorable_request(self, request, uri, domain, referer):
  130. """
  131. Return True if the given request *shouldn't* notify the site managers
  132. according to project settings or in situations outlined by the inline
  133. comments.
  134. """
  135. # The referer is empty.
  136. if not referer:
  137. return True
  138. # APPEND_SLASH is enabled and the referer is equal to the current URL
  139. # without a trailing slash indicating an internal redirect.
  140. if settings.APPEND_SLASH and uri.endswith("/") and referer == uri[:-1]:
  141. return True
  142. # A '?' in referer is identified as a search engine source.
  143. if not self.is_internal_request(domain, referer) and "?" in referer:
  144. return True
  145. # The referer is equal to the current URL, ignoring the scheme (assumed
  146. # to be a poorly implemented bot).
  147. parsed_referer = urlparse(referer)
  148. if parsed_referer.netloc in ["", domain] and parsed_referer.path == uri:
  149. return True
  150. return any(pattern.search(uri) for pattern in settings.IGNORABLE_404_URLS)