Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Addons + Proxito: return X-RTD-Resolver-Filename and inject via CF #11100

Merged
merged 12 commits into from
Feb 27, 2024
Merged
14 changes: 9 additions & 5 deletions dockerfiles/force-readthedocs-addons.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ async function handleRequest(request) {
// get project/version slug from headers inject by El Proxito
const projectSlug = originalResponse.headers.get("x-rtd-project") || "";
const versionSlug = originalResponse.headers.get("x-rtd-version") || "";
const resolverFilename = originalResponse.headers.get("x-rtd-resolver-filename") || "";

// check to decide whether or not inject the new beta addons:
//
Expand Down Expand Up @@ -103,7 +104,7 @@ async function handleRequest(request) {
// .on(readthedocsDataParse, new removeElement())
// .on(readthedocsData, new removeElement())
.on("head", new addPreloads())
.on("head", new addProjectVersionSlug(projectSlug, versionSlug))
.on("head", new addMetaTags(projectSlug, versionSlug, resolverFilename))
.transform(originalResponse)
);
}
Expand All @@ -116,7 +117,7 @@ async function handleRequest(request) {
if (forceAddons === "false" && injectHostingIntegrations === "true") {
return new HTMLRewriter()
.on("head", new addPreloads())
.on("head", new addProjectVersionSlug(projectSlug, versionSlug))
.on("head", new addMetaTags(projectSlug, versionSlug, resolverFilename))
.transform(originalResponse);
}
}
Expand Down Expand Up @@ -155,22 +156,25 @@ class addPreloads {
}
}

class addProjectVersionSlug {
constructor(projectSlug, versionSlug) {
class addMetaTags {
constructor(projectSlug, versionSlug, resolverFilename) {
this.projectSlug = projectSlug;
this.versionSlug = versionSlug;
this.resolverFilename = resolverFilename;
}

element(element) {
console.log(
`addProjectVersionSlug. projectSlug=${this.projectSlug} versionSlug=${this.versionSlug}`,
`addMetaTags. projectSlug=${this.projectSlug} versionSlug=${this.versionSlug} resolverFilename=${this.resolverFilename}`,
);
if (this.projectSlug && this.versionSlug) {
const metaProject = `<meta name="readthedocs-project-slug" content="${this.projectSlug}" />`;
const metaVersion = `<meta name="readthedocs-version-slug" content="${this.versionSlug}" />`;
const metaResolverFilename = `<meta name="readthedocs-resolver-filename" content="${this.resolverFilename}" />`;

element.append(metaProject, { html: true });
element.append(metaVersion, { html: true });
element.append(metaResolverFilename, { html: true });
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions dockerfiles/nginx/proxito.conf.template
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ server {
add_header X-RTD-Project-Method $rtd_project_method always;
set $rtd_redirect $upstream_http_x_rtd_redirect;
add_header X-RTD-Redirect $rtd_redirect always;
set $rtd_resolver_filename $upstream_http_x_rtd_resolver_filename;
add_header X-RTD-Resolver-Filename $rtd_resolver_filename always;
set $cdn_cache_control $upstream_http_cdn_cache_control;
add_header CDN-Cache-Control $cdn_cache_control always;
set $cache_tag $upstream_http_cache_tag;
Expand Down
24 changes: 24 additions & 0 deletions readthedocs/proxito/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
)
from django.conf import settings
from django.core.exceptions import SuspiciousOperation
from django.http.response import BadHeaderError, ResponseHeaders
from django.shortcuts import redirect
from django.urls import reverse
from django.utils.deprecation import MiddlewareMixin
from django.utils.html import escape

from readthedocs.builds.models import Version
from readthedocs.core.unresolver import (
Expand Down Expand Up @@ -367,11 +369,33 @@ def _get_https_redirect(self, request):

return None

def add_resolver_headers(self, request, response):
if hasattr(request, "unresolved_url") and request.unresolved_url is not None:
humitos marked this conversation as resolved.
Show resolved Hide resolved
# TODO: add more ``X-RTD-Resolver-*`` headers
header_value = escape(request.unresolved_url.filename)
try:
# Use Django internals to validate the header's value before injecting it.
ResponseHeaders({})._convert_to_charset(
header_value,
"latin-1",
mime_encode=True,
)
humitos marked this conversation as resolved.
Show resolved Hide resolved

response["X-RTD-Resolver-Filename"] = header_value
except BadHeaderError:
# Skip adding the header because it fails validation
log.info(
"Skip adding X-RTD-Resolver-Filename header due to invalid value.",
filename=request.unresolved_url.filename,
value=header_value,
)

def process_response(self, request, response): # noqa
self.add_proxito_headers(request, response)
self.add_cache_headers(request, response)
self.add_hsts_headers(request, response)
self.add_user_headers(request, response)
self.add_hosting_integrations_headers(request, response)
self.add_resolver_headers(request, response)
self.add_cors_headers(request, response)
return response
1 change: 1 addition & 0 deletions readthedocs/proxito/tests/test_headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def test_serve_headers(self):
self.assertEqual(r["X-RTD-Project-Method"], "public_domain")
self.assertEqual(r["X-RTD-Version"], "latest")
self.assertEqual(r["X-RTD-version-Method"], "path")
self.assertEqual(r["X-RTD-Resolver-Filename"], "/")
humitos marked this conversation as resolved.
Show resolved Hide resolved
self.assertEqual(
r["X-RTD-Path"], "/proxito/media/html/project/latest/index.html"
)
Expand Down
1 change: 1 addition & 0 deletions readthedocs/proxito/tests/test_old_redirects.py
Original file line number Diff line number Diff line change
Expand Up @@ -1352,6 +1352,7 @@ def test_redirect_exact_with_wildcard_crossdomain(self):
r = self.client.get(url, headers={"host": "project.dev.readthedocs.io"})
self.assertEqual(r.status_code, 302, url)
self.assertEqual(r["Location"], expected_location, url)
self.assertNotIn("X-RTD-Resolver-Filename", r.headers)

def test_redirect_html_to_clean_url_crossdomain(self):
"""
Expand Down
94 changes: 53 additions & 41 deletions readthedocs/proxito/views/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,10 @@ def serve_path(self, request, path):
version = unresolved.version
filename = unresolved.filename

# Inject the UnresolvedURL into the HttpRequest so we can access from the middleware.
# We could resolve it again from the middleware, but we would duplicating DB queries.
request.unresolved_url = unresolved
Comment on lines +269 to +270
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should also be in the 404 view.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍🏼 -- I put it there.

I found there could be cases where the unresolve_path could raise and exception making the request.unresolved_url=None but there would be a filename anyways, since it's got from exc.path.

I think we could eventually refactor the proxito/unresolver code to support UnresolvedURL without all the components. This is out of the scope of this PR, tho and probably also related to #10456


# Check if the old language code format was used, and redirect to the new one.
# NOTE: we may have some false positives here, for example for an URL like:
# /pt-br/latest/pt_BR/index.html, but our protection for infinite redirects
Expand Down Expand Up @@ -391,7 +395,7 @@ def get(self, request, proxito_path):
the Docs default page (Maze Found) is rendered by Django and served.
"""
log.bind(proxito_path=proxito_path)
log.debug('Executing 404 handler.')
log.debug("Executing 404 handler.")
unresolved_domain = request.unresolved_domain
# We force all storage calls to use the external versions storage,
# since we are serving an external version.
Expand Down Expand Up @@ -708,11 +712,12 @@ def get(self, request):
# Verify if the project is marked as spam and return a custom robots.txt
if "readthedocsext.spamfighting" in settings.INSTALLED_APPS:
from readthedocsext.spamfighting.utils import is_robotstxt_denied # noqa

if is_robotstxt_denied(project):
return render(
request,
'robots.spam.txt',
content_type='text/plain',
"robots.spam.txt",
content_type="text/plain",
)

# Use the ``robots.txt`` file from the default version configured
Expand Down Expand Up @@ -747,33 +752,30 @@ def get(self, request):
filename="robots.txt",
check_if_exists=True,
)
log.info('Serving custom robots.txt file.')
log.info("Serving custom robots.txt file.")
return response
except StorageFileNotFound:
pass

# Serve default robots.txt
sitemap_url = '{scheme}://{domain}/sitemap.xml'.format(
scheme='https',
sitemap_url = "{scheme}://{domain}/sitemap.xml".format(
scheme="https",
domain=project.subdomain(),
)
context = {
'sitemap_url': sitemap_url,
'hidden_paths': self._get_hidden_paths(project),
"sitemap_url": sitemap_url,
"hidden_paths": self._get_hidden_paths(project),
}
return render(
request,
'robots.txt',
"robots.txt",
context,
content_type='text/plain',
content_type="text/plain",
)

def _get_hidden_paths(self, project):
"""Get the absolute paths of the public hidden versions of `project`."""
hidden_versions = (
Version.internal.public(project=project)
.filter(hidden=True)
)
hidden_versions = Version.internal.public(project=project).filter(hidden=True)
resolver = Resolver()
hidden_paths = [
resolver.resolve_path(project, version_slug=version.slug)
Expand Down Expand Up @@ -846,8 +848,8 @@ def hreflang_formatter(lang):
Use hyphen instead of underscore in language and country value.
ref: https://en.wikipedia.org/wiki/Hreflang#Common_Mistakes
"""
if '_' in lang:
return lang.replace('_', '-')
if "_" in lang:
return lang.replace("_", "-")
return lang

def changefreqs_generator():
Expand All @@ -861,8 +863,8 @@ def changefreqs_generator():
aggressive. If the tag is removed and a branch is created with the same
name, we will want bots to revisit this.
"""
changefreqs = ['weekly', 'daily']
yield from itertools.chain(changefreqs, itertools.repeat('monthly'))
changefreqs = ["weekly", "daily"]
yield from itertools.chain(changefreqs, itertools.repeat("monthly"))

project = request.unresolved_domain.project
public_versions = Version.internal.public(
Expand All @@ -879,28 +881,34 @@ def changefreqs_generator():
# We want stable with priority=1 and changefreq='weekly' and
# latest with priority=0.9 and changefreq='daily'
# More details on this: https://github.com/rtfd/readthedocs.org/issues/5447
if (len(sorted_versions) >= 2 and sorted_versions[0].slug == LATEST and
sorted_versions[1].slug == STABLE):
sorted_versions[0], sorted_versions[1] = sorted_versions[1], sorted_versions[0]
if (
len(sorted_versions) >= 2
and sorted_versions[0].slug == LATEST
and sorted_versions[1].slug == STABLE
):
sorted_versions[0], sorted_versions[1] = (
sorted_versions[1],
sorted_versions[0],
)

versions = []
for version, priority, changefreq in zip(
sorted_versions,
priorities_generator(),
changefreqs_generator(),
sorted_versions,
priorities_generator(),
changefreqs_generator(),
):
element = {
'loc': version.get_subdomain_url(),
'priority': priority,
'changefreq': changefreq,
'languages': [],
"loc": version.get_subdomain_url(),
"priority": priority,
"changefreq": changefreq,
"languages": [],
}

# Version can be enabled, but not ``built`` yet. We want to show the
# link without a ``lastmod`` attribute
last_build = version.builds.order_by('-date').first()
last_build = version.builds.order_by("-date").first()
if last_build:
element['lastmod'] = last_build.date.isoformat()
element["lastmod"] = last_build.date.isoformat()

resolver = Resolver()
if project.translations.exists():
Expand All @@ -915,27 +923,31 @@ def changefreqs_generator():
project=translation,
version=translated_version,
)
element['languages'].append({
'hreflang': hreflang_formatter(translation.language),
'href': href,
})
element["languages"].append(
{
"hreflang": hreflang_formatter(translation.language),
"href": href,
}
)

# Add itself also as protocol requires
element['languages'].append({
'hreflang': project.language,
'href': element['loc'],
})
element["languages"].append(
{
"hreflang": project.language,
"href": element["loc"],
}
)

versions.append(element)

context = {
'versions': versions,
"versions": versions,
}
return render(
request,
'sitemap.xml',
"sitemap.xml",
context,
content_type='application/xml',
content_type="application/xml",
)

def _get_project(self):
Expand Down