From c93ca023a7c7893aca6575576278aad6152696d5 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Sun, 12 Apr 2020 16:18:47 +0100 Subject: [PATCH 1/6] Allow specifying the value of Accept-Language header for URL preview --- docs/sample_config.yaml | 25 +++++++++++++++++ synapse/config/repository.py | 27 +++++++++++++++++++ synapse/rest/media/v1/preview_url_resource.py | 8 ++++-- 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 3417813750be..81dccbd99738 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -859,6 +859,31 @@ media_store_path: "DATADIR/media_store" # #max_spider_size: 10M +# A list of values for the Accept-Language HTTP header used when +# downloading webpages during URL preview generation. This allows +# Synapse to specify the preferred languages that URL previews should +# be in when communicating with remote servers. +# +# Each value is a IETF language tag; a 2-3 letter identifier for a +# language, optionally followed by subtags separated by '-', specifying +# a country or region variant. +# +# Multiple values can be provided, and a weight can be added to each by +# using quality value syntax (;q=). '*' translates to any language. +# +# Defaults to "en". +# +# Example: +# +# url_preview_accept_language: +# - en-UK +# - en-US;q=0.9 +# - fr;q=0.8 +# - *;q=0.7 +# +url_preview_accept_language: +# - en + ## Captcha ## # See docs/CAPTCHA_SETUP for full details of configuring this. diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 7d2dd27fd0a1..0454c609d6c5 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -192,6 +192,8 @@ def read_config(self, config, **kwargs): self.url_preview_url_blacklist = config.get("url_preview_url_blacklist", ()) + self.url_preview_accept_language = config.get("url_preview_accept_language") or ["en"] + def generate_config_section(self, data_dir_path, **kwargs): media_store = os.path.join(data_dir_path, "media_store") uploads_path = os.path.join(data_dir_path, "uploads") @@ -329,6 +331,31 @@ def generate_config_section(self, data_dir_path, **kwargs): # The largest allowed URL preview spidering size in bytes # #max_spider_size: 10M + + # A list of values for the Accept-Language HTTP header used when + # downloading webpages during URL preview generation. This allows + # Synapse to specify the preferred languages that URL previews should + # be in when communicating with remote servers. + # + # Each value is a IETF language tag; a 2-3 letter identifier for a + # language, optionally followed by subtags separated by '-', specifying + # a country or region variant. + # + # Multiple values can be provided, and a weight can be added to each by + # using quality value syntax (;q=). '*' translates to any language. + # + # Defaults to "en". + # + # Example: + # + # url_preview_accept_language: + # - en-UK + # - en-US;q=0.9 + # - fr;q=0.8 + # - *;q=0.7 + # + url_preview_accept_language: + # - en """ % locals() ) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index c46676f8fcc8..e72cc1670c19 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -86,6 +86,7 @@ def __init__(self, hs, media_repo, media_storage): self.media_storage = media_storage self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist + self.url_preview_accept_language = hs.config.url_preview_url_blacklist # memory cache mapping urls to an ObservableDeferred returning # JSON-encoded OG metadata @@ -315,9 +316,12 @@ async def _download_url(self, url, user): with self.media_storage.store_into_file(file_info) as (f, fname, finish): try: - logger.debug("Trying to get url '%s'", url) + logger.debug("Trying to get preview for url '%s'", url) length, headers, uri, code = await self.client.get_file( - url, output_stream=f, max_size=self.max_spider_size + url, + output_stream=f, + max_size=self.max_spider_size, + headers={"Accept Language": self.url_preview_accept_language}, ) except SynapseError: # Pass SynapseErrors through directly, so that the servlet From afc6c8f4a55e6f1f16e8e4dadf137cb3af6b0b6b Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Sun, 12 Apr 2020 16:27:59 +0100 Subject: [PATCH 2/6] Add changelog --- changelog.d/7265.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/7265.feature diff --git a/changelog.d/7265.feature b/changelog.d/7265.feature new file mode 100644 index 000000000000..345b63e0b78b --- /dev/null +++ b/changelog.d/7265.feature @@ -0,0 +1 @@ +Add a config option for specifying the value of the Accept-Language HTTP header when generating URL previews. \ No newline at end of file From 818bd013df95be22b73a4b6a1d60c0be605496b7 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Sun, 12 Apr 2020 16:28:41 +0100 Subject: [PATCH 3/6] lint --- synapse/config/repository.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 0454c609d6c5..7193ea11140d 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -192,7 +192,9 @@ def read_config(self, config, **kwargs): self.url_preview_url_blacklist = config.get("url_preview_url_blacklist", ()) - self.url_preview_accept_language = config.get("url_preview_accept_language") or ["en"] + self.url_preview_accept_language = config.get( + "url_preview_accept_language" + ) or ["en"] def generate_config_section(self, data_dir_path, **kwargs): media_store = os.path.join(data_dir_path, "media_store") From f184e7754bd2aabd5bef954dff0b562cdf8b67c4 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Sun, 12 Apr 2020 16:55:36 +0100 Subject: [PATCH 4/6] Add test --- synapse/rest/media/v1/preview_url_resource.py | 2 +- tests/rest/media/v1/test_url_preview.py | 50 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index e72cc1670c19..af11a6b69ec5 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -86,7 +86,7 @@ def __init__(self, hs, media_repo, media_storage): self.media_storage = media_storage self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist - self.url_preview_accept_language = hs.config.url_preview_url_blacklist + self.url_preview_accept_language = hs.config.url_preview_accept_language # memory cache mapping urls to an ObservableDeferred returning # JSON-encoded OG metadata diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 852b8ab11ccd..1163259b0561 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -74,6 +74,7 @@ def make_homeserver(self, reactor, clock): ) config["url_preview_ip_range_whitelist"] = ("1.1.1.1",) config["url_preview_url_blacklist"] = [] + config["url_preview_accept_language"] = ["en-UK", "en-US;q=0.9", "fr;q=0.8", "*;q=0.7"] self.storage_path = self.mktemp() self.media_store_path = self.mktemp() @@ -507,3 +508,52 @@ def test_OPTIONS(self): self.pump() self.assertEqual(channel.code, 200) self.assertEqual(channel.json_body, {}) + + def test_accept_language_config_option(self): + """ + Accept-Language header is sent to the remote server + """ + self.lookups["example.com"] = [(IPv4Address, "8.8.8.8")] + + # Build and make a request to the server + request, channel = self.make_request( + "GET", "url_preview?url=http://example.com", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + # Extract Synapse's tcp client + client = self.reactor.tcpClients[0][2].buildProtocol(None) + + # Build a fake remote server to reply with + server = AccumulatingProtocol() + + # Connect the two together + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + + # Tell Synapse that it has received some data from the remote server + client.dataReceived( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\nContent-Type: text/html\r\n\r\n" + % (len(self.end_content),) + + self.end_content + ) + + # Move the reactor along until we get a response on our original channel + self.pump() + self.assertEqual(channel.code, 200) + self.assertEqual( + channel.json_body, {"og:title": "~matrix~", "og:description": "hi"} + ) + + # Check that the server received the Accept-Language header as part + # of the request from Synapse + self.assertIn( + ( + b"Accept language: en-UK\r\n" + b"Accept language: en-US;q=0.9\r\n" + b"Accept language: fr;q=0.8\r\n" + b"Accept language: *;q=0.7" + ), + server.data + ) From 037557fbf5381b4df11fe7ebde3075f310d63ba6 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Sun, 12 Apr 2020 16:56:32 +0100 Subject: [PATCH 5/6] lint --- tests/rest/media/v1/test_url_preview.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 1163259b0561..bebc94ecabbb 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -74,7 +74,12 @@ def make_homeserver(self, reactor, clock): ) config["url_preview_ip_range_whitelist"] = ("1.1.1.1",) config["url_preview_url_blacklist"] = [] - config["url_preview_accept_language"] = ["en-UK", "en-US;q=0.9", "fr;q=0.8", "*;q=0.7"] + config["url_preview_accept_language"] = [ + "en-UK", + "en-US;q=0.9", + "fr;q=0.8", + "*;q=0.7", + ] self.storage_path = self.mktemp() self.media_store_path = self.mktemp() @@ -555,5 +560,5 @@ def test_accept_language_config_option(self): b"Accept language: fr;q=0.8\r\n" b"Accept language: *;q=0.7" ), - server.data + server.data, ) From 0268d4fc8ef89b26bd0e9867dd2c979fb6f4e0b2 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 14 Apr 2020 18:17:30 +0100 Subject: [PATCH 6/6] space -> hyphen in Accept-Language, capatilise in test --- synapse/rest/media/v1/preview_url_resource.py | 2 +- tests/rest/media/v1/test_url_preview.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index af11a6b69ec5..f68e18ea8af5 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -321,7 +321,7 @@ async def _download_url(self, url, user): url, output_stream=f, max_size=self.max_spider_size, - headers={"Accept Language": self.url_preview_accept_language}, + headers={"Accept-Language": self.url_preview_accept_language}, ) except SynapseError: # Pass SynapseErrors through directly, so that the servlet diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index bebc94ecabbb..2826211f3213 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -555,10 +555,10 @@ def test_accept_language_config_option(self): # of the request from Synapse self.assertIn( ( - b"Accept language: en-UK\r\n" - b"Accept language: en-US;q=0.9\r\n" - b"Accept language: fr;q=0.8\r\n" - b"Accept language: *;q=0.7" + b"Accept-Language: en-UK\r\n" + b"Accept-Language: en-US;q=0.9\r\n" + b"Accept-Language: fr;q=0.8\r\n" + b"Accept-Language: *;q=0.7" ), server.data, )