From bcdb9099b9077e7e0664bc40f016bcca803b519b Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Wed, 10 Feb 2021 15:55:25 +0000 Subject: [PATCH 1/7] Add a config option for prioritising local users in user dir queries --- docs/sample_config.yaml | 5 +++++ synapse/config/user_directory.py | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index fbbf71edd9fc..2a31e414a28f 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -2542,9 +2542,14 @@ spam_checker: # rebuild the user_directory search indexes, see # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md # +# 'prioritise_local_users' defines whether to prioritise local users in +# search query results. If True, local users are more likely to appear above +# remote users when searching the user directory. Defaults to false. +# #user_directory: # enabled: true # search_all_users: false +# prioritise_local_users: false # User Consent configuration diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index c8d19c5d6b4e..e33a660d0980 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -26,6 +26,7 @@ class UserDirectoryConfig(Config): def read_config(self, config, **kwargs): self.user_directory_search_enabled = True self.user_directory_search_all_users = False + self.user_directory_search_prioritise_local_users = False user_directory_config = config.get("user_directory", None) if user_directory_config: self.user_directory_search_enabled = user_directory_config.get( @@ -34,6 +35,9 @@ def read_config(self, config, **kwargs): self.user_directory_search_all_users = user_directory_config.get( "search_all_users", False ) + self.user_directory_search_prioritise_local_users = user_directory_config.get( + "prioritise_local_users", False + ) def generate_config_section(self, config_dir_path, server_name, **kwargs): return """ @@ -49,7 +53,12 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): # rebuild the user_directory search indexes, see # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md # + # 'prioritise_local_users' defines whether to prioritise local users in + # search query results. If True, local users are more likely to appear above + # remote users when searching the user directory. Defaults to false. + # #user_directory: # enabled: true # search_all_users: false + # prioritise_local_users: false """ From e93f7d80e1d00b5c3eb813cb5481e654de47405c Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Wed, 10 Feb 2021 16:56:04 +0000 Subject: [PATCH 2/7] If option is true, modify user dir query to favour local users --- .../storage/databases/main/user_directory.py | 55 ++++++++++++++++--- 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 7b9729da0958..6068a7280b33 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -558,6 +558,11 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): def __init__(self, database: DatabasePool, db_conn, hs): super().__init__(database, db_conn, hs) + self._prioritise_local_users_in_search = ( + hs.config.user_directory_search_prioritise_local_users + ) + self._server_name = hs.config.server_name + async def remove_from_user_dir(self, user_id: str) -> None: def _remove_from_user_dir_txn(txn): self.db_pool.simple_delete_txn( @@ -750,6 +755,29 @@ async def search_user_dir(self, user_id, search_term, limit): ) """ + # We allow manipulating the ranking algorithm by injecting statements + # based on config options. + additional_ordering_statements = [] + ordering_arguments = () + + # If enabled, this config option will rank local users higher than those on + # remote instances. + if self._prioritise_local_users_in_search: + # The statement checks whether a given user's user ID contains a domain name + # that matches the local server + if isinstance(self.database_engine, PostgresEngine): + statement = "* (CASE WHEN user_id LIKE '%:' || ? THEN 2.0 ELSE 1.0 END)" + elif isinstance(self.database_engine, Sqlite3Engine): + # Note that we need to include a comma at the end for valid SQL + statement = "user_id LIKE '%:' || ? DESC," + else: + # This should be unreachable. + raise Exception("Unrecognized database engine") + additional_ordering_statements.append(statement) + + # Append the local server name as an argument to the final query + ordering_arguments += (self._server_name,) + if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) @@ -763,7 +791,7 @@ async def search_user_dir(self, user_id, search_term, limit): FROM user_directory_search as t INNER JOIN user_directory AS d USING (user_id) WHERE - %s + %(where_clause)s AND vector @@ to_tsquery('simple', ?) ORDER BY (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END) @@ -783,14 +811,21 @@ async def search_user_dir(self, user_id, search_term, limit): 8 ) ) + %(order_case_statements)s DESC, display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % ( - where_clause, + """ % { + "where_clause": where_clause, + "order_case_statements": " ".join(additional_ordering_statements), + } + args = ( + join_args + + (full_query, exact_query, prefix_query) + + ordering_arguments + + (limit + 1,) ) - args = join_args + (full_query, exact_query, prefix_query, limit + 1) elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) @@ -799,17 +834,19 @@ async def search_user_dir(self, user_id, search_term, limit): FROM user_directory_search as t INNER JOIN user_directory AS d USING (user_id) WHERE - %s + %(where_clause)s AND value MATCH ? ORDER BY rank(matchinfo(user_directory_search)) DESC, + %(order_statements)s display_name IS NULL, avatar_url IS NULL LIMIT ? - """ % ( - where_clause, - ) - args = join_args + (search_query, limit + 1) + """ % { + "where_clause": where_clause, + "order_statements": " ".join(additional_ordering_statements), + } + args = join_args + (search_query,) + ordering_arguments + (limit + 1,) else: # This should be unreachable. raise Exception("Unrecognized database engine") From 00bf0b977429d59b7e2760006f1cd3c86bb0589f Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Thu, 11 Feb 2021 12:59:37 +0000 Subject: [PATCH 3/7] Add a test Adds a test that creates a room, some local and some remote users. We then search the user directory and check that the local users appear first. --- tests/handlers/test_user_directory.py | 94 +++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 9c886d671a1b..a2321903ecb9 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -18,6 +18,7 @@ import synapse.rest.admin from synapse.api.constants import EventTypes, RoomEncryptionAlgorithms, UserTypes +from synapse.api.room_versions import RoomVersion, RoomVersions from synapse.rest.client.v1 import login, room from synapse.rest.client.v2_alpha import user_directory from synapse.storage.roommember import ProfileInfo @@ -46,6 +47,8 @@ def make_homeserver(self, reactor, clock): def prepare(self, reactor, clock, hs): self.store = hs.get_datastore() self.handler = hs.get_user_directory_handler() + self.event_builder_factory = self.hs.get_event_builder_factory() + self.event_creation_handler = self.hs.get_event_creation_handler() def test_handle_local_profile_change_with_support_user(self): support_user_id = "@support:test" @@ -541,6 +544,97 @@ def test_initial_share_all_users(self): s = self.get_success(self.handler.search_users(u1, u4, 10)) self.assertEqual(len(s["results"]), 1) + @override_config( + { + "user_directory": { + "enabled": True, + "search_all_users": True, + "prioritise_local_users": True, + } + } + ) + def test_prioritise_local_users(self): + """Tests that local users are shown higher in search results when + user_directory.prioritise_local_users is True. + """ + # Create a room and few users to test the directory with + searching_user = self.register_user("searcher", "password") + searching_user_tok = self.login("searcher", "password") + + room_id = self.helper.create_room_as( + searching_user, + room_version=RoomVersions.V1.identifier, + tok=searching_user_tok, + ) + + # Create a few local users and join them to the room + local_user_1 = self.register_user("user_xxxxx", "password") + local_user_2 = self.register_user("user_bbbbb", "password") + local_user_3 = self.register_user("user_zzzzz", "password") + + self._add_user_to_room(room_id, RoomVersions.V1, local_user_1) + self._add_user_to_room(room_id, RoomVersions.V1, local_user_2) + self._add_user_to_room(room_id, RoomVersions.V1, local_user_3) + + # Create a few "remote" users and join them to the room + remote_user_1 = "@user_aaaaa:remote_server" + remote_user_2 = "@user_yyyyy:remote_server" + remote_user_3 = "@user_ccccc:remote_server" + self._add_user_to_room(room_id, RoomVersions.V1, remote_user_1) + self._add_user_to_room(room_id, RoomVersions.V1, remote_user_2) + self._add_user_to_room(room_id, RoomVersions.V1, remote_user_3) + + local_users = [local_user_1, local_user_2, local_user_3] + remote_users = [remote_user_1, remote_user_2, remote_user_3] + + # Populate the user directory via background update + self._add_background_updates() + while not self.get_success( + self.store.db_pool.updates.has_completed_background_updates() + ): + self.get_success( + self.store.db_pool.updates.do_next_background_update(100), by=0.1 + ) + + # The local searching user searches for the term "user", which other users have + # in their user id + results = self.get_success( + self.handler.search_users(searching_user, "user", 20) + )["results"] + received_user_id_ordering = [result["user_id"] for result in results] + + # Typically we'd expect Synapse to return users in lexicographical order, + # assuming they have similar User IDs/display names, and profile information. + + # Check that the order of returned results using our module is as we expect, + # i.e our local users show up first, despite all users having lexographically mixed + # user IDs. + [self.assertIn(user, local_users) for user in received_user_id_ordering[:3]] + [self.assertIn(user, remote_users) for user in received_user_id_ordering[3:]] + + def _add_user_to_room( + self, room_id: str, room_version: RoomVersion, user_id: str, + ): + # Add a user to the room. + builder = self.event_builder_factory.for_room_version( + room_version, + { + "type": "m.room.member", + "sender": user_id, + "state_key": user_id, + "room_id": room_id, + "content": {"membership": "join"}, + }, + ) + + event, context = self.get_success( + self.event_creation_handler.create_new_client_event(builder) + ) + + self.get_success( + self.hs.get_storage().persistence.persist_event(event, context) + ) + class TestUserDirSearchDisabled(unittest.HomeserverTestCase): user_id = "@test:test" From 451106e2f6601f0f948a1058170d42768ff0ea9f Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Thu, 11 Feb 2021 13:17:44 +0000 Subject: [PATCH 4/7] Prefer not adding british english-isms to config options To avoid: "I put prioritize_local_users and it's not working!!11" --- docs/sample_config.yaml | 4 ++-- synapse/config/user_directory.py | 10 +++++----- synapse/storage/databases/main/user_directory.py | 6 +++--- tests/handlers/test_user_directory.py | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 2a31e414a28f..c5a5bec4b226 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -2542,14 +2542,14 @@ spam_checker: # rebuild the user_directory search indexes, see # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md # -# 'prioritise_local_users' defines whether to prioritise local users in +# 'prefer_local_users' defines whether to prioritise local users in # search query results. If True, local users are more likely to appear above # remote users when searching the user directory. Defaults to false. # #user_directory: # enabled: true # search_all_users: false -# prioritise_local_users: false +# prefer_local_users: false # User Consent configuration diff --git a/synapse/config/user_directory.py b/synapse/config/user_directory.py index e33a660d0980..89dbebd1480b 100644 --- a/synapse/config/user_directory.py +++ b/synapse/config/user_directory.py @@ -26,7 +26,7 @@ class UserDirectoryConfig(Config): def read_config(self, config, **kwargs): self.user_directory_search_enabled = True self.user_directory_search_all_users = False - self.user_directory_search_prioritise_local_users = False + self.user_directory_search_prefer_local_users = False user_directory_config = config.get("user_directory", None) if user_directory_config: self.user_directory_search_enabled = user_directory_config.get( @@ -35,8 +35,8 @@ def read_config(self, config, **kwargs): self.user_directory_search_all_users = user_directory_config.get( "search_all_users", False ) - self.user_directory_search_prioritise_local_users = user_directory_config.get( - "prioritise_local_users", False + self.user_directory_search_prefer_local_users = user_directory_config.get( + "prefer_local_users", False ) def generate_config_section(self, config_dir_path, server_name, **kwargs): @@ -53,12 +53,12 @@ def generate_config_section(self, config_dir_path, server_name, **kwargs): # rebuild the user_directory search indexes, see # https://github.com/matrix-org/synapse/blob/master/docs/user_directory.md # - # 'prioritise_local_users' defines whether to prioritise local users in + # 'prefer_local_users' defines whether to prioritise local users in # search query results. If True, local users are more likely to appear above # remote users when searching the user directory. Defaults to false. # #user_directory: # enabled: true # search_all_users: false - # prioritise_local_users: false + # prefer_local_users: false """ diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 6068a7280b33..e5600a9c8e56 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -558,8 +558,8 @@ class UserDirectoryStore(UserDirectoryBackgroundUpdateStore): def __init__(self, database: DatabasePool, db_conn, hs): super().__init__(database, db_conn, hs) - self._prioritise_local_users_in_search = ( - hs.config.user_directory_search_prioritise_local_users + self._prefer_local_users_in_search = ( + hs.config.user_directory_search_prefer_local_users ) self._server_name = hs.config.server_name @@ -762,7 +762,7 @@ async def search_user_dir(self, user_id, search_term, limit): # If enabled, this config option will rank local users higher than those on # remote instances. - if self._prioritise_local_users_in_search: + if self._prefer_local_users_in_search: # The statement checks whether a given user's user ID contains a domain name # that matches the local server if isinstance(self.database_engine, PostgresEngine): diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index a2321903ecb9..ef5e9cd3afc3 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -549,13 +549,13 @@ def test_initial_share_all_users(self): "user_directory": { "enabled": True, "search_all_users": True, - "prioritise_local_users": True, + "prefer_local_users": True, } } ) - def test_prioritise_local_users(self): + def test_prefer_local_users(self): """Tests that local users are shown higher in search results when - user_directory.prioritise_local_users is True. + user_directory.prefer_local_users is True. """ # Create a room and few users to test the directory with searching_user = self.register_user("searcher", "password") From d6956e6a0fe848dfe8604f881fd32f0262d09eec Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Thu, 11 Feb 2021 13:23:52 +0000 Subject: [PATCH 5/7] Changelog --- changelog.d/9383.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/9383.feature diff --git a/changelog.d/9383.feature b/changelog.d/9383.feature new file mode 100644 index 000000000000..8957c9cc5e4c --- /dev/null +++ b/changelog.d/9383.feature @@ -0,0 +1 @@ +Add a configuration option, `user_directory.prefer_local_users`, which when enabled will make it more likely for users on the same server as you to appear above other users. \ No newline at end of file From 18645c5be3ccb844f7e3dca6bd9b1e214bb553ad Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Thu, 11 Feb 2021 15:32:15 +0000 Subject: [PATCH 6/7] Append % in python, rather than in query Synapse translates %'s under the hood for each database engine. Including them in the query string instead of in the args can break things. --- synapse/storage/databases/main/user_directory.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index e5600a9c8e56..2ae1696ec5a2 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -766,17 +766,17 @@ async def search_user_dir(self, user_id, search_term, limit): # The statement checks whether a given user's user ID contains a domain name # that matches the local server if isinstance(self.database_engine, PostgresEngine): - statement = "* (CASE WHEN user_id LIKE '%:' || ? THEN 2.0 ELSE 1.0 END)" + statement = "* (CASE WHEN user_id LIKE ? THEN 2.0 ELSE 1.0 END)" elif isinstance(self.database_engine, Sqlite3Engine): # Note that we need to include a comma at the end for valid SQL - statement = "user_id LIKE '%:' || ? DESC," + statement = "user_id LIKE ? DESC," else: # This should be unreachable. raise Exception("Unrecognized database engine") additional_ordering_statements.append(statement) # Append the local server name as an argument to the final query - ordering_arguments += (self._server_name,) + ordering_arguments += ("%:" + self._server_name,) if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) From e87885d81180081b39d1e8b5b2e130e2edb97d09 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Wed, 17 Feb 2021 15:28:02 +0000 Subject: [PATCH 7/7] Merge statement building into each engines if block; lint --- .../storage/databases/main/user_directory.py | 46 ++++++++++--------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index 2ae1696ec5a2..467738285ff5 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -336,8 +336,7 @@ def _get_next_batch(txn): return len(users_to_work_on) async def is_room_world_readable_or_publicly_joinable(self, room_id): - """Check if the room is either world_readable or publically joinable - """ + """Check if the room is either world_readable or publically joinable""" # Create a state filter that only queries join and history state event types_to_filter = ( @@ -516,8 +515,7 @@ async def add_users_in_public_rooms( ) async def delete_all_from_user_dir(self) -> None: - """Delete the entire user directory - """ + """Delete the entire user directory""" def _delete_all_from_user_dir_txn(txn): txn.execute("DELETE FROM user_directory") @@ -760,27 +758,19 @@ async def search_user_dir(self, user_id, search_term, limit): additional_ordering_statements = [] ordering_arguments = () - # If enabled, this config option will rank local users higher than those on - # remote instances. - if self._prefer_local_users_in_search: - # The statement checks whether a given user's user ID contains a domain name - # that matches the local server - if isinstance(self.database_engine, PostgresEngine): - statement = "* (CASE WHEN user_id LIKE ? THEN 2.0 ELSE 1.0 END)" - elif isinstance(self.database_engine, Sqlite3Engine): - # Note that we need to include a comma at the end for valid SQL - statement = "user_id LIKE ? DESC," - else: - # This should be unreachable. - raise Exception("Unrecognized database engine") - additional_ordering_statements.append(statement) - - # Append the local server name as an argument to the final query - ordering_arguments += ("%:" + self._server_name,) - if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) + # If enabled, this config option will rank local users higher than those on + # remote instances. + if self._prefer_local_users_in_search: + # This statement checks whether a given user's user ID contains a server name + # that matches the local server + statement = "* (CASE WHEN user_id LIKE ? THEN 2.0 ELSE 1.0 END)" + additional_ordering_statements.append(statement) + + ordering_arguments += ("%:" + self._server_name,) + # We order by rank and then if they have profile info # The ranking algorithm is hand tweaked for "best" results. Broadly # the idea is we give a higher weight to exact matches. @@ -829,6 +819,18 @@ async def search_user_dir(self, user_id, search_term, limit): elif isinstance(self.database_engine, Sqlite3Engine): search_query = _parse_query_sqlite(search_term) + # If enabled, this config option will rank local users higher than those on + # remote instances. + if self._prefer_local_users_in_search: + # This statement checks whether a given user's user ID contains a server name + # that matches the local server + # + # Note that we need to include a comma at the end for valid SQL + statement = "user_id LIKE ? DESC," + additional_ordering_statements.append(statement) + + ordering_arguments += ("%:" + self._server_name,) + sql = """ SELECT d.user_id AS user_id, display_name, avatar_url FROM user_directory_search as t