Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Drop support for Postgres 10 in full text search code. (#14397)
Browse files Browse the repository at this point in the history
  • Loading branch information
clokep authored Nov 9, 2022
1 parent 21447c9 commit e9a4343
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 95 deletions.
1 change: 1 addition & 0 deletions changelog.d/14397.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove support for PostgreSQL 10.
50 changes: 23 additions & 27 deletions synapse/storage/databases/main/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,18 +463,17 @@ async def search_msgs(

if isinstance(self.database_engine, PostgresEngine):
search_query = search_term
tsquery_func = self.database_engine.tsquery_func
sql = f"""
SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) AS rank,
sql = """
SELECT ts_rank_cd(vector, websearch_to_tsquery('english', ?)) AS rank,
room_id, event_id
FROM event_search
WHERE vector @@ {tsquery_func}('english', ?)
WHERE vector @@ websearch_to_tsquery('english', ?)
"""
args = [search_query, search_query] + args

count_sql = f"""
count_sql = """
SELECT room_id, count(*) as count FROM event_search
WHERE vector @@ {tsquery_func}('english', ?)
WHERE vector @@ websearch_to_tsquery('english', ?)
"""
count_args = [search_query] + count_args
elif isinstance(self.database_engine, Sqlite3Engine):
Expand Down Expand Up @@ -523,9 +522,7 @@ async def search_msgs(

highlights = None
if isinstance(self.database_engine, PostgresEngine):
highlights = await self._find_highlights_in_postgres(
search_query, events, tsquery_func
)
highlights = await self._find_highlights_in_postgres(search_query, events)

count_sql += " GROUP BY room_id"

Expand Down Expand Up @@ -604,18 +601,17 @@ async def search_rooms(

if isinstance(self.database_engine, PostgresEngine):
search_query = search_term
tsquery_func = self.database_engine.tsquery_func
sql = f"""
SELECT ts_rank_cd(vector, {tsquery_func}('english', ?)) as rank,
sql = """
SELECT ts_rank_cd(vector, websearch_to_tsquery('english', ?)) as rank,
origin_server_ts, stream_ordering, room_id, event_id
FROM event_search
WHERE vector @@ {tsquery_func}('english', ?) AND
WHERE vector @@ websearch_to_tsquery('english', ?) AND
"""
args = [search_query, search_query] + args

count_sql = f"""
count_sql = """
SELECT room_id, count(*) as count FROM event_search
WHERE vector @@ {tsquery_func}('english', ?) AND
WHERE vector @@ websearch_to_tsquery('english', ?) AND
"""
count_args = [search_query] + count_args
elif isinstance(self.database_engine, Sqlite3Engine):
Expand Down Expand Up @@ -686,9 +682,7 @@ async def search_rooms(

highlights = None
if isinstance(self.database_engine, PostgresEngine):
highlights = await self._find_highlights_in_postgres(
search_query, events, tsquery_func
)
highlights = await self._find_highlights_in_postgres(search_query, events)

count_sql += " GROUP BY room_id"

Expand All @@ -714,7 +708,7 @@ async def search_rooms(
}

async def _find_highlights_in_postgres(
self, search_query: str, events: List[EventBase], tsquery_func: str
self, search_query: str, events: List[EventBase]
) -> Set[str]:
"""Given a list of events and a search term, return a list of words
that match from the content of the event.
Expand All @@ -725,7 +719,6 @@ async def _find_highlights_in_postgres(
Args:
search_query
events: A list of events
tsquery_func: The tsquery_* function to use when making queries
Returns:
A set of strings.
Expand Down Expand Up @@ -758,13 +751,16 @@ def f(txn: LoggingTransaction) -> Set[str]:
while stop_sel in value:
stop_sel += ">"

query = f"SELECT ts_headline(?, {tsquery_func}('english', ?), %s)" % (
_to_postgres_options(
{
"StartSel": start_sel,
"StopSel": stop_sel,
"MaxFragments": "50",
}
query = (
"SELECT ts_headline(?, websearch_to_tsquery('english', ?), %s)"
% (
_to_postgres_options(
{
"StartSel": start_sel,
"StopSel": stop_sel,
"MaxFragments": "50",
}
)
)
)
txn.execute(query, (value, search_query))
Expand Down
16 changes: 0 additions & 16 deletions synapse/storage/engines/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,22 +170,6 @@ def supports_returning(self) -> bool:
"""Do we support the `RETURNING` clause in insert/update/delete?"""
return True

@property
def tsquery_func(self) -> str:
"""
Selects a tsquery_* func to use.
Ref: https://www.postgresql.org/docs/current/textsearch-controls.html
Returns:
The function name.
"""
# Postgres 11 added support for websearch_to_tsquery.
assert self._version is not None
if self._version >= 110000:
return "websearch_to_tsquery"
return "plainto_tsquery"

def is_deadlock(self, error: Exception) -> bool:
if isinstance(error, psycopg2.DatabaseError):
# https://www.postgresql.org/docs/current/static/errcodes-appendix.html
Expand Down
69 changes: 17 additions & 52 deletions tests/storage/test_room_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Tuple, Union
from typing import List, Tuple
from unittest.case import SkipTest
from unittest.mock import PropertyMock, patch

from twisted.test.proto_helpers import MemoryReactor

Expand Down Expand Up @@ -220,24 +219,22 @@ class MessageSearchTest(HomeserverTestCase):

PHRASE = "the quick brown fox jumps over the lazy dog"

# Each entry is a search query, followed by either a boolean of whether it is
# in the phrase OR a tuple of booleans: whether it matches using websearch
# and using plain search.
COMMON_CASES: List[Tuple[str, Union[bool, Tuple[bool, bool]]]] = [
# Each entry is a search query, followed by a boolean of whether it is in the phrase.
COMMON_CASES = [
("nope", False),
("brown", True),
("quick brown", True),
("brown quick", True),
("quick \t brown", True),
("jump", True),
("brown nope", False),
('"brown quick"', (False, True)),
('"brown quick"', False),
('"jumps over"', True),
('"quick fox"', (False, True)),
('"quick fox"', False),
("nope OR doublenope", False),
("furphy OR fox", (True, False)),
("fox -nope", (True, False)),
("fox -brown", (False, True)),
("furphy OR fox", True),
("fox -nope", True),
("fox -brown", False),
('"fox" quick', True),
('"quick brown', True),
('" quick "', True),
Expand All @@ -246,11 +243,11 @@ class MessageSearchTest(HomeserverTestCase):
# TODO Test non-ASCII cases.

# Case that fail on SQLite.
POSTGRES_CASES: List[Tuple[str, Union[bool, Tuple[bool, bool]]]] = [
POSTGRES_CASES = [
# SQLite treats NOT as a binary operator.
("- fox", (False, True)),
("- nope", (True, False)),
('"-fox quick', (False, True)),
("- fox", False),
("- nope", True),
('"-fox quick', False),
# PostgreSQL skips stop words.
('"the quick brown"', True),
('"over lazy"', True),
Expand All @@ -275,7 +272,7 @@ def prepare(
if isinstance(main_store.database_engine, PostgresEngine):
assert main_store.database_engine._version is not None
found = main_store.database_engine._version < 140000
self.COMMON_CASES.append(('"fox quick', (found, True)))
self.COMMON_CASES.append(('"fox quick', found))

def test_tokenize_query(self) -> None:
"""Test the custom logic to tokenize a user's query."""
Expand Down Expand Up @@ -315,16 +312,10 @@ def test_tokenize_query(self) -> None:
)

def _check_test_cases(
self,
store: DataStore,
cases: List[Tuple[str, Union[bool, Tuple[bool, bool]]]],
index=0,
self, store: DataStore, cases: List[Tuple[str, bool]]
) -> None:
# Run all the test cases versus search_msgs
for query, expect_to_contain in cases:
if isinstance(expect_to_contain, tuple):
expect_to_contain = expect_to_contain[index]

result = self.get_success(
store.search_msgs([self.room_id], query, ["content.body"])
)
Expand All @@ -343,9 +334,6 @@ def _check_test_cases(

# Run them again versus search_rooms
for query, expect_to_contain in cases:
if isinstance(expect_to_contain, tuple):
expect_to_contain = expect_to_contain[index]

result = self.get_success(
store.search_rooms([self.room_id], query, ["content.body"], 10)
)
Expand All @@ -366,38 +354,15 @@ def test_postgres_web_search_for_phrase(self):
"""
Test searching for phrases using typical web search syntax, as per postgres' websearch_to_tsquery.
This test is skipped unless the postgres instance supports websearch_to_tsquery.
"""

store = self.hs.get_datastores().main
if not isinstance(store.database_engine, PostgresEngine):
raise SkipTest("Test only applies when postgres is used as the database")

if store.database_engine.tsquery_func != "websearch_to_tsquery":
raise SkipTest(
"Test only applies when postgres supporting websearch_to_tsquery is used as the database"
)
self._check_test_cases(store, self.COMMON_CASES + self.POSTGRES_CASES, index=0)

def test_postgres_non_web_search_for_phrase(self):
"""
Test postgres searching for phrases without using web search, which is used when websearch_to_tsquery isn't
supported by the current postgres version.
See https://www.postgresql.org/docs/current/textsearch-controls.html
"""

store = self.hs.get_datastores().main
if not isinstance(store.database_engine, PostgresEngine):
raise SkipTest("Test only applies when postgres is used as the database")

# Patch supports_websearch_to_tsquery to always return False to ensure we're testing the plainto_tsquery path.
with patch(
"synapse.storage.engines.postgres.PostgresEngine.tsquery_func",
new_callable=PropertyMock,
) as supports_websearch_to_tsquery:
supports_websearch_to_tsquery.return_value = "plainto_tsquery"
self._check_test_cases(
store, self.COMMON_CASES + self.POSTGRES_CASES, index=1
)
self._check_test_cases(store, self.COMMON_CASES + self.POSTGRES_CASES)

def test_sqlite_search(self):
"""
Expand All @@ -407,4 +372,4 @@ def test_sqlite_search(self):
if not isinstance(store.database_engine, Sqlite3Engine):
raise SkipTest("Test only applies when sqlite is used as the database")

self._check_test_cases(store, self.COMMON_CASES, index=0)
self._check_test_cases(store, self.COMMON_CASES)

0 comments on commit e9a4343

Please sign in to comment.