Skip to content

Commit

Permalink
Delete not useful weaviate_search_references
Browse files Browse the repository at this point in the history
  • Loading branch information
davidgxue committed Feb 23, 2024
1 parent 6566fc4 commit eb98196
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 41 deletions.
5 changes: 0 additions & 5 deletions airflow/dags/monitor/test_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,6 @@ def generate_test_answers(test_question_template_path: Path, ts_nodash=None, **c
"test_number",
"question",
"expected_references",
"weaviate_search_references",
"askastro_answer",
"askastro_references",
"langsmith_link",
Expand All @@ -196,10 +195,6 @@ def generate_test_answers(test_question_template_path: Path, ts_nodash=None, **c
if question_number_subset:
questions_df = questions_df[questions_df.test_number.isin(question_number_subset)]

questions_df["weaviate_search_references"] = questions_df.question.apply(
lambda x: weaviate_search(weaviate_client=weaviate_client, question=x, class_name=WEAVIATE_CLASS)
)

questions_df[["askastro_answer", "askastro_references", "langsmith_link"]] = questions_df.question.apply(
lambda x: pd.Series(
generate_answer(
Expand Down
36 changes: 0 additions & 36 deletions airflow/include/tasks/extract/utils/retrieval_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,42 +90,6 @@ def generate_answer(
return (answer, references, langsmith_link)


def weaviate_search(weaviate_client: WeaviateClient, question: str, class_name: str) -> str:
"""
This function uses Weaviate's
[Similarity Search](https://weaviate.io/developers/weaviate/search/similarity)
and returns a pandas series of reference documents. This is a one-shot retrieval unlike
Ask Astro frontend which uses LangChain's MultiQueryRetrieval.
:param weaviate_client: An instantiated weaviate client to use for the search.
:param question: A question.
:param class_name: The name of the class to search.
"""

try:
results = (
weaviate_client.query.get(class_name=class_name, properties=["docLink"])
.with_near_text(
{
"concepts": question,
}
)
.with_limit(8)
.with_additional(["id", "certainty"])
.do()["data"]["Get"][class_name]
)

references = "\n".join(
[f"{result['docLink']} [{round(result['_additional']['certainty'], 3)}]" for result in results]
)

except Exception as e:
logger.info(e)
references = []

return references


def get_or_create_drive_folder(gd_hook: GoogleDriveHook, folder_name: str, parent_id: str | None) -> str:
"""
Creates a google drive folder if it does not exist.
Expand Down

0 comments on commit eb98196

Please sign in to comment.