Skip to content

Commit

Permalink
got responder working
Browse files Browse the repository at this point in the history
  • Loading branch information
yujonglee committed Sep 17, 2024
1 parent 1e5bd5e commit e7b75cb
Show file tree
Hide file tree
Showing 11 changed files with 152 additions and 177 deletions.
15 changes: 1 addition & 14 deletions core/lib/canary/index/index.ex
Original file line number Diff line number Diff line change
@@ -1,16 +1,3 @@
defmodule Canary.Index.Hit do
@derive Jason.Encoder
defstruct [
:id,
:document_id,
:source_id,
:url,
:title,
:excerpt,
:tags
]
end

defmodule Canary.Index do
alias Canary.Sources.Source
alias Canary.Sources.Webpage
Expand Down Expand Up @@ -157,7 +144,7 @@ defmodule Canary.Index do
end

defp transform_hit(hit) do
%Canary.Index.Hit{
%{
id: hit["document"]["id"],
document_id: hit["document"]["document_id"],
source_id: hit["document"]["source_id"],
Expand Down
70 changes: 30 additions & 40 deletions core/lib/canary/interactions/responder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ defmodule Canary.Interactions.Responder do
alias Canary.Interactions.Responder

@callback run(
sources :: list(any()),
query :: String.t(),
pattern :: String.t() | nil,
client :: any(),
handle_delta :: function()
handle_delta :: function(),
opts :: keyword()
) :: {:ok, any()} | {:error, any()}

def run(query, pattern, client, handle_delta \\ nil) do
impl().run(query, pattern, client, handle_delta)
def run(sources, query, handle_delta, opts \\ []) do
impl().run(sources, query, handle_delta, opts)
end

defp impl, do: Application.get_env(:canary, :responder, Responder.Default)
Expand All @@ -19,25 +19,22 @@ defmodule Canary.Interactions.Responder.Default do
@behaviour Canary.Interactions.Responder
require Ash.Query

def run(query, pattern, %{account: account, sources: sources}, handle_delta) do
model = Application.fetch_env!(:canary, :CHAT_COMPLETION_MODEL)
source = sources |> Enum.at(0)
{:ok, %{search: docs}} = Canary.Searcher.run(source, query)
def run(sources, query, handle_delta, opts) do
{:ok, results} = Canary.Searcher.run(sources, query, cache: opts[:cache])

docs =
if is_nil(pattern) do
docs
else
docs
|> Enum.filter(fn doc -> Canary.Native.glob_match(pattern, URI.parse(doc.url).path) end)
end
results
|> search_results_to_docs()
|> then(
&Canary.Reranker.run!(query, &1, threshold: 0.05, renderer: fn doc -> doc.content end)
)

{:ok, pid} = Agent.start_link(fn -> "" end)

{:ok, completion} =
Canary.AI.chat(
%{
model: model,
model: Application.fetch_env!(:canary, :chat_completion_model),
messages: [
%{
role: "system",
Expand Down Expand Up @@ -67,36 +64,29 @@ defmodule Canary.Interactions.Responder.Default do
)

completion = if completion == "", do: Agent.get(pid, & &1), else: completion

references =
completion
|> parse_footnotes()
|> Enum.map(fn i -> Enum.at(docs, i - 1) end)

# TODO: there's great change this is invalid, and will cause problem to the client side.
safe(handle_delta, %{type: :references, items: references})
safe(handle_delta, %{type: :complete, content: completion})

Task.Supervisor.start_child(Canary.TaskSupervisor, fn ->
Canary.Accounts.Billing.increment_ask(account.billing)
end)
{:ok, %{response: completion, references: []}}
end

{:ok, %{response: completion, references: references}}
defp search_results_to_docs(results) do
doc_ids =
results
|> Enum.flat_map(fn %Canary.Searcher.Result{} = result ->
result.hits
|> Enum.flat_map(fn hit -> Enum.map(hit.sub_results, & &1.document_id) end)
end)
|> Enum.uniq()

r =
Canary.Sources.Document
|> Ash.Query.filter(id in ^doc_ids)
|> Ash.read!()
|> Enum.flat_map(fn %{chunks: chunks} -> chunks end)
|> Enum.map(fn chunk -> %{title: chunk.value.title, content: chunk.value.content} end)
end

defp safe(func, arg) do
if is_function(func, 1), do: func.(arg), else: :noop
end

def parse_footnotes(text) do
regex = ~r/\[\^(\d+)\]:\s*(\d+)\s*$/m

Regex.scan(regex, text)
|> Enum.sort_by(fn [_, footnote_number, _] ->
String.to_integer(footnote_number)
end)
|> Enum.map(fn [_, _, index] ->
String.to_integer(index)
end)
end
end
1 change: 1 addition & 0 deletions core/lib/canary/prompts/responder_assistant.eex
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<%= @response %>
60 changes: 30 additions & 30 deletions core/lib/canary/prompts/responder_system.eex
Original file line number Diff line number Diff line change
@@ -1,39 +1,39 @@
You are a world class techincal support engineer.
I will provide user's question and retrieved relevant documents, and you should answer it. Detailed guideline will be also provided.
## Instruction

In any case, you must respond in markdown format. Header, Link, Inline Code, Block Code, Bold, Italic and Footnotes are supported.
You are a world-class technical support engineer. Your job is to answer user's question based on the given documents.

Notes about tags:
## Request Format

- Header:
If the response is simple, you don't need to use header. But for most case, it is essential to use headers to structure the response.
Be careful not to make the response too long or over-complicated.
<retrieved_documents>
..
</retrieved_documents>
<user_question>
..
</user_question>

- Bold:
This can boost the readability. Use it for important points, or sentence that actually answer the user's question.
## Response Format

- Inline Code:
Also for readability gain. Should be used for domain-specific terms, pronouns, and code-related things.
In any case, you must respond in markdown format. List, Link, Inline Code, Block Code, and Bold are supported.
In addition, you can use `<canary-reference>` tag to reference the given documents.

- Code Block:
Always add language tag after the triple backticks. For example:
You should always start with **<IMMEDIATE_ANSWER>**, and then add more details.

```markup
<div class="container">
<h1>Hello World</h1>
</div>
```
<IMMEDIATE_ANSWER> can be something like:
- Yes
- No
- Not sure.
- Probably not, but not sure.
- I don't know.
- Not sure what you mean.
- etc

- Footnotes:
Use it to reference the related document with the sentence, like this[^1]. (no duplicate footnotes)
Only single number footnote is allowed, no range, no multiple numbers.
At the end of the response, include the footnotes which strictly follow the format below:
## Guidelines

[^1]: 2
[^2]: 6
[^3]: 4

This means the first footnote is referencing the document at index 2, the second is referencing the document at index 6, and so on.
When writing footnotes, do not add heading or other formatting around <notes> tag.

You should add enough footnotes as possible for transparency and accuracy. At least one footnote is required.
- Always stick to the question asked, and do not add any extra information.
- You can use your existing knowledge to understand the user's query and given documents, but you should NOT directly use it for answering the question.
- Every sentence should be backed by the given documents.
- This is not a multi-turn conversation. Do not say something like "Please let me know if you have any other questions."
- Also, avoid mentioning yourself, and do not use emoji.
For example:
Don't do: "I am not sure"
Instead, do: "It is not clear"
31 changes: 25 additions & 6 deletions core/lib/canary/prompts/responder_user.eex
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,29 @@ Go straight to the point, give answer first, then go through the details. Each s

If user is asking for nonsense, or the retrieved documents are not relevant, just transparently say it.

Don't forget to include footnotes like below:
```
[^1]: 2
[^2]: 6
[^3]: 4
```
##
You should talk less, and reference/point more in your response.
If answer is already in the document, you should just point to it rather than rephrase it.

When doing so, follow this format:

<canary-reference title="<TITLE>" excerpt="<QUOTED_CONTENT"></canary-reference>

Note that above is HTML, but HTML is valid in Markdown.

##
If you reference something, do not say like "you can refer to the sections on seamless integrations and specific framework support.".
Referencing should be only done with `<canary-reference>` tag, because otherwise user can not be able to see the content.

</instruction>

<notes>
This answer is really bad.

Q: Can you tell me about 'Vitepress'?
A: """
Yes.

VitePress is a static site generator powered by Vite and Vue. It uses Minisearch as its default local search. You can integrate Canary with VitePress using the canary-provider-vitepress-minisearch. To set it up, you need to install @getcanary/web and configure your VitePress project accordingly. Additionally, Canary Cloud is in active development and not ready for production use yet.
"""
</notes>
2 changes: 1 addition & 1 deletion core/lib/canary/prompts/understander_assistant.eex
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<query><%= Enum.join(@queries, ",") %></query>
<keywords><%= Enum.join(@keywords, ",") %></keywords>
40 changes: 27 additions & 13 deletions core/lib/canary/prompts/understander_system.eex
Original file line number Diff line number Diff line change
@@ -1,20 +1,34 @@
You are a world class techincal support engineer.
Your job is to analyze the user's query and return a structured response like below:
## Instruction

<query>KEYWORD_1,KEYWORD_2,KEYWORD_3</query>
You are a world-class technical support engineer. Your job is to analyze the request and return a response that strictly follows the format.

- <query></query> should contain comma separated list of keywords. MAX 5 keywords are allowed.
- Each "keyword" must be a single word. It will be used to run keyword based search.
## Request Format

Do not include any other text, just respond with the XML-like format that I provided.
<keywords>
..
</keywords>
<user_query>
..
</user_query>

## Response Format

<keywords>KEYWORD_1,KEYWORD_2,KEYWORD_3</keywords>

- <keywords></keywords> should contain a comma-separated list of keywords. MAX 5 keywords are allowed.
- Each "keyword" must be a single word without any punctuation or special characters.

## Guidelines

These are very important guidelines to follow:
Your response will be used for keyword-based search, and the result will fulfill the user's request. It's crucial to predict the right set of keywords.

Follow these guidelines:

- Your output must be same or some kind of variant of the given keywords.
- Use your existing knowledge to guess correct workd from the given keywords and user's query.
- Given keywords can be bit noisy, so you should try correct typo, split words like "groupchatmanager" to "group chat manager", etc.
- Infer complement word if user asked for it. For example, if user ask "is cloud the only option?", you should search for "local" or "self" etc based on the given keywords.
- Infer generic words if user asked for list or enumeration. For example, if user ask "any fframeworks are supported", you should search for "framrwork" or "integration"
- If user's query is totally nonsense, just return <analysis></analysis>.
- The MOST important Rule: Your response should be highly influenced by the given <keywords>...</keywords>. Because if not, even if your prediction is correct, it can be useless for the keyword based search.
- But don't just "pick" from the given keywords(it can be noisy). Correct typos, split compound words (e.g., "groupchatmanager" to "group chat manager"), etc.
- Use your knowledge and experience as a technical support engineer. Consider user intent when selecting keywords.
Example 1: If user asks "is cloud the only option?", include keywords like "local" or "self" based on given keywords (complementary words).
Example 2: If user asks "astro,vitepress,astro, what else?", include "framework" or "integration" to get a list of frameworks (enumeration, generic words).
- If the user's query is nonsensical, incomplete, or adversarial, just return an empty <keywords></keywords>.
- When dealing with comparison queries, include keywords for both items being compared.
- If the user's query involves troubleshooting, include keywords related to common solutions or diagnostic steps.
10 changes: 6 additions & 4 deletions core/lib/canary/prompts/understander_user.eex
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
## Keywords
<%= @keywords |> Enum.join(", ") %>
<keywords>
<%= @keywords |> Enum.join(",") %>
</keywords>

Based on above information, come up with plausible keywords to fullfil below user's query.
Query: "<%= @query %>"
<user_query>
<%= @query %>
</user_query>
2 changes: 1 addition & 1 deletion core/lib/canary/query/understander.ex
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ defmodule Canary.Query.Understander.LLM do
end

defp parse(completion) do
case Regex.run(~r/<query>(.*?)<\/query>/s, completion) do
case Regex.run(~r/<keywords>(.*?)<\/keywords>/s, completion) do
[_, match] ->
match
|> String.split(",")
Expand Down
16 changes: 14 additions & 2 deletions core/lib/canary/searcher.ex
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
defmodule Canary.Searcher.Result do
@derive Jason.Encoder
defstruct [:name, :type, :hits]

@type t :: %__MODULE__{
name: String.t(),
type: String.t(),
hits: list(any())
}
end

defmodule Canary.Searcher do
@callback run(list(any()), String.t()) :: {:ok, list()} | {:error, any()}
@callback run(list(any()), String.t()) ::
{:ok, list(Canary.Searcher.Result.t())} | {:error, any()}

def run(sources, query, opts \\ []) do
if opts[:cache] do
Expand Down Expand Up @@ -90,7 +102,7 @@ defmodule Canary.Searcher.Default do
}
end)

%{name: name, type: type, hits: hits}
%Canary.Searcher.Result{name: name, type: type, hits: hits}
end)
end
end
Loading

0 comments on commit e7b75cb

Please sign in to comment.