Skip to content

Commit

Permalink
Add assistant with internet access (#13)
Browse files Browse the repository at this point in the history
* Update dependencies

* Add the `tools` module

* Add `CONTEXTQA_AGENT_TEMPLATE`

* Add extra setting no enable internet access for the assistant

* Update `searcher` tool

* Added exception handling
* It searches 5 different sites sequentially and chooses the one that
meets the criteria of containing more than 100 words
* Added logger

* Update memory configuration based on the `enable_internet_access` flag

* Add `CONTEXTQA_AGENT_TEMPLATE` and a custom prefix for the agent

* Add `get_llm_assistant` function

This function returns an assistant with or without internet access

* Update `ChatCard`

Now urls in markdown format are rendered properly

* Add switch to enable internet access

* Update memory chat

Now some of the "generated" parameters depend on
the `internet_access` flag

* Update `qa_service` function

* The `qa` endpoint now expects the optional `internet_access` flag
* Added chat memory usage depending on that flag

* Add `internetEnabled` state

* Update Dialog messages

* Fix bug

Added `v-if` usage so the internet switch is only available
in conversations with no context

* Update home's welcome text
  • Loading branch information
zaldivards authored Jul 10, 2023
1 parent 8034c8f commit ffb65d0
Show file tree
Hide file tree
Showing 12 changed files with 397 additions and 172 deletions.
4 changes: 2 additions & 2 deletions api/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ name = "pypi"
[packages]
langchain = "*"
openai = "*"
google-search-results = "*"
tweepy = "*"
scikit-learn = "*"
tiktoken = "*"
fastapi = "*"
Expand All @@ -19,6 +17,8 @@ pandas = "*"
pyarrow = "*"
redis = "*"
async-timeout = "*"
beautifulsoup4 = "*"
googlesearch-python = "*"

[dev-packages]
black = "*"
Expand Down
260 changes: 126 additions & 134 deletions api/Pipfile.lock

Large diffs are not rendered by default.

50 changes: 50 additions & 0 deletions api/contextqa/agents/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import requests
from bs4 import BeautifulSoup
from googlesearch import search
from langchain.agents import Tool

from contextqa import get_logger


LOGGER = get_logger()


def _get_content(url: str, timeout: int = 5) -> bytes:
resp = requests.get(url=url, timeout=timeout)
resp.raise_for_status()
return resp.content


def _js_disable_message(text: str) -> bool:
return "JavaScript is disabled in this browser" in text


def _searcher(search_term: str):
"""Search for the provided search term in Google Search when the assistant could not find information to answer"""
results = search(search_term, num_results=5)
for url in results:
try:
html_content = BeautifulSoup(_get_content(url), "html.parser")
except requests.exceptions.HTTPError as ex:
LOGGER.warning("Got HTTP error when requesting %s. Error %s", url, ex)
continue
else:
html_text = html_content.text
if _js_disable_message(html_text):
LOGGER.warning("%s detected JavaScript not available", url)
continue
words = html_text.replace("\n", "").split()
if len(words) > 100:
LOGGER.info("Chosen url: %s", url)
text = "I got the response:" + " ".join(words[:500])
break
return text


searcher = Tool(
name="Crawl google for external knowledge",
func=_searcher,
description=(
"useful for when the assitant does not know the answer of the human input and it needs external knowledge"
),
)
1 change: 1 addition & 0 deletions api/contextqa/parsers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class LLMContextQueryRequest(BaseModel):

class LLMQueryRequest(BaseModel):
message: str
internet_access: bool = False


class LLMQueryRequestBody(LLMRequestBodyBase):
Expand Down
52 changes: 42 additions & 10 deletions api/contextqa/services/chat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from langchain.agents import initialize_agent, AgentType, Agent
from langchain.chat_models import ChatOpenAI
from langchain import ConversationChain
from langchain.chains.conversation.prompt import DEFAULT_TEMPLATE
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import (
AIMessagePromptTemplate,
ChatPromptTemplate,
Expand All @@ -9,12 +10,13 @@
)

from contextqa import models, settings
from contextqa.utils import memory
from contextqa.utils import memory, prompts
from contextqa.agents.tools import searcher


_MESSAGES = [
SystemMessagePromptTemplate.from_template(
"""You are helpful assistant called ContextQA that answer user inputs. You emphasize your name in every greeting.
"""You are a helpful assistant called ContextQA that answer user inputs. You emphasize your name in every greeting.
Example: Hello, I am ContextQA, how can I help you?
Expand All @@ -26,20 +28,50 @@
]


def qa_service(message: str) -> models.LLMResult:
def get_llm_assistant(internet_access: bool) -> ConversationChain | Agent:
"""Return certain LLM assistant based on the system configuration
Parameters
----------
internet_access : bool
flag indicating whether an assistant with internet access was requested
Returns
-------
ConversationChain | Agent
"""
llm = ChatOpenAI(temperature=0)

if internet_access:
return initialize_agent(
[searcher],
llm=llm,
agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
memory=memory.Redis("default", internet_access=True),
verbose=settings().debug,
agent_kwargs={
# "output_parser": CustomOP(),
# "format_instructions": prompts.CONTEXTQA_AGENT_TEMPLATE,
"prefix": prompts.PREFIX,
},
handle_parsing_errors=True,
)
prompt = ChatPromptTemplate.from_messages(_MESSAGES)
return ConversationChain(llm=llm, prompt=prompt, memory=memory.Redis("default"), verbose=settings().debug)


def qa_service(params: models.LLMQueryRequest) -> models.LLMResult:
"""Chat with the llm
Parameters
----------
message : str
User message
params : models.LLMQueryRequest
request body parameters
Returns
-------
models.LLMResult
LLM response
"""
llm = ChatOpenAI(temperature=0)
prompt = ChatPromptTemplate.from_messages(_MESSAGES)
chain = ConversationChain(llm=llm, prompt=prompt, memory=memory.Redis("default"), verbose=settings().debug)
return models.LLMResult(response=chain.run(input=message))
assistant = get_llm_assistant(params.internet_access)
return models.LLMResult(response=assistant.run(input=params.message))
27 changes: 15 additions & 12 deletions api/contextqa/utils/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,47 +4,50 @@
from langchain.memory import (
ConversationBufferWindowMemory,
ConversationSummaryBufferMemory,
ConversationSummaryMemory,
RedisChatMessageHistory,
)
from langchain.schema import BaseMemory

from contextqa import settings

envs = settings()

_PROMPT_KEYS = {
"default": {"input_key": "input", "memory_key": "history"},
"defaultv2": {"input_key": "input", "memory_key": "chat_history"},
"context": {"input_key": "question", "memory_key": "chat_history"},
}


def _requires_raw(session: str) -> bool:
return session != "default"
def _prompt_keys(kind: str, internet_access: bool = False) -> dict[str, str]:
if internet_access:
return _PROMPT_KEYS["defaultv2"]
return _PROMPT_KEYS[kind]


def _requires_raw(session: str, internet_access: bool) -> bool:
return session != "default" or internet_access


def _redis(session: Literal["default", "context"] = "default") -> BaseMemory:
def _redis(session: Literal["default", "context"] = "default", internet_access: bool = False) -> BaseMemory:
history_db = RedisChatMessageHistory(session_id=session, url=envs.redis_url)
return ConversationBufferWindowMemory(
chat_memory=history_db,
max_token_limit=1000,
k=5,
return_messages=_requires_raw(session),
**_PROMPT_KEYS[session]
return_messages=_requires_raw(session, internet_access),
**_prompt_keys(session, internet_access)
)


def _summary_memory() -> BaseMemory:
return ConversationSummaryMemory(llm=OpenAI(temperature=0), input_key="question")


def _redis_with_summary(session: Literal["default", "context"] = "default") -> BaseMemory:
history_db = RedisChatMessageHistory(session_id=session, url=envs.redis_url)
memory = ConversationSummaryBufferMemory(
llm=OpenAI(temperature=0),
chat_memory=history_db,
return_messages=_requires_raw(session),
return_messages=_requires_raw(session, False),
max_token_limit=1000,
**_PROMPT_KEYS[session]
**_prompt_keys(session)
)
return memory

Expand Down
69 changes: 69 additions & 0 deletions api/contextqa/utils/prompts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Custom Prompt templates"""

from langchain.agents.conversational_chat.prompt import PREFIX as PREFIX_
from langchain.agents.conversational.prompt import FORMAT_INSTRUCTIONS
from langchain.prompts import PromptTemplate

# part of this template was taken from langchain.chains.conversational_retrieval.prompts
Expand All @@ -23,5 +25,72 @@
Follow Up Input: {question}
Standalone question:"""

PREFIX = """
You are ContextQA. If you can't find the answer neither using the provided tools nor got an incomplete response, answer 'I am unable to find the answer'.
You emphasize your name in every greeting or question about who you are:
```
Example 1:
Human: Hi
AI: AI: Hi I am ContextQA, how may I help you?
Example 2:
Human: Hi, who are you?
AI: AI: Hi I am ContextQA, how may I help you?
```
{}
You must use the tools only once, that MUST be the final result of the answer.
""".format(
"\n".join(PREFIX_.split("\n")[1:])
)

_COMMON_TEAMPLATE_SEGMENT = """
You always need to use the first observation as the final answer:
```
Example 1:
Thought: Do I need to use a tool? Yes
Action: Crawl google for external knowledge
Action Input: Langchain
Observation: This is the result, Langchain is a great framework for LLms...
{ai_prefix}: [Last observation as the answer]
Example 2:
Thought: Do I need to use a tool? Yes
Action: Crawl google for external knowledge
Action Input: Wheater
Observation: This is the whather
{ai_prefix}: [The found wheater]
```
The Thought/Action/Action Input/Observation can repeat only ONCE or answer I don't know:
```
Example 1:
Thought: I now know the final answer
{ai_prefix}: the final answer to the original input question that must be rephrased in an understandable summary
Example 2:
Thought: I don't know the answer
{ai_prefix}: I couldn't find the answer
```
After getting the answer from the tool, your thought MUST be "I got the answer"
When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format:
```
Thought: Do I need to use a tool? No
{ai_prefix}: Your final answer
```"""


_INSTRUCTIONS_SUFIX = """
You must use the tools only and only if you are unable to answer with your own training knowledge, otherwise it will be incorrect.
The first observation AFTER using a tool, is your final answer. Use the tool only ONE time:
Obervation: I got the response: [the response]
Thought: Do I need to use a tool? No
{ai_prefix}: [The last observation(the response)]
"""

CONTEXTQA_RETRIEVAL_PROMPT = PromptTemplate.from_template(_template)
CONTEXTQA_AGENT_TEMPLATE = FORMAT_INSTRUCTIONS + _INSTRUCTIONS_SUFIX
2 changes: 1 addition & 1 deletion api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def llm_qa(params: models.LLMQueryRequest):
Provide a message and receive a response from the LLM
"""
try:
return chat.qa_service(params.message)
return chat.qa_service(params)
except Exception as ex:
raise HTTPException(status_code=424, detail={"message": "Something went wrong", "cause": str(ex)}) from ex

Expand Down
Loading

0 comments on commit ffb65d0

Please sign in to comment.