From 79049b10dff4c545f0d39034efe76f28f9866b50 Mon Sep 17 00:00:00 2001
From: Lance Martin <lance@langchain.dev>
Date: Fri, 21 Jul 2023 16:47:20 -0700
Subject: [PATCH 01/11] Web research retriever

---
 .../retrievers/how_to/web_research.ipynb      | 383 ++++++++++++++++++
 .../langchain/retrievers/__init__.py          |   2 +
 .../langchain/retrievers/web_research.py      | 169 ++++++++
 3 files changed, 554 insertions(+)
 create mode 100644 docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
 create mode 100644 libs/langchain/langchain/retrievers/web_research.py

diff --git a/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb b/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
new file mode 100644
index 0000000000000..3812cca4a1d71
--- /dev/null
+++ b/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
@@ -0,0 +1,383 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "9c0ffe42",
+   "metadata": {},
+   "source": [
+    "# WebResearchRetriever\n",
+    "\n",
+    "Given a query, this retriever will: \n",
+    "\n",
+    "* Formulate a set of relate Google searches\n",
+    "* Search for each \n",
+    "* Load all the resulting URLs\n",
+    "* Then embed and perform similarity search with the query on the consolidate page content"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "4abea0a0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.callbacks.manager import CallbackManager\n",
+    "from langchain.retrievers.web_research import WebResearchRetriever\n",
+    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0c0e57bb",
+   "metadata": {},
+   "source": [
+    "## Run\n",
+    "\n",
+    "Pass the desired model and vectorstore."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "3d84ea47",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "ggml_metal_free: deallocating\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain.vectorstores import Chroma\n",
+    "from langchain.embeddings import OpenAIEmbeddings\n",
+    "from langchain.chat_models.openai import ChatOpenAI\n",
+    "# Set input\n",
+    "llm = ChatOpenAI(temperature=0)\n",
+    "vectorstore = Chroma(embedding_function=OpenAIEmbeddings())\n",
+    "GOOGLE_CSE_ID = \"xxx\"\n",
+    "GOOGLE_API_KEY = \"xxx\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "f135e81d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize\n",
+    "web_research_retriever = WebResearchRetriever(\n",
+    "    vectorstore=vectorstore, \n",
+    "    llm=llm, \n",
+    "    GOOGLE_CSE_ID=GOOGLE_CSE_ID, \n",
+    "    GOOGLE_API_KEY=GOOGLE_API_KEY\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "c958adc6",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:langchain.retrievers.web_research:Generating questions for Google Search ...\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What is the definition of LLM Powered Autonomous Agents?', '2. What are the key features of LLM Powered Autonomous Agents?', '3. How do LLM Powered Autonomous Agents differ from traditional autonomous agents?', '4. What are the applications of LLM Powered Autonomous Agents?', '5. Are there any case studies or examples of successful implementations of LLM Powered Autonomous Agents?'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What is the definition of LLM Powered Autonomous Agents?', '2. What are the key features of LLM Powered Autonomous Agents?', '3. How do LLM Powered Autonomous Agents differ from traditional autonomous agents?', '4. What are the applications of LLM Powered Autonomous Agents?', '5. Are there any case studies or examples of successful implementations of LLM Powered Autonomous Agents?']\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:URLs to load: {'https://towardsdatascience.com/autonomous-agents-and-multi-agent-systems-101-agents-and-deception-f4da3401f92a', 'https://lilianweng.github.io/posts/2023-06-23-agent/'}\n",
+      "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
+      "Fetching pages: 100%|##############################################################################################################################################| 2/2 [00:02<00:00,  1.04s/it]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Run\n",
+    "import logging\n",
+    "logging.basicConfig()\n",
+    "logging.getLogger(\"langchain.retrievers.web_research\").setLevel(logging.INFO)\n",
+    "user_input = \"How do LLM Powered Autonomous Agents work?\"\n",
+    "docs = web_research_retriever.get_relevant_documents(user_input)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "52c07edd",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "6"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(docs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3663b2ba",
+   "metadata": {},
+   "source": [
+    "`Local -`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "e62e36e9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "llama.cpp: loading model from /Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
+      "llama_model_load_internal: format     = ggjt v3 (latest)\n",
+      "llama_model_load_internal: n_vocab    = 32000\n",
+      "llama_model_load_internal: n_ctx      = 4096\n",
+      "llama_model_load_internal: n_embd     = 5120\n",
+      "llama_model_load_internal: n_mult     = 256\n",
+      "llama_model_load_internal: n_head     = 40\n",
+      "llama_model_load_internal: n_layer    = 40\n",
+      "llama_model_load_internal: n_rot      = 128\n",
+      "llama_model_load_internal: freq_base  = 10000.0\n",
+      "llama_model_load_internal: freq_scale = 1\n",
+      "llama_model_load_internal: ftype      = 2 (mostly Q4_0)\n",
+      "llama_model_load_internal: n_ff       = 13824\n",
+      "llama_model_load_internal: model size = 13B\n",
+      "llama_model_load_internal: ggml ctx size =    0.09 MB\n",
+      "llama_model_load_internal: mem required  = 9132.71 MB (+ 1608.00 MB per state)\n",
+      "llama_new_context_with_model: kv self size  = 3200.00 MB\n",
+      "ggml_metal_init: allocating\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found model file at  /Users/rlm/.cache/gpt4all/ggml-all-MiniLM-L6-v2-f16.bin\n",
+      "llama_new_context_with_model: max tensor size =    87.89 MB\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "ggml_metal_init: using MPS\n",
+      "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
+      "ggml_metal_init: loaded kernel_add                            0x295facb60\n",
+      "ggml_metal_init: loaded kernel_mul                            0x295fadef0\n",
+      "ggml_metal_init: loaded kernel_mul_row                        0x295faf260\n",
+      "ggml_metal_init: loaded kernel_scale                          0x295fae150\n",
+      "ggml_metal_init: loaded kernel_silu                           0x295fae3b0\n",
+      "ggml_metal_init: loaded kernel_relu                           0x295fb00c0\n",
+      "ggml_metal_init: loaded kernel_gelu                           0x295fb0e50\n",
+      "ggml_metal_init: loaded kernel_soft_max                       0x295fb1330\n",
+      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x295fb1d20\n",
+      "ggml_metal_init: loaded kernel_get_rows_f16                   0x295fb23c0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x295fb1f80\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x295fb2ba0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x295fb3360\n",
+      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x295fb3a90\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x295fb41b0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x295fb48d0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x295fb4ff0\n",
+      "ggml_metal_init: loaded kernel_rms_norm                       0x295fb5710\n",
+      "ggml_metal_init: loaded kernel_norm                           0x295fb65c0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x295fb6e70\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x295fb7620\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x295fb7dc0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x295fb8580\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x295fb8ee0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x295fb9620\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x295fb9de0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x295fba5f0\n",
+      "ggml_metal_init: loaded kernel_rope                           0x295fbad00\n",
+      "ggml_metal_init: loaded kernel_alibi_f32                      0x295fbb6f0\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x295fbc310\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x295fbcc80\n",
+      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x295fbd5f0\n",
+      "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
+      "ggml_metal_init: hasUnifiedMemory             = true\n",
+      "ggml_metal_init: maxTransferRate              = built-in GPU\n",
+      "ggml_metal_add_buffer: allocated 'data            ' buffer, size =  6984.06 MB, ( 6984.52 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'eval            ' buffer, size =  1040.00 MB, ( 8024.52 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'kv              ' buffer, size =  3202.00 MB, (11226.52 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'scr0            ' buffer, size =   597.00 MB, (11823.52 / 21845.34)\n",
+      "ggml_metal_add_buffer: allocated 'scr1            ' buffer, size =   512.00 MB, (12335.52 / 21845.34)\n",
+      "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
+      "objc[77641]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2964a8208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x29acf8208). One of the two will be used. Which one is undefined.\n",
+      "objc[77641]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2964a8208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x29b124208). One of the two will be used. Which one is undefined.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain.llms import LlamaCpp\n",
+    "from langchain.vectorstores import Chroma\n",
+    "from langchain.embeddings import GPT4AllEmbeddings\n",
+    "\n",
+    "n_gpu_layers = 1  # Metal set to 1 is enough.\n",
+    "n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
+    "callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])\n",
+    "llama = LlamaCpp(\n",
+    "    model_path=\"/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
+    "    n_gpu_layers=n_gpu_layers,\n",
+    "    n_batch=n_batch,\n",
+    "    n_ctx=4096,  # Context window\n",
+    "    max_tokens=1000,  # Max tokens to generate\n",
+    "    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls\n",
+    "    callback_manager=callback_manager,\n",
+    "    verbose=True,\n",
+    ")\n",
+    "vectorstore_llama = Chroma(embedding_function=GPT4AllEmbeddings())\n",
+    "GOOGLE_CSE_ID = \"b5e84267513eb4dcf\"\n",
+    "GOOGLE_API_KEY = \"AIzaSyDUKwJCpdU6nNwANyA7NC2cXnMfvXD6YcM\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "39ff3d75",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize WebResearchRetriever\n",
+    "web_research_retriever = WebResearchRetriever(\n",
+    "    vectorstore=vectorstore_llama, \n",
+    "    llm=llama, \n",
+    "    GOOGLE_CSE_ID=GOOGLE_CSE_ID, \n",
+    "    GOOGLE_API_KEY=GOOGLE_API_KEY\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "0898e34c",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:langchain.retrievers.web_research:Generating questions for Google Search ...\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  Sure! Based on the user input search query \"How do LLM Powered Autonomous Agents work?\", here are five search queries that could help answer the question:\n",
+      "\n",
+      "1. What is an LLM (LLM Powered Autonomous Agents)?\n",
+      "2. How do LLMs differ from traditional AI systems?\n",
+      "3. What are some real-world applications of LLM Powered Autonomous Agents?\n",
+      "4. How do LLMs enable Autonomous Agents to make decisions in real-time?\n",
+      "5. What are the current challenges and limitations facing LLM Powered Autonomous Agents?\n",
+      "\n",
+      "These search queries could lead to a wealth of information about LLM Powered Autonomous Agents, such as their definition, capabilities, applications, and potential drawbacks. This information can help users better understand how these systems work and their potential uses in various industries."
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "llama_print_timings:        load time =  5144.38 ms\n",
+      "llama_print_timings:      sample time =   139.14 ms /   198 runs   (    0.70 ms per token,  1423.05 tokens per second)\n",
+      "llama_print_timings: prompt eval time =  5143.89 ms /    99 tokens (   51.96 ms per token,    19.25 tokens per second)\n",
+      "llama_print_timings:        eval time =  6624.20 ms /   197 runs   (   33.63 ms per token,    29.74 tokens per second)\n",
+      "llama_print_timings:       total time = 12190.60 ms\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What is an LLM (LLM Powered Autonomous Agents)?\\n', '2. How do LLMs differ from traditional AI systems?\\n', '3. What are some real-world applications of LLM Powered Autonomous Agents?\\n', '4. How do LLMs enable Autonomous Agents to make decisions in real-time?\\n', '5. What are the current challenges and limitations facing LLM Powered Autonomous Agents?\\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What is an LLM (LLM Powered Autonomous Agents)?\\n', '2. How do LLMs differ from traditional AI systems?\\n', '3. What are some real-world applications of LLM Powered Autonomous Agents?\\n', '4. How do LLMs enable Autonomous Agents to make decisions in real-time?\\n', '5. What are the current challenges and limitations facing LLM Powered Autonomous Agents?\\n']\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT,\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'What is Generative AI? Everything You Need to Know', 'link': 'https://www.techtarget.com/searchenterpriseai/definition/generative-AI', 'snippet': 'Generative AI is a type of artificial intelligence technology that can ... of Bard built on its most advanced LLM, PaLM 2, which allows Bard to be more\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, ... It plays a crucial role in real-world tasks where trial and error are\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'Wireless Multi-Agent Generative AI: From Connected Intelligence to ...', 'link': 'https://arxiv.org/pdf/2307.02757', 'snippet': 'Jul 6, 2023 ... intelligent decision-making happens right at the edge. This article ... scene for realizing on-device LLMs, where multi-agent LLMs are.'}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:URLs to load: {'https://lilianweng.github.io/posts/2023-06-23-agent/', 'https://www.techtarget.com/searchenterpriseai/definition/generative-AI', 'https://arxiv.org/pdf/2307.02757'}\n",
+      "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
+      "Fetching pages: 100%|##############################################################################################################################################| 3/3 [00:00<00:00,  4.07it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import logging\n",
+    "logging.basicConfig()\n",
+    "logging.getLogger(\"langchain.retrievers.web_research\").setLevel(logging.INFO)\n",
+    "user_input = \"How do LLM Powered Autonomous Agents work?\"\n",
+    "docs = web_research_retriever.get_relevant_documents(user_input)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "7e06adad",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "7"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(docs)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/libs/langchain/langchain/retrievers/__init__.py b/libs/langchain/langchain/retrievers/__init__.py
index 360a111e3825f..a933823b7d25b 100644
--- a/libs/langchain/langchain/retrievers/__init__.py
+++ b/libs/langchain/langchain/retrievers/__init__.py
@@ -30,6 +30,7 @@
 )
 from langchain.retrievers.vespa_retriever import VespaRetriever
 from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever
+from langchain.retrievers.web_research import WebResearchRetriever
 from langchain.retrievers.wikipedia import WikipediaRetriever
 from langchain.retrievers.zep import ZepRetriever
 from langchain.retrievers.zilliz import ZillizRetriever
@@ -64,4 +65,5 @@
     "ZepRetriever",
     "ZillizRetriever",
     "DocArrayRetriever",
+    "WebResearchRetriever",
 ]
diff --git a/libs/langchain/langchain/retrievers/web_research.py b/libs/langchain/langchain/retrievers/web_research.py
new file mode 100644
index 0000000000000..836b178f6bf2a
--- /dev/null
+++ b/libs/langchain/langchain/retrievers/web_research.py
@@ -0,0 +1,169 @@
+import logging
+import os
+import re
+from typing import List, Union
+
+from pydantic import BaseModel, Field
+
+from langchain.callbacks.manager import (
+    AsyncCallbackManagerForRetrieverRun,
+    CallbackManagerForRetrieverRun,
+)
+from langchain.chains import LLMChain
+from langchain.chat_models.openai import ChatOpenAI
+from langchain.document_loaders import AsyncHtmlLoader
+from langchain.document_transformers import Html2TextTransformer
+from langchain.llms.base import BaseLLM
+from langchain.output_parsers.pydantic import PydanticOutputParser
+from langchain.prompts import PromptTemplate
+from langchain.schema import BaseRetriever, Document
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.utilities import GoogleSearchAPIWrapper
+from langchain.vectorstores.base import VectorStore
+
+logger = logging.getLogger(__name__)
+
+
+class SearchQueries(BaseModel):
+    """Search queries to run to research for the user's goal."""
+
+    queries: List[str] = Field(
+        ..., description="List of search queries to look up on Google"
+    )
+
+
+DEFAULT_SEARCH_PROMPT = PromptTemplate(
+    input_variables=["question"],
+    template="""<<SYS>> \n You are a web research assistant to help users
+    answer questions. Answer using a numeric list. Do not include any extra
+    test. \n <</SYS>> \n\n [INST] Given a user input search query, 
+    generate a numbered list of five search queries to run to help answer their 
+    question: \n\n {question} [/INST]""",
+)
+
+
+class LineList(BaseModel):
+    """List of questions."""
+
+    lines: List[str] = Field(description="Questions")
+
+
+class QuestionListOutputParser(PydanticOutputParser):
+    """Output parser for a list of numbered questions."""
+
+    def __init__(self) -> None:
+        super().__init__(pydantic_object=LineList)
+
+    def parse(self, text: str) -> LineList:
+        lines = re.findall(r"\d+\..*?\n", text)
+        return LineList(lines=lines)
+
+
+class WebResearchRetriever(BaseRetriever):
+    # Inputs
+    vectorstore: VectorStore = Field(
+        ..., description="Vector store for handling document embeddings"
+    )
+    llm: BaseLLM = Field(..., description="Language model for generating questions")
+    llm: Union[BaseLLM, ChatOpenAI] = Field(
+        ..., description="Language model for generating questions"
+    )
+    GOOGLE_CSE_ID: str = Field(..., description="Google Custom Search Engine ID")
+    GOOGLE_API_KEY: str = Field(..., description="Google API Key")
+    search_prompt: PromptTemplate = Field(
+        DEFAULT_SEARCH_PROMPT, description="Search Prompt Template"
+    )
+    max_splits_per_doc: int = Field(100, description="Maximum splits per document")
+
+    def search_tool(self, query: str, num_pages: int = 1):
+        """Google search for up to 3 queries."""
+        try:
+            os.environ["GOOGLE_CSE_ID"] = self.GOOGLE_CSE_ID
+            os.environ["GOOGLE_API_KEY"] = self.GOOGLE_API_KEY
+            search = GoogleSearchAPIWrapper()
+        except Exception as e:
+            print(f"Error: {str(e)}")
+        result = search.results(query, num_pages)
+        return result if isinstance(result, list) else [result]
+
+    def _get_relevant_documents(
+        self,
+        query: str,
+        *,
+        run_manager: CallbackManagerForRetrieverRun,
+    ) -> List[Document]:
+        """Search Google for documents related to the query input.
+
+        Args:
+            query: user query
+
+        Returns:
+            Relevant documents from all various urls.
+        """
+
+        # Get search questions
+        logger.info("Generating questions for Google Search ...")
+        llm_chain = LLMChain(
+            llm=self.llm,
+            prompt=self.search_prompt,
+            output_parser=QuestionListOutputParser(),
+        )
+        result = llm_chain({"question": query})
+        logger.info(f"Questions for Google Search (raw): {result}")
+        questions = getattr(result["text"], "lines", [])
+        logger.info(f"Questions for Google Search: {questions}")
+
+        # Get urls
+        logger.info("Searching for relevat urls ...")
+        urls_to_look = []
+        for query in questions:
+            # Google search
+            search_results = self.search_tool(query)
+            logger.info("Searching for relevat urls ...")
+            logger.info(f"Search results: {search_results}")
+            for res in search_results:
+                urls_to_look.append(res["link"])
+
+        # Load HTML to text
+        urls = set(urls_to_look)
+        logger.info(f"URLs to load: {urls}")
+        loader = AsyncHtmlLoader(list(urls))
+        html2text = Html2TextTransformer()
+
+        # Proect against very large documents
+        # This can use rate limit w/ embedding
+        logger.info("Grabbing most relevant splits from urls ...")
+        filtered_splits = []
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1500, chunk_overlap=50
+        )
+        for doc in html2text.transform_documents(loader.load()):
+            doc_splits = text_splitter.split_documents([doc])
+            if len(doc_splits) > self.max_splits_per_doc:
+                logger.info(
+                    f"Document {doc.metadata} has too many splits ({len(doc_splits)}), "
+                    f"keeping only the first {self.max_splits_per_doc}"
+                )
+                doc_splits = doc_splits[: self.max_splits_per_doc]
+            filtered_splits.extend(doc_splits)
+        self.vectorstore.add_documents(filtered_splits)
+
+        # Search for relevant splits
+        docs = []
+        for query in questions:
+            docs.extend(self.vectorstore.similarity_search(query))
+
+        # Get unique docs
+        unique_documents_dict = {
+            (doc.page_content, tuple(sorted(doc.metadata.items()))): doc for doc in docs
+        }
+        unique_documents = list(unique_documents_dict.values())
+        return unique_documents
+
+    async def _aget_relevant_documents(
+        self,
+        query: str,
+        *,
+        run_manager: AsyncCallbackManagerForRetrieverRun,
+    ) -> List[Document]:
+        raise NotImplementedError

From fcf29fce327fc236c61b784a3d74a4d7b6973d7e Mon Sep 17 00:00:00 2001
From: Lance Martin <lance@langchain.dev>
Date: Fri, 21 Jul 2023 18:38:57 -0700
Subject: [PATCH 02/11] fmt

---
 .../retrievers/how_to/web_research.ipynb      | 151 +++++++++---------
 .../langchain/retrievers/web_research.py      |   3 +-
 2 files changed, 74 insertions(+), 80 deletions(-)

diff --git a/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb b/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
index 3812cca4a1d71..b58c42912ecad 100644
--- a/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
+++ b/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "id": "4abea0a0",
    "metadata": {},
    "outputs": [],
@@ -40,18 +40,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 5,
    "id": "3d84ea47",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "ggml_metal_free: deallocating\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from langchain.vectorstores import Chroma\n",
     "from langchain.embeddings import OpenAIEmbeddings\n",
@@ -59,8 +51,8 @@
     "# Set input\n",
     "llm = ChatOpenAI(temperature=0)\n",
     "vectorstore = Chroma(embedding_function=OpenAIEmbeddings())\n",
-    "GOOGLE_CSE_ID = \"xxx\"\n",
-    "GOOGLE_API_KEY = \"xxx\""
+    "GOOGLE_CSE_ID = \"b5e84267513eb4dcf\"\n",
+    "GOOGLE_API_KEY = \"AIzaSyDUKwJCpdU6nNwANyA7NC2cXnMfvXD6YcM\""
    ]
   },
   {
@@ -143,7 +135,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 7,
    "id": "e62e36e9",
    "metadata": {},
    "outputs": [
@@ -167,8 +159,7 @@
       "llama_model_load_internal: model size = 13B\n",
       "llama_model_load_internal: ggml ctx size =    0.09 MB\n",
       "llama_model_load_internal: mem required  = 9132.71 MB (+ 1608.00 MB per state)\n",
-      "llama_new_context_with_model: kv self size  = 3200.00 MB\n",
-      "ggml_metal_init: allocating\n"
+      "llama_new_context_with_model: kv self size  = 3200.00 MB\n"
      ]
     },
     {
@@ -183,40 +174,41 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "ggml_metal_init: allocating\n",
       "ggml_metal_init: using MPS\n",
       "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
-      "ggml_metal_init: loaded kernel_add                            0x295facb60\n",
-      "ggml_metal_init: loaded kernel_mul                            0x295fadef0\n",
-      "ggml_metal_init: loaded kernel_mul_row                        0x295faf260\n",
-      "ggml_metal_init: loaded kernel_scale                          0x295fae150\n",
-      "ggml_metal_init: loaded kernel_silu                           0x295fae3b0\n",
-      "ggml_metal_init: loaded kernel_relu                           0x295fb00c0\n",
-      "ggml_metal_init: loaded kernel_gelu                           0x295fb0e50\n",
-      "ggml_metal_init: loaded kernel_soft_max                       0x295fb1330\n",
-      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x295fb1d20\n",
-      "ggml_metal_init: loaded kernel_get_rows_f16                   0x295fb23c0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x295fb1f80\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x295fb2ba0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x295fb3360\n",
-      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x295fb3a90\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x295fb41b0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x295fb48d0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x295fb4ff0\n",
-      "ggml_metal_init: loaded kernel_rms_norm                       0x295fb5710\n",
-      "ggml_metal_init: loaded kernel_norm                           0x295fb65c0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x295fb6e70\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x295fb7620\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x295fb7dc0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x295fb8580\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x295fb8ee0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x295fb9620\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x295fb9de0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x295fba5f0\n",
-      "ggml_metal_init: loaded kernel_rope                           0x295fbad00\n",
-      "ggml_metal_init: loaded kernel_alibi_f32                      0x295fbb6f0\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x295fbc310\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x295fbcc80\n",
-      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x295fbd5f0\n",
+      "ggml_metal_init: loaded kernel_add                            0x2a58f3710\n",
+      "ggml_metal_init: loaded kernel_mul                            0x2a58f4c40\n",
+      "ggml_metal_init: loaded kernel_mul_row                        0x2a58f5af0\n",
+      "ggml_metal_init: loaded kernel_scale                          0x2a58f3a60\n",
+      "ggml_metal_init: loaded kernel_silu                           0x2a58f3cc0\n",
+      "ggml_metal_init: loaded kernel_relu                           0x2a58f6260\n",
+      "ggml_metal_init: loaded kernel_gelu                           0x2a58f68b0\n",
+      "ggml_metal_init: loaded kernel_soft_max                       0x2a58f75b0\n",
+      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x2a58f7a70\n",
+      "ggml_metal_init: loaded kernel_get_rows_f16                   0x2a58f8530\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x2a58f8b90\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x2a58f9390\n",
+      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x2a58f9bc0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x2a58fa2b0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x2a58fa980\n",
+      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x2a58fb070\n",
+      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x2a58fb7c0\n",
+      "ggml_metal_init: loaded kernel_rms_norm                       0x2a16e6940\n",
+      "ggml_metal_init: loaded kernel_norm                           0x2a16e7440\n",
+      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x2a16e79e0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x2a16e8aa0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x2a16e9290\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x2a16e8400\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x2a58fc520\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x2a58fc940\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x2a58fd930\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x2a58fe0e0\n",
+      "ggml_metal_init: loaded kernel_rope                           0x2a58fe810\n",
+      "ggml_metal_init: loaded kernel_alibi_f32                      0x2a5c04330\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x2a5c04bf0\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x2a16ea2a0\n",
+      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x2a16ea870\n",
       "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
       "ggml_metal_init: hasUnifiedMemory             = true\n",
       "ggml_metal_init: maxTransferRate              = built-in GPU\n",
@@ -226,8 +218,8 @@
       "ggml_metal_add_buffer: allocated 'scr0            ' buffer, size =   597.00 MB, (11823.52 / 21845.34)\n",
       "ggml_metal_add_buffer: allocated 'scr1            ' buffer, size =   512.00 MB, (12335.52 / 21845.34)\n",
       "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
-      "objc[77641]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2964a8208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x29acf8208). One of the two will be used. Which one is undefined.\n",
-      "objc[77641]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2964a8208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x29b124208). One of the two will be used. Which one is undefined.\n"
+      "objc[82333]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2a571c208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x2a77c8208). One of the two will be used. Which one is undefined.\n",
+      "objc[82333]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2a571c208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x2b4ca0208). One of the two will be used. Which one is undefined.\n"
      ]
     }
    ],
@@ -256,7 +248,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 8,
    "id": "39ff3d75",
    "metadata": {},
    "outputs": [],
@@ -272,7 +264,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 9,
    "id": "0898e34c",
    "metadata": {
     "scrolled": false
@@ -289,15 +281,15 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  Sure! Based on the user input search query \"How do LLM Powered Autonomous Agents work?\", here are five search queries that could help answer the question:\n",
+      "  Sure! Here are five search queries that could help answer the user's question about how LLM powered autonomous agents work:\n",
       "\n",
-      "1. What is an LLM (LLM Powered Autonomous Agents)?\n",
-      "2. How do LLMs differ from traditional AI systems?\n",
-      "3. What are some real-world applications of LLM Powered Autonomous Agents?\n",
-      "4. How do LLMs enable Autonomous Agents to make decisions in real-time?\n",
-      "5. What are the current challenges and limitations facing LLM Powered Autonomous Agents?\n",
+      "1. \"LLM powered autonomous agents architecture\" - This search query could provide information on the overall design and structure of LLM powered autonomous agents, including the components and interfaces involved in their operation.\n",
+      "2. \"How do LLM powered autonomous agents perceive their environment?\" - This search query could provide information on the sensors and other sources of data that LLM powered autonomous agents use to understand their environment and make decisions.\n",
+      "3. \"What algorithms and techniques are used in LLM powered autonomous agents for decision making?\" - This search query could provide information on the machine learning and artificial intelligence techniques that are used in LLM powered autonomous agents to enable them to make decisions and take actions based on their environment and objectives.\n",
+      "4. \"How do LLM powered autonomous agents learn and improve over time?\" - This search query could provide information on how LLM powered autonomous agents learn from their experiences and adapt to new situations, as well as any techniques or algorithms used for learning and improvement.\n",
+      "5. \"What are some examples of real-world applications of LLM powered autonomous agents?\" - This search query could provide information on the types of tasks and industries where LLM powered autonomous agents are being used successfully, such as self-driving cars, robots, or other intelligent systems.\n",
       "\n",
-      "These search queries could lead to a wealth of information about LLM Powered Autonomous Agents, such as their definition, capabilities, applications, and potential drawbacks. This information can help users better understand how these systems work and their potential uses in various industries."
+      "These search queries should provide a good starting point for understanding how LLM powered autonomous agents work and their potential applications in various fields."
      ]
     },
     {
@@ -305,27 +297,30 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "llama_print_timings:        load time =  5144.38 ms\n",
-      "llama_print_timings:      sample time =   139.14 ms /   198 runs   (    0.70 ms per token,  1423.05 tokens per second)\n",
-      "llama_print_timings: prompt eval time =  5143.89 ms /    99 tokens (   51.96 ms per token,    19.25 tokens per second)\n",
-      "llama_print_timings:        eval time =  6624.20 ms /   197 runs   (   33.63 ms per token,    29.74 tokens per second)\n",
-      "llama_print_timings:       total time = 12190.60 ms\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What is an LLM (LLM Powered Autonomous Agents)?\\n', '2. How do LLMs differ from traditional AI systems?\\n', '3. What are some real-world applications of LLM Powered Autonomous Agents?\\n', '4. How do LLMs enable Autonomous Agents to make decisions in real-time?\\n', '5. What are the current challenges and limitations facing LLM Powered Autonomous Agents?\\n'])}\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What is an LLM (LLM Powered Autonomous Agents)?\\n', '2. How do LLMs differ from traditional AI systems?\\n', '3. What are some real-world applications of LLM Powered Autonomous Agents?\\n', '4. How do LLMs enable Autonomous Agents to make decisions in real-time?\\n', '5. What are the current challenges and limitations facing LLM Powered Autonomous Agents?\\n']\n",
-      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT,\\xa0...'}]\n",
+      "llama_print_timings:        load time =  7344.40 ms\n",
+      "llama_print_timings:      sample time =   245.79 ms /   350 runs   (    0.70 ms per token,  1423.96 tokens per second)\n",
+      "llama_print_timings: prompt eval time =  7344.26 ms /    99 tokens (   74.18 ms per token,    13.48 tokens per second)\n",
+      "llama_print_timings:        eval time = 14318.16 ms /   349 runs   (   41.03 ms per token,    24.37 tokens per second)\n",
+      "llama_print_timings:       total time = 22399.59 ms\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. \"LLM powered autonomous agents architecture\" - This search query could provide information on the overall design and structure of LLM powered autonomous agents, including the components and interfaces involved in their operation.\\n', '2. \"How do LLM powered autonomous agents perceive their environment?\" - This search query could provide information on the sensors and other sources of data that LLM powered autonomous agents use to understand their environment and make decisions.\\n', '3. \"What algorithms and techniques are used in LLM powered autonomous agents for decision making?\" - This search query could provide information on the machine learning and artificial intelligence techniques that are used in LLM powered autonomous agents to enable them to make decisions and take actions based on their environment and objectives.\\n', '4. \"How do LLM powered autonomous agents learn and improve over time?\" - This search query could provide information on how LLM powered autonomous agents learn from their experiences and adapt to new situations, as well as any techniques or algorithms used for learning and improvement.\\n', '5. \"What are some examples of real-world applications of LLM powered autonomous agents?\" - This search query could provide information on the types of tasks and industries where LLM powered autonomous agents are being used successfully, such as self-driving cars, robots, or other intelligent systems.\\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. \"LLM powered autonomous agents architecture\" - This search query could provide information on the overall design and structure of LLM powered autonomous agents, including the components and interfaces involved in their operation.\\n', '2. \"How do LLM powered autonomous agents perceive their environment?\" - This search query could provide information on the sensors and other sources of data that LLM powered autonomous agents use to understand their environment and make decisions.\\n', '3. \"What algorithms and techniques are used in LLM powered autonomous agents for decision making?\" - This search query could provide information on the machine learning and artificial intelligence techniques that are used in LLM powered autonomous agents to enable them to make decisions and take actions based on their environment and objectives.\\n', '4. \"How do LLM powered autonomous agents learn and improve over time?\" - This search query could provide information on how LLM powered autonomous agents learn from their experiences and adapt to new situations, as well as any techniques or algorithms used for learning and improvement.\\n', '5. \"What are some examples of real-world applications of LLM powered autonomous agents?\" - This search query could provide information on the types of tasks and industries where LLM powered autonomous agents are being used successfully, such as self-driving cars, robots, or other intelligent systems.\\n']\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'What is Generative AI? Everything You Need to Know', 'link': 'https://www.techtarget.com/searchenterpriseai/definition/generative-AI', 'snippet': 'Generative AI is a type of artificial intelligence technology that can ... of Bard built on its most advanced LLM, PaLM 2, which allows Bard to be more\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, ... It plays a crucial role in real-world tasks where trial and error are\\xa0...'}]\n",
-      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'Wireless Multi-Agent Generative AI: From Connected Intelligence to ...', 'link': 'https://arxiv.org/pdf/2307.02757', 'snippet': 'Jul 6, 2023 ... intelligent decision-making happens right at the edge. This article ... scene for realizing on-device LLMs, where multi-agent LLMs are.'}]\n",
-      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Reflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future\\xa0...'}]\n",
-      "INFO:langchain.retrievers.web_research:URLs to load: {'https://lilianweng.github.io/posts/2023-06-23-agent/', 'https://www.techtarget.com/searchenterpriseai/definition/generative-AI', 'https://arxiv.org/pdf/2307.02757'}\n",
-      "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
-      "Fetching pages: 100%|##############################################################################################################################################| 3/3 [00:00<00:00,  4.07it/s]\n"
+      "INFO:langchain.retrievers.web_research:Search results: [{'Result': 'No good Google Search Result was found'}]\n"
+     ]
+    },
+    {
+     "ename": "KeyError",
+     "evalue": "'link'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m      3\u001b[0m logging\u001b[38;5;241m.\u001b[39mgetLogger(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlangchain.retrievers.web_research\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39msetLevel(logging\u001b[38;5;241m.\u001b[39mINFO)\n\u001b[1;32m      4\u001b[0m user_input \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHow do LLM Powered Autonomous Agents work?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 5\u001b[0m docs \u001b[38;5;241m=\u001b[39m \u001b[43mweb_research_retriever\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_relevant_documents\u001b[49m\u001b[43m(\u001b[49m\u001b[43muser_input\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Desktop/Code/langchain/libs/langchain/langchain/schema/retriever.py:181\u001b[0m, in \u001b[0;36mBaseRetriever.get_relevant_documents\u001b[0;34m(self, query, callbacks, tags, metadata, **kwargs)\u001b[0m\n\u001b[1;32m    179\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    180\u001b[0m     run_manager\u001b[38;5;241m.\u001b[39mon_retriever_error(e)\n\u001b[0;32m--> 181\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m    182\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    183\u001b[0m     run_manager\u001b[38;5;241m.\u001b[39mon_retriever_end(\n\u001b[1;32m    184\u001b[0m         result,\n\u001b[1;32m    185\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m    186\u001b[0m     )\n",
+      "File \u001b[0;32m~/Desktop/Code/langchain/libs/langchain/langchain/schema/retriever.py:174\u001b[0m, in \u001b[0;36mBaseRetriever.get_relevant_documents\u001b[0;34m(self, query, callbacks, tags, metadata, **kwargs)\u001b[0m\n\u001b[1;32m    172\u001b[0m _kwargs \u001b[38;5;241m=\u001b[39m kwargs \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expects_other_args \u001b[38;5;28;01melse\u001b[39;00m {}\n\u001b[1;32m    173\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_new_arg_supported:\n\u001b[0;32m--> 174\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_relevant_documents\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    175\u001b[0m \u001b[43m        \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m_kwargs\u001b[49m\n\u001b[1;32m    176\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    177\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    178\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_relevant_documents(query, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m_kwargs)\n",
+      "File \u001b[0;32m~/Desktop/Code/langchain/libs/langchain/langchain/retrievers/web_research.py:125\u001b[0m, in \u001b[0;36mWebResearchRetriever._get_relevant_documents\u001b[0;34m(self, query, run_manager)\u001b[0m\n\u001b[1;32m    123\u001b[0m     logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSearch results: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msearch_results\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    124\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m res \u001b[38;5;129;01min\u001b[39;00m search_results:\n\u001b[0;32m--> 125\u001b[0m         urls_to_look\u001b[38;5;241m.\u001b[39mappend(\u001b[43mres\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlink\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m)\n\u001b[1;32m    127\u001b[0m \u001b[38;5;66;03m# Load HTML to text\u001b[39;00m\n\u001b[1;32m    128\u001b[0m urls \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m(urls_to_look)\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'link'"
      ]
     }
    ],
diff --git a/libs/langchain/langchain/retrievers/web_research.py b/libs/langchain/langchain/retrievers/web_research.py
index 836b178f6bf2a..1cc11ec00fa89 100644
--- a/libs/langchain/langchain/retrievers/web_research.py
+++ b/libs/langchain/langchain/retrievers/web_research.py
@@ -64,7 +64,6 @@ class WebResearchRetriever(BaseRetriever):
     vectorstore: VectorStore = Field(
         ..., description="Vector store for handling document embeddings"
     )
-    llm: BaseLLM = Field(..., description="Language model for generating questions")
     llm: Union[BaseLLM, ChatOpenAI] = Field(
         ..., description="Language model for generating questions"
     )
@@ -75,7 +74,7 @@ class WebResearchRetriever(BaseRetriever):
     )
     max_splits_per_doc: int = Field(100, description="Maximum splits per document")
 
-    def search_tool(self, query: str, num_pages: int = 1):
+    def search_tool(self, query: str, num_pages: int = 1) -> List[dict]:
         """Google search for up to 3 queries."""
         try:
             os.environ["GOOGLE_CSE_ID"] = self.GOOGLE_CSE_ID

From 0b59dec2b0856dc592ed9ea783b95d9926d8d16a Mon Sep 17 00:00:00 2001
From: Lance Martin <lance@langchain.dev>
Date: Sun, 23 Jul 2023 10:28:24 -0700
Subject: [PATCH 03/11] Update

---
 .../retrievers/how_to/web_research.ipynb      | 250 +++++++++++-------
 .../langchain/document_loaders/async_html.py  |   7 +-
 .../langchain/retrievers/web_research.py      |  11 +-
 3 files changed, 173 insertions(+), 95 deletions(-)

diff --git a/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb b/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
index b58c42912ecad..8b60c0ea67e64 100644
--- a/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
+++ b/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "id": "4abea0a0",
    "metadata": {},
    "outputs": [],
@@ -40,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
    "id": "3d84ea47",
    "metadata": {},
    "outputs": [],
@@ -50,14 +50,14 @@
     "from langchain.chat_models.openai import ChatOpenAI\n",
     "# Set input\n",
     "llm = ChatOpenAI(temperature=0)\n",
-    "vectorstore = Chroma(embedding_function=OpenAIEmbeddings())\n",
-    "GOOGLE_CSE_ID = \"b5e84267513eb4dcf\"\n",
-    "GOOGLE_API_KEY = \"AIzaSyDUKwJCpdU6nNwANyA7NC2cXnMfvXD6YcM\""
+    "vectorstore = Chroma(embedding_function=OpenAIEmbeddings(),persist_directory=\"./chroma_db_oai\")\n",
+    "GOOGLE_CSE_ID = \"xxx\"\n",
+    "GOOGLE_API_KEY = \"xxx\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "id": "f135e81d",
    "metadata": {},
    "outputs": [],
@@ -73,7 +73,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
    "id": "c958adc6",
    "metadata": {
     "scrolled": false
@@ -84,12 +84,20 @@
      "output_type": "stream",
      "text": [
       "INFO:langchain.retrievers.web_research:Generating questions for Google Search ...\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What is the definition of LLM Powered Autonomous Agents?', '2. What are the key features of LLM Powered Autonomous Agents?', '3. How do LLM Powered Autonomous Agents differ from traditional autonomous agents?', '4. What are the applications of LLM Powered Autonomous Agents?', '5. Are there any case studies or examples of successful implementations of LLM Powered Autonomous Agents?'])}\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What is the definition of LLM Powered Autonomous Agents?', '2. What are the key features of LLM Powered Autonomous Agents?', '3. How do LLM Powered Autonomous Agents differ from traditional autonomous agents?', '4. What are the applications of LLM Powered Autonomous Agents?', '5. Are there any case studies or examples of successful implementations of LLM Powered Autonomous Agents?']\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What is the definition of LLM Powered Autonomous Agents?\\n', '2. What are the key features of LLM Powered Autonomous Agents?\\n', '3. How do LLM Powered Autonomous Agents use machine learning algorithms?\\n', '4. What are some real-world applications of LLM Powered Autonomous Agents?\\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What is the definition of LLM Powered Autonomous Agents?\\n', '2. What are the key features of LLM Powered Autonomous Agents?\\n', '3. How do LLM Powered Autonomous Agents use machine learning algorithms?\\n', '4. What are some real-world applications of LLM Powered Autonomous Agents?\\n']\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Fig. 1. Overview of a LLM-powered autonomous agent system. ... This approach utilizes the Planning Domain Definition Language (PDDL) as an\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:URLs to load: {'https://towardsdatascience.com/autonomous-agents-and-multi-agent-systems-101-agents-and-deception-f4da3401f92a', 'https://lilianweng.github.io/posts/2023-06-23-agent/'}\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': \"Jun 23, 2023 ... In a LLM-powered autonomous agent system, LLM functions as the agent's brain, complemented by several key components: Planning.\"}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... 2023) applies the same idea to cross-episode trajectories in reinforcement learning tasks, where an algorithm is encapsulated in a long history-\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, ... It plays a crucial role in real-world tasks where trial and error are\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:URLs to load: {'https://lilianweng.github.io/posts/2023-06-23-agent/'}\n",
       "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
-      "Fetching pages: 100%|##############################################################################################################################################| 2/2 [00:02<00:00,  1.04s/it]\n"
+      "Fetching pages: 100%|##############| 1/1 [00:00<00:00,  4.88it/s]\n"
      ]
     }
    ],
@@ -104,25 +112,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "id": "52c07edd",
-   "metadata": {
-    "scrolled": true
-   },
+   "execution_count": 6,
+   "id": "c7a23b2a",
+   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "6"
+       "\"LLM-powered autonomous agents work by using a large language model (LLM) as their core controller. These agents have several key components that complement the LLM:\\n\\n1. Planning: Complex tasks are broken down into simpler steps through task decomposition. This can be done by prompting the LLM, providing task-specific instructions, or using human input. Self-reflection techniques help the agents learn from experience and improve their reasoning.\\n\\n2. Memory: Autonomous agents have different types of memory, including working memory and long-term memory. They can use contextual embeddings to understand the user's intent and context by incorporating entire conversation histories. This allows them to respond based on collective knowledge gained throughout the interaction with a user.\\n\\n3. Tool Use: Autonomous agents have the capacity to use tools such as browsing the internet, accessing live data, or running code. They can define goals and tasks, identify the right actions to take, and generate and execute commands with the help of external tools like search engines or APIs.\\n\\nThese components work together to enable autonomous agents to perform tasks, solve problems, and interact with users in a personalized and context-aware manner. However, there are still challenges to overcome, such as the finite context length, long-term planning, and reliability of natural language interfaces. Ongoing research is focused on addressing these challenges and improving the capabilities of LLM-powered autonomous agents.\""
       ]
      },
-     "execution_count": 9,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "len(docs)"
+    "from langchain.chains.question_answering import load_qa_chain\n",
+    "chain = load_qa_chain(llm, chain_type=\"stuff\")\n",
+    "output = chain({\"input_documents\": docs, \"question\": user_input}, return_only_outputs=True)\n",
+    "output['output_text']"
    ]
   },
   {
@@ -159,7 +168,8 @@
       "llama_model_load_internal: model size = 13B\n",
       "llama_model_load_internal: ggml ctx size =    0.09 MB\n",
       "llama_model_load_internal: mem required  = 9132.71 MB (+ 1608.00 MB per state)\n",
-      "llama_new_context_with_model: kv self size  = 3200.00 MB\n"
+      "llama_new_context_with_model: kv self size  = 3200.00 MB\n",
+      "ggml_metal_init: allocating\n"
      ]
     },
     {
@@ -174,41 +184,40 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "ggml_metal_init: allocating\n",
       "ggml_metal_init: using MPS\n",
       "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
-      "ggml_metal_init: loaded kernel_add                            0x2a58f3710\n",
-      "ggml_metal_init: loaded kernel_mul                            0x2a58f4c40\n",
-      "ggml_metal_init: loaded kernel_mul_row                        0x2a58f5af0\n",
-      "ggml_metal_init: loaded kernel_scale                          0x2a58f3a60\n",
-      "ggml_metal_init: loaded kernel_silu                           0x2a58f3cc0\n",
-      "ggml_metal_init: loaded kernel_relu                           0x2a58f6260\n",
-      "ggml_metal_init: loaded kernel_gelu                           0x2a58f68b0\n",
-      "ggml_metal_init: loaded kernel_soft_max                       0x2a58f75b0\n",
-      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x2a58f7a70\n",
-      "ggml_metal_init: loaded kernel_get_rows_f16                   0x2a58f8530\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x2a58f8b90\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x2a58f9390\n",
-      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x2a58f9bc0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x2a58fa2b0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x2a58fa980\n",
-      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x2a58fb070\n",
-      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x2a58fb7c0\n",
-      "ggml_metal_init: loaded kernel_rms_norm                       0x2a16e6940\n",
-      "ggml_metal_init: loaded kernel_norm                           0x2a16e7440\n",
-      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x2a16e79e0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x2a16e8aa0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x2a16e9290\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x2a16e8400\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x2a58fc520\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x2a58fc940\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x2a58fd930\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x2a58fe0e0\n",
-      "ggml_metal_init: loaded kernel_rope                           0x2a58fe810\n",
-      "ggml_metal_init: loaded kernel_alibi_f32                      0x2a5c04330\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x2a5c04bf0\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x2a16ea2a0\n",
-      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x2a16ea870\n",
+      "ggml_metal_init: loaded kernel_add                            0x2996f3910\n",
+      "ggml_metal_init: loaded kernel_mul                            0x2996f4e40\n",
+      "ggml_metal_init: loaded kernel_mul_row                        0x2996f5660\n",
+      "ggml_metal_init: loaded kernel_scale                          0x2996f5cf0\n",
+      "ggml_metal_init: loaded kernel_silu                           0x2996f6460\n",
+      "ggml_metal_init: loaded kernel_relu                           0x2996f3c60\n",
+      "ggml_metal_init: loaded kernel_gelu                           0x2996f3ec0\n",
+      "ggml_metal_init: loaded kernel_soft_max                       0x2996f77e0\n",
+      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x2996f7c90\n",
+      "ggml_metal_init: loaded kernel_get_rows_f16                   0x2996f85b0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x2996f8de0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x2996f9570\n",
+      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x2996f9de0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x2996fa4d0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x2996fabd0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x2996fb2b0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x2996fb9b0\n",
+      "ggml_metal_init: loaded kernel_rms_norm                       0x2996fc0b0\n",
+      "ggml_metal_init: loaded kernel_norm                           0x2996fc7e0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x2996fd7b0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x2996fdfa0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x2996fe760\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x2996fef40\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x2996ff8a0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x2b8804080\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x2b8804840\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x2b8805000\n",
+      "ggml_metal_init: loaded kernel_rope                           0x2b8805710\n",
+      "ggml_metal_init: loaded kernel_alibi_f32                      0x2b8806100\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x2b8806cd0\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x2b8807640\n",
+      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x2b8807fb0\n",
       "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
       "ggml_metal_init: hasUnifiedMemory             = true\n",
       "ggml_metal_init: maxTransferRate              = built-in GPU\n",
@@ -218,8 +227,8 @@
       "ggml_metal_add_buffer: allocated 'scr0            ' buffer, size =   597.00 MB, (11823.52 / 21845.34)\n",
       "ggml_metal_add_buffer: allocated 'scr1            ' buffer, size =   512.00 MB, (12335.52 / 21845.34)\n",
       "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
-      "objc[82333]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2a571c208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x2a77c8208). One of the two will be used. Which one is undefined.\n",
-      "objc[82333]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2a571c208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x2b4ca0208). One of the two will be used. Which one is undefined.\n"
+      "objc[24358]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2b82ac208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x5de898208). One of the two will be used. Which one is undefined.\n",
+      "objc[24358]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2b82ac208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x5decc4208). One of the two will be used. Which one is undefined.\n"
      ]
     }
    ],
@@ -241,7 +250,7 @@
     "    callback_manager=callback_manager,\n",
     "    verbose=True,\n",
     ")\n",
-    "vectorstore_llama = Chroma(embedding_function=GPT4AllEmbeddings())\n",
+    "vectorstore_llama = Chroma(embedding_function=GPT4AllEmbeddings(),persist_directory=\"./chroma_db_llama\")\n",
     "GOOGLE_CSE_ID = \"b5e84267513eb4dcf\"\n",
     "GOOGLE_API_KEY = \"AIzaSyDUKwJCpdU6nNwANyA7NC2cXnMfvXD6YcM\""
    ]
@@ -281,15 +290,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  Sure! Here are five search queries that could help answer the user's question about how LLM powered autonomous agents work:\n",
-      "\n",
-      "1. \"LLM powered autonomous agents architecture\" - This search query could provide information on the overall design and structure of LLM powered autonomous agents, including the components and interfaces involved in their operation.\n",
-      "2. \"How do LLM powered autonomous agents perceive their environment?\" - This search query could provide information on the sensors and other sources of data that LLM powered autonomous agents use to understand their environment and make decisions.\n",
-      "3. \"What algorithms and techniques are used in LLM powered autonomous agents for decision making?\" - This search query could provide information on the machine learning and artificial intelligence techniques that are used in LLM powered autonomous agents to enable them to make decisions and take actions based on their environment and objectives.\n",
-      "4. \"How do LLM powered autonomous agents learn and improve over time?\" - This search query could provide information on how LLM powered autonomous agents learn from their experiences and adapt to new situations, as well as any techniques or algorithms used for learning and improvement.\n",
-      "5. \"What are some examples of real-world applications of LLM powered autonomous agents?\" - This search query could provide information on the types of tasks and industries where LLM powered autonomous agents are being used successfully, such as self-driving cars, robots, or other intelligent systems.\n",
+      "  Sure! Based on the user input search query \"How do LLM Powered Autonomous Agents work?\", here are five search queries that could help answer their question:\n",
       "\n",
-      "These search queries should provide a good starting point for understanding how LLM powered autonomous agents work and their potential applications in various fields."
+      "1. What are LLM Powered Autonomous Agents and how do they differ from traditional AI agents?\n",
+      "2. How do LLM Powered Autonomous Agents learn and improve their decision-making abilities over time?\n",
+      "3. Can you provide examples of real-world applications of LLM Powered Autonomous Agents, such as self-driving cars or personal assistants?\n",
+      "4. How does the choice of LLM (Long Short-Term Memory) algorithm affect the performance and capabilities of an LLM Powered Autonomous Agent?\n",
+      "5. What are some common challenges and limitations of LLM Powered Autonomous Agents, such as dealing with unexpected events or handling conflicting goals?"
      ]
     },
     {
@@ -297,30 +304,25 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "llama_print_timings:        load time =  7344.40 ms\n",
-      "llama_print_timings:      sample time =   245.79 ms /   350 runs   (    0.70 ms per token,  1423.96 tokens per second)\n",
-      "llama_print_timings: prompt eval time =  7344.26 ms /    99 tokens (   74.18 ms per token,    13.48 tokens per second)\n",
-      "llama_print_timings:        eval time = 14318.16 ms /   349 runs   (   41.03 ms per token,    24.37 tokens per second)\n",
-      "llama_print_timings:       total time = 22399.59 ms\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. \"LLM powered autonomous agents architecture\" - This search query could provide information on the overall design and structure of LLM powered autonomous agents, including the components and interfaces involved in their operation.\\n', '2. \"How do LLM powered autonomous agents perceive their environment?\" - This search query could provide information on the sensors and other sources of data that LLM powered autonomous agents use to understand their environment and make decisions.\\n', '3. \"What algorithms and techniques are used in LLM powered autonomous agents for decision making?\" - This search query could provide information on the machine learning and artificial intelligence techniques that are used in LLM powered autonomous agents to enable them to make decisions and take actions based on their environment and objectives.\\n', '4. \"How do LLM powered autonomous agents learn and improve over time?\" - This search query could provide information on how LLM powered autonomous agents learn from their experiences and adapt to new situations, as well as any techniques or algorithms used for learning and improvement.\\n', '5. \"What are some examples of real-world applications of LLM powered autonomous agents?\" - This search query could provide information on the types of tasks and industries where LLM powered autonomous agents are being used successfully, such as self-driving cars, robots, or other intelligent systems.\\n'])}\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. \"LLM powered autonomous agents architecture\" - This search query could provide information on the overall design and structure of LLM powered autonomous agents, including the components and interfaces involved in their operation.\\n', '2. \"How do LLM powered autonomous agents perceive their environment?\" - This search query could provide information on the sensors and other sources of data that LLM powered autonomous agents use to understand their environment and make decisions.\\n', '3. \"What algorithms and techniques are used in LLM powered autonomous agents for decision making?\" - This search query could provide information on the machine learning and artificial intelligence techniques that are used in LLM powered autonomous agents to enable them to make decisions and take actions based on their environment and objectives.\\n', '4. \"How do LLM powered autonomous agents learn and improve over time?\" - This search query could provide information on how LLM powered autonomous agents learn from their experiences and adapt to new situations, as well as any techniques or algorithms used for learning and improvement.\\n', '5. \"What are some examples of real-world applications of LLM powered autonomous agents?\" - This search query could provide information on the types of tasks and industries where LLM powered autonomous agents are being used successfully, such as self-driving cars, robots, or other intelligent systems.\\n']\n",
+      "llama_print_timings:        load time =  7308.57 ms\n",
+      "llama_print_timings:      sample time =   136.15 ms /   194 runs   (    0.70 ms per token,  1424.94 tokens per second)\n",
+      "llama_print_timings: prompt eval time =  7308.44 ms /    99 tokens (   73.82 ms per token,    13.55 tokens per second)\n",
+      "llama_print_timings:        eval time =  6384.79 ms /   193 runs   (   33.08 ms per token,    30.23 tokens per second)\n",
+      "llama_print_timings:       total time = 14072.87 ms\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What are LLM Powered Autonomous Agents and how do they differ from traditional AI agents?\\n', '2. How do LLM Powered Autonomous Agents learn and improve their decision-making abilities over time?\\n', '3. Can you provide examples of real-world applications of LLM Powered Autonomous Agents, such as self-driving cars or personal assistants?\\n', '4. How does the choice of LLM (Long Short-Term Memory) algorithm affect the performance and capabilities of an LLM Powered Autonomous Agent?\\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What are LLM Powered Autonomous Agents and how do they differ from traditional AI agents?\\n', '2. How do LLM Powered Autonomous Agents learn and improve their decision-making abilities over time?\\n', '3. Can you provide examples of real-world applications of LLM Powered Autonomous Agents, such as self-driving cars or personal assistants?\\n', '4. How does the choice of LLM (Long Short-Term Memory) algorithm affect the performance and capabilities of an LLM Powered Autonomous Agent?\\n']\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'Result': 'No good Google Search Result was found'}]\n"
-     ]
-    },
-    {
-     "ename": "KeyError",
-     "evalue": "'link'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m      3\u001b[0m logging\u001b[38;5;241m.\u001b[39mgetLogger(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlangchain.retrievers.web_research\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39msetLevel(logging\u001b[38;5;241m.\u001b[39mINFO)\n\u001b[1;32m      4\u001b[0m user_input \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHow do LLM Powered Autonomous Agents work?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 5\u001b[0m docs \u001b[38;5;241m=\u001b[39m \u001b[43mweb_research_retriever\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_relevant_documents\u001b[49m\u001b[43m(\u001b[49m\u001b[43muser_input\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/Desktop/Code/langchain/libs/langchain/langchain/schema/retriever.py:181\u001b[0m, in \u001b[0;36mBaseRetriever.get_relevant_documents\u001b[0;34m(self, query, callbacks, tags, metadata, **kwargs)\u001b[0m\n\u001b[1;32m    179\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    180\u001b[0m     run_manager\u001b[38;5;241m.\u001b[39mon_retriever_error(e)\n\u001b[0;32m--> 181\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m    182\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    183\u001b[0m     run_manager\u001b[38;5;241m.\u001b[39mon_retriever_end(\n\u001b[1;32m    184\u001b[0m         result,\n\u001b[1;32m    185\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m    186\u001b[0m     )\n",
-      "File \u001b[0;32m~/Desktop/Code/langchain/libs/langchain/langchain/schema/retriever.py:174\u001b[0m, in \u001b[0;36mBaseRetriever.get_relevant_documents\u001b[0;34m(self, query, callbacks, tags, metadata, **kwargs)\u001b[0m\n\u001b[1;32m    172\u001b[0m _kwargs \u001b[38;5;241m=\u001b[39m kwargs \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expects_other_args \u001b[38;5;28;01melse\u001b[39;00m {}\n\u001b[1;32m    173\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_new_arg_supported:\n\u001b[0;32m--> 174\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_relevant_documents\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    175\u001b[0m \u001b[43m        \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m_kwargs\u001b[49m\n\u001b[1;32m    176\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    177\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    178\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_relevant_documents(query, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m_kwargs)\n",
-      "File \u001b[0;32m~/Desktop/Code/langchain/libs/langchain/langchain/retrievers/web_research.py:125\u001b[0m, in \u001b[0;36mWebResearchRetriever._get_relevant_documents\u001b[0;34m(self, query, run_manager)\u001b[0m\n\u001b[1;32m    123\u001b[0m     logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSearch results: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msearch_results\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    124\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m res \u001b[38;5;129;01min\u001b[39;00m search_results:\n\u001b[0;32m--> 125\u001b[0m         urls_to_look\u001b[38;5;241m.\u001b[39mappend(\u001b[43mres\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlink\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m)\n\u001b[1;32m    127\u001b[0m \u001b[38;5;66;03m# Load HTML to text\u001b[39;00m\n\u001b[1;32m    128\u001b[0m urls \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m(urls_to_look)\n",
-      "\u001b[0;31mKeyError\u001b[0m: 'link'"
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'The State of Autonomous AI Agents', 'link': 'https://www.linkedin.com/pulse/state-autonomous-ai-agents-dean-meyer?utm_source=share&utm_medium=member_ios&utm_campaign=share_via', 'snippet': 'Jul 6, 2023 ... To optimize LLMs and in-context learning, a three-tiered infrastructure is taking shape (more on these tiers by Matt at a16z). The LLM Stack: (1)\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... In a LLM-powered autonomous agent system, LLM functions as the ... agents to improve iteratively by refining past action decisions and\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'SQ2. What are the most important advances in AI? | One Hundred ...', 'link': 'https://ai100.stanford.edu/gathering-strength-gathering-storms-one-hundred-year-study-artificial-intelligence-ai100-2021-1/sq2', 'snippet': 'One of the practical applications can be seen in GAN-based medical-image ... of crowds and are important for mobile robots including self-driving cars.'}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'Reflexion: Language Agents with Verbal Reinforcement Learning', 'link': 'https://arxiv.org/pdf/2303.11366', 'snippet': \"Jun 10, 2023 ... policy as an agent's memory encoding paired with a choice of LLM ... of the Reflexion process are the notion of short-term and long-term.\"}]\n",
+      "INFO:langchain.retrievers.web_research:URLs to load: {'https://www.linkedin.com/pulse/state-autonomous-ai-agents-dean-meyer?utm_source=share&utm_medium=member_ios&utm_campaign=share_via', 'https://arxiv.org/pdf/2303.11366', 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'https://ai100.stanford.edu/gathering-strength-gathering-storms-one-hundred-year-study-artificial-intelligence-ai100-2021-1/sq2'}\n",
+      "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
+      "Fetching pages: 100%|##############| 4/4 [00:01<00:00,  3.41it/s]\n"
      ]
     }
    ],
@@ -334,17 +336,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 10,
    "id": "7e06adad",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "7"
+       "6"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -352,6 +354,76 @@
    "source": [
     "len(docs)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "bc053593",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Llama.generate: prefix-match hit\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  LLM Powered Autonomous Agents use a combination of planning, memory, and tool use to perform tasks. The agent breaks down large tasks into smaller subgoals, reflects on past actions, and learns from mistakes. It also uses external tools to access proprietary information sources and more. The agent utilizes short-term and long-term memory to retain and recall information over extended periods."
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "llama_print_timings:        load time =  7308.57 ms\n",
+      "llama_print_timings:      sample time =    63.39 ms /    85 runs   (    0.75 ms per token,  1340.88 tokens per second)\n",
+      "llama_print_timings: prompt eval time = 103653.11 ms /  2117 tokens (   48.96 ms per token,    20.42 tokens per second)\n",
+      "llama_print_timings:        eval time =  3880.31 ms /    84 runs   (   46.19 ms per token,    21.65 tokens per second)\n",
+      "llama_print_timings:       total time = 107710.45 ms\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'  LLM Powered Autonomous Agents use a combination of planning, memory, and tool use to perform tasks. The agent breaks down large tasks into smaller subgoals, reflects on past actions, and learns from mistakes. It also uses external tools to access proprietary information sources and more. The agent utilizes short-term and long-term memory to retain and recall information over extended periods.'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain import PromptTemplate\n",
+    "from langchain.chains.question_answering import load_qa_chain\n",
+    "\n",
+    "# Prompt\n",
+    "template = \"\"\"<<SYS>> \\n You are a QA assistant. Use the following pieces of context to answer the \n",
+    "question at the end. Keep the answer as concise as possible. \\n <</SYS>> \\n\\n  [INST] Context: \n",
+    "{context} \\n\n",
+    "Question: {question} [/INST]\"\"\"\n",
+    "QA_CHAIN_PROMPT = PromptTemplate(\n",
+    "    input_variables=[\"context\", \"question\"],\n",
+    "    template=template,\n",
+    ")\n",
+    "\n",
+    "chain = load_qa_chain(llama, chain_type=\"stuff\", prompt=QA_CHAIN_PROMPT)\n",
+    "output = chain({\"input_documents\": docs, \"question\": user_input}, return_only_outputs=True)\n",
+    "output['output_text']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "efa99b1e",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/libs/langchain/langchain/document_loaders/async_html.py b/libs/langchain/langchain/document_loaders/async_html.py
index d73e1425343f9..5a1ae4780874f 100644
--- a/libs/langchain/langchain/document_loaders/async_html.py
+++ b/libs/langchain/langchain/document_loaders/async_html.py
@@ -86,7 +86,12 @@ async def _fetch(
                         headers=self.session.headers,
                         ssl=None if self.session.verify else False,
                     ) as response:
-                        return await response.text()
+                        try:
+                            text = await response.text()
+                        except UnicodeDecodeError:
+                            print(f"Failed to decode content from {url}")
+                            text = ""
+                        return text
                 except aiohttp.ClientConnectionError as e:
                     if i == retries - 1:
                         raise
diff --git a/libs/langchain/langchain/retrievers/web_research.py b/libs/langchain/langchain/retrievers/web_research.py
index 1cc11ec00fa89..65848ec833e74 100644
--- a/libs/langchain/langchain/retrievers/web_research.py
+++ b/libs/langchain/langchain/retrievers/web_research.py
@@ -73,17 +73,18 @@ class WebResearchRetriever(BaseRetriever):
         DEFAULT_SEARCH_PROMPT, description="Search Prompt Template"
     )
     max_splits_per_doc: int = Field(100, description="Maximum splits per document")
+    num_search_results: int = Field(1, description="Number of pages per Google search")
 
-    def search_tool(self, query: str, num_pages: int = 1) -> List[dict]:
-        """Google search for up to 3 queries."""
+    def search_tool(self, query: str, num_search_results: int = 1) -> List[dict]:
+        """Returns num_serch_results pages per Google search."""
         try:
             os.environ["GOOGLE_CSE_ID"] = self.GOOGLE_CSE_ID
             os.environ["GOOGLE_API_KEY"] = self.GOOGLE_API_KEY
             search = GoogleSearchAPIWrapper()
         except Exception as e:
             print(f"Error: {str(e)}")
-        result = search.results(query, num_pages)
-        return result if isinstance(result, list) else [result]
+        result = search.results(query, num_search_results)
+        return result
 
     def _get_relevant_documents(
         self,
@@ -117,7 +118,7 @@ def _get_relevant_documents(
         urls_to_look = []
         for query in questions:
             # Google search
-            search_results = self.search_tool(query)
+            search_results = self.search_tool(query, self.num_search_results)
             logger.info("Searching for relevat urls ...")
             logger.info(f"Search results: {search_results}")
             for res in search_results:

From cf44cb1a5a9c992d1250ff2218983649ed66aad3 Mon Sep 17 00:00:00 2001
From: Lance Martin <lance@langchain.dev>
Date: Mon, 24 Jul 2023 10:51:20 -0700
Subject: [PATCH 04/11] Address comments

---
 .../retrievers/how_to/web_research.ipynb      | 386 +++++++++++-------
 .../langchain/document_loaders/async_html.py  |   3 +-
 .../langchain/retrievers/web_research.py      |  74 +++-
 3 files changed, 293 insertions(+), 170 deletions(-)

diff --git a/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb b/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
index 8b60c0ea67e64..829345192f3c2 100644
--- a/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
+++ b/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 10,
    "id": "4abea0a0",
    "metadata": {},
    "outputs": [],
@@ -30,51 +30,161 @@
   },
   {
    "cell_type": "markdown",
-   "id": "0c0e57bb",
+   "id": "90b1dcbd",
    "metadata": {},
    "source": [
-    "## Run\n",
+    "`Simple usage`\n",
     "\n",
-    "Pass the desired model and vectorstore."
+    "Specify the LLM to use for search query generation, and the retriver will do the rest."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "id": "3d84ea47",
+   "execution_count": 25,
+   "id": "e63d1c8b",
    "metadata": {},
    "outputs": [],
    "source": [
     "from langchain.vectorstores import Chroma\n",
     "from langchain.embeddings import OpenAIEmbeddings\n",
     "from langchain.chat_models.openai import ChatOpenAI\n",
-    "# Set input\n",
-    "llm = ChatOpenAI(temperature=0)\n",
+    "from langchain.utilities import GoogleSearchAPIWrapper\n",
+    "\n",
+    "# Vectorstore\n",
     "vectorstore = Chroma(embedding_function=OpenAIEmbeddings(),persist_directory=\"./chroma_db_oai\")\n",
-    "GOOGLE_CSE_ID = \"xxx\"\n",
-    "GOOGLE_API_KEY = \"xxx\""
+    "\n",
+    "# LLM\n",
+    "llm = ChatOpenAI(temperature=0)\n",
+    "\n",
+    "# Search \n",
+    "os.environ[\"GOOGLE_CSE_ID\"] = \"xxx\"\n",
+    "os.environ[\"GOOGLE_API_KEY\"] = \"xxx\"\n",
+    "search = GoogleSearchAPIWrapper()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "f135e81d",
+   "execution_count": 23,
+   "id": "2c4e8ab3",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:langchain.retrievers.web_research:Generating questions for Google Search ...\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What is the definition of LLM Powered Autonomous Agents?\\n', '2. What are the key features of LLM Powered Autonomous Agents?\\n', '3. How do LLM Powered Autonomous Agents differ from traditional autonomous agents?\\n', '4. What are the applications of LLM Powered Autonomous Agents?\\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What is the definition of LLM Powered Autonomous Agents?\\n', '2. What are the key features of LLM Powered Autonomous Agents?\\n', '3. How do LLM Powered Autonomous Agents differ from traditional autonomous agents?\\n', '4. What are the applications of LLM Powered Autonomous Agents?\\n']\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Fig. 1. Overview of a LLM-powered autonomous agent system. ... This approach utilizes the Planning Domain Definition Language (PDDL) as an\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': \"Jun 23, 2023 ... In a LLM-powered autonomous agent system, LLM functions as the agent's brain, complemented by several key components: Planning.\"}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'Autonomous Agents & Agent Simulations', 'link': 'https://blog.langchain.dev/agents-round/', 'snippet': 'Apr 18, 2023 ... The main differences between the AutoGPT project and traditional LangChain agents can be attributed to different objectives. In AutoGPT, the\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, LLM functions ... simulacra of human behavior for interactive applications.'}]\n",
+      "INFO:langchain.retrievers.web_research:URLs to load: {'https://blog.langchain.dev/agents-round/', 'https://lilianweng.github.io/posts/2023-06-23-agent/'}\n",
+      "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
+      "Fetching pages: 100%|######################################################################################################################################################################################################| 2/2 [00:00<00:00,  2.31it/s]\n"
+     ]
+    }
+   ],
    "source": [
     "# Initialize\n",
-    "web_research_retriever = WebResearchRetriever(\n",
-    "    vectorstore=vectorstore, \n",
+    "web_research_retriever = WebResearchRetriever.from_llm(\n",
+    "    vectorstore=vectorstore,\n",
     "    llm=llm, \n",
-    "    GOOGLE_CSE_ID=GOOGLE_CSE_ID, \n",
-    "    GOOGLE_API_KEY=GOOGLE_API_KEY\n",
-    ")"
+    "    search=search, \n",
+    ")\n",
+    "\n",
+    "# Run\n",
+    "user_input = \"How do LLM Powered Autonomous Agents work?\"\n",
+    "docs = web_research_retriever.get_relevant_documents(user_input)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "c958adc6",
+   "execution_count": 24,
+   "id": "d39a90a8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "5"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(docs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0c0e57bb",
+   "metadata": {},
+   "source": [
+    "`Added flexibility`\n",
+    "\n",
+    "Pass an LLM chain with custom prompt and output parsing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "3d84ea47",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import re\n",
+    "from typing import List\n",
+    "from langchain.chains import LLMChain\n",
+    "from pydantic import BaseModel, Field\n",
+    "from langchain.prompts import PromptTemplate\n",
+    "from langchain.output_parsers.pydantic import PydanticOutputParser\n",
+    "\n",
+    "# LLMChain\n",
+    "search_prompt = PromptTemplate(\n",
+    "    input_variables=[\"question\"],\n",
+    "    template=\"\"\"<<SYS>> \\n You are a web research assistant to help users\n",
+    "    answer questions. Answer using a numeric list. Do not include any extra\n",
+    "    test. \\n <</SYS>> \\n\\n [INST] Given a user input search query, \n",
+    "    generate a numbered list of five search queries to run to help answer their \n",
+    "    question: \\n\\n {question} [/INST]\"\"\",\n",
+    ")\n",
+    "\n",
+    "class LineList(BaseModel):\n",
+    "    \"\"\"List of questions.\"\"\"\n",
+    "\n",
+    "    lines: List[str] = Field(description=\"Questions\")\n",
+    "\n",
+    "class QuestionListOutputParser(PydanticOutputParser):\n",
+    "    \"\"\"Output parser for a list of numbered questions.\"\"\"\n",
+    "\n",
+    "    def __init__(self) -> None:\n",
+    "        super().__init__(pydantic_object=LineList)\n",
+    "\n",
+    "    def parse(self, text: str) -> LineList:\n",
+    "        lines = re.findall(r\"\\d+\\..*?\\n\", text)\n",
+    "        return LineList(lines=lines)\n",
+    "    \n",
+    "llm_chain = LLMChain(\n",
+    "            llm=llm,\n",
+    "            prompt=search_prompt,\n",
+    "            output_parser=QuestionListOutputParser(),\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "851b0471",
    "metadata": {
     "scrolled": false
    },
@@ -84,8 +194,9 @@
      "output_type": "stream",
      "text": [
       "INFO:langchain.retrievers.web_research:Generating questions for Google Search ...\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What is the definition of LLM Powered Autonomous Agents?\\n', '2. What are the key features of LLM Powered Autonomous Agents?\\n', '3. How do LLM Powered Autonomous Agents use machine learning algorithms?\\n', '4. What are some real-world applications of LLM Powered Autonomous Agents?\\n'])}\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What is the definition of LLM Powered Autonomous Agents?\\n', '2. What are the key features of LLM Powered Autonomous Agents?\\n', '3. How do LLM Powered Autonomous Agents use machine learning algorithms?\\n', '4. What are some real-world applications of LLM Powered Autonomous Agents?\\n']\n",
+      "ERROR:langchain.callbacks.tracers.langchain:Failed to post https://api.langchain.plus/runs in LangSmith API. {\"detail\":\"Internal server error\"}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What is the definition of LLM Powered Autonomous Agents?\\n', '2. What are the key features of LLM Powered Autonomous Agents?\\n', '3. How do LLM Powered Autonomous Agents use machine learning algorithms?\\n', '4. What are the applications of LLM Powered Autonomous Agents?\\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What is the definition of LLM Powered Autonomous Agents?\\n', '2. What are the key features of LLM Powered Autonomous Agents?\\n', '3. How do LLM Powered Autonomous Agents use machine learning algorithms?\\n', '4. What are the applications of LLM Powered Autonomous Agents?\\n']\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Fig. 1. Overview of a LLM-powered autonomous agent system. ... This approach utilizes the Planning Domain Definition Language (PDDL) as an\\xa0...'}]\n",
@@ -94,58 +205,61 @@
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... 2023) applies the same idea to cross-episode trajectories in reinforcement learning tasks, where an algorithm is encapsulated in a long history-\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, ... It plays a crucial role in real-world tasks where trial and error are\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, LLM functions ... simulacra of human behavior for interactive applications.'}]\n",
       "INFO:langchain.retrievers.web_research:URLs to load: {'https://lilianweng.github.io/posts/2023-06-23-agent/'}\n",
       "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
-      "Fetching pages: 100%|##############| 1/1 [00:00<00:00,  4.88it/s]\n"
+      "Fetching pages: 100%|######################################################################################################################################################################################################| 1/1 [00:00<00:00,  5.13it/s]\n",
+      "ERROR:langchain.callbacks.tracers.langchain:Failed to patch https://api.langchain.plus/runs/f347c1aa-c27a-4855-9fa5-c3cd772c62f6 in LangSmith API. {\"detail\":\"Internal server error\"}\n"
      ]
     }
    ],
    "source": [
+    "# Initialize\n",
+    "web_research_retriever_llm_chain = WebResearchRetriever(\n",
+    "    vectorstore=vectorstore,\n",
+    "    llm_chain=llm_chain, \n",
+    "    search=search, \n",
+    ")\n",
+    "\n",
     "# Run\n",
-    "import logging\n",
-    "logging.basicConfig()\n",
-    "logging.getLogger(\"langchain.retrievers.web_research\").setLevel(logging.INFO)\n",
-    "user_input = \"How do LLM Powered Autonomous Agents work?\"\n",
-    "docs = web_research_retriever.get_relevant_documents(user_input)"
+    "docs = web_research_retriever_llm_chain.get_relevant_documents(user_input)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "c7a23b2a",
+   "execution_count": 28,
+   "id": "1ee52163",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "\"LLM-powered autonomous agents work by using a large language model (LLM) as their core controller. These agents have several key components that complement the LLM:\\n\\n1. Planning: Complex tasks are broken down into simpler steps through task decomposition. This can be done by prompting the LLM, providing task-specific instructions, or using human input. Self-reflection techniques help the agents learn from experience and improve their reasoning.\\n\\n2. Memory: Autonomous agents have different types of memory, including working memory and long-term memory. They can use contextual embeddings to understand the user's intent and context by incorporating entire conversation histories. This allows them to respond based on collective knowledge gained throughout the interaction with a user.\\n\\n3. Tool Use: Autonomous agents have the capacity to use tools such as browsing the internet, accessing live data, or running code. They can define goals and tasks, identify the right actions to take, and generate and execute commands with the help of external tools like search engines or APIs.\\n\\nThese components work together to enable autonomous agents to perform tasks, solve problems, and interact with users in a personalized and context-aware manner. However, there are still challenges to overcome, such as the finite context length, long-term planning, and reliability of natural language interfaces. Ongoing research is focused on addressing these challenges and improving the capabilities of LLM-powered autonomous agents.\""
+       "5"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from langchain.chains.question_answering import load_qa_chain\n",
-    "chain = load_qa_chain(llm, chain_type=\"stuff\")\n",
-    "output = chain({\"input_documents\": docs, \"question\": user_input}, return_only_outputs=True)\n",
-    "output['output_text']"
+    "len(docs)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "3663b2ba",
+   "id": "4f9530c0",
    "metadata": {},
    "source": [
-    "`Local -`"
+    "`Run locally`\n",
+    "\n",
+    "Specify LLM and embeddings that will run locally."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "e62e36e9",
+   "execution_count": 29,
+   "id": "8cf0d155",
    "metadata": {},
    "outputs": [
     {
@@ -168,8 +282,7 @@
       "llama_model_load_internal: model size = 13B\n",
       "llama_model_load_internal: ggml ctx size =    0.09 MB\n",
       "llama_model_load_internal: mem required  = 9132.71 MB (+ 1608.00 MB per state)\n",
-      "llama_new_context_with_model: kv self size  = 3200.00 MB\n",
-      "ggml_metal_init: allocating\n"
+      "llama_new_context_with_model: kv self size  = 3200.00 MB\n"
      ]
     },
     {
@@ -184,40 +297,41 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "ggml_metal_init: allocating\n",
       "ggml_metal_init: using MPS\n",
       "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
-      "ggml_metal_init: loaded kernel_add                            0x2996f3910\n",
-      "ggml_metal_init: loaded kernel_mul                            0x2996f4e40\n",
-      "ggml_metal_init: loaded kernel_mul_row                        0x2996f5660\n",
-      "ggml_metal_init: loaded kernel_scale                          0x2996f5cf0\n",
-      "ggml_metal_init: loaded kernel_silu                           0x2996f6460\n",
-      "ggml_metal_init: loaded kernel_relu                           0x2996f3c60\n",
-      "ggml_metal_init: loaded kernel_gelu                           0x2996f3ec0\n",
-      "ggml_metal_init: loaded kernel_soft_max                       0x2996f77e0\n",
-      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x2996f7c90\n",
-      "ggml_metal_init: loaded kernel_get_rows_f16                   0x2996f85b0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x2996f8de0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x2996f9570\n",
-      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x2996f9de0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x2996fa4d0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x2996fabd0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x2996fb2b0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x2996fb9b0\n",
-      "ggml_metal_init: loaded kernel_rms_norm                       0x2996fc0b0\n",
-      "ggml_metal_init: loaded kernel_norm                           0x2996fc7e0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x2996fd7b0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x2996fdfa0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x2996fe760\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x2996fef40\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x2996ff8a0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x2b8804080\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x2b8804840\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x2b8805000\n",
-      "ggml_metal_init: loaded kernel_rope                           0x2b8805710\n",
-      "ggml_metal_init: loaded kernel_alibi_f32                      0x2b8806100\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x2b8806cd0\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x2b8807640\n",
-      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x2b8807fb0\n",
+      "ggml_metal_init: loaded kernel_add                            0x2cdaed120\n",
+      "ggml_metal_init: loaded kernel_mul                            0x2cdaee650\n",
+      "ggml_metal_init: loaded kernel_mul_row                        0x2cdaeede0\n",
+      "ggml_metal_init: loaded kernel_scale                          0x2cdaef460\n",
+      "ggml_metal_init: loaded kernel_silu                           0x2cdaefbe0\n",
+      "ggml_metal_init: loaded kernel_relu                           0x2cdaed470\n",
+      "ggml_metal_init: loaded kernel_gelu                           0x2cdaed6d0\n",
+      "ggml_metal_init: loaded kernel_soft_max                       0x2cdaf0f20\n",
+      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x2cdaf13b0\n",
+      "ggml_metal_init: loaded kernel_get_rows_f16                   0x2cdaf1e70\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x2cdaf2540\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x2cdaf2cd0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x28c9914a0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x28c991700\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x28c991960\n",
+      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x28c991bc0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x28c991e20\n",
+      "ggml_metal_init: loaded kernel_rms_norm                       0x28c992210\n",
+      "ggml_metal_init: loaded kernel_norm                           0x28c992470\n",
+      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x28c992a10\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x28c992c70\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x28c992ed0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x28c993130\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x10f7b2be0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x10f7b2ef0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x10f7b3150\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x10f7b33b0\n",
+      "ggml_metal_init: loaded kernel_rope                           0x10f7b3610\n",
+      "ggml_metal_init: loaded kernel_alibi_f32                      0x10f7b3c10\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x10f7b41e0\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x10f7b47b0\n",
+      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x10f7b4d80\n",
       "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
       "ggml_metal_init: hasUnifiedMemory             = true\n",
       "ggml_metal_init: maxTransferRate              = built-in GPU\n",
@@ -227,14 +341,13 @@
       "ggml_metal_add_buffer: allocated 'scr0            ' buffer, size =   597.00 MB, (11823.52 / 21845.34)\n",
       "ggml_metal_add_buffer: allocated 'scr1            ' buffer, size =   512.00 MB, (12335.52 / 21845.34)\n",
       "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
-      "objc[24358]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2b82ac208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x5de898208). One of the two will be used. Which one is undefined.\n",
-      "objc[24358]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2b82ac208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x5decc4208). One of the two will be used. Which one is undefined.\n"
+      "objc[39697]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2cd900208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x5f2400208). One of the two will be used. Which one is undefined.\n",
+      "objc[39697]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2cd900208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x5f282c208). One of the two will be used. Which one is undefined.\n"
      ]
     }
    ],
    "source": [
     "from langchain.llms import LlamaCpp\n",
-    "from langchain.vectorstores import Chroma\n",
     "from langchain.embeddings import GPT4AllEmbeddings\n",
     "\n",
     "n_gpu_layers = 1  # Metal set to 1 is enough.\n",
@@ -250,34 +363,15 @@
     "    callback_manager=callback_manager,\n",
     "    verbose=True,\n",
     ")\n",
-    "vectorstore_llama = Chroma(embedding_function=GPT4AllEmbeddings(),persist_directory=\"./chroma_db_llama\")\n",
-    "GOOGLE_CSE_ID = \"b5e84267513eb4dcf\"\n",
-    "GOOGLE_API_KEY = \"AIzaSyDUKwJCpdU6nNwANyA7NC2cXnMfvXD6YcM\""
+    "\n",
+    "vectorstore_llama = Chroma(embedding_function=GPT4AllEmbeddings(),persist_directory=\"./chroma_db_llama\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "39ff3d75",
+   "execution_count": 30,
+   "id": "3e0561ca",
    "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Initialize WebResearchRetriever\n",
-    "web_research_retriever = WebResearchRetriever(\n",
-    "    vectorstore=vectorstore_llama, \n",
-    "    llm=llama, \n",
-    "    GOOGLE_CSE_ID=GOOGLE_CSE_ID, \n",
-    "    GOOGLE_API_KEY=GOOGLE_API_KEY\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "0898e34c",
-   "metadata": {
-    "scrolled": false
-   },
    "outputs": [
     {
      "name": "stderr",
@@ -290,13 +384,14 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  Sure! Based on the user input search query \"How do LLM Powered Autonomous Agents work?\", here are five search queries that could help answer their question:\n",
+      "  Sure! Based on the user input search query \"How do LLM Powered Autonomous Agents work?\", here are five search queries that could be used to help answer this question:\n",
       "\n",
-      "1. What are LLM Powered Autonomous Agents and how do they differ from traditional AI agents?\n",
-      "2. How do LLM Powered Autonomous Agents learn and improve their decision-making abilities over time?\n",
-      "3. Can you provide examples of real-world applications of LLM Powered Autonomous Agents, such as self-driving cars or personal assistants?\n",
-      "4. How does the choice of LLM (Long Short-Term Memory) algorithm affect the performance and capabilities of an LLM Powered Autonomous Agent?\n",
-      "5. What are some common challenges and limitations of LLM Powered Autonomous Agents, such as dealing with unexpected events or handling conflicting goals?"
+      "1. What is an LLM (Large Language Model) and how does it differ from other machine learning models?\n",
+      "2. How do autonomous agents use LLMs to make decisions and take actions?\n",
+      "3. Can you provide examples of real-world applications of LLM Powered Autonomous Agents, such as self-driving cars or virtual assistants?\n",
+      "4. What are some potential risks or limitations associated with using LLM Powered Autonomous Agents in various industries or contexts?\n",
+      "5. How do experts predict the future of LLM Powered Autonomous Agents will evolve as technology advances and becomes more integrated into our daily lives?\n",
+      "These search queries could help provide a comprehensive overview of how LLM Powered Autonomous Agents work, their potential applications, risks and limitations, as well as the future outlook of this technology."
      ]
     },
     {
@@ -304,49 +399,56 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "llama_print_timings:        load time =  7308.57 ms\n",
-      "llama_print_timings:      sample time =   136.15 ms /   194 runs   (    0.70 ms per token,  1424.94 tokens per second)\n",
-      "llama_print_timings: prompt eval time =  7308.44 ms /    99 tokens (   73.82 ms per token,    13.55 tokens per second)\n",
-      "llama_print_timings:        eval time =  6384.79 ms /   193 runs   (   33.08 ms per token,    30.23 tokens per second)\n",
-      "llama_print_timings:       total time = 14072.87 ms\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What are LLM Powered Autonomous Agents and how do they differ from traditional AI agents?\\n', '2. How do LLM Powered Autonomous Agents learn and improve their decision-making abilities over time?\\n', '3. Can you provide examples of real-world applications of LLM Powered Autonomous Agents, such as self-driving cars or personal assistants?\\n', '4. How does the choice of LLM (Long Short-Term Memory) algorithm affect the performance and capabilities of an LLM Powered Autonomous Agent?\\n'])}\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What are LLM Powered Autonomous Agents and how do they differ from traditional AI agents?\\n', '2. How do LLM Powered Autonomous Agents learn and improve their decision-making abilities over time?\\n', '3. Can you provide examples of real-world applications of LLM Powered Autonomous Agents, such as self-driving cars or personal assistants?\\n', '4. How does the choice of LLM (Long Short-Term Memory) algorithm affect the performance and capabilities of an LLM Powered Autonomous Agent?\\n']\n",
+      "llama_print_timings:        load time = 12546.80 ms\n",
+      "llama_print_timings:      sample time =   166.15 ms /   236 runs   (    0.70 ms per token,  1420.44 tokens per second)\n",
+      "llama_print_timings: prompt eval time = 12546.65 ms /    99 tokens (  126.73 ms per token,     7.89 tokens per second)\n",
+      "llama_print_timings:        eval time =  9499.58 ms /   235 runs   (   40.42 ms per token,    24.74 tokens per second)\n",
+      "llama_print_timings:       total time = 22535.38 ms\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What is an LLM (Large Language Model) and how does it differ from other machine learning models?\\n', '2. How do autonomous agents use LLMs to make decisions and take actions?\\n', '3. Can you provide examples of real-world applications of LLM Powered Autonomous Agents, such as self-driving cars or virtual assistants?\\n', '4. What are some potential risks or limitations associated with using LLM Powered Autonomous Agents in various industries or contexts?\\n', '5. How do experts predict the future of LLM Powered Autonomous Agents will evolve as technology advances and becomes more integrated into our daily lives?\\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What is an LLM (Large Language Model) and how does it differ from other machine learning models?\\n', '2. How do autonomous agents use LLMs to make decisions and take actions?\\n', '3. Can you provide examples of real-world applications of LLM Powered Autonomous Agents, such as self-driving cars or virtual assistants?\\n', '4. What are some potential risks or limitations associated with using LLM Powered Autonomous Agents in various industries or contexts?\\n', '5. How do experts predict the future of LLM Powered Autonomous Agents will evolve as technology advances and becomes more integrated into our daily lives?\\n']\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'The State of Autonomous AI Agents', 'link': 'https://www.linkedin.com/pulse/state-autonomous-ai-agents-dean-meyer?utm_source=share&utm_medium=member_ios&utm_campaign=share_via', 'snippet': 'Jul 6, 2023 ... To optimize LLMs and in-context learning, a three-tiered infrastructure is taking shape (more on these tiers by Matt at a16z). The LLM Stack: (1)\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'What Are Large Language Models and Why Are They Important ...', 'link': 'https://blogs.nvidia.com/blog/2023/01/26/what-are-large-language-models-used-for/', 'snippet': 'Jan 26, 2023 ... A large language model, or LLM, is a deep learning algorithm that ... languages or scenarios in which communication of different types is\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... In a LLM-powered autonomous agent system, LLM functions as the ... agents to improve iteratively by refining past action decisions and\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'Demystifying LLMs (Part 2): Autonomous Agents ... - The Agency Fund', 'link': 'https://www.agency.fund/post/demystifying-llms-part-2', 'snippet': 'Jun 23, 2023 ... Autonomous LLM agents can aid in data analysis tasks, ... they use LLMs to define goals and tasks, identify the right actions to take,\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'SQ2. What are the most important advances in AI? | One Hundred ...', 'link': 'https://ai100.stanford.edu/gathering-strength-gathering-storms-one-hundred-year-study-artificial-intelligence-ai100-2021-1/sq2', 'snippet': 'One of the practical applications can be seen in GAN-based medical-image ... of crowds and are important for mobile robots including self-driving cars.'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'What are the 3 types of AI? A guide to narrow, general, and super ...', 'link': 'https://codebots.com/artificial-intelligence/the-3-types-of-ai-is-the-third-even-possible', 'snippet': 'Oct 24, 2017 ... There are 3 types of artificial intelligence (AI): narrow or weak AI, general or strong AI, and artificial superintelligence. We have\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'Reflexion: Language Agents with Verbal Reinforcement Learning', 'link': 'https://arxiv.org/pdf/2303.11366', 'snippet': \"Jun 10, 2023 ... policy as an agent's memory encoding paired with a choice of LLM ... of the Reflexion process are the notion of short-term and long-term.\"}]\n",
-      "INFO:langchain.retrievers.web_research:URLs to load: {'https://www.linkedin.com/pulse/state-autonomous-ai-agents-dean-meyer?utm_source=share&utm_medium=member_ios&utm_campaign=share_via', 'https://arxiv.org/pdf/2303.11366', 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'https://ai100.stanford.edu/gathering-strength-gathering-storms-one-hundred-year-study-artificial-intelligence-ai100-2021-1/sq2'}\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'Economic potential of generative AI | McKinsey', 'link': 'https://www.mckinsey.com/capabilities/mckinsey-digital/our-insights/the-economic-potential-of-generative-ai-the-next-productivity-frontier', 'snippet': 'Jun 14, 2023 ... Deep learning has powered many of the recent advances in AI, but the ... Notably, the potential value of using generative AI for several\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'Predictions for the State of AI and Robotics in 2025 | Pew Research ...', 'link': 'https://www.pewresearch.org/internet/2014/08/06/predictions-for-the-state-of-ai-and-robotics-in-2025/', 'snippet': 'Aug 6, 2014 ... As computer intelligence becomes increasingly integrated in daily life, a number of experts expect major changes in the way people manage their\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:URLs to load: {'https://codebots.com/artificial-intelligence/the-3-types-of-ai-is-the-third-even-possible', 'https://www.pewresearch.org/internet/2014/08/06/predictions-for-the-state-of-ai-and-robotics-in-2025/', 'https://www.mckinsey.com/capabilities/mckinsey-digital/our-insights/the-economic-potential-of-generative-ai-the-next-productivity-frontier', 'https://blogs.nvidia.com/blog/2023/01/26/what-are-large-language-models-used-for/', 'https://www.agency.fund/post/demystifying-llms-part-2'}\n",
       "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
-      "Fetching pages: 100%|##############| 4/4 [00:01<00:00,  3.41it/s]\n"
+      "Fetching pages: 100%|######################################################################################################################################################################################################| 5/5 [00:02<00:00,  1.78it/s]\n"
      ]
     }
    ],
    "source": [
-    "import logging\n",
-    "logging.basicConfig()\n",
-    "logging.getLogger(\"langchain.retrievers.web_research\").setLevel(logging.INFO)\n",
+    "# Initialize\n",
+    "web_research_retriever = WebResearchRetriever.from_llm(\n",
+    "    vectorstore=vectorstore_llama,\n",
+    "    llm=llama, \n",
+    "    search=search, \n",
+    ")\n",
+    "\n",
+    "# Run\n",
     "user_input = \"How do LLM Powered Autonomous Agents work?\"\n",
     "docs = web_research_retriever.get_relevant_documents(user_input)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "id": "7e06adad",
+   "execution_count": 31,
+   "id": "9c6304d8",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "6"
+       "10"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -357,8 +459,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "id": "bc053593",
+   "execution_count": 32,
+   "id": "f135e81d",
    "metadata": {},
    "outputs": [
     {
@@ -372,7 +474,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  LLM Powered Autonomous Agents use a combination of planning, memory, and tool use to perform tasks. The agent breaks down large tasks into smaller subgoals, reflects on past actions, and learns from mistakes. It also uses external tools to access proprietary information sources and more. The agent utilizes short-term and long-term memory to retain and recall information over extended periods."
+      "  LLM powered autonomous agents use large language models as their core controller to perform various tasks. These agents consist of three components: planning, memory, and tools. The LLM is responsible for generating actions based on the current state of the agent and its goals. The planning component breaks down complex tasks into smaller ones, while the memory component stores knowledge gained throughout the interaction with a user. Finally, the tool use component allows the agent to fetch current information, access live data, or perform dynamic computations using external tools such as search engines or APIs."
      ]
     },
     {
@@ -380,26 +482,26 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "llama_print_timings:        load time =  7308.57 ms\n",
-      "llama_print_timings:      sample time =    63.39 ms /    85 runs   (    0.75 ms per token,  1340.88 tokens per second)\n",
-      "llama_print_timings: prompt eval time = 103653.11 ms /  2117 tokens (   48.96 ms per token,    20.42 tokens per second)\n",
-      "llama_print_timings:        eval time =  3880.31 ms /    84 runs   (   46.19 ms per token,    21.65 tokens per second)\n",
-      "llama_print_timings:       total time = 107710.45 ms\n"
+      "llama_print_timings:        load time = 12546.80 ms\n",
+      "llama_print_timings:      sample time =    83.38 ms /   114 runs   (    0.73 ms per token,  1367.27 tokens per second)\n",
+      "llama_print_timings: prompt eval time = 176621.27 ms /  3027 tokens (   58.35 ms per token,    17.14 tokens per second)\n",
+      "llama_print_timings:        eval time =  6285.97 ms /   113 runs   (   55.63 ms per token,    17.98 tokens per second)\n",
+      "llama_print_timings:       total time = 183147.12 ms\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "'  LLM Powered Autonomous Agents use a combination of planning, memory, and tool use to perform tasks. The agent breaks down large tasks into smaller subgoals, reflects on past actions, and learns from mistakes. It also uses external tools to access proprietary information sources and more. The agent utilizes short-term and long-term memory to retain and recall information over extended periods.'"
+       "'  LLM powered autonomous agents use large language models as their core controller to perform various tasks. These agents consist of three components: planning, memory, and tools. The LLM is responsible for generating actions based on the current state of the agent and its goals. The planning component breaks down complex tasks into smaller ones, while the memory component stores knowledge gained throughout the interaction with a user. Finally, the tool use component allows the agent to fetch current information, access live data, or perform dynamic computations using external tools such as search engines or APIs.'"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from langchain import PromptTemplate\n",
+    "# Gengerate answer\n",
     "from langchain.chains.question_answering import load_qa_chain\n",
     "\n",
     "# Prompt\n",
@@ -416,14 +518,6 @@
     "output = chain({\"input_documents\": docs, \"question\": user_input}, return_only_outputs=True)\n",
     "output['output_text']"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "efa99b1e",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/libs/langchain/langchain/document_loaders/async_html.py b/libs/langchain/langchain/document_loaders/async_html.py
index 5a1ae4780874f..95e55e7dfc752 100644
--- a/libs/langchain/langchain/document_loaders/async_html.py
+++ b/libs/langchain/langchain/document_loaders/async_html.py
@@ -1,4 +1,3 @@
-"""Web base loader class."""
 import asyncio
 import logging
 import warnings
@@ -89,7 +88,7 @@ async def _fetch(
                         try:
                             text = await response.text()
                         except UnicodeDecodeError:
-                            print(f"Failed to decode content from {url}")
+                            logger.error(f"Failed to decode content from {url}")
                             text = ""
                         return text
                 except aiohttp.ClientConnectionError as e:
diff --git a/libs/langchain/langchain/retrievers/web_research.py b/libs/langchain/langchain/retrievers/web_research.py
index 65848ec833e74..06db113ddc118 100644
--- a/libs/langchain/langchain/retrievers/web_research.py
+++ b/libs/langchain/langchain/retrievers/web_research.py
@@ -1,7 +1,6 @@
 import logging
-import os
 import re
-from typing import List, Union
+from typing import List
 
 from pydantic import BaseModel, Field
 
@@ -10,7 +9,6 @@
     CallbackManagerForRetrieverRun,
 )
 from langchain.chains import LLMChain
-from langchain.chat_models.openai import ChatOpenAI
 from langchain.document_loaders import AsyncHtmlLoader
 from langchain.document_transformers import Html2TextTransformer
 from langchain.llms.base import BaseLLM
@@ -64,26 +62,65 @@ class WebResearchRetriever(BaseRetriever):
     vectorstore: VectorStore = Field(
         ..., description="Vector store for handling document embeddings"
     )
-    llm: Union[BaseLLM, ChatOpenAI] = Field(
-        ..., description="Language model for generating questions"
-    )
-    GOOGLE_CSE_ID: str = Field(..., description="Google Custom Search Engine ID")
-    GOOGLE_API_KEY: str = Field(..., description="Google API Key")
+    llm_chain: LLMChain
+    search: GoogleSearchAPIWrapper = Field(..., description="Google Search API Wrapper")
     search_prompt: PromptTemplate = Field(
         DEFAULT_SEARCH_PROMPT, description="Search Prompt Template"
     )
     max_splits_per_doc: int = Field(100, description="Maximum splits per document")
     num_search_results: int = Field(1, description="Number of pages per Google search")
+    text_splitter: RecursiveCharacterTextSplitter = Field(
+        RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=50),
+        description="Text splitter for splitting web pages into chunks",
+    )
+
+    @classmethod
+    def from_llm(
+        cls,
+        vectorstore: VectorStore,
+        llm: BaseLLM,
+        search: GoogleSearchAPIWrapper,
+        search_prompt: PromptTemplate = DEFAULT_SEARCH_PROMPT,
+        max_splits_per_doc: int = 100,
+        num_search_results: int = 1,
+        text_splitter: RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter(
+            chunk_size=1500, chunk_overlap=50
+        ),
+    ) -> "WebResearchRetriever":
+        """Initialize from llm using default template.
+
+        Args:
+            search: GoogleSearchAPIWrapper
+            llm: llm for search question generation using DEFAULT_SEARCH_PROMPT
+            search_prompt: prompt to generating search questions
+            max_splits_per_doc: Maximum splits per document to keep
+            num_search_results: Number of pages per Google search
+            text_splitter: Text splitter for splitting web pages into chunks
+
+        Returns:
+            WebResearchRetriever
+        """
+        llm_chain = LLMChain(
+            llm=llm,
+            prompt=search_prompt,
+            output_parser=QuestionListOutputParser(),
+        )
+        return cls(
+            vectorstore=vectorstore,
+            llm_chain=llm_chain,
+            search=search,
+            search_prompt=search_prompt,
+            max_splits_per_doc=max_splits_per_doc,
+            num_search_results=num_search_results,
+            text_splitter=text_splitter,
+        )
 
     def search_tool(self, query: str, num_search_results: int = 1) -> List[dict]:
         """Returns num_serch_results pages per Google search."""
         try:
-            os.environ["GOOGLE_CSE_ID"] = self.GOOGLE_CSE_ID
-            os.environ["GOOGLE_API_KEY"] = self.GOOGLE_API_KEY
-            search = GoogleSearchAPIWrapper()
+            result = self.search.results(query, num_search_results)
         except Exception as e:
-            print(f"Error: {str(e)}")
-        result = search.results(query, num_search_results)
+            raise Exception(f"Error: {str(e)}")
         return result
 
     def _get_relevant_documents(
@@ -103,12 +140,7 @@ def _get_relevant_documents(
 
         # Get search questions
         logger.info("Generating questions for Google Search ...")
-        llm_chain = LLMChain(
-            llm=self.llm,
-            prompt=self.search_prompt,
-            output_parser=QuestionListOutputParser(),
-        )
-        result = llm_chain({"question": query})
+        result = self.llm_chain({"question": query})
         logger.info(f"Questions for Google Search (raw): {result}")
         questions = getattr(result["text"], "lines", [])
         logger.info(f"Questions for Google Search: {questions}")
@@ -134,9 +166,7 @@ def _get_relevant_documents(
         # This can use rate limit w/ embedding
         logger.info("Grabbing most relevant splits from urls ...")
         filtered_splits = []
-        text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=1500, chunk_overlap=50
-        )
+        text_splitter = self.text_splitter
         for doc in html2text.transform_documents(loader.load()):
             doc_splits = text_splitter.split_documents([doc])
             if len(doc_splits) > self.max_splits_per_doc:

From 4f4f98ede054fb726ce392ec0a5d3388653fe39f Mon Sep 17 00:00:00 2001
From: Lance Martin <lance@langchain.dev>
Date: Mon, 24 Jul 2023 12:06:06 -0700
Subject: [PATCH 05/11] Add local database of urls we've already loaded into
 vectorstore

---
 .../retrievers/how_to/web_research.ipynb      | 116 ++++++++++++++----
 .../langchain/retrievers/web_research.py      |  56 ++++++---
 2 files changed, 126 insertions(+), 46 deletions(-)

diff --git a/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb b/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
index 829345192f3c2..929fc2abb6a85 100644
--- a/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
+++ b/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 2,
    "id": "4abea0a0",
    "metadata": {},
    "outputs": [],
@@ -33,18 +33,19 @@
    "id": "90b1dcbd",
    "metadata": {},
    "source": [
-    "`Simple usage`\n",
+    "### Simple usage\n",
     "\n",
-    "Specify the LLM to use for search query generation, and the retriver will do the rest."
+    "Specify the LLM to use for Google search query generation."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 3,
    "id": "e63d1c8b",
    "metadata": {},
    "outputs": [],
    "source": [
+    "import os\n",
     "from langchain.vectorstores import Chroma\n",
     "from langchain.embeddings import OpenAIEmbeddings\n",
     "from langchain.chat_models.openai import ChatOpenAI\n",
@@ -64,7 +65,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 8,
+   "id": "118b50aa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize\n",
+    "web_research_retriever = WebResearchRetriever.from_llm(\n",
+    "    vectorstore=vectorstore,\n",
+    "    llm=llm, \n",
+    "    search=search, \n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "357559fd",
+   "metadata": {},
+   "source": [
+    "Run with optional logging."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
    "id": "2c4e8ab3",
    "metadata": {},
    "outputs": [
@@ -73,55 +97,95 @@
      "output_type": "stream",
      "text": [
       "INFO:langchain.retrievers.web_research:Generating questions for Google Search ...\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What is the definition of LLM Powered Autonomous Agents?\\n', '2. What are the key features of LLM Powered Autonomous Agents?\\n', '3. How do LLM Powered Autonomous Agents differ from traditional autonomous agents?\\n', '4. What are the applications of LLM Powered Autonomous Agents?\\n'])}\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What is the definition of LLM Powered Autonomous Agents?\\n', '2. What are the key features of LLM Powered Autonomous Agents?\\n', '3. How do LLM Powered Autonomous Agents differ from traditional autonomous agents?\\n', '4. What are the applications of LLM Powered Autonomous Agents?\\n']\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What is LLM technology and how does it work?\\n', '2. What are the key features of LLM powered autonomous agents?\\n', '3. How do LLM powered autonomous agents make decisions?\\n', '4. What are the advantages of using LLM technology in autonomous agents?\\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What is LLM technology and how does it work?\\n', '2. What are the key features of LLM powered autonomous agents?\\n', '3. How do LLM powered autonomous agents make decisions?\\n', '4. What are the advantages of using LLM technology in autonomous agents?\\n']\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Fig. 1. Overview of a LLM-powered autonomous agent system. ... This approach utilizes the Planning Domain Definition Language (PDDL) as an\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'How Does ChatGPT Really Work?', 'link': 'https://www.nytimes.com/2023/03/28/technology/ai-chatbots-chatgpt-bing-bard-llm.html', 'snippet': \"Apr 4, 2023 ... Let's pretend that we're trying to build an L.L.M. to help you with replying to your emails. We'll call it MailBot. Step 1: Set a goal. Every\\xa0...\"}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': \"Jun 23, 2023 ... In a LLM-powered autonomous agent system, LLM functions as the agent's brain, complemented by several key components: Planning.\"}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'Autonomous Agents & Agent Simulations', 'link': 'https://blog.langchain.dev/agents-round/', 'snippet': 'Apr 18, 2023 ... The main differences between the AutoGPT project and traditional LangChain agents can be attributed to different objectives. In AutoGPT, the\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, ... In the API-Bank workflow, LLMs need to make a couple of decisions and at\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, LLM functions ... simulacra of human behavior for interactive applications.'}]\n",
-      "INFO:langchain.retrievers.web_research:URLs to load: {'https://blog.langchain.dev/agents-round/', 'https://lilianweng.github.io/posts/2023-06-23-agent/'}\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'A comprehensive and hands-on guide to autonomous agents with ...', 'link': 'https://bootcamp.uxdesign.cc/a-comprehensive-and-hands-on-guide-to-autonomous-agents-with-gpt-b58d54724d50', 'snippet': 'May 2, 2023 ... Databases keep the context and memory for LLMs. Because each call to the LLM (or GPT API) is constrained to that single conversation, to provide\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:New URLs to load: ['https://bootcamp.uxdesign.cc/a-comprehensive-and-hands-on-guide-to-autonomous-agents-with-gpt-b58d54724d50', 'https://www.nytimes.com/2023/03/28/technology/ai-chatbots-chatgpt-bing-bard-llm.html']\n",
       "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
-      "Fetching pages: 100%|######################################################################################################################################################################################################| 2/2 [00:00<00:00,  2.31it/s]\n"
+      "Fetching pages: 100%|######################################################################################################################################################################################################| 2/2 [00:01<00:00,  1.36it/s]\n"
      ]
     }
    ],
    "source": [
-    "# Initialize\n",
-    "web_research_retriever = WebResearchRetriever.from_llm(\n",
-    "    vectorstore=vectorstore,\n",
-    "    llm=llm, \n",
-    "    search=search, \n",
-    ")\n",
-    "\n",
     "# Run\n",
+    "import logging\n",
+    "logging.basicConfig()\n",
+    "logging.getLogger(\"langchain.retrievers.web_research\").setLevel(logging.INFO)\n",
     "user_input = \"How do LLM Powered Autonomous Agents work?\"\n",
     "docs = web_research_retriever.get_relevant_documents(user_input)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "94d30c39",
+   "metadata": {},
+   "source": [
+    "Look at the URLs loaded."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "51621ebd",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['https://lilianweng.github.io/posts/2023-06-23-agent/',\n",
+       " 'https://bootcamp.uxdesign.cc/a-comprehensive-and-hands-on-guide-to-autonomous-agents-with-gpt-b58d54724d50',\n",
+       " 'https://www.nytimes.com/2023/03/28/technology/ai-chatbots-chatgpt-bing-bard-llm.html']"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "web_research_retriever.get_urls()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b681a846",
+   "metadata": {},
+   "source": [
+    "Generate answer."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 24,
-   "id": "d39a90a8",
+   "execution_count": 16,
+   "id": "ceca5681",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "5"
+       "\"LLM Powered Autonomous Agents work by using a large language model (LLM) as the core controller of the agent. The LLM serves as the agent's brain and is complemented by several key components.\\n\\n1. Planning: Complex tasks are broken down into simpler steps through task decomposition. This can be done by prompting the LLM, providing task-specific instructions, or using human input. Self-reflection techniques help the agent learn from experience and improve its reasoning abilities.\\n\\n2. Memory: Autonomous LLM agents have the capacity to use tools and access live data. They can fetch current information, perform dynamic computations, and interact with external resources. This allows them to go beyond the limitations of turn-by-turn conversational interfaces.\\n\\nOverall, LLM Powered Autonomous Agents leverage the capabilities of LLMs to solve problems, generate solutions, and perform tasks in various domains.\""
       ]
      },
-     "execution_count": 24,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "len(docs)"
+    "from langchain.chains.question_answering import load_qa_chain\n",
+    "chain = load_qa_chain(llm, chain_type=\"stuff\")\n",
+    "output = chain({\"input_documents\": docs, \"question\": user_input},return_only_outputs=True)\n",
+    "output['output_text']"
    ]
   },
   {
@@ -129,7 +193,7 @@
    "id": "0c0e57bb",
    "metadata": {},
    "source": [
-    "`Added flexibility`\n",
+    "### More flexibility\n",
     "\n",
     "Pass an LLM chain with custom prompt and output parsing"
    ]
@@ -251,7 +315,7 @@
    "id": "4f9530c0",
    "metadata": {},
    "source": [
-    "`Run locally`\n",
+    "### Run locally\n",
     "\n",
     "Specify LLM and embeddings that will run locally."
    ]
diff --git a/libs/langchain/langchain/retrievers/web_research.py b/libs/langchain/langchain/retrievers/web_research.py
index 06db113ddc118..69a43367e4ba9 100644
--- a/libs/langchain/langchain/retrievers/web_research.py
+++ b/libs/langchain/langchain/retrievers/web_research.py
@@ -73,6 +73,12 @@ class WebResearchRetriever(BaseRetriever):
         RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=50),
         description="Text splitter for splitting web pages into chunks",
     )
+    urls: List[str] = Field(
+        default_factory=list, description="Current URLs being processed"
+    )
+    url_database: List[str] = Field(
+        default_factory=list, description="List of processed URLs"
+    )
 
     @classmethod
     def from_llm(
@@ -123,6 +129,10 @@ def search_tool(self, query: str, num_search_results: int = 1) -> List[dict]:
             raise Exception(f"Error: {str(e)}")
         return result
 
+    def get_urls(self) -> List[str]:
+        """Return the list of URLs fetched during the most recent query."""
+        return self.urls
+
     def _get_relevant_documents(
         self,
         query: str,
@@ -156,27 +166,33 @@ def _get_relevant_documents(
             for res in search_results:
                 urls_to_look.append(res["link"])
 
-        # Load HTML to text
+        # Relevant urls
         urls = set(urls_to_look)
-        logger.info(f"URLs to load: {urls}")
-        loader = AsyncHtmlLoader(list(urls))
-        html2text = Html2TextTransformer()
-
-        # Proect against very large documents
-        # This can use rate limit w/ embedding
-        logger.info("Grabbing most relevant splits from urls ...")
-        filtered_splits = []
-        text_splitter = self.text_splitter
-        for doc in html2text.transform_documents(loader.load()):
-            doc_splits = text_splitter.split_documents([doc])
-            if len(doc_splits) > self.max_splits_per_doc:
-                logger.info(
-                    f"Document {doc.metadata} has too many splits ({len(doc_splits)}), "
-                    f"keeping only the first {self.max_splits_per_doc}"
-                )
-                doc_splits = doc_splits[: self.max_splits_per_doc]
-            filtered_splits.extend(doc_splits)
-        self.vectorstore.add_documents(filtered_splits)
+        self.urls = list(urls)
+
+        # Check for any new urls that we have not processed
+        new_urls = list(urls.difference(self.url_database))
+
+        logger.info(f"New URLs to load: {new_urls}")
+        # Load, split, and add new urls to vectorstore
+        if new_urls:
+            loader = AsyncHtmlLoader(new_urls)
+            html2text = Html2TextTransformer()
+            logger.info("Grabbing most relevant splits from urls ...")
+            filtered_splits = []
+            text_splitter = self.text_splitter
+            for doc in html2text.transform_documents(loader.load()):
+                doc_splits = text_splitter.split_documents([doc])
+                # Proect against very large documents
+                if len(doc_splits) > self.max_splits_per_doc:
+                    logger.info(
+                        f"{doc.metadata} has too many splits ({len(doc_splits)}), "
+                        f"keeping only the first {self.max_splits_per_doc}"
+                    )
+                    doc_splits = doc_splits[: self.max_splits_per_doc]
+                filtered_splits.extend(doc_splits)
+            self.vectorstore.add_documents(filtered_splits)
+            self.url_database.extend(new_urls)
 
         # Search for relevant splits
         docs = []

From e142d8871f0f4bde2300a42b1a93b30180a3730b Mon Sep 17 00:00:00 2001
From: Lance Martin <lance@langchain.dev>
Date: Mon, 24 Jul 2023 18:00:56 -0700
Subject: [PATCH 06/11] Address comments

---
 .../retrievers/how_to/web_research.ipynb      | 332 ++++++++----------
 .../langchain/retrievers/web_research.py      |  34 +-
 2 files changed, 169 insertions(+), 197 deletions(-)

diff --git a/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb b/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
index 929fc2abb6a85..b342ae5a8d750 100644
--- a/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
+++ b/docs/extras/modules/data_connection/retrievers/how_to/web_research.ipynb
@@ -17,15 +17,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
    "id": "4abea0a0",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from langchain.callbacks.manager import CallbackManager\n",
-    "from langchain.retrievers.web_research import WebResearchRetriever\n",
-    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
-    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler"
+    "from langchain.retrievers.web_research import WebResearchRetriever"
    ]
   },
   {
@@ -40,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 10,
    "id": "e63d1c8b",
    "metadata": {},
    "outputs": [],
@@ -65,7 +62,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 11,
    "id": "118b50aa",
    "metadata": {},
    "outputs": [],
@@ -78,17 +75,65 @@
     ")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "39114da4",
+   "metadata": {},
+   "source": [
+    "`Run with citations`\n",
+    "\n",
+    "We can use `RetrievalQAWithSourcesChain` to retrieve docs and provide citations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "0b330acd",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Fetching pages: 100%|######################################################################################################################################################################################################| 1/1 [00:00<00:00,  3.60it/s]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'question': 'How do LLM Powered Autonomous Agents work?',\n",
+       " 'answer': 'LLM Powered Autonomous Agents work by utilizing a large language model (LLM) as the core controller of the agent. The agent is complemented by several key components, including planning, memory, and tool use. In terms of planning, the agent breaks down tasks into smaller subgoals and can reflect on past actions to improve future results. The memory component includes both short-term and long-term memory, allowing the agent to learn in-context and retain and recall information over extended periods. Tool use involves the agent calling external APIs for additional information. There are also challenges associated with LLM-powered autonomous agents, such as finite context length, long-term planning and task decomposition, and the reliability of natural language interfaces. \\n\\n',\n",
+       " 'sources': '\\n- https://lilianweng.github.io/posts/2023-06-23-agent/'}"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain.chains import RetrievalQAWithSourcesChain\n",
+    "user_input = \"How do LLM Powered Autonomous Agents work?\"\n",
+    "qa_chain = RetrievalQAWithSourcesChain.from_chain_type(llm,retriever=web_research_retriever)\n",
+    "result = qa_chain({\"question\": user_input})\n",
+    "result"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "357559fd",
    "metadata": {},
    "source": [
-    "Run with optional logging."
+    "`Run with logging`\n",
+    "\n",
+    "Here, we use `get_relevant_documents` method to return docs."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 16,
    "id": "2c4e8ab3",
    "metadata": {},
    "outputs": [
@@ -97,20 +142,18 @@
      "output_type": "stream",
      "text": [
       "INFO:langchain.retrievers.web_research:Generating questions for Google Search ...\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What is LLM technology and how does it work?\\n', '2. What are the key features of LLM powered autonomous agents?\\n', '3. How do LLM powered autonomous agents make decisions?\\n', '4. What are the advantages of using LLM technology in autonomous agents?\\n'])}\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What is LLM technology and how does it work?\\n', '2. What are the key features of LLM powered autonomous agents?\\n', '3. How do LLM powered autonomous agents make decisions?\\n', '4. What are the advantages of using LLM technology in autonomous agents?\\n']\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'What is Task Decomposition in LLM Powered Autonomous Agents?', 'text': LineList(lines=['1. How do LLM powered autonomous agents utilize task decomposition?\\n', '2. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '3. What role does task decomposition play in the functioning of LLM powered autonomous agents?\\n', '4. Why is task decomposition important for LLM powered autonomous agents?\\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. How do LLM powered autonomous agents utilize task decomposition?\\n', '2. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '3. What role does task decomposition play in the functioning of LLM powered autonomous agents?\\n', '4. Why is task decomposition important for LLM powered autonomous agents?\\n']\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'How Does ChatGPT Really Work?', 'link': 'https://www.nytimes.com/2023/03/28/technology/ai-chatbots-chatgpt-bing-bard-llm.html', 'snippet': \"Apr 4, 2023 ... Let's pretend that we're trying to build an L.L.M. to help you with replying to your emails. We'll call it MailBot. Step 1: Set a goal. Every\\xa0...\"}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': \"Jun 23, 2023 ... In a LLM-powered autonomous agent system, LLM functions as the agent's brain, complemented by several key components: Planning.\"}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?\" , (2)\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, ... In the API-Bank workflow, LLMs need to make a couple of decisions and at\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... In a LLM-powered autonomous agent system, LLM functions as the ... Task decomposition can be done (1) by LLM with simple prompting like\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'A comprehensive and hands-on guide to autonomous agents with ...', 'link': 'https://bootcamp.uxdesign.cc/a-comprehensive-and-hands-on-guide-to-autonomous-agents-with-gpt-b58d54724d50', 'snippet': 'May 2, 2023 ... Databases keep the context and memory for LLMs. Because each call to the LLM (or GPT API) is constrained to that single conversation, to provide\\xa0...'}]\n",
-      "INFO:langchain.retrievers.web_research:New URLs to load: ['https://bootcamp.uxdesign.cc/a-comprehensive-and-hands-on-guide-to-autonomous-agents-with-gpt-b58d54724d50', 'https://www.nytimes.com/2023/03/28/technology/ai-chatbots-chatgpt-bing-bard-llm.html']\n",
-      "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
-      "Fetching pages: 100%|######################################################################################################################################################################################################| 2/2 [00:01<00:00,  1.36it/s]\n"
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, ... Task decomposition can be done (1) by LLM with simple prompting like\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:New URLs to load: []\n"
      ]
     }
    ],
@@ -119,7 +162,7 @@
     "import logging\n",
     "logging.basicConfig()\n",
     "logging.getLogger(\"langchain.retrievers.web_research\").setLevel(logging.INFO)\n",
-    "user_input = \"How do LLM Powered Autonomous Agents work?\"\n",
+    "user_input = \"What is Task Decomposition in LLM Powered Autonomous Agents?\"\n",
     "docs = web_research_retriever.get_relevant_documents(user_input)"
    ]
   },
@@ -128,12 +171,12 @@
    "id": "94d30c39",
    "metadata": {},
    "source": [
-    "Look at the URLs loaded."
+    "`Look at the URLs loaded`"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 17,
    "id": "51621ebd",
    "metadata": {
     "scrolled": true
@@ -142,12 +185,10 @@
     {
      "data": {
       "text/plain": [
-       "['https://lilianweng.github.io/posts/2023-06-23-agent/',\n",
-       " 'https://bootcamp.uxdesign.cc/a-comprehensive-and-hands-on-guide-to-autonomous-agents-with-gpt-b58d54724d50',\n",
-       " 'https://www.nytimes.com/2023/03/28/technology/ai-chatbots-chatgpt-bing-bard-llm.html']"
+       "['https://lilianweng.github.io/posts/2023-06-23-agent/']"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -161,22 +202,24 @@
    "id": "b681a846",
    "metadata": {},
    "source": [
-    "Generate answer."
+    "`Generate answer using retrieved docs`\n",
+    "\n",
+    "We can use `load_qa_chain` for QA using the retrieved docs"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 19,
    "id": "ceca5681",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "\"LLM Powered Autonomous Agents work by using a large language model (LLM) as the core controller of the agent. The LLM serves as the agent's brain and is complemented by several key components.\\n\\n1. Planning: Complex tasks are broken down into simpler steps through task decomposition. This can be done by prompting the LLM, providing task-specific instructions, or using human input. Self-reflection techniques help the agent learn from experience and improve its reasoning abilities.\\n\\n2. Memory: Autonomous LLM agents have the capacity to use tools and access live data. They can fetch current information, perform dynamic computations, and interact with external resources. This allows them to go beyond the limitations of turn-by-turn conversational interfaces.\\n\\nOverall, LLM Powered Autonomous Agents leverage the capabilities of LLMs to solve problems, generate solutions, and perform tasks in various domains.\""
+       "'Task decomposition in LLM-powered autonomous agents refers to the process of breaking down a complex task into smaller, more manageable subgoals. This allows the agent to efficiently handle and solve complex tasks by tackling them step by step. By decomposing a task, the agent can plan ahead and determine the sequence of actions required to achieve the overall goal. Task decomposition is an important component of planning in LLM-powered agents.'"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -216,11 +259,10 @@
     "# LLMChain\n",
     "search_prompt = PromptTemplate(\n",
     "    input_variables=[\"question\"],\n",
-    "    template=\"\"\"<<SYS>> \\n You are a web research assistant to help users\n",
-    "    answer questions. Answer using a numeric list. Do not include any extra\n",
-    "    test. \\n <</SYS>> \\n\\n [INST] Given a user input search query, \n",
-    "    generate a numbered list of five search queries to run to help answer their \n",
-    "    question: \\n\\n {question} [/INST]\"\"\",\n",
+    "    template=\"\"\"You are an assistant tasked with improving Google search \n",
+    "    results. Generate FIVE Google search queries that are similar to\n",
+    "    this question. The output should be a numbered list of questions and each\n",
+    "    should have a question mark at the end: {question}\"\"\",\n",
     ")\n",
     "\n",
     "class LineList(BaseModel):\n",
@@ -317,12 +359,12 @@
    "source": [
     "### Run locally\n",
     "\n",
-    "Specify LLM and embeddings that will run locally."
+    "Specify LLM and embeddings that will run locally (e.g., on your laptop)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 6,
    "id": "8cf0d155",
    "metadata": {},
    "outputs": [
@@ -364,38 +406,38 @@
       "ggml_metal_init: allocating\n",
       "ggml_metal_init: using MPS\n",
       "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
-      "ggml_metal_init: loaded kernel_add                            0x2cdaed120\n",
-      "ggml_metal_init: loaded kernel_mul                            0x2cdaee650\n",
-      "ggml_metal_init: loaded kernel_mul_row                        0x2cdaeede0\n",
-      "ggml_metal_init: loaded kernel_scale                          0x2cdaef460\n",
-      "ggml_metal_init: loaded kernel_silu                           0x2cdaefbe0\n",
-      "ggml_metal_init: loaded kernel_relu                           0x2cdaed470\n",
-      "ggml_metal_init: loaded kernel_gelu                           0x2cdaed6d0\n",
-      "ggml_metal_init: loaded kernel_soft_max                       0x2cdaf0f20\n",
-      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x2cdaf13b0\n",
-      "ggml_metal_init: loaded kernel_get_rows_f16                   0x2cdaf1e70\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x2cdaf2540\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x2cdaf2cd0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x28c9914a0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x28c991700\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x28c991960\n",
-      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x28c991bc0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x28c991e20\n",
-      "ggml_metal_init: loaded kernel_rms_norm                       0x28c992210\n",
-      "ggml_metal_init: loaded kernel_norm                           0x28c992470\n",
-      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x28c992a10\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x28c992c70\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x28c992ed0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x28c993130\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x10f7b2be0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x10f7b2ef0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x10f7b3150\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x10f7b33b0\n",
-      "ggml_metal_init: loaded kernel_rope                           0x10f7b3610\n",
-      "ggml_metal_init: loaded kernel_alibi_f32                      0x10f7b3c10\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x10f7b41e0\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x10f7b47b0\n",
-      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x10f7b4d80\n",
+      "ggml_metal_init: loaded kernel_add                            0x2c147aa30\n",
+      "ggml_metal_init: loaded kernel_mul                            0x2c147bfd0\n",
+      "ggml_metal_init: loaded kernel_mul_row                        0x2c147c820\n",
+      "ggml_metal_init: loaded kernel_scale                          0x16c293d10\n",
+      "ggml_metal_init: loaded kernel_silu                           0x16c294770\n",
+      "ggml_metal_init: loaded kernel_relu                           0x16c2954f0\n",
+      "ggml_metal_init: loaded kernel_gelu                           0x16c295b90\n",
+      "ggml_metal_init: loaded kernel_soft_max                       0x16c296210\n",
+      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x16c296960\n",
+      "ggml_metal_init: loaded kernel_get_rows_f16                   0x16c2970e0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x16c297810\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x16c297f90\n",
+      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x2c147adc0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x2c147d350\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x2c147ded0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x2c147e5f0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x2c147ece0\n",
+      "ggml_metal_init: loaded kernel_rms_norm                       0x2c147f3e0\n",
+      "ggml_metal_init: loaded kernel_norm                           0x2c147fb50\n",
+      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x14afce5a0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x14af8b5e0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x109306050\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x14af89f80\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x16c298b50\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x16c298db0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x16c299780\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x2c147d740\n",
+      "ggml_metal_init: loaded kernel_rope                           0x2c1480280\n",
+      "ggml_metal_init: loaded kernel_alibi_f32                      0x2c1481000\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x2c14819d0\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x2c1482680\n",
+      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x2c14828e0\n",
       "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
       "ggml_metal_init: hasUnifiedMemory             = true\n",
       "ggml_metal_init: maxTransferRate              = built-in GPU\n",
@@ -405,14 +447,16 @@
       "ggml_metal_add_buffer: allocated 'scr0            ' buffer, size =   597.00 MB, (11823.52 / 21845.34)\n",
       "ggml_metal_add_buffer: allocated 'scr1            ' buffer, size =   512.00 MB, (12335.52 / 21845.34)\n",
       "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
-      "objc[39697]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2cd900208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x5f2400208). One of the two will be used. Which one is undefined.\n",
-      "objc[39697]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2cd900208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x5f282c208). One of the two will be used. Which one is undefined.\n"
+      "objc[77075]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2c1268208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x5e671c208). One of the two will be used. Which one is undefined.\n",
+      "objc[77075]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2c1268208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x5e6b48208). One of the two will be used. Which one is undefined.\n"
      ]
     }
    ],
    "source": [
     "from langchain.llms import LlamaCpp\n",
     "from langchain.embeddings import GPT4AllEmbeddings\n",
+    "from langchain.callbacks.manager import CallbackManager\n",
+    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
     "\n",
     "n_gpu_layers = 1  # Metal set to 1 is enough.\n",
     "n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
@@ -432,113 +476,31 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "3e0561ca",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:langchain.retrievers.web_research:Generating questions for Google Search ...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  Sure! Based on the user input search query \"How do LLM Powered Autonomous Agents work?\", here are five search queries that could be used to help answer this question:\n",
-      "\n",
-      "1. What is an LLM (Large Language Model) and how does it differ from other machine learning models?\n",
-      "2. How do autonomous agents use LLMs to make decisions and take actions?\n",
-      "3. Can you provide examples of real-world applications of LLM Powered Autonomous Agents, such as self-driving cars or virtual assistants?\n",
-      "4. What are some potential risks or limitations associated with using LLM Powered Autonomous Agents in various industries or contexts?\n",
-      "5. How do experts predict the future of LLM Powered Autonomous Agents will evolve as technology advances and becomes more integrated into our daily lives?\n",
-      "These search queries could help provide a comprehensive overview of how LLM Powered Autonomous Agents work, their potential applications, risks and limitations, as well as the future outlook of this technology."
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "llama_print_timings:        load time = 12546.80 ms\n",
-      "llama_print_timings:      sample time =   166.15 ms /   236 runs   (    0.70 ms per token,  1420.44 tokens per second)\n",
-      "llama_print_timings: prompt eval time = 12546.65 ms /    99 tokens (  126.73 ms per token,     7.89 tokens per second)\n",
-      "llama_print_timings:        eval time =  9499.58 ms /   235 runs   (   40.42 ms per token,    24.74 tokens per second)\n",
-      "llama_print_timings:       total time = 22535.38 ms\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What is an LLM (Large Language Model) and how does it differ from other machine learning models?\\n', '2. How do autonomous agents use LLMs to make decisions and take actions?\\n', '3. Can you provide examples of real-world applications of LLM Powered Autonomous Agents, such as self-driving cars or virtual assistants?\\n', '4. What are some potential risks or limitations associated with using LLM Powered Autonomous Agents in various industries or contexts?\\n', '5. How do experts predict the future of LLM Powered Autonomous Agents will evolve as technology advances and becomes more integrated into our daily lives?\\n'])}\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What is an LLM (Large Language Model) and how does it differ from other machine learning models?\\n', '2. How do autonomous agents use LLMs to make decisions and take actions?\\n', '3. Can you provide examples of real-world applications of LLM Powered Autonomous Agents, such as self-driving cars or virtual assistants?\\n', '4. What are some potential risks or limitations associated with using LLM Powered Autonomous Agents in various industries or contexts?\\n', '5. How do experts predict the future of LLM Powered Autonomous Agents will evolve as technology advances and becomes more integrated into our daily lives?\\n']\n",
-      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'What Are Large Language Models and Why Are They Important ...', 'link': 'https://blogs.nvidia.com/blog/2023/01/26/what-are-large-language-models-used-for/', 'snippet': 'Jan 26, 2023 ... A large language model, or LLM, is a deep learning algorithm that ... languages or scenarios in which communication of different types is\\xa0...'}]\n",
-      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'Demystifying LLMs (Part 2): Autonomous Agents ... - The Agency Fund', 'link': 'https://www.agency.fund/post/demystifying-llms-part-2', 'snippet': 'Jun 23, 2023 ... Autonomous LLM agents can aid in data analysis tasks, ... they use LLMs to define goals and tasks, identify the right actions to take,\\xa0...'}]\n",
-      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'What are the 3 types of AI? A guide to narrow, general, and super ...', 'link': 'https://codebots.com/artificial-intelligence/the-3-types-of-ai-is-the-third-even-possible', 'snippet': 'Oct 24, 2017 ... There are 3 types of artificial intelligence (AI): narrow or weak AI, general or strong AI, and artificial superintelligence. We have\\xa0...'}]\n",
-      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'Economic potential of generative AI | McKinsey', 'link': 'https://www.mckinsey.com/capabilities/mckinsey-digital/our-insights/the-economic-potential-of-generative-ai-the-next-productivity-frontier', 'snippet': 'Jun 14, 2023 ... Deep learning has powered many of the recent advances in AI, but the ... Notably, the potential value of using generative AI for several\\xa0...'}]\n",
-      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': 'Predictions for the State of AI and Robotics in 2025 | Pew Research ...', 'link': 'https://www.pewresearch.org/internet/2014/08/06/predictions-for-the-state-of-ai-and-robotics-in-2025/', 'snippet': 'Aug 6, 2014 ... As computer intelligence becomes increasingly integrated in daily life, a number of experts expect major changes in the way people manage their\\xa0...'}]\n",
-      "INFO:langchain.retrievers.web_research:URLs to load: {'https://codebots.com/artificial-intelligence/the-3-types-of-ai-is-the-third-even-possible', 'https://www.pewresearch.org/internet/2014/08/06/predictions-for-the-state-of-ai-and-robotics-in-2025/', 'https://www.mckinsey.com/capabilities/mckinsey-digital/our-insights/the-economic-potential-of-generative-ai-the-next-productivity-frontier', 'https://blogs.nvidia.com/blog/2023/01/26/what-are-large-language-models-used-for/', 'https://www.agency.fund/post/demystifying-llms-part-2'}\n",
-      "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
-      "Fetching pages: 100%|######################################################################################################################################################################################################| 5/5 [00:02<00:00,  1.78it/s]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Initialize\n",
-    "web_research_retriever = WebResearchRetriever.from_llm(\n",
-    "    vectorstore=vectorstore_llama,\n",
-    "    llm=llama, \n",
-    "    search=search, \n",
-    ")\n",
-    "\n",
-    "# Run\n",
-    "user_input = \"How do LLM Powered Autonomous Agents work?\"\n",
-    "docs = web_research_retriever.get_relevant_documents(user_input)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "9c6304d8",
+   "cell_type": "markdown",
+   "id": "00f93dd4",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "10"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "len(docs)"
+    "We supplied `StreamingStdOutCallbackHandler()`, so model outputs are streamed "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
-   "id": "f135e81d",
+   "execution_count": 7,
+   "id": "3e0561ca",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Llama.generate: prefix-match hit\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  LLM powered autonomous agents use large language models as their core controller to perform various tasks. These agents consist of three components: planning, memory, and tools. The LLM is responsible for generating actions based on the current state of the agent and its goals. The planning component breaks down complex tasks into smaller ones, while the memory component stores knowledge gained throughout the interaction with a user. Finally, the tool use component allows the agent to fetch current information, access live data, or perform dynamic computations using external tools such as search engines or APIs."
+      "Using LlamaCpp\n",
+      "  Sure, here are five Google search queries that are similar to \"How do LLM Powered Autonomous Agents work?\":\n",
+      "\n",
+      "1. What are the key components of an LLM-powered autonomous agent?\n",
+      "2. How do LLMs enable autonomous agents to make decisions?\n",
+      "3. Can you explain the training process for an LLM-powered autonomous agent?\n",
+      "4. What are some real-world applications of LLM-powered autonomous agents?\n",
+      "5. How do LLM-powered autonomous agents handle unexpected events or situations?"
      ]
     },
     {
@@ -546,41 +508,39 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "llama_print_timings:        load time = 12546.80 ms\n",
-      "llama_print_timings:      sample time =    83.38 ms /   114 runs   (    0.73 ms per token,  1367.27 tokens per second)\n",
-      "llama_print_timings: prompt eval time = 176621.27 ms /  3027 tokens (   58.35 ms per token,    17.14 tokens per second)\n",
-      "llama_print_timings:        eval time =  6285.97 ms /   113 runs   (   55.63 ms per token,    17.98 tokens per second)\n",
-      "llama_print_timings:       total time = 183147.12 ms\n"
+      "llama_print_timings:        load time =  8929.09 ms\n",
+      "llama_print_timings:      sample time =    88.29 ms /   125 runs   (    0.71 ms per token,  1415.76 tokens per second)\n",
+      "llama_print_timings: prompt eval time =  8928.89 ms /    97 tokens (   92.05 ms per token,    10.86 tokens per second)\n",
+      "llama_print_timings:        eval time =  8130.48 ms /   124 runs   (   65.57 ms per token,    15.25 tokens per second)\n",
+      "llama_print_timings:       total time = 17310.78 ms\n",
+      "Fetching pages: 100%|######################################################################################################################################################################################################| 1/1 [00:00<00:00,  3.90it/s]\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "'  LLM powered autonomous agents use large language models as their core controller to perform various tasks. These agents consist of three components: planning, memory, and tools. The LLM is responsible for generating actions based on the current state of the agent and its goals. The planning component breaks down complex tasks into smaller ones, while the memory component stores knowledge gained throughout the interaction with a user. Finally, the tool use component allows the agent to fetch current information, access live data, or perform dynamic computations using external tools such as search engines or APIs.'"
+       "{'question': 'How do LLM Powered Autonomous Agents work?',\n",
+       " 'answer': 'LLM Powered Autonomous Agents work by utilizing a large language model (LLM) as the core controller of the agent. The agent system consists of several key components, including planning, memory, and tool use. In terms of planning, the agent breaks down complex tasks into smaller subgoals and can reflect on past actions to improve future results. The memory component includes both short-term and long-term memory, allowing the agent to learn in-context and retain and recall information over extended periods. The tool use component involves the agent calling external APIs for additional information. There are also case studies, such as scientific discovery agents and generative agents simulations, that demonstrate the capabilities of LLM-powered autonomous agents. However, there are challenges, such as the limited context length, difficulties in long-term planning and task decomposition, and the reliability of natural language interfaces. Self-reflection is also an important aspect of LLM-powered autonomous agents, allowing them to refine past actions and learn from mistakes. There are different approaches to implementing LLM-powered agents, including LLM+P, which involves using an external classical planner for long-horizon planning, and ReAct, which integrates reasoning and acting within LLM. \\n\\n',\n",
+       " 'sources': '\\n- https://lilianweng.github.io/posts/2023-06-23-agent/'}"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "# Gengerate answer\n",
-    "from langchain.chains.question_answering import load_qa_chain\n",
-    "\n",
-    "# Prompt\n",
-    "template = \"\"\"<<SYS>> \\n You are a QA assistant. Use the following pieces of context to answer the \n",
-    "question at the end. Keep the answer as concise as possible. \\n <</SYS>> \\n\\n  [INST] Context: \n",
-    "{context} \\n\n",
-    "Question: {question} [/INST]\"\"\"\n",
-    "QA_CHAIN_PROMPT = PromptTemplate(\n",
-    "    input_variables=[\"context\", \"question\"],\n",
-    "    template=template,\n",
+    "# Initialize\n",
+    "web_research_retriever = WebResearchRetriever.from_llm(\n",
+    "    vectorstore=vectorstore_llama,\n",
+    "    llm=llama, \n",
+    "    search=search, \n",
     ")\n",
     "\n",
-    "chain = load_qa_chain(llama, chain_type=\"stuff\", prompt=QA_CHAIN_PROMPT)\n",
-    "output = chain({\"input_documents\": docs, \"question\": user_input}, return_only_outputs=True)\n",
-    "output['output_text']"
+    "# Run\n",
+    "qa_chain = RetrievalQAWithSourcesChain.from_chain_type(llm,retriever=web_research_retriever)\n",
+    "result = qa_chain({\"question\": user_input})\n",
+    "result"
    ]
   }
  ],
diff --git a/libs/langchain/langchain/retrievers/web_research.py b/libs/langchain/langchain/retrievers/web_research.py
index 69a43367e4ba9..d7816638b11ed 100644
--- a/libs/langchain/langchain/retrievers/web_research.py
+++ b/libs/langchain/langchain/retrievers/web_research.py
@@ -11,6 +11,7 @@
 from langchain.chains import LLMChain
 from langchain.document_loaders import AsyncHtmlLoader
 from langchain.document_transformers import Html2TextTransformer
+from langchain.llms import LlamaCpp
 from langchain.llms.base import BaseLLM
 from langchain.output_parsers.pydantic import PydanticOutputParser
 from langchain.prompts import PromptTemplate
@@ -30,13 +31,20 @@ class SearchQueries(BaseModel):
     )
 
 
+DEFAULT_LLAMA_SEARCH_PROMPT = PromptTemplate(
+    input_variables=["question"],
+    template="""<<SYS>> \n You are an assistant tasked with improving Google search 
+    results. \n <</SYS>> \n\n [INST] Generate FIVE Google search queries that 
+    are similar to this question. The output should be a numbered list of questions 
+    and each should have a question mark at the end: \n\n {question} [/INST]""",
+)
+
 DEFAULT_SEARCH_PROMPT = PromptTemplate(
     input_variables=["question"],
-    template="""<<SYS>> \n You are a web research assistant to help users
-    answer questions. Answer using a numeric list. Do not include any extra
-    test. \n <</SYS>> \n\n [INST] Given a user input search query, 
-    generate a numbered list of five search queries to run to help answer their 
-    question: \n\n {question} [/INST]""",
+    template="""You are an assistant tasked with improving Google search 
+    results. Generate FIVE Google search queries that are similar to
+    this question. The output should be a numbered list of questions and each
+    should have a question mark at the end: {question}""",
 )
 
 
@@ -64,9 +72,6 @@ class WebResearchRetriever(BaseRetriever):
     )
     llm_chain: LLMChain
     search: GoogleSearchAPIWrapper = Field(..., description="Google Search API Wrapper")
-    search_prompt: PromptTemplate = Field(
-        DEFAULT_SEARCH_PROMPT, description="Search Prompt Template"
-    )
     max_splits_per_doc: int = Field(100, description="Maximum splits per document")
     num_search_results: int = Field(1, description="Number of pages per Google search")
     text_splitter: RecursiveCharacterTextSplitter = Field(
@@ -86,7 +91,6 @@ def from_llm(
         vectorstore: VectorStore,
         llm: BaseLLM,
         search: GoogleSearchAPIWrapper,
-        search_prompt: PromptTemplate = DEFAULT_SEARCH_PROMPT,
         max_splits_per_doc: int = 100,
         num_search_results: int = 1,
         text_splitter: RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter(
@@ -106,16 +110,24 @@ def from_llm(
         Returns:
             WebResearchRetriever
         """
+
+        if isinstance(llm, LlamaCpp):
+            prompt = DEFAULT_LLAMA_SEARCH_PROMPT
+
+        else:
+            prompt = DEFAULT_SEARCH_PROMPT
+
+        # Use chat model prompt
         llm_chain = LLMChain(
             llm=llm,
-            prompt=search_prompt,
+            prompt=prompt,
             output_parser=QuestionListOutputParser(),
         )
+
         return cls(
             vectorstore=vectorstore,
             llm_chain=llm_chain,
             search=search,
-            search_prompt=search_prompt,
             max_splits_per_doc=max_splits_per_doc,
             num_search_results=num_search_results,
             text_splitter=text_splitter,

From a7cd5784215905c0f609b712b6db054ef2d0948f Mon Sep 17 00:00:00 2001
From: Lance Martin <lance@langchain.dev>
Date: Tue, 25 Jul 2023 11:22:06 -0700
Subject: [PATCH 07/11] Address comments

---
 .../retrievers/web_research.ipynb             | 207 ++++++++++--------
 .../langchain/retrievers/web_research.py      |  24 +-
 2 files changed, 134 insertions(+), 97 deletions(-)

diff --git a/docs/extras/modules/data_connection/retrievers/web_research.ipynb b/docs/extras/modules/data_connection/retrievers/web_research.ipynb
index b342ae5a8d750..43c38010f0bb5 100644
--- a/docs/extras/modules/data_connection/retrievers/web_research.ipynb
+++ b/docs/extras/modules/data_connection/retrievers/web_research.ipynb
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 2,
    "id": "4abea0a0",
    "metadata": {},
    "outputs": [],
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 6,
    "id": "e63d1c8b",
    "metadata": {},
    "outputs": [],
@@ -55,14 +55,14 @@
     "llm = ChatOpenAI(temperature=0)\n",
     "\n",
     "# Search \n",
-    "os.environ[\"GOOGLE_CSE_ID\"] = \"xxx\"\n",
-    "os.environ[\"GOOGLE_API_KEY\"] = \"xxx\"\n",
+    "os.environ[\"GOOGLE_CSE_ID\"] = \"d41ce8cb756bc4829\"\n",
+    "os.environ[\"GOOGLE_API_KEY\"] = \"AIzaSyAjHlH_JsmmigyomF0qykkFtJldEIFNvUs\"\n",
     "search = GoogleSearchAPIWrapper()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 7,
    "id": "118b50aa",
    "metadata": {},
    "outputs": [],
@@ -87,7 +87,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 8,
    "id": "0b330acd",
    "metadata": {
     "scrolled": false
@@ -97,18 +97,18 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Fetching pages: 100%|######################################################################################################################################################################################################| 1/1 [00:00<00:00,  3.60it/s]\n"
+      "Fetching pages: 100%|#######################################################################################################################################| 3/3 [00:01<00:00,  1.79it/s]\n"
      ]
     },
     {
      "data": {
       "text/plain": [
        "{'question': 'How do LLM Powered Autonomous Agents work?',\n",
-       " 'answer': 'LLM Powered Autonomous Agents work by utilizing a large language model (LLM) as the core controller of the agent. The agent is complemented by several key components, including planning, memory, and tool use. In terms of planning, the agent breaks down tasks into smaller subgoals and can reflect on past actions to improve future results. The memory component includes both short-term and long-term memory, allowing the agent to learn in-context and retain and recall information over extended periods. Tool use involves the agent calling external APIs for additional information. There are also challenges associated with LLM-powered autonomous agents, such as finite context length, long-term planning and task decomposition, and the reliability of natural language interfaces. \\n\\n',\n",
+       " 'answer': 'LLM Powered Autonomous Agents work by utilizing a large language model (LLM) as the core controller of the agent. The agent is complemented by several key components, including planning, memory, and tool use. The planning component involves task decomposition and self-reflection. The memory component includes short-term and long-term memory, which allows the agent to learn and retain information. The tool use component enables the agent to call external APIs for additional information. There are also case studies and challenges associated with building LLM-powered autonomous agents. \\n\\n',\n",
        " 'sources': '\\n- https://lilianweng.github.io/posts/2023-06-23-agent/'}"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -133,7 +133,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 9,
    "id": "2c4e8ab3",
    "metadata": {},
    "outputs": [
@@ -142,17 +142,17 @@
      "output_type": "stream",
      "text": [
       "INFO:langchain.retrievers.web_research:Generating questions for Google Search ...\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'What is Task Decomposition in LLM Powered Autonomous Agents?', 'text': LineList(lines=['1. How do LLM powered autonomous agents utilize task decomposition?\\n', '2. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '3. What role does task decomposition play in the functioning of LLM powered autonomous agents?\\n', '4. Why is task decomposition important for LLM powered autonomous agents?\\n'])}\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. How do LLM powered autonomous agents utilize task decomposition?\\n', '2. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '3. What role does task decomposition play in the functioning of LLM powered autonomous agents?\\n', '4. Why is task decomposition important for LLM powered autonomous agents?\\n']\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'What is Task Decomposition in LLM Powered Autonomous Agents?', 'text': LineList(lines=['1. How do LLM powered autonomous agents use task decomposition?\\n', '2. Why is task decomposition important for LLM powered autonomous agents?\\n', '3. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '4. What are the benefits of task decomposition in LLM powered autonomous agents?\\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. How do LLM powered autonomous agents use task decomposition?\\n', '2. Why is task decomposition important for LLM powered autonomous agents?\\n', '3. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '4. What are the benefits of task decomposition in LLM powered autonomous agents?\\n']\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?\" , (2)\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... In a LLM-powered autonomous agent system, LLM functions as the ... Task decomposition can be done (1) by LLM with simple prompting like\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, ... Task decomposition can be done (1) by LLM with simple prompting like\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
       "INFO:langchain.retrievers.web_research:New URLs to load: []\n"
      ]
     }
@@ -176,7 +176,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 10,
    "id": "51621ebd",
    "metadata": {
     "scrolled": true
@@ -188,7 +188,7 @@
        "['https://lilianweng.github.io/posts/2023-06-23-agent/']"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -209,17 +209,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 11,
    "id": "ceca5681",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'Task decomposition in LLM-powered autonomous agents refers to the process of breaking down a complex task into smaller, more manageable subgoals. This allows the agent to efficiently handle and solve complex tasks by tackling them step by step. By decomposing a task, the agent can plan ahead and determine the sequence of actions required to achieve the overall goal. Task decomposition is an important component of planning in LLM-powered agents.'"
+       "'Task decomposition in LLM-powered autonomous agents refers to the process of breaking down complex tasks into smaller, more manageable subtasks. This allows the agent to efficiently handle and solve complex problems by dividing them into smaller steps. Task decomposition can be done using various techniques, such as prompting the LLM with specific instructions or using human inputs. The goal is to transform a large task into multiple smaller tasks that can be easily understood and executed by the agent.'"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -243,7 +243,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 12,
    "id": "3d84ea47",
    "metadata": {},
    "outputs": [],
@@ -289,7 +289,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 13,
    "id": "851b0471",
    "metadata": {
     "scrolled": false
@@ -300,22 +300,20 @@
      "output_type": "stream",
      "text": [
       "INFO:langchain.retrievers.web_research:Generating questions for Google Search ...\n",
-      "ERROR:langchain.callbacks.tracers.langchain:Failed to post https://api.langchain.plus/runs in LangSmith API. {\"detail\":\"Internal server error\"}\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'How do LLM Powered Autonomous Agents work?', 'text': LineList(lines=['1. What is the definition of LLM Powered Autonomous Agents?\\n', '2. What are the key features of LLM Powered Autonomous Agents?\\n', '3. How do LLM Powered Autonomous Agents use machine learning algorithms?\\n', '4. What are the applications of LLM Powered Autonomous Agents?\\n'])}\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What is the definition of LLM Powered Autonomous Agents?\\n', '2. What are the key features of LLM Powered Autonomous Agents?\\n', '3. How do LLM Powered Autonomous Agents use machine learning algorithms?\\n', '4. What are the applications of LLM Powered Autonomous Agents?\\n']\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'What is Task Decomposition in LLM Powered Autonomous Agents?', 'text': LineList(lines=['1. How do LLM powered autonomous agents utilize task decomposition?\\n', '2. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '3. What role does task decomposition play in the functioning of LLM powered autonomous agents?\\n', '4. Why is task decomposition important for LLM powered autonomous agents?\\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. How do LLM powered autonomous agents utilize task decomposition?\\n', '2. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '3. What role does task decomposition play in the functioning of LLM powered autonomous agents?\\n', '4. Why is task decomposition important for LLM powered autonomous agents?\\n']\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Fig. 1. Overview of a LLM-powered autonomous agent system. ... This approach utilizes the Planning Domain Definition Language (PDDL) as an\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': \"Jun 23, 2023 ... In a LLM-powered autonomous agent system, LLM functions as the agent's brain, complemented by several key components: Planning.\"}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?\" , (2)\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... 2023) applies the same idea to cross-episode trajectories in reinforcement learning tasks, where an algorithm is encapsulated in a long history-\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... In a LLM-powered autonomous agent system, LLM functions as the ... Task decomposition can be done (1) by LLM with simple prompting like\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, LLM functions ... simulacra of human behavior for interactive applications.'}]\n",
-      "INFO:langchain.retrievers.web_research:URLs to load: {'https://lilianweng.github.io/posts/2023-06-23-agent/'}\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, ... Task decomposition can be done (1) by LLM with simple prompting like\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:New URLs to load: ['https://lilianweng.github.io/posts/2023-06-23-agent/']\n",
       "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
-      "Fetching pages: 100%|######################################################################################################################################################################################################| 1/1 [00:00<00:00,  5.13it/s]\n",
-      "ERROR:langchain.callbacks.tracers.langchain:Failed to patch https://api.langchain.plus/runs/f347c1aa-c27a-4855-9fa5-c3cd772c62f6 in LangSmith API. {\"detail\":\"Internal server error\"}\n"
+      "Fetching pages: 100%|#######################################################################################################################################| 1/1 [00:00<00:00,  8.41it/s]\n"
      ]
     }
    ],
@@ -333,17 +331,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 14,
    "id": "1ee52163",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "5"
+       "3"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -364,7 +362,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 15,
    "id": "8cf0d155",
    "metadata": {},
    "outputs": [
@@ -406,38 +404,38 @@
       "ggml_metal_init: allocating\n",
       "ggml_metal_init: using MPS\n",
       "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
-      "ggml_metal_init: loaded kernel_add                            0x2c147aa30\n",
-      "ggml_metal_init: loaded kernel_mul                            0x2c147bfd0\n",
-      "ggml_metal_init: loaded kernel_mul_row                        0x2c147c820\n",
-      "ggml_metal_init: loaded kernel_scale                          0x16c293d10\n",
-      "ggml_metal_init: loaded kernel_silu                           0x16c294770\n",
-      "ggml_metal_init: loaded kernel_relu                           0x16c2954f0\n",
-      "ggml_metal_init: loaded kernel_gelu                           0x16c295b90\n",
-      "ggml_metal_init: loaded kernel_soft_max                       0x16c296210\n",
-      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x16c296960\n",
-      "ggml_metal_init: loaded kernel_get_rows_f16                   0x16c2970e0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x16c297810\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x16c297f90\n",
-      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x2c147adc0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x2c147d350\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x2c147ded0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x2c147e5f0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x2c147ece0\n",
-      "ggml_metal_init: loaded kernel_rms_norm                       0x2c147f3e0\n",
-      "ggml_metal_init: loaded kernel_norm                           0x2c147fb50\n",
-      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x14afce5a0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x14af8b5e0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x109306050\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x14af89f80\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x16c298b50\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x16c298db0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x16c299780\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x2c147d740\n",
-      "ggml_metal_init: loaded kernel_rope                           0x2c1480280\n",
-      "ggml_metal_init: loaded kernel_alibi_f32                      0x2c1481000\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x2c14819d0\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x2c1482680\n",
-      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x2c14828e0\n",
+      "ggml_metal_init: loaded kernel_add                            0x105bdb800\n",
+      "ggml_metal_init: loaded kernel_mul                            0x105bdcec0\n",
+      "ggml_metal_init: loaded kernel_mul_row                        0x105bde260\n",
+      "ggml_metal_init: loaded kernel_scale                          0x105bdd120\n",
+      "ggml_metal_init: loaded kernel_silu                           0x105bdd380\n",
+      "ggml_metal_init: loaded kernel_relu                           0x105bdf760\n",
+      "ggml_metal_init: loaded kernel_gelu                           0x2cc9deb10\n",
+      "ggml_metal_init: loaded kernel_soft_max                       0x105bdf9c0\n",
+      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x105bdff80\n",
+      "ggml_metal_init: loaded kernel_get_rows_f16                   0x105be0620\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x105be0d20\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x105be14b0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x105be1cc0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x105be2390\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x105be2a70\n",
+      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x1099db7f0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x29fd2e530\n",
+      "ggml_metal_init: loaded kernel_rms_norm                       0x105be3400\n",
+      "ggml_metal_init: loaded kernel_norm                           0x105be37f0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x105be4880\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x1099dc280\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x29fd326a0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x29fd32900\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x105be4d80\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x105be51a0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x105be6160\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x105be6900\n",
+      "ggml_metal_init: loaded kernel_rope                           0x29fd40a00\n",
+      "ggml_metal_init: loaded kernel_alibi_f32                      0x29fd41000\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x29f5d3a70\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x29f5d3cd0\n",
+      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x29f5d4d80\n",
       "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
       "ggml_metal_init: hasUnifiedMemory             = true\n",
       "ggml_metal_init: maxTransferRate              = built-in GPU\n",
@@ -447,8 +445,8 @@
       "ggml_metal_add_buffer: allocated 'scr0            ' buffer, size =   597.00 MB, (11823.52 / 21845.34)\n",
       "ggml_metal_add_buffer: allocated 'scr1            ' buffer, size =   512.00 MB, (12335.52 / 21845.34)\n",
       "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
-      "objc[77075]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2c1268208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x5e671c208). One of the two will be used. Which one is undefined.\n",
-      "objc[77075]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2c1268208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x5e6b48208). One of the two will be used. Which one is undefined.\n"
+      "objc[14995]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2c6758208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x5f1248208). One of the two will be used. Which one is undefined.\n",
+      "objc[14995]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2c6758208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x5f1674208). One of the two will be used. Which one is undefined.\n"
      ]
     }
    ],
@@ -480,27 +478,39 @@
    "id": "00f93dd4",
    "metadata": {},
    "source": [
-    "We supplied `StreamingStdOutCallbackHandler()`, so model outputs are streamed "
+    "We supplied `StreamingStdOutCallbackHandler()`, so model outputs (e.g., generated questions) are streamed. \n",
+    "\n",
+    "We also have logging on, so we seem them there too."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 16,
    "id": "3e0561ca",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:langchain.retrievers.web_research:Generating questions for Google Search ...\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Using LlamaCpp\n",
-      "  Sure, here are five Google search queries that are similar to \"How do LLM Powered Autonomous Agents work?\":\n",
+      "  Sure, here are five Google search queries that are similar to \"What is Task Decomposition in LLM Powered Autonomous Agents?\"\n",
+      "\n",
+      "1. How does Task Decomposition work in LLM Powered Autonomous Agents?\n",
+      "\n",
+      "2. What are the benefits of using Task Decomposition in LLM Powered Autonomous Agents?\n",
       "\n",
-      "1. What are the key components of an LLM-powered autonomous agent?\n",
-      "2. How do LLMs enable autonomous agents to make decisions?\n",
-      "3. Can you explain the training process for an LLM-powered autonomous agent?\n",
-      "4. What are some real-world applications of LLM-powered autonomous agents?\n",
-      "5. How do LLM-powered autonomous agents handle unexpected events or situations?"
+      "3. Can you explain the process of Task Decomposition in LLM Powered Autonomous Agents with examples?\n",
+      "\n",
+      "4. How does Task Decomposition improve the performance of LLM Powered Autonomous Agents?\n",
+      "\n",
+      "5. What are some common tasks that can be decomposed using Task Decomposition in LLM Powered Autonomous Agents?"
      ]
     },
     {
@@ -508,23 +518,36 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "llama_print_timings:        load time =  8929.09 ms\n",
-      "llama_print_timings:      sample time =    88.29 ms /   125 runs   (    0.71 ms per token,  1415.76 tokens per second)\n",
-      "llama_print_timings: prompt eval time =  8928.89 ms /    97 tokens (   92.05 ms per token,    10.86 tokens per second)\n",
-      "llama_print_timings:        eval time =  8130.48 ms /   124 runs   (   65.57 ms per token,    15.25 tokens per second)\n",
-      "llama_print_timings:       total time = 17310.78 ms\n",
-      "Fetching pages: 100%|######################################################################################################################################################################################################| 1/1 [00:00<00:00,  3.90it/s]\n"
+      "llama_print_timings:        load time = 16109.97 ms\n",
+      "llama_print_timings:      sample time =   113.70 ms /   160 runs   (    0.71 ms per token,  1407.25 tokens per second)\n",
+      "llama_print_timings: prompt eval time = 16109.88 ms /   101 tokens (  159.50 ms per token,     6.27 tokens per second)\n",
+      "llama_print_timings:        eval time = 10335.34 ms /   159 runs   (   65.00 ms per token,    15.38 tokens per second)\n",
+      "llama_print_timings:       total time = 26759.93 ms\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'What is Task Decomposition in LLM Powered Autonomous Agents?', 'text': LineList(lines=['1. How does Task Decomposition work in LLM Powered Autonomous Agents?\\n', '2. What are the benefits of using Task Decomposition in LLM Powered Autonomous Agents?\\n', '3. Can you explain the process of Task Decomposition in LLM Powered Autonomous Agents with examples?\\n', '4. How does Task Decomposition improve the performance of LLM Powered Autonomous Agents?\\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. How does Task Decomposition work in LLM Powered Autonomous Agents?\\n', '2. What are the benefits of using Task Decomposition in LLM Powered Autonomous Agents?\\n', '3. Can you explain the process of Task Decomposition in LLM Powered Autonomous Agents with examples?\\n', '4. How does Task Decomposition improve the performance of LLM Powered Autonomous Agents?\\n']\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?\" , (2)\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
+      "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, ... Task decomposition can be done (1) by LLM with simple prompting like\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:New URLs to load: ['https://lilianweng.github.io/posts/2023-06-23-agent/']\n",
+      "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
+      "Fetching pages: 100%|#######################################################################################################################################| 1/1 [00:00<00:00, 10.01it/s]\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'question': 'How do LLM Powered Autonomous Agents work?',\n",
-       " 'answer': 'LLM Powered Autonomous Agents work by utilizing a large language model (LLM) as the core controller of the agent. The agent system consists of several key components, including planning, memory, and tool use. In terms of planning, the agent breaks down complex tasks into smaller subgoals and can reflect on past actions to improve future results. The memory component includes both short-term and long-term memory, allowing the agent to learn in-context and retain and recall information over extended periods. The tool use component involves the agent calling external APIs for additional information. There are also case studies, such as scientific discovery agents and generative agents simulations, that demonstrate the capabilities of LLM-powered autonomous agents. However, there are challenges, such as the limited context length, difficulties in long-term planning and task decomposition, and the reliability of natural language interfaces. Self-reflection is also an important aspect of LLM-powered autonomous agents, allowing them to refine past actions and learn from mistakes. There are different approaches to implementing LLM-powered agents, including LLM+P, which involves using an external classical planner for long-horizon planning, and ReAct, which integrates reasoning and acting within LLM. \\n\\n',\n",
-       " 'sources': '\\n- https://lilianweng.github.io/posts/2023-06-23-agent/'}"
+       "{'question': 'What is Task Decomposition in LLM Powered Autonomous Agents?',\n",
+       " 'answer': 'Task Decomposition in LLM Powered Autonomous Agents refers to the process of breaking down large tasks into smaller, manageable subgoals. This allows the agent to efficiently handle complex tasks. Task decomposition is one of the components of the planning phase in LLM-powered autonomous agents. \\n',\n",
+       " 'sources': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -542,6 +565,14 @@
     "result = qa_chain({\"question\": user_input})\n",
     "result"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "212439ca",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/libs/langchain/langchain/retrievers/web_research.py b/libs/langchain/langchain/retrievers/web_research.py
index d7816638b11ed..a60128b21b519 100644
--- a/libs/langchain/langchain/retrievers/web_research.py
+++ b/libs/langchain/langchain/retrievers/web_research.py
@@ -1,6 +1,6 @@
 import logging
 import re
-from typing import List
+from typing import List, Optional
 
 from pydantic import BaseModel, Field
 
@@ -9,6 +9,7 @@
     CallbackManagerForRetrieverRun,
 )
 from langchain.chains import LLMChain
+from langchain.chains.prompt_selector import ConditionalPromptSelector
 from langchain.document_loaders import AsyncHtmlLoader
 from langchain.document_transformers import Html2TextTransformer
 from langchain.llms import LlamaCpp
@@ -68,7 +69,7 @@ def parse(self, text: str) -> LineList:
 class WebResearchRetriever(BaseRetriever):
     # Inputs
     vectorstore: VectorStore = Field(
-        ..., description="Vector store for handling document embeddings"
+        ..., description="Vector store for storing web pages"
     )
     llm_chain: LLMChain
     search: GoogleSearchAPIWrapper = Field(..., description="Google Search API Wrapper")
@@ -91,6 +92,7 @@ def from_llm(
         vectorstore: VectorStore,
         llm: BaseLLM,
         search: GoogleSearchAPIWrapper,
+        prompt: Optional[PromptTemplate] = None,
         max_splits_per_doc: int = 100,
         num_search_results: int = 1,
         text_splitter: RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter(
@@ -100,9 +102,10 @@ def from_llm(
         """Initialize from llm using default template.
 
         Args:
+            vectorstore: Vector store for storing web pages
+            llm: llm for search question generation
             search: GoogleSearchAPIWrapper
-            llm: llm for search question generation using DEFAULT_SEARCH_PROMPT
-            search_prompt: prompt to generating search questions
+            prompt: prompt to generating search questions
             max_splits_per_doc: Maximum splits per document to keep
             num_search_results: Number of pages per Google search
             text_splitter: Text splitter for splitting web pages into chunks
@@ -111,11 +114,14 @@ def from_llm(
             WebResearchRetriever
         """
 
-        if isinstance(llm, LlamaCpp):
-            prompt = DEFAULT_LLAMA_SEARCH_PROMPT
-
-        else:
-            prompt = DEFAULT_SEARCH_PROMPT
+        if not prompt:
+            QUESTION_PROMPT_SELECTOR = ConditionalPromptSelector(
+                default_prompt=DEFAULT_SEARCH_PROMPT,
+                conditionals=[
+                    (lambda llm: isinstance(llm, LlamaCpp), DEFAULT_LLAMA_SEARCH_PROMPT)
+                ],
+            )
+            prompt = QUESTION_PROMPT_SELECTOR.get_prompt(llm)
 
         # Use chat model prompt
         llm_chain = LLMChain(

From 305ca5e40d58639acbe0d8814c3e0a6ff907ac30 Mon Sep 17 00:00:00 2001
From: Lance Martin <lance@langchain.dev>
Date: Tue, 25 Jul 2023 11:42:33 -0700
Subject: [PATCH 08/11] fmt

---
 .../modules/data_connection/retrievers/web_research.ipynb     | 4 ++--
 libs/langchain/langchain/retrievers/web_research.py           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/extras/modules/data_connection/retrievers/web_research.ipynb b/docs/extras/modules/data_connection/retrievers/web_research.ipynb
index 43c38010f0bb5..9f929f84223bc 100644
--- a/docs/extras/modules/data_connection/retrievers/web_research.ipynb
+++ b/docs/extras/modules/data_connection/retrievers/web_research.ipynb
@@ -55,8 +55,8 @@
     "llm = ChatOpenAI(temperature=0)\n",
     "\n",
     "# Search \n",
-    "os.environ[\"GOOGLE_CSE_ID\"] = \"d41ce8cb756bc4829\"\n",
-    "os.environ[\"GOOGLE_API_KEY\"] = \"AIzaSyAjHlH_JsmmigyomF0qykkFtJldEIFNvUs\"\n",
+    "os.environ[\"GOOGLE_CSE_ID\"] = \"xxx\"\n",
+    "os.environ[\"GOOGLE_API_KEY\"] = \"xxx\"\n",
     "search = GoogleSearchAPIWrapper()"
    ]
   },
diff --git a/libs/langchain/langchain/retrievers/web_research.py b/libs/langchain/langchain/retrievers/web_research.py
index a60128b21b519..675347f20c895 100644
--- a/libs/langchain/langchain/retrievers/web_research.py
+++ b/libs/langchain/langchain/retrievers/web_research.py
@@ -15,7 +15,7 @@
 from langchain.llms import LlamaCpp
 from langchain.llms.base import BaseLLM
 from langchain.output_parsers.pydantic import PydanticOutputParser
-from langchain.prompts import PromptTemplate
+from langchain.prompts import BasePromptTemplate, PromptTemplate
 from langchain.schema import BaseRetriever, Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.utilities import GoogleSearchAPIWrapper
@@ -92,7 +92,7 @@ def from_llm(
         vectorstore: VectorStore,
         llm: BaseLLM,
         search: GoogleSearchAPIWrapper,
-        prompt: Optional[PromptTemplate] = None,
+        prompt: Optional[BasePromptTemplate] = None,
         max_splits_per_doc: int = 100,
         num_search_results: int = 1,
         text_splitter: RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter(

From b7859104e4ca958d732225828605965e5a5ad92a Mon Sep 17 00:00:00 2001
From: Lance Martin <lance@langchain.dev>
Date: Tue, 25 Jul 2023 14:44:58 -0700
Subject: [PATCH 09/11] Address comments

---
 .../retrievers/web_research.ipynb             | 233 ++++++++----------
 .../langchain/retrievers/web_research.py      |  26 +-
 2 files changed, 111 insertions(+), 148 deletions(-)

diff --git a/docs/extras/modules/data_connection/retrievers/web_research.ipynb b/docs/extras/modules/data_connection/retrievers/web_research.ipynb
index 9f929f84223bc..2d1a35f9bdde6 100644
--- a/docs/extras/modules/data_connection/retrievers/web_research.ipynb
+++ b/docs/extras/modules/data_connection/retrievers/web_research.ipynb
@@ -18,7 +18,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "id": "4abea0a0",
+   "id": "13548212",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -37,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 3,
    "id": "e63d1c8b",
    "metadata": {},
    "outputs": [],
@@ -62,7 +62,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 4,
    "id": "118b50aa",
    "metadata": {},
    "outputs": [],
@@ -87,7 +87,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 5,
    "id": "0b330acd",
    "metadata": {
     "scrolled": false
@@ -97,18 +97,18 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Fetching pages: 100%|#######################################################################################################################################| 3/3 [00:01<00:00,  1.79it/s]\n"
+      "Fetching pages: 100%|###################################################################################################################################| 1/1 [00:00<00:00,  3.33it/s]\n"
      ]
     },
     {
      "data": {
       "text/plain": [
        "{'question': 'How do LLM Powered Autonomous Agents work?',\n",
-       " 'answer': 'LLM Powered Autonomous Agents work by utilizing a large language model (LLM) as the core controller of the agent. The agent is complemented by several key components, including planning, memory, and tool use. The planning component involves task decomposition and self-reflection. The memory component includes short-term and long-term memory, which allows the agent to learn and retain information. The tool use component enables the agent to call external APIs for additional information. There are also case studies and challenges associated with building LLM-powered autonomous agents. \\n\\n',\n",
-       " 'sources': '\\n- https://lilianweng.github.io/posts/2023-06-23-agent/'}"
+       " 'answer': \"LLM Powered Autonomous Agents work by using LLM (large language model) as the core controller of the agent's brain. It is complemented by several key components, including planning, memory, and tool use. The agent system is designed to be a powerful general problem solver. \\n\",\n",
+       " 'sources': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -133,7 +133,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 6,
    "id": "2c4e8ab3",
    "metadata": {},
    "outputs": [
@@ -142,17 +142,17 @@
      "output_type": "stream",
      "text": [
       "INFO:langchain.retrievers.web_research:Generating questions for Google Search ...\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'What is Task Decomposition in LLM Powered Autonomous Agents?', 'text': LineList(lines=['1. How do LLM powered autonomous agents use task decomposition?\\n', '2. Why is task decomposition important for LLM powered autonomous agents?\\n', '3. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '4. What are the benefits of task decomposition in LLM powered autonomous agents?\\n'])}\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. How do LLM powered autonomous agents use task decomposition?\\n', '2. Why is task decomposition important for LLM powered autonomous agents?\\n', '3. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '4. What are the benefits of task decomposition in LLM powered autonomous agents?\\n']\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'What is Task Decomposition in LLM Powered Autonomous Agents?', 'text': LineList(lines=['1. How do LLM powered autonomous agents utilize task decomposition?\\n', '2. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '3. What role does task decomposition play in the functioning of LLM powered autonomous agents?\\n', '4. Why is task decomposition important for LLM powered autonomous agents?\\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. How do LLM powered autonomous agents utilize task decomposition?\\n', '2. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '3. What role does task decomposition play in the functioning of LLM powered autonomous agents?\\n', '4. Why is task decomposition important for LLM powered autonomous agents?\\n']\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?\" , (2)\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... In a LLM-powered autonomous agent system, LLM functions as the ... Task decomposition can be done (1) by LLM with simple prompting like\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, ... Task decomposition can be done (1) by LLM with simple prompting like\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:New URLs to load: []\n"
      ]
     }
@@ -166,37 +166,6 @@
     "docs = web_research_retriever.get_relevant_documents(user_input)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "94d30c39",
-   "metadata": {},
-   "source": [
-    "`Look at the URLs loaded`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "51621ebd",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['https://lilianweng.github.io/posts/2023-06-23-agent/']"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "web_research_retriever.get_urls()"
-   ]
-  },
   {
    "cell_type": "markdown",
    "id": "b681a846",
@@ -209,17 +178,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 7,
    "id": "ceca5681",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'Task decomposition in LLM-powered autonomous agents refers to the process of breaking down complex tasks into smaller, more manageable subtasks. This allows the agent to efficiently handle and solve complex problems by dividing them into smaller steps. Task decomposition can be done using various techniques, such as prompting the LLM with specific instructions or using human inputs. The goal is to transform a large task into multiple smaller tasks that can be easily understood and executed by the agent.'"
+       "'Task decomposition in LLM-powered autonomous agents refers to the process of breaking down a complex task into smaller, more manageable subgoals. This allows the agent to efficiently handle and execute the individual steps required to complete the overall task. By decomposing the task, the agent can prioritize and organize its actions, making it easier to plan and execute the necessary steps towards achieving the desired outcome.'"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -243,7 +212,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 8,
    "id": "3d84ea47",
    "metadata": {},
    "outputs": [],
@@ -289,7 +258,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 9,
    "id": "851b0471",
    "metadata": {
     "scrolled": false
@@ -300,20 +269,20 @@
      "output_type": "stream",
      "text": [
       "INFO:langchain.retrievers.web_research:Generating questions for Google Search ...\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'What is Task Decomposition in LLM Powered Autonomous Agents?', 'text': LineList(lines=['1. How do LLM powered autonomous agents utilize task decomposition?\\n', '2. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '3. What role does task decomposition play in the functioning of LLM powered autonomous agents?\\n', '4. Why is task decomposition important for LLM powered autonomous agents?\\n'])}\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. How do LLM powered autonomous agents utilize task decomposition?\\n', '2. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '3. What role does task decomposition play in the functioning of LLM powered autonomous agents?\\n', '4. Why is task decomposition important for LLM powered autonomous agents?\\n']\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'What is Task Decomposition in LLM Powered Autonomous Agents?', 'text': LineList(lines=['1. How do LLM powered autonomous agents use task decomposition?\\n', '2. Why is task decomposition important for LLM powered autonomous agents?\\n', '3. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '4. What are the benefits of task decomposition in LLM powered autonomous agents?\\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. How do LLM powered autonomous agents use task decomposition?\\n', '2. Why is task decomposition important for LLM powered autonomous agents?\\n', '3. Can you explain the concept of task decomposition in LLM powered autonomous agents?\\n', '4. What are the benefits of task decomposition in LLM powered autonomous agents?\\n']\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?\" , (2)\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... In a LLM-powered autonomous agent system, LLM functions as the ... Task decomposition can be done (1) by LLM with simple prompting like\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, ... Task decomposition can be done (1) by LLM with simple prompting like\\xa0...'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
       "INFO:langchain.retrievers.web_research:New URLs to load: ['https://lilianweng.github.io/posts/2023-06-23-agent/']\n",
       "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
-      "Fetching pages: 100%|#######################################################################################################################################| 1/1 [00:00<00:00,  8.41it/s]\n"
+      "Fetching pages: 100%|###################################################################################################################################| 1/1 [00:00<00:00,  6.32it/s]\n"
      ]
     }
    ],
@@ -331,17 +300,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 10,
    "id": "1ee52163",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "3"
+       "1"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -362,7 +331,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 11,
    "id": "8cf0d155",
    "metadata": {},
    "outputs": [
@@ -386,7 +355,8 @@
       "llama_model_load_internal: model size = 13B\n",
       "llama_model_load_internal: ggml ctx size =    0.09 MB\n",
       "llama_model_load_internal: mem required  = 9132.71 MB (+ 1608.00 MB per state)\n",
-      "llama_new_context_with_model: kv self size  = 3200.00 MB\n"
+      "llama_new_context_with_model: kv self size  = 3200.00 MB\n",
+      "ggml_metal_init: allocating\n"
      ]
     },
     {
@@ -401,41 +371,40 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "ggml_metal_init: allocating\n",
       "ggml_metal_init: using MPS\n",
       "ggml_metal_init: loading '/Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/ggml-metal.metal'\n",
-      "ggml_metal_init: loaded kernel_add                            0x105bdb800\n",
-      "ggml_metal_init: loaded kernel_mul                            0x105bdcec0\n",
-      "ggml_metal_init: loaded kernel_mul_row                        0x105bde260\n",
-      "ggml_metal_init: loaded kernel_scale                          0x105bdd120\n",
-      "ggml_metal_init: loaded kernel_silu                           0x105bdd380\n",
-      "ggml_metal_init: loaded kernel_relu                           0x105bdf760\n",
-      "ggml_metal_init: loaded kernel_gelu                           0x2cc9deb10\n",
-      "ggml_metal_init: loaded kernel_soft_max                       0x105bdf9c0\n",
-      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x105bdff80\n",
-      "ggml_metal_init: loaded kernel_get_rows_f16                   0x105be0620\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x105be0d20\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x105be14b0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x105be1cc0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x105be2390\n",
-      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x105be2a70\n",
-      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x1099db7f0\n",
-      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x29fd2e530\n",
-      "ggml_metal_init: loaded kernel_rms_norm                       0x105be3400\n",
-      "ggml_metal_init: loaded kernel_norm                           0x105be37f0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x105be4880\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x1099dc280\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x29fd326a0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x29fd32900\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x105be4d80\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x105be51a0\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x105be6160\n",
-      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x105be6900\n",
-      "ggml_metal_init: loaded kernel_rope                           0x29fd40a00\n",
-      "ggml_metal_init: loaded kernel_alibi_f32                      0x29fd41000\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x29f5d3a70\n",
-      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x29f5d3cd0\n",
-      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x29f5d4d80\n",
+      "ggml_metal_init: loaded kernel_add                            0x110fbd600\n",
+      "ggml_metal_init: loaded kernel_mul                            0x110fbeb30\n",
+      "ggml_metal_init: loaded kernel_mul_row                        0x110fbf350\n",
+      "ggml_metal_init: loaded kernel_scale                          0x110fbf9e0\n",
+      "ggml_metal_init: loaded kernel_silu                           0x110fc0150\n",
+      "ggml_metal_init: loaded kernel_relu                           0x110fbd950\n",
+      "ggml_metal_init: loaded kernel_gelu                           0x110fbdbb0\n",
+      "ggml_metal_init: loaded kernel_soft_max                       0x110fc14d0\n",
+      "ggml_metal_init: loaded kernel_diag_mask_inf                  0x110fc1980\n",
+      "ggml_metal_init: loaded kernel_get_rows_f16                   0x110fc22a0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_0                  0x110fc2ad0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_1                  0x110fc3260\n",
+      "ggml_metal_init: loaded kernel_get_rows_q2_K                  0x110fc3ad0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q3_K                  0x110fc41c0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q4_K                  0x110fc48c0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q5_K                  0x110fc4fa0\n",
+      "ggml_metal_init: loaded kernel_get_rows_q6_K                  0x110fc56a0\n",
+      "ggml_metal_init: loaded kernel_rms_norm                       0x110fc5da0\n",
+      "ggml_metal_init: loaded kernel_norm                           0x110fc64d0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_f16_f32                0x2a5c19990\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_0_f32               0x2a5c1d4a0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_1_f32               0x2a5c19fc0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q2_K_f32               0x2a5c1dcc0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q3_K_f32               0x2a5c1e420\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q4_K_f32               0x2a5c1edc0\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q5_K_f32               0x2a5c1fd90\n",
+      "ggml_metal_init: loaded kernel_mul_mat_q6_K_f32               0x2a5c20540\n",
+      "ggml_metal_init: loaded kernel_rope                           0x2a5c20d40\n",
+      "ggml_metal_init: loaded kernel_alibi_f32                      0x2a5c21730\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f16                    0x2a5c21ab0\n",
+      "ggml_metal_init: loaded kernel_cpy_f32_f32                    0x2a5c22080\n",
+      "ggml_metal_init: loaded kernel_cpy_f16_f16                    0x2a5c231d0\n",
       "ggml_metal_init: recommendedMaxWorkingSetSize = 21845.34 MB\n",
       "ggml_metal_init: hasUnifiedMemory             = true\n",
       "ggml_metal_init: maxTransferRate              = built-in GPU\n",
@@ -443,10 +412,10 @@
       "ggml_metal_add_buffer: allocated 'eval            ' buffer, size =  1040.00 MB, ( 8024.52 / 21845.34)\n",
       "ggml_metal_add_buffer: allocated 'kv              ' buffer, size =  3202.00 MB, (11226.52 / 21845.34)\n",
       "ggml_metal_add_buffer: allocated 'scr0            ' buffer, size =   597.00 MB, (11823.52 / 21845.34)\n",
-      "ggml_metal_add_buffer: allocated 'scr1            ' buffer, size =   512.00 MB, (12335.52 / 21845.34)\n",
       "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n",
-      "objc[14995]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2c6758208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x5f1248208). One of the two will be used. Which one is undefined.\n",
-      "objc[14995]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2c6758208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x5f1674208). One of the two will be used. Which one is undefined.\n"
+      "ggml_metal_add_buffer: allocated 'scr1            ' buffer, size =   512.00 MB, (12335.52 / 21845.34)\n",
+      "objc[33471]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2c7368208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libreplit-mainline-metal.dylib (0x5ebf48208). One of the two will be used. Which one is undefined.\n",
+      "objc[33471]: Class GGMLMetalClass is implemented in both /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/llama_cpp/libllama.dylib (0x2c7368208) and /Users/rlm/miniforge3/envs/llama/lib/python3.9/site-packages/gpt4all/llmodel_DO_NOT_MODIFY/build/libllamamodel-mainline-metal.dylib (0x5ec374208). One of the two will be used. Which one is undefined.\n"
      ]
     }
    ],
@@ -485,7 +454,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 12,
    "id": "3e0561ca",
    "metadata": {},
    "outputs": [
@@ -500,17 +469,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  Sure, here are five Google search queries that are similar to \"What is Task Decomposition in LLM Powered Autonomous Agents?\"\n",
+      "  Sure, here are five Google search queries that are similar to \"What is Task Decomposition in LLM Powered Autonomous Agents?\":\n",
       "\n",
-      "1. How does Task Decomposition work in LLM Powered Autonomous Agents?\n",
-      "\n",
-      "2. What are the benefits of using Task Decomposition in LLM Powered Autonomous Agents?\n",
-      "\n",
-      "3. Can you explain the process of Task Decomposition in LLM Powered Autonomous Agents with examples?\n",
-      "\n",
-      "4. How does Task Decomposition improve the performance of LLM Powered Autonomous Agents?\n",
-      "\n",
-      "5. What are some common tasks that can be decomposed using Task Decomposition in LLM Powered Autonomous Agents?"
+      "1. How does Task Decomposition work in LLM Powered Autonomous Agents? \n",
+      "2. What are the benefits of using Task Decomposition in LLM Powered Autonomous Agents? \n",
+      "3. Can you provide examples of Task Decomposition in LLM Powered Autonomous Agents? \n",
+      "4. How does Task Decomposition improve the performance of LLM Powered Autonomous Agents? \n",
+      "5. What are some common challenges or limitations of using Task Decomposition in LLM Powered Autonomous Agents, and how can they be addressed?"
      ]
     },
     {
@@ -518,41 +483,64 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "llama_print_timings:        load time = 16109.97 ms\n",
-      "llama_print_timings:      sample time =   113.70 ms /   160 runs   (    0.71 ms per token,  1407.25 tokens per second)\n",
-      "llama_print_timings: prompt eval time = 16109.88 ms /   101 tokens (  159.50 ms per token,     6.27 tokens per second)\n",
-      "llama_print_timings:        eval time = 10335.34 ms /   159 runs   (   65.00 ms per token,    15.38 tokens per second)\n",
-      "llama_print_timings:       total time = 26759.93 ms\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'What is Task Decomposition in LLM Powered Autonomous Agents?', 'text': LineList(lines=['1. How does Task Decomposition work in LLM Powered Autonomous Agents?\\n', '2. What are the benefits of using Task Decomposition in LLM Powered Autonomous Agents?\\n', '3. Can you explain the process of Task Decomposition in LLM Powered Autonomous Agents with examples?\\n', '4. How does Task Decomposition improve the performance of LLM Powered Autonomous Agents?\\n'])}\n",
-      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. How does Task Decomposition work in LLM Powered Autonomous Agents?\\n', '2. What are the benefits of using Task Decomposition in LLM Powered Autonomous Agents?\\n', '3. Can you explain the process of Task Decomposition in LLM Powered Autonomous Agents with examples?\\n', '4. How does Task Decomposition improve the performance of LLM Powered Autonomous Agents?\\n']\n",
+      "llama_print_timings:        load time =  8585.01 ms\n",
+      "llama_print_timings:      sample time =   124.24 ms /   164 runs   (    0.76 ms per token,  1320.04 tokens per second)\n",
+      "llama_print_timings: prompt eval time =  8584.83 ms /   101 tokens (   85.00 ms per token,    11.76 tokens per second)\n",
+      "llama_print_timings:        eval time =  7268.55 ms /   163 runs   (   44.59 ms per token,    22.43 tokens per second)\n",
+      "llama_print_timings:       total time = 16236.13 ms\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'What is Task Decomposition in LLM Powered Autonomous Agents?', 'text': LineList(lines=['1. How does Task Decomposition work in LLM Powered Autonomous Agents? \\n', '2. What are the benefits of using Task Decomposition in LLM Powered Autonomous Agents? \\n', '3. Can you provide examples of Task Decomposition in LLM Powered Autonomous Agents? \\n', '4. How does Task Decomposition improve the performance of LLM Powered Autonomous Agents? \\n'])}\n",
+      "INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. How does Task Decomposition work in LLM Powered Autonomous Agents? \\n', '2. What are the benefits of using Task Decomposition in LLM Powered Autonomous Agents? \\n', '3. Can you provide examples of Task Decomposition in LLM Powered Autonomous Agents? \\n', '4. How does Task Decomposition improve the performance of LLM Powered Autonomous Agents? \\n']\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?\" , (2)\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
-      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Task decomposition can be done (1) by LLM with simple prompting like \"Steps for XYZ.\\\\n1.\" , \"What are the subgoals for achieving XYZ?'}]\n",
+      "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... A complicated task usually involves many steps. An agent needs to know what they are and plan ahead. Task Decomposition#. Chain of thought (CoT;\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:Searching for relevat urls ...\n",
       "INFO:langchain.retrievers.web_research:Search results: [{'title': \"LLM Powered Autonomous Agents | Lil'Log\", 'link': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'snippet': 'Jun 23, 2023 ... Agent System Overview In a LLM-powered autonomous agent system, ... Task decomposition can be done (1) by LLM with simple prompting like\\xa0...'}]\n",
       "INFO:langchain.retrievers.web_research:New URLs to load: ['https://lilianweng.github.io/posts/2023-06-23-agent/']\n",
       "INFO:langchain.retrievers.web_research:Grabbing most relevant splits from urls ...\n",
-      "Fetching pages: 100%|#######################################################################################################################################| 1/1 [00:00<00:00, 10.01it/s]\n"
+      "Fetching pages: 100%|###################################################################################################################################| 1/1 [00:00<00:00, 10.49it/s]\n",
+      "Llama.generate: prefix-match hit\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " The content discusses Task Decomposition in LLM Powered Autonomous Agents, which involves breaking down large tasks into smaller, manageable subgoals for efficient handling of complex tasks.\n",
+      "SOURCES:\n",
+      "https://lilianweng.github.io/posts/2023-06-23-agent/"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "llama_print_timings:        load time =  8585.01 ms\n",
+      "llama_print_timings:      sample time =    52.88 ms /    72 runs   (    0.73 ms per token,  1361.55 tokens per second)\n",
+      "llama_print_timings: prompt eval time = 125925.13 ms /  2358 tokens (   53.40 ms per token,    18.73 tokens per second)\n",
+      "llama_print_timings:        eval time =  3504.16 ms /    71 runs   (   49.35 ms per token,    20.26 tokens per second)\n",
+      "llama_print_timings:       total time = 129584.60 ms\n"
      ]
     },
     {
      "data": {
       "text/plain": [
        "{'question': 'What is Task Decomposition in LLM Powered Autonomous Agents?',\n",
-       " 'answer': 'Task Decomposition in LLM Powered Autonomous Agents refers to the process of breaking down large tasks into smaller, manageable subgoals. This allows the agent to efficiently handle complex tasks. Task decomposition is one of the components of the planning phase in LLM-powered autonomous agents. \\n',\n",
+       " 'answer': ' The content discusses Task Decomposition in LLM Powered Autonomous Agents, which involves breaking down large tasks into smaller, manageable subgoals for efficient handling of complex tasks.\\n',\n",
        " 'sources': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "from langchain.chains import RetrievalQAWithSourcesChain\n",
     "# Initialize\n",
     "web_research_retriever = WebResearchRetriever.from_llm(\n",
     "    vectorstore=vectorstore_llama,\n",
@@ -561,18 +549,11 @@
     ")\n",
     "\n",
     "# Run\n",
-    "qa_chain = RetrievalQAWithSourcesChain.from_chain_type(llm,retriever=web_research_retriever)\n",
+    "user_input = \"What is Task Decomposition in LLM Powered Autonomous Agents?\"\n",
+    "qa_chain = RetrievalQAWithSourcesChain.from_chain_type(llama,retriever=web_research_retriever)\n",
     "result = qa_chain({\"question\": user_input})\n",
     "result"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "212439ca",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/libs/langchain/langchain/retrievers/web_research.py b/libs/langchain/langchain/retrievers/web_research.py
index 675347f20c895..ba266691eebe4 100644
--- a/libs/langchain/langchain/retrievers/web_research.py
+++ b/libs/langchain/langchain/retrievers/web_research.py
@@ -79,9 +79,6 @@ class WebResearchRetriever(BaseRetriever):
         RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=50),
         description="Text splitter for splitting web pages into chunks",
     )
-    urls: List[str] = Field(
-        default_factory=list, description="Current URLs being processed"
-    )
     url_database: List[str] = Field(
         default_factory=list, description="List of processed URLs"
     )
@@ -141,16 +138,9 @@ def from_llm(
 
     def search_tool(self, query: str, num_search_results: int = 1) -> List[dict]:
         """Returns num_serch_results pages per Google search."""
-        try:
-            result = self.search.results(query, num_search_results)
-        except Exception as e:
-            raise Exception(f"Error: {str(e)}")
+        result = self.search.results(query, num_search_results)
         return result
 
-    def get_urls(self) -> List[str]:
-        """Return the list of URLs fetched during the most recent query."""
-        return self.urls
-
     def _get_relevant_documents(
         self,
         query: str,
@@ -186,7 +176,6 @@ def _get_relevant_documents(
 
         # Relevant urls
         urls = set(urls_to_look)
-        self.urls = list(urls)
 
         # Check for any new urls that we have not processed
         new_urls = list(urls.difference(self.url_database))
@@ -197,19 +186,12 @@ def _get_relevant_documents(
             loader = AsyncHtmlLoader(new_urls)
             html2text = Html2TextTransformer()
             logger.info("Grabbing most relevant splits from urls ...")
-            filtered_splits = []
+            _splits = []
             text_splitter = self.text_splitter
             for doc in html2text.transform_documents(loader.load()):
                 doc_splits = text_splitter.split_documents([doc])
-                # Proect against very large documents
-                if len(doc_splits) > self.max_splits_per_doc:
-                    logger.info(
-                        f"{doc.metadata} has too many splits ({len(doc_splits)}), "
-                        f"keeping only the first {self.max_splits_per_doc}"
-                    )
-                    doc_splits = doc_splits[: self.max_splits_per_doc]
-                filtered_splits.extend(doc_splits)
-            self.vectorstore.add_documents(filtered_splits)
+                _splits.extend(doc_splits)
+            self.vectorstore.add_documents(_splits)
             self.url_database.extend(new_urls)
 
         # Search for relevant splits

From e5963591ba103d08acff89f4bf95a2a236037e79 Mon Sep 17 00:00:00 2001
From: Harrison Chase <hw.chase.17@gmail.com>
Date: Tue, 25 Jul 2023 18:35:35 -0700
Subject: [PATCH 10/11] cr

---
 .../langchain/langchain/retrievers/web_research.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libs/langchain/langchain/retrievers/web_research.py b/libs/langchain/langchain/retrievers/web_research.py
index ba266691eebe4..10ac9d06ef72e 100644
--- a/libs/langchain/langchain/retrievers/web_research.py
+++ b/libs/langchain/langchain/retrievers/web_research.py
@@ -185,16 +185,16 @@ def _get_relevant_documents(
         if new_urls:
             loader = AsyncHtmlLoader(new_urls)
             html2text = Html2TextTransformer()
-            logger.info("Grabbing most relevant splits from urls ...")
-            _splits = []
-            text_splitter = self.text_splitter
-            for doc in html2text.transform_documents(loader.load()):
-                doc_splits = text_splitter.split_documents([doc])
-                _splits.extend(doc_splits)
-            self.vectorstore.add_documents(_splits)
+            logger.info("Indexing new urls...")
+            docs = loader.load()
+            docs = html2text.transform_documents(docs)
+            docs = self.text_splitter.split_documents(docs)
+            self.vectorstore.add_documents(docs)
             self.url_database.extend(new_urls)
 
         # Search for relevant splits
+        # TODO: make this async
+        logger.info("Grabbing most relevant splits from urls...")
         docs = []
         for query in questions:
             docs.extend(self.vectorstore.similarity_search(query))

From a9ca6c7f3feb3e39800efe079eb72644e19bef19 Mon Sep 17 00:00:00 2001
From: Harrison Chase <hw.chase.17@gmail.com>
Date: Tue, 25 Jul 2023 18:48:39 -0700
Subject: [PATCH 11/11] cr

---
 libs/langchain/langchain/retrievers/web_research.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/langchain/langchain/retrievers/web_research.py b/libs/langchain/langchain/retrievers/web_research.py
index 10ac9d06ef72e..a5ad41007f370 100644
--- a/libs/langchain/langchain/retrievers/web_research.py
+++ b/libs/langchain/langchain/retrievers/web_research.py
@@ -187,7 +187,7 @@ def _get_relevant_documents(
             html2text = Html2TextTransformer()
             logger.info("Indexing new urls...")
             docs = loader.load()
-            docs = html2text.transform_documents(docs)
+            docs = list(html2text.transform_documents(docs))
             docs = self.text_splitter.split_documents(docs)
             self.vectorstore.add_documents(docs)
             self.url_database.extend(new_urls)