Skip to content

Commit

Permalink
Merge branch 'main' into default_model_provider_v1
Browse files Browse the repository at this point in the history
  • Loading branch information
aws-khatria authored Nov 4, 2023
2 parents fad17c5 + ec75ba8 commit 8608b1e
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 12 deletions.
17 changes: 15 additions & 2 deletions docs/source/users/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ Jupyter AI supports the following model providers:
| AI21 | `ai21` | `AI21_API_KEY` | `ai21` |
| Anthropic | `anthropic` | `ANTHROPIC_API_KEY` | `anthropic` |
| Anthropic (chat) | `anthropic-chat` | `ANTHROPIC_API_KEY` | `anthropic` |
| Bedrock | `amazon-bedrock` | N/A | `boto3` |
| Bedrock (chat) | `amazon-bedrock-chat`| N/A | `boto3` |
| Bedrock | `bedrock` | N/A | `boto3` |
| Bedrock (chat) | `bedrock-chat` | N/A | `boto3` |
| Cohere | `cohere` | `COHERE_API_KEY` | `cohere` |
| Hugging Face Hub | `huggingface_hub` | `HUGGINGFACEHUB_API_TOKEN` | `huggingface_hub`, `ipywidgets`, `pillow` |
| OpenAI | `openai` | `OPENAI_API_KEY` | `openai` |
Expand Down Expand Up @@ -492,6 +492,19 @@ use the `-c` or `--chunk-size` option and the `-o` or `--chunk-overlap` option.
/learn --chunk-size 1000 --chunk-overlap 200 <directory>
```

By default, `/learn` will not read directories named `node_modules`, `lib`, or `build`,
and will not read hidden files or hidden directories, where the file or directory name
starts with a `.`. To force `/learn` to read all supported file types in all directories,
use the `-a` or `--all-files` option.

```
# do not learn from hidden files, hidden directories, or node_modules, lib, or build directories
/learn <directory>
# learn from all supported files
/learn -a <directory>
```

### Additional chat commands

To clear the chat panel, use the `/clear` command. This does not reset the AI model; the model may still remember previous messages that you sent it, and it may use them to inform its responses.
Expand Down
11 changes: 8 additions & 3 deletions packages/jupyter-ai/jupyter_ai/chat_handlers/learn.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def __init__(
self.root_dir = root_dir
self.dask_client_future = dask_client_future
self.parser.prog = "/learn"
self.parser.add_argument("-a", "--all-files", action="store_true")
self.parser.add_argument("-v", "--verbose", action="store_true")
self.parser.add_argument("-d", "--delete", action="store_true")
self.parser.add_argument("-l", "--list", action="store_true")
Expand Down Expand Up @@ -115,7 +116,9 @@ async def _process_message(self, message: HumanChatMessage):
if args.verbose:
self.reply(f"Loading and splitting files for {load_path}", message)

await self.learn_dir(load_path, args.chunk_size, args.chunk_overlap)
await self.learn_dir(
load_path, args.chunk_size, args.chunk_overlap, args.all_files
)
self.save()

response = f"""🎉 I have learned documents at **{load_path}** and I am ready to answer questions about them.
Expand All @@ -132,7 +135,9 @@ def _build_list_response(self):
{dir_list}"""
return message

async def learn_dir(self, path: str, chunk_size: int, chunk_overlap: int):
async def learn_dir(
self, path: str, chunk_size: int, chunk_overlap: int, all_files: bool
):
dask_client = await self.dask_client_future
splitter_kwargs = {"chunk_size": chunk_size, "chunk_overlap": chunk_overlap}
splitters = {
Expand All @@ -146,7 +151,7 @@ async def learn_dir(self, path: str, chunk_size: int, chunk_overlap: int):
default_splitter=RecursiveCharacterTextSplitter(**splitter_kwargs),
)

delayed = split(path, splitter=splitter)
delayed = split(path, all_files, splitter=splitter)
doc_chunks = await dask_client.compute(delayed)

em_provider_cls, em_provider_args = self.get_embedding_provider()
Expand Down
15 changes: 8 additions & 7 deletions packages/jupyter-ai/jupyter_ai/document_loaders/directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,11 @@ def path_to_doc(path):
return Document(page_content=text, metadata=metadata)


# Unless /learn has the "all files" option passed in, files and directories beginning with '.' are excluded
EXCLUDE_DIRS = {
".ipynb_checkpoints",
"node_modules",
"lib",
"build",
".git",
".DS_Store",
}
SUPPORTED_EXTS = {
".py",
Expand All @@ -50,12 +48,15 @@ def flatten(*chunk_lists):
return list(itertools.chain(*chunk_lists))


def split(path, splitter):
def split(path, all_files: bool, splitter):
chunks = []

for dir, _, filenames in os.walk(path):
if dir in EXCLUDE_DIRS:
continue
for dir, subdirs, filenames in os.walk(path):
# Filter out hidden filenames, hidden directories, and excluded directories,
# unless "all files" are requested
if not all_files:
subdirs[:] = [d for d in subdirs if not (d[0] == "." or d in EXCLUDE_DIRS)]
filenames = [f for f in filenames if not f[0] == "."]

for filename in filenames:
filepath = Path(os.path.join(dir, filename))
Expand Down

0 comments on commit 8608b1e

Please sign in to comment.