llama_index examples

rafalposwiata · Apr 15, 2023 · 2f0db9f · 2f0db9f
1 parent 1fdf898
commit 2f0db9f
Show file tree

Hide file tree

Showing 4 changed files with 56 additions and 1 deletion.
diff --git a/1_qna.py b/1_qna.py
@@ -32,4 +32,5 @@ def query(q):
 
 query("What are the effects of legislations surrounding emissions on the Australia coal market?")
 query("What are China's plans with renewable energy?")
-query("Is there an export ban on Coal in Indonesia? Why?")
+query("Is there an export ban on Coal in Indonesia? Why?")
+query("Who are the main exporters of Coal to China? What is the role of Indonesia in this?")
diff --git a/2_llama.py b/2_llama.py
@@ -4,6 +4,7 @@
 load_dotenv()
 
 documents = SimpleDirectoryReader('news').load_data()
+
 index = GPTSimpleVectorIndex.from_documents(documents)
 
 # save to disk

diff --git a/2b_llama_chroma.py b/2b_llama_chroma.py
@@ -0,0 +1,29 @@
+from llama_index import GPTChromaIndex, SimpleDirectoryReader
+import chromadb
+
+from dotenv import load_dotenv
+
+load_dotenv()
+#  https://docs.trychroma.com/embeddings
+# create a Chroma vector store, by default operating purely in-memory
+chroma_client = chromadb.Client()
+
+# create a collection
+chroma_collection = chroma_client.create_collection("newspieces")
+# https://docs.trychroma.com/api-reference
+print(chroma_collection.count())
+
+documents = SimpleDirectoryReader('news').load_data()
+
+index = GPTChromaIndex.from_documents(documents, chroma_collection=chroma_collection)
+print(chroma_collection.count())
+print(chroma_collection.get()['documents'])
+print(chroma_collection.get()['metadatas'])
+
+index.save_to_disk("newspieces.json")
+
+# During query time, the index uses Chroma to query for the top k
+# most similar nodes, and synthesizes an answer from the retrieved nodes.
+
+r = index.query("Who are the main exporters of Coal to China? What is the role of Indonesia in this?")
+print(r)
diff --git a/newspieces.json b/newspieces.json
@@ -0,0 +1,24 @@
+{
+    "index_struct": {
+        "__type__": "chroma",
+        "__data__": {
+            "index_id": "6c885f2c-9b0d-44b6-ad81-ce34ad3ccc87",
+            "summary": null,
+            "nodes_dict": {},
+            "doc_id_dict": {},
+            "embeddings_dict": {}
+        }
+    },
+    "docstore": {
+        "docs": {},
+        "ref_doc_info": {
+            "eba4909c-2870-445c-90a2-8f07c29c31ff": {
+                "doc_hash": "1abedf1e096b34455d8e4cd5b65b87329a34b30e8ce3c6fabd000241522e5081"
+            },
+            "9b7ab721-a18f-4c48-b103-242c790390f6": {
+                "doc_hash": "e55b12ed31aceb1a6b09488c2686b3290405c6d682d19a0c3719f1502681f416"
+            }
+        }
+    },
+    "vector_store": {}
+}