Setting things up

ZeroXClem · Mar 15, 2024 · 586b55c · 586b55c
commit 586b55c
Show file tree

Hide file tree

Showing 65 changed files with 41,507 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,10 @@
+python/
+llama-2-7b-chat.Q4_K_M.gguf
+node_modules/
+mistral-7b-v0.1.Q4_K_M.gguf
+mistral-7b-instruct-v0.2.Q4_K_M.gguf
+src/mystuff/
+src/.env
+dist
+.DS_Store
+mpnet/
diff --git a/Bluepoint-Dot-added-improvements.code-workspace b/Bluepoint-Dot-added-improvements.code-workspace
@@ -0,0 +1,13 @@
+{
+	"folders": [
+		{
+			"name": "Bluepoint-Dot-added-improvements",
+			"path": "."
+		},
+		{
+			"name": "src",
+			"path": "../../Library/Mobile Documents/com~apple~CloudDocs/.Trash/src"
+		}
+	],
+	"settings": {}
+}
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
diff --git a/entitlements.mac.plist b/entitlements.mac.plist
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+  <dict>
+    <key>com.apple.security.cs.allow-dyld-environment-variables</key>
+    <true/>
+    <key>com.apple.security.cs.disable-library-validation</key>
+    <true/>
+    <key>com.apple.security.cs.allow-jit</key>
+    <true/>
+    <key>com.apple.security.cs.allow-unsigned-executable-memory</key>
+    <true/>
+    <key>com.apple.security.cs.debugger</key>
+    <true/>
+    <key>com.apple.security.network.client</key>
+    <true/>
+    <key>com.apple.security.files.user-selected.read-only</key>
+    <true/>
+    <key>com.apple.security.inherit</key>
+    <true/>
+    <key>com.apple.security.automation.apple-events</key>
+    <true/>
+  </dict>
+</plist>
diff --git a/forge.config.js b/forge.config.js
@@ -0,0 +1,38 @@
+const path = require('path');
+const fs = require('fs');
+const ncp = require('ncp').ncp;
+
+module.exports = {
+  packagerConfig: {
+    icon: "./src/Assets/icon.icns",
+    asar: true,
+    name: "dot",
+  },
+  rebuildConfig: {},
+  makers: [
+    {
+      name: '@electron-forge/maker-squirrel',
+      config: {},
+    },
+    {
+      name: '@electron-forge/maker-dmg',
+      config: {
+        name: 'dot', // Replace with your app name
+      },
+    },
+    {
+      name: '@electron-forge/maker-deb',
+      config: {},
+    },
+    {
+      name: '@electron-forge/maker-rpm',
+      config: {},
+    },
+  ],
+  plugins: [
+    {
+      name: '@electron-forge/plugin-auto-unpack-natives',
+      config: {},
+    },
+  ],
+};
diff --git a/lib/utils.ts b/lib/utils.ts
@@ -0,0 +1,6 @@
+import { clsx, type ClassValue } from "clsx"
+import { twMerge } from "tailwind-merge"
+
+export function cn(...inputs: ClassValue[]) {
+  return twMerge(clsx(inputs))
+}
diff --git a/llm/scripts/bigdot.py b/llm/scripts/bigdot.py
@@ -0,0 +1,80 @@
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
+from langchain.memory import ConversationBufferMemory
+import sys
+import json
+from langchain.llms import LlamaCpp
+import os
+from langchain.memory import ConversationBufferWindowMemory
+from langchain.prompts import PromptTemplate
+
+
+n_gpu_layers = 1  # Metal set to 1 is enough.
+n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
+
+
+# Find the current script's directory
+script_dir = os.path.dirname(__file__)
+
+# Construct the relative path
+relative_model_path = "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+model_path = os.path.join(script_dir, relative_model_path)
+
+
+llm = LlamaCpp(
+    model_path=model_path,
+    n_gpu_layers=n_gpu_layers,
+    n_batch=n_batch,
+    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls ONLY FOR MAC
+    #callback_manager=callback_manager,
+    #verbose=True, # Verbose is required to pass to the callback manager,
+    max_tokens=2000,
+    temperature= 0.6,
+    n_ctx=8000,
+)
+# Notice that "chat_history" is present in the prompt template
+template = """You are called Dot, you were made by Bluepoint, You are a helpful and honest assistant. Always answer as helpfully as possible. 
+
+Previous conversation:
+{chat_history}
+
+New conversation: {question}
+Response:"""
+prompt = PromptTemplate.from_template(template)
+# Notice that we need to align the `memory_key`
+memory = ConversationBufferWindowMemory(memory_key="chat_history", k=2)
+conversation = LLMChain(
+    llm=llm,
+    prompt=prompt,
+    verbose=False,
+    memory=memory
+)
+
+
+
+
+def send_response(response):
+    # Convert the response to JSON
+    response_json = json.dumps({"result": response})
+
+    # Print the JSON to stdout
+    print(response_json)
+
+    # Flush stdout to ensure the message is sent immediately
+    sys.stdout.flush()
+
+if __name__ == "__main__":
+    while True:
+        user_input = sys.stdin.readline().strip()
+        if not user_input:
+            break
+
+        prompt = user_input
+        result = conversation({"question": prompt})['text']
+
+        # Split the result into chunks of maximum length (e.g., 1000 characters)
+        max_chunk_length = 1000
+        chunks = [result[i:i + max_chunk_length] for i in range(0, len(result), max_chunk_length)]
+
+        # Send the chunks as an array
+        send_response(chunks)
diff --git a/llm/scripts/docdot.py b/llm/scripts/docdot.py
@@ -0,0 +1,143 @@
+import sys
+import json
+from langchain import PromptTemplate
+from langchain.chains import RetrievalQA
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.document_loaders import PyPDFLoader, DirectoryLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain.llms import LlamaCpp
+from langchain import PromptTemplate
+from langchain.callbacks.manager import CallbackManager
+import os
+
+
+# Specify the desktop path
+desktop_path = os.path.join(os.path.expanduser("~"), "Documents")
+
+# Specify the folder name
+folder_name = "Dot-data"
+
+# Combine the desktop path and folder name
+folder_path = os.path.join(desktop_path, folder_name)
+
+# Create the folder if it doesn't exist
+if not os.path.exists(folder_path):
+    os.makedirs(folder_path)
+
+
+
+
+current_directory = os.path.dirname(os.path.realpath(__file__))
+model_directory = os.path.join(current_directory, '..', 'mpnet')
+
+#print("Model Directory:", os.path.abspath(model_directory))
+
+### LOAD EMBEDDING SETTINGS
+embeddings=HuggingFaceEmbeddings(model_name=model_directory, model_kwargs={'device':'mps'}) # SET TO 'cpu' for PC
+vector_store = FAISS.load_local(os.path.join(folder_path, "Dot-data"), embeddings)
+n_gpu_layers = 1  # Metal set to 1 is enough.
+n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
+
+
+# Find the current script's directory
+script_dir = os.path.dirname(__file__)
+
+# Construct the relative path
+relative_model_path = "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+model_path = os.path.join(script_dir, relative_model_path)
+
+
+llm = LlamaCpp(
+    model_path=model_path,
+    n_gpu_layers=n_gpu_layers,
+    n_batch=n_batch,
+    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls ONLY FOR MAC
+    max_tokens=2000,
+    temperature= 0.01,
+    n_ctx=8000,
+)
+
+DEFAULT_SYSTEM_PROMPT ="""
+You are a good, honest assistant. 
+
+If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you do not know the answer to a question, make it clear you do not know the answer instead of making up false information.
+""".strip()
+
+def generate_prompt(prompt: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
+    return f"""
+    [INST] <<SYS>>
+    {system_prompt}
+    <</SYS>>
+
+    {prompt} [/INST]
+    """.strip()
+
+SYSTEM_PROMPT ="Use the following pieces of context to answer the question at the end. If you do not know the answer, just say you don't know, don't try to make up an answer."
+
+template = generate_prompt(
+    """
+    {context}
+
+    Question: {question}
+    """,
+        system_prompt = SYSTEM_PROMPT,
+)
+
+
+qa_prompt=PromptTemplate(template=template, input_variables=['context', 'question'])
+
+#start=timeit.default_timer()
+
+chain = RetrievalQA.from_chain_type(llm=llm,
+                                chain_type='stuff',
+                                retriever=vector_store.as_retriever(search_kwargs={'k': 2}),
+                                return_source_documents=True,
+                                chain_type_kwargs={'prompt': qa_prompt})
+
+
+def chat(input_text):
+    while True:
+        user_input=str(input_text)
+        query='ass'
+        if query=='exit':
+            print('Exiting')
+            sys.exit()
+        if query=='':
+            continue
+        result = chain({'query': user_input})['result']
+        return result
+
+
+
+def send_response(response):
+    # Convert the response to JSON
+    response_json = json.dumps({"result": response})
+
+    # Print the JSON to stdout
+    print(response_json)
+
+    # Flush stdout to ensure the message is sent immediately
+    sys.stdout.flush()
+
+if __name__ == "__main__":
+    while True:
+        # Read input continuously from stdin
+        line = sys.stdin.readline().strip()
+        if not line:
+            break
+
+        # Use the entire line as user input
+        user_input = line
+
+        # Perform your processing on user_input
+        result = chat(user_input)
+
+        # Split the result into chunks of maximum length (e.g., 1000 characters)
+        max_chunk_length = 1000
+        chunks = [result[i:i + max_chunk_length] for i in range(0, len(result), max_chunk_length)]
+
+        # Send the chunks as an array
+        send_response(chunks)
+