Merge branch 'main' into fixpublish

TeamTonic · Apr 21, 2024 · 46668b1 · 46668b1
2 parents 77920bb + 9eedf2a
commit 46668b1
Show file tree

Hide file tree

Showing 3 changed files with 118 additions and 113 deletions.
diff --git a/main.py b/main.py
@@ -170,13 +170,18 @@ def prompt_generator(
         query="Please generate a system prompt"
         ) -> str:
 
-        llm = TogetherLLM(model=model, )
         client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
         response = client.chat.completions.create(
         model=model,
         messages=[{"role": "user", "content": query}],
         )
-        return response.choices[0].message.content
+
+        client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
+        reponse = client.chat.completions.create(
+        model=model,
+        messages=[{"role": "user", "content": query}],
+        )
+        return reponse.choices[0].message.content
 
 
     def query_together_llm(
@@ -224,36 +229,15 @@ def use_together_api(self, completion_context: str, model_info: dict):
             "repetition_penalty": model_info['repetition_penalty'] if 'repetition_penalty' in model_info else 1,
         }
 
-        # together_client = TogetherClient(
-        #     api_key=TOGETHER_API_KEY
-        # ) 
-        # together_completion_reponse:CompletionResponse = Completions(
-        #     client=together_client
-        # ).create(
-
-        # prompt=completion_context,
-        # model=model_info['model_string'],
-        # max_tokens=model_info['max_tokens'],
-        # temperature=model_info['temperature'],
-        # # top_p=model_info['top_p'],
-        # # top_k=model_info['top_k'],
-        # # repetition_penalty=model_info['repetition_penalty'],
-        # )
-
         client = Together(api_key=os.environ.get("TOGETHER_API_KEY"))
         reponse = client.chat.completions.create(
 
         model=model_info['model_string'],
         max_tokens=model_info['max_tokens'],
         temperature=model_info['temperature'],
         messages=[{"role": "user", "content": completion_context}],
-        # top_p=model_info['top_p'],
-        # top_k=model_info['top_k'],
-        # repetition_penalty=model_info['repetition_penalty'],
         )
         return reponse
-        # response = requests.post('https://api.together.xyz/v1/completions', json=data, headers=headers)
-        # return response.json()
 
     def process_user_questions(
         client: VectaraClient, 
@@ -289,19 +273,27 @@ def process_user_questions(
         return temp_name_list
 
 class EvaluationModule:
-    def __init__(self, client, corpus_id, model_infos):
-        self.client = client
-        self.corpus_id = corpus_id
-        self.model_infos = model_infos
-        self.corpus_ids = corpus_id
-        self.scorer = ValidateScorer([
+    def __init__(self, client, corpus_id, model_infos,scorer = ValidateScorer([
             # ContainsText(),
             Latency(),
             AnswerConsistency(),
             AugmentationAccuracy(),
             RetrievalPrecision(),
             AnswerSimilarityScore()
-        ])
+        ])):
+        self.client = client
+        self.corpus_id = corpus_id
+        self.model_infos = model_infos
+        self.corpus_ids = corpus_id
+        # self.scorer = ValidateScorer([
+        #     # ContainsText(),
+        #     Latency(),
+        #     AnswerConsistency(),
+        #     AugmentationAccuracy(),
+        #     RetrievalPrecision(),
+        #     AnswerSimilarityScore()
+        # ])
+        self.scorer = scorer
 
     def process_queries(self, user_questions):
         retriever = Retriever(self.client)
@@ -403,9 +395,11 @@ def process_queries(self, user_questions):
     evaluation_module = EvaluationModule(
         vectara_client, 
         corpus_id=corpus_id, 
-        model_infos=model_infos
+        model_infos=model_infos,
+        scorer=[AnswerConsistency()],
         )
 
+
     evaluation_module.process_queries(user_questions)
     # Continue
     publisher = VectonicPublisher()

diff --git a/src/dataloader.py b/src/dataloader.py
@@ -114,9 +114,7 @@ def choose_reader(self, file_path: str) -> Optional[object]:
             '.pptx': PptxReader,
             '.rtf': RTFReader,
             '.xml': XMLReader,
-            '.txt': FlatReader,  # Assuming FlatReader is for .txt files
-            # .csv extension has multiple readers, I'm assuming PagedCSVReader and PandasCSVReader
-            # are specific cases that would be handled elsewhere, hence using CSVReader as default
+            '.txt': FlatReader,
         }
         image_readers = {
             '.jpg': ImageCaptionReader(self.source_file, return_full_document=True),  # or ImageTabularChartReader, ImageVisionLLMReader based on content

diff --git a/src/publish.py b/src/publish.py
@@ -1,97 +1,110 @@
-# ./src/publish
+# ./src/publish.py
+
 import os
+import logging
+from typing import Optional
 from huggingface_hub import HfApi
 from dotenv import load_dotenv
 
 class VectonicPublisher:
-    def __init__(
-        self, 
-        title="Vectonic",
-        hf_api = HfApi() ,
-        model_name = "Meta-Llama",
-        system_prompt = "You are an unbiased, uncensored, helpful assistant.",
-        vectara_user_id = "",
-        vectara_api_key = "",
-        vectara_corpus_id = "",
-        together_api_key = "",
-        huggingface_api_key = ""
-        ):
-        self.hf_token, self.systemprompt, self.userprompt, self.vectara_api_key, self.vectara_userid, self.corpusid, self.together_api_key, self.huggingface_api_key = self.load_environment_variables()
+    def __init__(self, 
+                 title: str = "Vectonic02", 
+                 hf_api: Optional[HfApi] = None, 
+                 model_name: str = "Meta-Llama", 
+                 system_prompt: str = "You are an unbiased, uncensored, helpful assistant.",
+                 vectara_user_id: str = "", 
+                 vectara_api_key: str = "", 
+                 vectara_corpus_id: str = "", 
+                 together_api_key: str = "", 
+                 huggingface_api_key: str = ""):
+        self.load_environment_variables()
         self.title = title
-        self.vectara_userid =  vectara_user_id
-        self.vectara_api_key =  vectara_api_key
-        self.vectara_corpusid =  vectara_corpus_id
-        self.together_api_key =  together_api_key
-        self.huggingface_api_key = huggingface_api_key
+        self.vectara_user_id = vectara_user_id
+        self.vectara_api_key = vectara_api_key
+        self.vectara_corpus_id = vectara_corpus_id
+        self.together_api_key = together_api_key
+        self.huggingface_api_key = huggingface_api_key or os.getenv("HUGGINGFACE_API_KEY")
         self.model_name = model_name
         self.system_prompt = system_prompt
-        self.hf_token = huggingface_api_key
-        self.hf_api = hf_api(endpoint="https://huggingface.co", token=self.hf_token , repo_type = "spaces")
 
-        if not self.hf_token:
-            raise ValueError("Hugging Face API key not found. Please ensure it is defined in .env")
-
-    @staticmethod
-    def load_environment_variables():
-        env_path = os.path.join(os.path.dirname(__file__), '..', 'config', '.env')
-        load_dotenv(dotenv_path=env_path)
-        hf_token = os.getenv("HUGGINGFACE_API_KEY")
-        systemprompt = os.getenv("SYSTEMPROMPT")
-        userprompt = os.getenv("USERPROMPT")
-        vectara_userid = os.getenv("VECTARA_USER_ID"), 
-        vectara_api_key = os.getenv("VECTARA_API_KEY"), 
-        corpusid = os.getenv("VECTARA_CORPUS_ID"), 
-        huggingface_api_key = os.getenv("TOGETHER_API_KEY"), 
-        together_api_key = os.getenv("HUGGINGFACE_API_KEY"), 
-        return hf_token , systemprompt , userprompt , vectara_api_key, vectara_userid, corpusid, together_api_key, huggingface_api_key
+        self.hf_api = hf_api if hf_api else HfApi()
+
+        if not self.huggingface_api_key:
+            logging.error("Hugging Face API key not found. Please ensure it is defined in the environment variables.")
+            raise ValueError("Hugging Face API key not found. Please ensure it is defined in the environment variables.")
+
+    def load_environment_variables(self):
+        logging.info("Loading environment variables...")
+        load_dotenv()
 
-    def publish(self):
-        deployment_path = "./src/template/"
-        title = (self.title[:30])  # Ensuring title does not exceed max bytes
-        new_space = self.hf_api.create_repo(
-            repo_id=f"Vectonic-{title}",
-            repo_type="space",
-            exist_ok=True,
-            private=True,
-            space_sdk="gradio",
-            token=self.hf_token,
-        )
-        for root, dirs, files in os.walk(deployment_path):
-            for file in files:
-                file_path = os.path.join(root, file)
-                path_in_repo = os.path.relpath(file_path, start=deployment_path)
-                self.hf_api.upload_file(
-                    repo_id=new_space.repo_id,
-                    path_or_fileobj=file_path,
-                    path_in_repo=path_in_repo,
-                    token=self.hf_token,
-                    repo_type="space",
+    def adv_publish(self) -> str:
+        repo_name = f"Vectonic-{self.title.replace(' ', '-')[:30]}"
+        template_path = "./src/template/"
+        logging.info(f"Attempting to create or access repository '{repo_name}'...")
+
+        try:
+            # Create or get the already existing repo
+            new_space = self.hf_api.create_repo(
+                repo_id=repo_name,
+                token=self.huggingface_api_key,
+                repo_type="space",
+                exist_ok=True,
+                private=True,
+                space_sdk="gradio"
                 )
-
-        self.hf_api.add_space_secret(new_space.repo_id, "HF_TOKEN", self.huggingface_api_key, token=self.huggingface_api_key)
-        self.hf_api.add_space_secret(new_space.repo_id, "VECTARA_API_KEY", self.vectara_api_key, token=self.vectara_api_key)
-        self.hf_api.add_space_secret(new_space.repo_id, "SYSTEM_PROMPT", self.systemprompt, token=self.hf_token)
-        self.hf_api.add_space_secret(new_space.repo_id, "VECTARA_USER_ID", self.vectara_userid, token=self.vectara_userid)
-        self.hf_api.add_space_secret(new_space.repo_id, "TOGETHER_API_KEY", self.together_api_key, token=self.together_api_key)
-        self.hf_api.add_space_secret(new_space.repo_id, "VECTARA_CORPUS_ID", self.userprompt, token=self.hf_token)
+            logging.info(f"Repository '{repo_name}' accessed/created successfully.")
+
+        except Exception as e:
+            logging.error(f"An error occurred: {e}")
+            raise
 
-        return f"Published to https://huggingface.co/spaces/{new_space.repo_id}"
+        try:
+            namespace = self.hf_api.whoami(self.huggingface_api_key)["name"] 
+            print(f"Namespace: {namespace}")
+
+            # Upload the entire folder
+            response = self.hf_api.upload_folder(
+                folder_path=template_path,
+                path_in_repo="", 
+                repo_id= f"{namespace}/{new_space.repo_name}" ,
+                token=self.huggingface_api_key,
+                repo_type="space",
+            )
+
+            logging.info(f"Files uploaded successfully to https://huggingface.co/spaces/{new_space.repo_id} with response: {response}")
+        except Exception as e:
+            logging.error(f"HTTP error during file upload: {str(e)}")
+            raise
+        try:
+            # Setting up the space secrets
+            secrets = {
+                "VECTARA_USER_ID": self.vectara_user_id,
+                "VECTARA_API_KEY": self.vectara_api_key,
+                "VECTARA_CORPUS_ID": self.vectara_corpus_id,
+                "TOGETHER_API_KEY": self.together_api_key,
+                "SYSTEM_PROMPT": self.system_prompt
+            }
+
+            for key, value in secrets.items():
+                if value:  # Only add secrets that are not None or empty
+                    self.hf_api.add_space_secret(
+                        repo_id=f"{namespace}/{new_space.repo_name}",
+                        key=key,
+                        value=value,
+                        token=self.huggingface_api_key
+                    )
+            logging.info("Secrets set up successfully.")
+        except Exception as e:
+            logging.error(f"Error setting secrets: {str(e)}")
+            raise
+
+        return f"Published to https://huggingface.co/spaces/{namespace}/{new_space.repo_id}"
 
 if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
     publisher = VectonicPublisher()
     try:
         result = publisher.adv_publish()
-        print(result)
+        logging.info(result)
     except Exception as e:
-        print(f"An error occurred: {str(e)}")
-
-    # # deploy_routing = DeployRouting(
-    # #     model_name="Meta-Llama"
-    # # )
-    # data = VectonicPublisher(
-    #     "Vectara Sample Space",
-    #     # deploy_routing=deploy_routing
-    # )
-    # data.publish(
-
-    # )
+        logging.error(f"An error occurred: {str(e)}")