Merge pull request #210 from acon96/release/v0.3.6

Release v0.3.6
acon96 · Aug 21, 2024 · f037241 · f037241
2 parents 9c24ff8 + 506aaf4
commit f037241
Show file tree

Hide file tree

Showing 7 changed files with 171 additions and 44 deletions.
diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml
@@ -54,7 +54,26 @@ jobs:
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
-
+
+      - name: Verify version match
+        if: startsWith(github.event.ref, 'refs/tags/v')
+        run: |
+          tag_version=$(echo ${{ github.ref }} | sed 's/refs\/tags\/v//')
+          component_version_manifest=$(jq -r '.version' custom_components/llama_conversation/manifest.json)
+          component_version_const=$(cat custom_components/llama_conversation/const.py | grep "INTEGRATION_VERSION" | tr -d ' ' | tr -d '"' | tr -d 'INTEGRATION_VERSION=')
+
+          if [ "$tag_version" != "$component_version_manifest" ]; then
+            echo "The version in the GitHub tag ($tag_version) does not match the version in the Home Assistant custom component manifest ($component_version_manifest)!"
+            exit 1
+          fi
+
+          if [ "$tag_version" != "$component_version_const" ]; then
+            echo "The version in the GitHub tag ($tag_version) does not match the version in const.py ($component_version_const)!"
+            exit 1
+          fi
+
+          echo "All required versions match."
+
       - name: Read llama-cpp-python version
         run: cat custom_components/llama_conversation/const.py | grep "EMBEDDED_LLAMA_CPP_PYTHON_VERSION" | tr -d ' ' | tr -d '"' >> $GITHUB_ENV
 
@@ -109,7 +128,7 @@ jobs:
     name: Create Release
     needs: [ build_wheels ]
     runs-on: ubuntu-latest
-    if: "startsWith(github.event.ref, 'refs/tags/v')" # only create a release if this was run on a tag
+    if: startsWith(github.event.ref, 'refs/tags/v')
 
     steps:
       - name: Download artifacts

diff --git a/README.md b/README.md
@@ -150,6 +150,7 @@ In order to facilitate running the project entirely on the system where Home Ass
 ## Version History
 | Version | Description                                                                                                                                                                                                          |
 |---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| v0.3.6  | Small llama.cpp backend fixes                                                                                                                                                                                        |
 | v0.3.5  | Fix for llama.cpp backend installation, Fix for Home LLM v1-3 API parameters, add Polish ICL examples                                                                                                                |
 | v0.3.4  | Significantly improved language support including full Polish translation, Update bundled llama-cpp-python to support new models, various bug fixes                                                                  |
 | v0.3.3  | Improvements to the Generic OpenAI Backend, improved area handling, fix issue using RGB colors, remove EOS token from responses, replace requests dependency with aiohttp included with Home Assistant               |

diff --git a/custom_components/llama_conversation/const.py b/custom_components/llama_conversation/const.py
@@ -383,5 +383,5 @@
     },
 }
 
-INTEGRATION_VERSION = "0.3.4"
+INTEGRATION_VERSION = "0.3.6"
 EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.2.88"
diff --git a/custom_components/llama_conversation/conversation.py b/custom_components/llama_conversation/conversation.py
@@ -732,7 +732,7 @@ def _generate_icl_examples(self, num_examples, entity_names):
 
         return examples
 
-    def _generate_system_prompt(self, prompt_template: str, llm_api: llm.APIInstance) -> str:
+    def _generate_system_prompt(self, prompt_template: str, llm_api: llm.APIInstance | None) -> str:
         """Generate the system prompt with current entity states"""
         entities_to_expose, domains = self._async_get_exposed_entities()
 
@@ -1076,7 +1076,7 @@ async def _async_cache_prompt(self, entity, old_state, new_state):
         refresh_end = time.time()
         _LOGGER.debug(f"cache refresh took {(refresh_end - refresh_start):.2f} sec")
 
-    def _cache_prompt(self, llm_api: llm.API) -> None:
+    def _cache_prompt(self, llm_api: llm.APIInstance | None) -> None:
         # if a refresh is already scheduled then exit
         if self.cache_refresh_after_cooldown:
             return
@@ -1165,6 +1165,11 @@ def _generate(self, conversation: dict) -> str:
             )
 
             context_len = self.entry.options.get(CONF_CONTEXT_LENGTH, DEFAULT_CONTEXT_LENGTH)
+            if len(input_tokens) >= context_len:
+                num_entities = len(self._async_get_exposed_entities()[0])
+                context_size = self.entry.options.get(CONF_CONTEXT_LENGTH, DEFAULT_CONTEXT_LENGTH)
+                self._warn_context_size()
+                raise Exception(f"The model failed to produce a result because too many devices are exposed ({num_entities} devices) for the context size ({context_size} tokens)!")
             if len(input_tokens) + max_tokens >= context_len:
                 self._warn_context_size()
 

diff --git a/custom_components/llama_conversation/manifest.json b/custom_components/llama_conversation/manifest.json
@@ -1,7 +1,7 @@
 {
   "domain": "llama_conversation",
   "name": "Local LLM Conversation",
-  "version": "0.3.5",
+  "version": "0.3.6",
   "codeowners": ["@acon96"],
   "config_flow": true,
   "dependencies": ["conversation"],

diff --git a/data/generate_home_assistant_data.py b/data/generate_home_assistant_data.py
@@ -371,6 +371,14 @@ def get_random_state(self, extra_exposed_attributes=[]):
     "spanish": "H:m EEEE, d 'de' MMMM 'de' yyyy"
 }
 
+USER_INSTRUCTION_PROMPT = {
+    "english": "User instruction",
+    "german": "Benutzeranweisung",
+    "french": "Instruction de l'utilisateur ",
+    "spanish": "Instrucción del usuario",
+    "polish": "Instrukcja użytkownika"
+}
+
 
 class NoResponseAvailableException(Exception):
     pass
@@ -827,16 +835,21 @@ def generate_dpo_extra_service_call(template: dict, persona: str, max_devices: i
 def generate_dpo_incorrect_persona(template: dict, persona: str, max_devices: int = 32):
     pass
 
-def format_example_raw_chatml(example, persona, language):
+def format_example_raw_chatml(example, persona, language, use_system_role):
     """Don't use this one anymore"""
     sys_prompt = pile_of_system_prompts[persona]
     services_block = f"{SERVICES_PROMPT[language]}: " + ", ".join(sorted(example["available_services"]))
     states_block = f"{DEVICES_PROMPT[language]}:\n" + "\n".join(example["states"])
     question = example["question"]
     answers = " ".join(example["answers"])
 
-    system_block = "\n".join([ "<|im_start|>system", sys_prompt, services_block, states_block ]) + "<|im_end|>"
-    user_block = "\n".join([ "<|im_start|>user", question]) + "<|im_end|>"
+    if use_system_role:
+        system_block = "\n".join([ "<|im_start|>system", sys_prompt, services_block, states_block ]) + "<|im_end|>"
+        user_block = "\n".join([ "<|im_start|>user", question]) + "<|im_end|>"
+    else:
+        user_instruction_words = USER_INSTRUCTION_PROMPT[language] + ":"
+        system_block = ""
+        user_block = "\n".join([ "<|im_start|>user", sys_prompt, services_block, states_block, user_instruction_words, question]) + "<|im_end|>"
 
     assistant_block = "<|im_start|>assistant\n" + answers
     if len(example["service_calls"]) > 0:
@@ -855,7 +868,7 @@ def format_example_raw_chatml(example, persona, language):
     result = result.replace("garage_door.", "cover.")
     return { "text": result }
 
-def format_example_sharegpt(example, persona, language):
+def format_example_sharegpt(example, persona, language, use_system_role):
     sys_prompt = pile_of_system_prompts[persona]
     random_datetime = generate_random_datetime()
     translate_datetime = babel.dates.format_datetime(random_datetime, BABEL_FORMAT[language], locale=BABEL_LOCALE[language])
@@ -876,11 +889,18 @@ def format_example_sharegpt(example, persona, language):
     states_block = states_block.replace("blinds.", "cover.").replace("garage_door.", "cover.")
     services_block = services_block.replace("blinds.", "cover.").replace("garage_door.", "cover.")
 
-    conversation = [
-        { "from": "system", "value": "\n".join([ sys_prompt, time_block, services_block, states_block ])},
-        { "from": "user", "value": question },
-        { "from": "assistant", "value": assistant_block },
-    ]
+    if use_system_role:
+        conversation = [
+            { "from": "system", "value": "\n".join([ sys_prompt, time_block, services_block, states_block ])},
+            { "from": "user", "value": question },
+            { "from": "assistant", "value": assistant_block },
+        ]
+    else:
+        user_instruction_words = USER_INSTRUCTION_PROMPT[language] + ":"
+        conversation = [
+            { "from": "user", "value": "\n".join([ sys_prompt, time_block, services_block, states_block, user_instruction_words, question ]) },
+            { "from": "assistant", "value": assistant_block },
+        ]
 
     return { "conversations": conversation }
 
@@ -918,7 +938,7 @@ def format_example_dpo(example, persona, language):
         "rejected": rejected_assistant_block,
     }
 
-def generate_sft_file(filename: str, seed: int, format_func: Callable, personas: list[str], language: str, *, static_factor: int, template_factor: int, status_request_factor: int):
+def generate_sft_file(filename: str, seed: int, format_func: Callable, use_system_role: bool, personas: list[str], language: str, *, static_factor: int, template_factor: int, status_request_factor: int):
     random.seed(seed)
     np.random.seed(seed)
 
@@ -927,10 +947,10 @@ def generate_sft_file(filename: str, seed: int, format_func: Callable, personas:
     def run_factor_times(func, examples, data, persona, factor, language):
         if factor >= 1:
             for i in range(factor):
-                examples.append(format_func(func(data, persona), persona, language))
+                examples.append(format_func(func(data, persona), persona, language, use_system_role))
         else:
             if random.random() < factor:
-                examples.append(format_func(func(data, persona), persona, language))
+                examples.append(format_func(func(data, persona), persona, language, use_system_role))
 
     generated_examples = []
 
@@ -1139,6 +1159,7 @@ def main():
     parser.add_argument("--dpo", action="store_true", help="Set this flag to enable generation of the DPO dataset.")
     parser.add_argument("--merge", help="Set this flag to merge the generated datasets with the specified dataset.")
     parser.add_argument("--language", nargs="+", default=["english"], help="List of languages to generate: english, german, french, spanish, polish")
+    parser.add_argument("--no-system-role", action="store_true", help="Set this flag to disable the system role. It will be combined with the user role")
 
     train_size_group = parser.add_mutually_exclusive_group()
     train_size_group.add_argument('--small', action='store_const', const='small', dest='size')
@@ -1165,26 +1186,28 @@ def main():
     elif args.format == "sharegpt":
         format_func = format_example_sharegpt
 
+    use_system_role = not args.no_system_role
+
     for language in args.language:
         load_dataset_piles(language)
         personas = list(pile_of_system_prompts.keys())
         suffix = f"_{language}" if len(args.language) > 1 else ""
 
         if args.sample:
-            generate_sft_file(f"sample{suffix}", 42, format_func, personas, language, static_factor=1, template_factor=1, status_request_factor=1)
+            generate_sft_file(f"sample{suffix}", 42, format_func, use_system_role, personas, language, static_factor=1, template_factor=1, status_request_factor=1)
         if args.train:
             if args.size == "small":
-                generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, personas, language, static_factor=1, template_factor=10, status_request_factor=8)
+                generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, use_system_role, personas, language, static_factor=1, template_factor=10, status_request_factor=8)
             elif args.size == "medium":
-                generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, personas, language, static_factor=5, template_factor=15, status_request_factor=12)
+                generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, use_system_role, personas, language, static_factor=5, template_factor=15, status_request_factor=12)
             elif args.size == "large":
-                generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, personas, language, static_factor=5, template_factor=20, status_request_factor=15)
+                generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, use_system_role, personas, language, static_factor=5, template_factor=20, status_request_factor=15)
             elif args.size == "xl":
-                generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, personas, language, static_factor=7, template_factor=25, status_request_factor=18)
+                generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, use_system_role, personas, language, static_factor=7, template_factor=25, status_request_factor=18)
             else:
                 raise Exception(f"Unrecognized dataset size: {args.size}")
         if args.test:
-            generate_sft_file(f"home_assistant_test{suffix}", 12345, format_func, personas, language, static_factor=0.25, template_factor=1, status_request_factor=2)
+            generate_sft_file(f"home_assistant_test{suffix}", 12345, format_func, use_system_role, personas, language, static_factor=0.25, template_factor=1, status_request_factor=2)
 
     if len(args.language) > 1:
         if args.sample: