diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml index b758aec..6473b08 100644 --- a/.github/workflows/create-release.yml +++ b/.github/workflows/create-release.yml @@ -54,7 +54,26 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 - + + - name: Verify version match + if: startsWith(github.event.ref, 'refs/tags/v') + run: | + tag_version=$(echo ${{ github.ref }} | sed 's/refs\/tags\/v//') + component_version_manifest=$(jq -r '.version' custom_components/llama_conversation/manifest.json) + component_version_const=$(cat custom_components/llama_conversation/const.py | grep "INTEGRATION_VERSION" | tr -d ' ' | tr -d '"' | tr -d 'INTEGRATION_VERSION=') + + if [ "$tag_version" != "$component_version_manifest" ]; then + echo "The version in the GitHub tag ($tag_version) does not match the version in the Home Assistant custom component manifest ($component_version_manifest)!" + exit 1 + fi + + if [ "$tag_version" != "$component_version_const" ]; then + echo "The version in the GitHub tag ($tag_version) does not match the version in const.py ($component_version_const)!" + exit 1 + fi + + echo "All required versions match." + - name: Read llama-cpp-python version run: cat custom_components/llama_conversation/const.py | grep "EMBEDDED_LLAMA_CPP_PYTHON_VERSION" | tr -d ' ' | tr -d '"' >> $GITHUB_ENV @@ -109,7 +128,7 @@ jobs: name: Create Release needs: [ build_wheels ] runs-on: ubuntu-latest - if: "startsWith(github.event.ref, 'refs/tags/v')" # only create a release if this was run on a tag + if: startsWith(github.event.ref, 'refs/tags/v') steps: - name: Download artifacts diff --git a/README.md b/README.md index 2875a2d..855739a 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,7 @@ In order to facilitate running the project entirely on the system where Home Ass ## Version History | Version | Description | |---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| v0.3.6 | Small llama.cpp backend fixes | | v0.3.5 | Fix for llama.cpp backend installation, Fix for Home LLM v1-3 API parameters, add Polish ICL examples | | v0.3.4 | Significantly improved language support including full Polish translation, Update bundled llama-cpp-python to support new models, various bug fixes | | v0.3.3 | Improvements to the Generic OpenAI Backend, improved area handling, fix issue using RGB colors, remove EOS token from responses, replace requests dependency with aiohttp included with Home Assistant | diff --git a/custom_components/llama_conversation/const.py b/custom_components/llama_conversation/const.py index e147990..c2f88f2 100644 --- a/custom_components/llama_conversation/const.py +++ b/custom_components/llama_conversation/const.py @@ -383,5 +383,5 @@ }, } -INTEGRATION_VERSION = "0.3.4" +INTEGRATION_VERSION = "0.3.6" EMBEDDED_LLAMA_CPP_PYTHON_VERSION = "0.2.88" \ No newline at end of file diff --git a/custom_components/llama_conversation/conversation.py b/custom_components/llama_conversation/conversation.py index 76d19a3..6458287 100644 --- a/custom_components/llama_conversation/conversation.py +++ b/custom_components/llama_conversation/conversation.py @@ -732,7 +732,7 @@ def _generate_icl_examples(self, num_examples, entity_names): return examples - def _generate_system_prompt(self, prompt_template: str, llm_api: llm.APIInstance) -> str: + def _generate_system_prompt(self, prompt_template: str, llm_api: llm.APIInstance | None) -> str: """Generate the system prompt with current entity states""" entities_to_expose, domains = self._async_get_exposed_entities() @@ -1076,7 +1076,7 @@ async def _async_cache_prompt(self, entity, old_state, new_state): refresh_end = time.time() _LOGGER.debug(f"cache refresh took {(refresh_end - refresh_start):.2f} sec") - def _cache_prompt(self, llm_api: llm.API) -> None: + def _cache_prompt(self, llm_api: llm.APIInstance | None) -> None: # if a refresh is already scheduled then exit if self.cache_refresh_after_cooldown: return @@ -1165,6 +1165,11 @@ def _generate(self, conversation: dict) -> str: ) context_len = self.entry.options.get(CONF_CONTEXT_LENGTH, DEFAULT_CONTEXT_LENGTH) + if len(input_tokens) >= context_len: + num_entities = len(self._async_get_exposed_entities()[0]) + context_size = self.entry.options.get(CONF_CONTEXT_LENGTH, DEFAULT_CONTEXT_LENGTH) + self._warn_context_size() + raise Exception(f"The model failed to produce a result because too many devices are exposed ({num_entities} devices) for the context size ({context_size} tokens)!") if len(input_tokens) + max_tokens >= context_len: self._warn_context_size() diff --git a/custom_components/llama_conversation/manifest.json b/custom_components/llama_conversation/manifest.json index 3b8f0a2..a36be09 100644 --- a/custom_components/llama_conversation/manifest.json +++ b/custom_components/llama_conversation/manifest.json @@ -1,7 +1,7 @@ { "domain": "llama_conversation", "name": "Local LLM Conversation", - "version": "0.3.5", + "version": "0.3.6", "codeowners": ["@acon96"], "config_flow": true, "dependencies": ["conversation"], diff --git a/data/generate_home_assistant_data.py b/data/generate_home_assistant_data.py index daeec4b..4d3dcc5 100644 --- a/data/generate_home_assistant_data.py +++ b/data/generate_home_assistant_data.py @@ -371,6 +371,14 @@ def get_random_state(self, extra_exposed_attributes=[]): "spanish": "H:m EEEE, d 'de' MMMM 'de' yyyy" } +USER_INSTRUCTION_PROMPT = { + "english": "User instruction", + "german": "Benutzeranweisung", + "french": "Instruction de l'utilisateur ", + "spanish": "Instrucción del usuario", + "polish": "Instrukcja użytkownika" +} + class NoResponseAvailableException(Exception): pass @@ -827,7 +835,7 @@ def generate_dpo_extra_service_call(template: dict, persona: str, max_devices: i def generate_dpo_incorrect_persona(template: dict, persona: str, max_devices: int = 32): pass -def format_example_raw_chatml(example, persona, language): +def format_example_raw_chatml(example, persona, language, use_system_role): """Don't use this one anymore""" sys_prompt = pile_of_system_prompts[persona] services_block = f"{SERVICES_PROMPT[language]}: " + ", ".join(sorted(example["available_services"])) @@ -835,8 +843,13 @@ def format_example_raw_chatml(example, persona, language): question = example["question"] answers = " ".join(example["answers"]) - system_block = "\n".join([ "<|im_start|>system", sys_prompt, services_block, states_block ]) + "<|im_end|>" - user_block = "\n".join([ "<|im_start|>user", question]) + "<|im_end|>" + if use_system_role: + system_block = "\n".join([ "<|im_start|>system", sys_prompt, services_block, states_block ]) + "<|im_end|>" + user_block = "\n".join([ "<|im_start|>user", question]) + "<|im_end|>" + else: + user_instruction_words = USER_INSTRUCTION_PROMPT[language] + ":" + system_block = "" + user_block = "\n".join([ "<|im_start|>user", sys_prompt, services_block, states_block, user_instruction_words, question]) + "<|im_end|>" assistant_block = "<|im_start|>assistant\n" + answers if len(example["service_calls"]) > 0: @@ -855,7 +868,7 @@ def format_example_raw_chatml(example, persona, language): result = result.replace("garage_door.", "cover.") return { "text": result } -def format_example_sharegpt(example, persona, language): +def format_example_sharegpt(example, persona, language, use_system_role): sys_prompt = pile_of_system_prompts[persona] random_datetime = generate_random_datetime() translate_datetime = babel.dates.format_datetime(random_datetime, BABEL_FORMAT[language], locale=BABEL_LOCALE[language]) @@ -876,11 +889,18 @@ def format_example_sharegpt(example, persona, language): states_block = states_block.replace("blinds.", "cover.").replace("garage_door.", "cover.") services_block = services_block.replace("blinds.", "cover.").replace("garage_door.", "cover.") - conversation = [ - { "from": "system", "value": "\n".join([ sys_prompt, time_block, services_block, states_block ])}, - { "from": "user", "value": question }, - { "from": "assistant", "value": assistant_block }, - ] + if use_system_role: + conversation = [ + { "from": "system", "value": "\n".join([ sys_prompt, time_block, services_block, states_block ])}, + { "from": "user", "value": question }, + { "from": "assistant", "value": assistant_block }, + ] + else: + user_instruction_words = USER_INSTRUCTION_PROMPT[language] + ":" + conversation = [ + { "from": "user", "value": "\n".join([ sys_prompt, time_block, services_block, states_block, user_instruction_words, question ]) }, + { "from": "assistant", "value": assistant_block }, + ] return { "conversations": conversation } @@ -918,7 +938,7 @@ def format_example_dpo(example, persona, language): "rejected": rejected_assistant_block, } -def generate_sft_file(filename: str, seed: int, format_func: Callable, personas: list[str], language: str, *, static_factor: int, template_factor: int, status_request_factor: int): +def generate_sft_file(filename: str, seed: int, format_func: Callable, use_system_role: bool, personas: list[str], language: str, *, static_factor: int, template_factor: int, status_request_factor: int): random.seed(seed) np.random.seed(seed) @@ -927,10 +947,10 @@ def generate_sft_file(filename: str, seed: int, format_func: Callable, personas: def run_factor_times(func, examples, data, persona, factor, language): if factor >= 1: for i in range(factor): - examples.append(format_func(func(data, persona), persona, language)) + examples.append(format_func(func(data, persona), persona, language, use_system_role)) else: if random.random() < factor: - examples.append(format_func(func(data, persona), persona, language)) + examples.append(format_func(func(data, persona), persona, language, use_system_role)) generated_examples = [] @@ -1139,6 +1159,7 @@ def main(): parser.add_argument("--dpo", action="store_true", help="Set this flag to enable generation of the DPO dataset.") parser.add_argument("--merge", help="Set this flag to merge the generated datasets with the specified dataset.") parser.add_argument("--language", nargs="+", default=["english"], help="List of languages to generate: english, german, french, spanish, polish") + parser.add_argument("--no-system-role", action="store_true", help="Set this flag to disable the system role. It will be combined with the user role") train_size_group = parser.add_mutually_exclusive_group() train_size_group.add_argument('--small', action='store_const', const='small', dest='size') @@ -1165,26 +1186,28 @@ def main(): elif args.format == "sharegpt": format_func = format_example_sharegpt + use_system_role = not args.no_system_role + for language in args.language: load_dataset_piles(language) personas = list(pile_of_system_prompts.keys()) suffix = f"_{language}" if len(args.language) > 1 else "" if args.sample: - generate_sft_file(f"sample{suffix}", 42, format_func, personas, language, static_factor=1, template_factor=1, status_request_factor=1) + generate_sft_file(f"sample{suffix}", 42, format_func, use_system_role, personas, language, static_factor=1, template_factor=1, status_request_factor=1) if args.train: if args.size == "small": - generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, personas, language, static_factor=1, template_factor=10, status_request_factor=8) + generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, use_system_role, personas, language, static_factor=1, template_factor=10, status_request_factor=8) elif args.size == "medium": - generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, personas, language, static_factor=5, template_factor=15, status_request_factor=12) + generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, use_system_role, personas, language, static_factor=5, template_factor=15, status_request_factor=12) elif args.size == "large": - generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, personas, language, static_factor=5, template_factor=20, status_request_factor=15) + generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, use_system_role, personas, language, static_factor=5, template_factor=20, status_request_factor=15) elif args.size == "xl": - generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, personas, language, static_factor=7, template_factor=25, status_request_factor=18) + generate_sft_file(f"home_assistant_train{suffix}", 42, format_func, use_system_role, personas, language, static_factor=7, template_factor=25, status_request_factor=18) else: raise Exception(f"Unrecognized dataset size: {args.size}") if args.test: - generate_sft_file(f"home_assistant_test{suffix}", 12345, format_func, personas, language, static_factor=0.25, template_factor=1, status_request_factor=2) + generate_sft_file(f"home_assistant_test{suffix}", 12345, format_func, use_system_role, personas, language, static_factor=0.25, template_factor=1, status_request_factor=2) if len(args.language) > 1: if args.sample: diff --git a/find_split.py b/find_split.py index 419d056..609be6d 100644 --- a/find_split.py +++ b/find_split.py @@ -1,29 +1,79 @@ # this script attempts to figure out the correct prefix_ids and suffix_ids for the given model # usage: python3 find_split.py from transformers import AutoTokenizer +from jinja2.exceptions import TemplateError import sys if len(sys.argv) > 1: model = sys.argv[1] else: - model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" + print(f"Usage: {sys.argv[0]} ") + exit(-1) prefix_ids = None suffix_ids = None tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True) -assistant_prompt = tokenizer.apply_chat_template( - conversation=[{"role": "assistant", "content": r"%%%%%%%%%%%%%%%%"}], +test_prompt = tokenizer.apply_chat_template( + conversation=[ + {"role": "user", "content": r"HA_REQUEST"}, + {"role": "assistant", "content": r"HA_RESPONSE"} + ], tokenize=False, add_generation_prompt=False, -).split( r"%%%%%%%%%%%%%%%%") +) + +print("Chat template:") +print("-" * 100) +print(test_prompt) +print("-" * 100) + +# Added real example to test the tokenizer +test_prompt_tokens = tokenizer.apply_chat_template( + conversation=[ + {"role": "system", "content": "this is a system prompt"}, + {"role": "user", "content": "a user request goes here"}, + {"role": "assistant", "content": "the response is in here"} + ], + tokenize=True, + add_generation_prompt=False +) + +print("Chat template tokens:") +print("-" * 100) +print(test_prompt_tokens) +print("-" * 100) + +try: + assistant_prompt = tokenizer.apply_chat_template( + conversation=[{"role": "assistant", "content": r"%%%%%%%%%%%%%%%%"}], + tokenize=False, + add_generation_prompt=False, + ).split( r"%%%%%%%%%%%%%%%%") +except TemplateError: + user_prompt = tokenizer.apply_chat_template( + conversation=[ + {"role": "user", "content": r"$$$$$$$$$$$$$$$$"} + ], + tokenize=False, + add_generation_prompt=True, + ) + # some prompt templates require user/assistant alternating + assistant_prompt = tokenizer.apply_chat_template( + conversation=[ + {"role": "user", "content": r"$$$$$$$$$$$$$$$$"}, + {"role": "assistant", "content": r"%%%%%%%%%%%%%%%%"}, + ], + tokenize=False, + add_generation_prompt=True, + ).split(r"$$$$$$$$$$$$$$$$")[-1].strip().split(r"%%%%%%%%%%%%%%%%") response_prefix = assistant_prompt[0] response_suffix = assistant_prompt[1] # check for inserted system prompt and remove it if tokenizer.eos_token in response_prefix: - response_prefix = response_prefix.split(tokenizer.eos_token)[1].lstrip() + response_prefix = response_prefix.split(tokenizer.eos_token)[-1].lstrip() # some chat templates ALWAYS add the bos token if tokenizer.bos_token in response_prefix: @@ -38,20 +88,25 @@ prefix_ids3 = tokenizer("\n" + response_prefix, add_special_tokens=False)["input_ids"] suffix_ids3 = tokenizer("\n" + response_suffix, add_special_tokens=False)["input_ids"] +prefix_ids4 = tokenizer(response_prefix.strip(), add_special_tokens=False)["input_ids"] +suffix_ids4 = tokenizer(response_suffix.strip(), add_special_tokens=False)["input_ids"] + print(f"Estimated tokens for {model}") print("response prefix:") print(response_prefix) print("tokens with no leading whitespace:", prefix_ids) print("tokens with leading whitespace:", prefix_ids2) print("tokens with leading newline:", prefix_ids3) +print("tokens with stripped whitespace:", prefix_ids4) -print("---------------") +print("-" * 100) print("response suffix:") print(response_suffix) print("tokens with no leading whitespace:", suffix_ids) print("tokens with leading whitespace:", suffix_ids2) print("tokens with leading newline:", suffix_ids3) +print("tokens with stripped whitespace:", suffix_ids4) def _find_mask_ranges(input_ids, prefix_ids, suffix_ids): @@ -105,28 +160,52 @@ def _find_mask_ranges(input_ids, prefix_ids, suffix_ids): return inverse_ranges -label = tokenizer.apply_chat_template( - conversation=[ - {"role": "system", "content": "this is a system prompt"}, - {"role": "user", "content": "a user request goes here"}, - {"role": "assistant", "content": "the response is in here"}], - add_generation_prompt=False, -) +try: + label = tokenizer.apply_chat_template( + conversation=[ + {"role": "system", "content": "this is a system prompt"}, + {"role": "user", "content": "a user request goes here"}, + {"role": "assistant", "content": "the response is in here"} + ], + add_generation_prompt=False, + ) +except TemplateError: + # some chat templates don't have a system prompt option + label = tokenizer.apply_chat_template( + conversation=[ + {"role": "user", "content": "a user request goes here"}, + {"role": "assistant", "content": "the response is in here"} + ], + add_generation_prompt=False, + ) def check_range(label, name, prefix_ids, suffix_ids): label = label[:] mask_ranges = _find_mask_ranges(label, prefix_ids, suffix_ids) + found = False for start, end in mask_ranges: if end - start == len(label) - 1: print(f"'{name}' did not find the assistant response") else: - print(f"'{name}' found the assistant response!") - print(f"\t--prefix-ids {','.join([str(x) for x in prefix_ids])}") - print(f"\t--suffix-ids {','.join([str(x) for x in suffix_ids])}") - break - -print("---------------") -check_range(label, "no whitespace", prefix_ids, suffix_ids) + found = True + # label[start:end] = [-100] * (end - start) + + # assistant_tokens = [x for x in label if x != -100] + # decoded_string = tokenizer.decode(assistant_tokens) + # expected_decoded_string = "the response is in here" + tokenizer.decode(suffix_ids) + # if decoded_string == expected_decoded_string: + # found = True + + if found: + print(f"'{name}' found the assistant response!") + print(f"\t--prefix_ids {','.join([str(x) for x in prefix_ids])}") + print(f"\t--suffix_ids {','.join([str(x) for x in suffix_ids])}") + # else: + # print(f"'{decoded_string}' != '{expected_decoded_string}'") + +print("-" * 100) +check_range(label, "no added whitespace", prefix_ids, suffix_ids) check_range(label, "leading space", prefix_ids2, suffix_ids2) check_range(label, "leading newline", prefix_ids3, suffix_ids3) +check_range(label, "stripped whitespace", prefix_ids4, suffix_ids4)