From 5aaa62fd01c3ae4d73580c748eae81925ced99d1 Mon Sep 17 00:00:00 2001 From: Zlobin Vladimir Date: Mon, 6 May 2024 13:13:08 +0400 Subject: [PATCH 01/10] Migrate to optimum-cli from llm_bench usage (#417) Ticket 128657 I can't remove `convert_tokenizer` call because `optimum-cli` reports: > OpenVINO Tokenizer version is not compatible with OpenVINO version. Installed OpenVINO version: 2024.1.0,OpenVINO Tokenizers requires 2024.0.0. OpenVINO Tokenizers models will not be added during export. --- .github/dependabot.yml | 4 + .github/workflows/causal_lm_cpp.yml | 97 ++++++++++--------- text_generation/causal_lm/cpp/README.md | 28 +++--- .../causal_lm/cpp/requirements.txt | 4 + 4 files changed, 76 insertions(+), 57 deletions(-) create mode 100644 text_generation/causal_lm/cpp/requirements.txt diff --git a/.github/dependabot.yml b/.github/dependabot.yml index a9b468dff..9ab4587c2 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -8,3 +8,7 @@ updates: directory: "image_generation/lcm_dreamshaper_v7/cpp/scripts/" schedule: interval: "weekly" + - package-ecosystem: "pip" + directory: "text_generation/causal_lm/cpp/" + schedule: + interval: "weekly" diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index 9017cd544..b4a38838c 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -3,7 +3,6 @@ on: pull_request: paths: - .github/workflows/causal_lm_cpp.yml - - llm_bench/python/** - text_generation/causal_lm/cpp/* - thirdparty/openvino_tokenizers - "!**.md" @@ -29,15 +28,16 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt "transformers<4.38" ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id openlm-research/open_llama_3b_v2 --output_dir ./open_llama_3b_v2/ --precision FP16 & + python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model openlm-research/open_llama_3b_v2 open_llama_3b_v2 cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j - wait - name: convert_tokenizer and run run: | source ./ov/setupvars.sh - convert_tokenizer ./open_llama_3b_v2/pytorch/dldt/FP16/ --output ./open_llama_3b_v2/pytorch/dldt/FP16/ --with-detokenizer - ./build/greedy_causal_lm ./open_llama_3b_v2/pytorch/dldt/FP16/ "return 0" + convert_tokenizer ./open_llama_3b_v2/ --output ./open_llama_3b_v2/ --with-detokenizer + ./build/greedy_causal_lm ./open_llama_3b_v2/ "return 0" cpp-beam_search_causal_lm-ubuntu: runs-on: ubuntu-20.04 @@ -56,16 +56,17 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt "transformers<4.38" ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir ./TinyLlama-1.1B-Chat-v1.0/ --precision FP16 & + python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j - wait - name: Compare run: | source ./ov/setupvars.sh - convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ --output ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ --with-detokenizer + convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer - timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ 69 > ./pred.txt + timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt python -c " import transformers with open('pred.txt', 'r') as file: @@ -81,7 +82,7 @@ jobs: " echo "69" passed - timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ Hi > ./pred.txt + timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ Hi > ./pred.txt python -c " import transformers with open('pred.txt', 'r') as file: @@ -97,7 +98,7 @@ jobs: " echo "Hi" passed - timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "return 0" > ./pred.txt + timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "return 0" > ./pred.txt python -c " import transformers with open('pred.txt', 'r') as file: @@ -113,7 +114,7 @@ jobs: " echo "return 0" passed - ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "你好! 你好嗎?" > ./pred.txt + ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "你好! 你好嗎?" > ./pred.txt python -c " import transformers with open('pred.txt', 'r') as file: @@ -147,17 +148,18 @@ jobs: shell: cmd run: | call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64\setupvars.bat - python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt "transformers<4.38" ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu - python ./llm_bench/python/convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir ./TinyLlama-1.1B-Chat-v1.0/ --precision FP16 + python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j - name: Compare shell: cmd run: | call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64\setupvars.bat - convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ --output .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ --with-detokenizer + convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer - .\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ "69" > .\pred.txt + .\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt echo import transformers > ref.py echo predictions = open('pred.txt', 'r').read() >> ref.py echo tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') >> ref.py @@ -187,15 +189,16 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id Qwen/Qwen-7B-Chat --output_dir ./Qwen-7B-Chat/ --precision FP16 & + python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen-7B-Chat Qwen-7B-Chat cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j - wait - name: Compare run: | source ./ov/setupvars.sh - convert_tokenizer ./Qwen-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code - timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/pytorch/dldt/FP16/ 69 > ./pred.txt + convert_tokenizer Qwen/Qwen-7B-Chat --output ./Qwen-7B-Chat/ --with-detokenizer --trust-remote-code + timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/ 69 > ./pred.txt cpp-beam_search_causal_lm-Qwen1_5-7B-Chat: runs-on: ubuntu-20.04-16-cores @@ -214,15 +217,16 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id Qwen/Qwen1.5-7B-Chat --output_dir ./Qwen1.5-7B-Chat/ --precision FP16 & + python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen1.5-7B-Chat Qwen1.5-7B-Chat cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j - wait - name: Run run: | source ./ov/setupvars.sh - convert_tokenizer ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code - timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ "你好!" > ./pred_qwen15.txt + convert_tokenizer ./Qwen1.5-7B-Chat/ --output ./Qwen1.5-7B-Chat/ --with-detokenizer --trust-remote-code + timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/ "你好!" > ./pred_qwen15.txt cpp-beam_search_causal_lm-Phi-2: runs-on: ubuntu-20.04-16-cores @@ -241,15 +245,16 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id microsoft/phi-2 --output_dir ./Phi-2/ --precision FP16 & + python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-2 phi-2 cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j 15 - wait - name: Compare run: | source ./ov/setupvars.sh - convert_tokenizer ./Phi-2/pytorch/dldt/FP16/ --output ./Phi-2/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code - timeout 50s ./build/beam_search_causal_lm ./Phi-2/pytorch/dldt/FP16/ 69 > ./pred.txt + convert_tokenizer ./phi-2/ --output ./phi-2/ --with-detokenizer --trust-remote-code + timeout 50s ./build/beam_search_causal_lm ./phi-2/ 69 > ./pred.txt cpp-beam_search_causal_lm-notus-7b-v1: runs-on: ubuntu-20.04-16-cores @@ -268,15 +273,16 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id argilla/notus-7b-v1 --output_dir ./notus-7b-v1/ --precision FP16 & + python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model argilla/notus-7b-v1 notus-7b-v1 cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j - wait - name: Compare run: | source ./ov/setupvars.sh - convert_tokenizer ./notus-7b-v1/pytorch/dldt/FP16/ --output ./notus-7b-v1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code - timeout 50s ./build/beam_search_causal_lm ./notus-7b-v1/pytorch/dldt/FP16/ 69 > ./pred.txt + convert_tokenizer ./notus-7b-v1/ --output ./notus-7b-v1/ --with-detokenizer --trust-remote-code + timeout 50s ./build/beam_search_causal_lm ./notus-7b-v1/ 69 > ./pred.txt cpp-speculative_decoding_lm-ubuntu: runs-on: ubuntu-20.04-16-cores @@ -295,19 +301,19 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt "transformers<4.38" ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu - python ./llm_bench/python/convert.py --model_id databricks/dolly-v2-3b --output_dir ./dolly-v2-3b/ --precision FP16 - python ./llm_bench/python/convert.py --model_id databricks/dolly-v2-7b --output_dir ./dolly-v2-7b/ --precision FP16 - convert_tokenizer ./dolly-v2-3b/pytorch/dldt/FP16/ --output ./dolly-v2-3b/pytorch/dldt/FP16/ --with-detokenizer - convert_tokenizer ./dolly-v2-7b/pytorch/dldt/FP16/ --output ./dolly-v2-7b/pytorch/dldt/FP16/ --with-detokenizer + python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b + convert_tokenizer ./dolly-v2-3b/ --output ./dolly-v2-3b/ --with-detokenizer + convert_tokenizer ./dolly-v2-7b/ --output ./dolly-v2-7b/ --with-detokenizer cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j - wait - name: run and compare run: | source ./ov/setupvars.sh - ./build/speculative_decoding_lm ./dolly-v2-3b/pytorch/dldt/FP16/ ./dolly-v2-7b/pytorch/dldt/FP16/ "Alan Turing was a" > predictions_speculative.txt - ./build/greedy_causal_lm ./dolly-v2-7b/pytorch/dldt/FP16/ "Alan Turing was a" > predictions_greedy.txt + ./build/speculative_decoding_lm ./dolly-v2-3b/ ./dolly-v2-7b/ "Alan Turing was a" > predictions_speculative.txt + ./build/greedy_causal_lm ./dolly-v2-7b/ "Alan Turing was a" > predictions_greedy.txt python -c " with open('predictions_greedy.txt', 'r') as f: predicted_greedy = f.readline() @@ -334,16 +340,17 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id microsoft/phi-1_5 --output_dir ./Phi-1_5/ --precision FP16 & + python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-1_5 phi-1_5 cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j 15 - wait - name: Run Generation run: | source ./ov/setupvars.sh - convert_tokenizer ./Phi-1_5/pytorch/dldt/FP16/ --output ./Phi-1_5/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code - timeout 50s ./build/greedy_causal_lm ./Phi-1_5/pytorch/dldt/FP16/ "Alan Turing was a" > ./pred_greedy.txt - timeout 50s ./build/beam_search_causal_lm ./Phi-1_5/pytorch/dldt/FP16/ "Alan Turing was a" > ./pred_beam.txt + convert_tokenizer ./phi-1_5/ --output ./phi-1_5/ --with-detokenizer --trust-remote-code + timeout 50s ./build/greedy_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_greedy.txt + timeout 50s ./build/beam_search_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_beam.txt - name: Compare run: | python -c " diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md index 5ba4f8110..a0b4a0a1b 100644 --- a/text_generation/causal_lm/cpp/README.md +++ b/text_generation/causal_lm/cpp/README.md @@ -77,18 +77,22 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upg ```sh source /setupvars.sh -python3 -m pip install --upgrade-strategy eager "transformers<4.38" -r ../../../llm_bench/python/requirements.txt ../../../thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu -python3 ../../../llm_bench/python/convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir ./TinyLlama-1.1B-Chat-v1.0/ --precision FP16 -convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ --output ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code +python3 -m pip install --upgrade-strategy eager -r requirements.txt +# Update openvino_tokenizers from the submodule +python3 -m pip install ./../../../thirdparty/openvino_tokenizers/[transformers] +optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 +convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer --trust-remote-code ``` #### Windows ```bat \setupvars.bat -python -m pip install --upgrade-strategy eager "transformers<4.38" -r ..\..\..\llm_bench\python\requirements.txt ..\..\..\thirdparty\openvino_tokenizers\[transformers] --extra-index-url https://download.pytorch.org/whl/cpu -python ..\..\..\llm_bench\python\convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir .\TinyLlama-1.1B-Chat-v1.0\ --precision FP16 -convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ --output .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ --with-detokenizer --trust-remote-code +python -m pip install --upgrade-strategy eager -r requirements.txt +REM Update openvino_tokenizers from the submodule +python -m pip install .\..\..\..\thirdparty\openvino_tokenizers\[transformers] +optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 +convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer --trust-remote-code ``` ## Run @@ -100,14 +104,14 @@ convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ --output .\TinyL ### Examples: #### Windows: -1. `/build/Release/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "Why is the Sun yellow?"` -2. `/build/Release/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "Why is the Sun yellow?"` -3. `/build/Release/speculative_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ ./Llama-2-7b-chat-hf/pytorch/dldt/FP16/ "Why is the Sun yellow?"` +1. `/build/Release/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?"` +2. `/build/Release/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?"` +3. `/build/Release/speculative_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ ./Llama-2-7b-chat-hf/ "Why is the Sun yellow?"` #### Linux/MacOS: -1. `./build/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "Why is the Sun yellow?"` -2. `./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "Why is the Sun yellow?"` -3. `./build/speculative_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ ./Llama-2-7b-chat-hf/pytorch/dldt/FP16/ "Why is the Sun yellow?"` +1. `./build/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?"` +2. `./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?"` +3. `./build/speculative_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ ./Llama-2-7b-chat-hf/ "Why is the Sun yellow?"` To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot. diff --git a/text_generation/causal_lm/cpp/requirements.txt b/text_generation/causal_lm/cpp/requirements.txt new file mode 100644 index 000000000..89c6a4b4d --- /dev/null +++ b/text_generation/causal_lm/cpp/requirements.txt @@ -0,0 +1,4 @@ +--extra-index-url https://download.pytorch.org/whl/cpu +optimum[openvino]==1.19.1 +einops==0.7.0 # For Qwen +transformers_stream_generator==0.0.4 # For Qwen From b0169c9fa02f7ee99b30233f97bdd85099e43f7e Mon Sep 17 00:00:00 2001 From: guozhong wang Date: Tue, 7 May 2024 15:01:44 +0800 Subject: [PATCH 02/10] Update openvino and nncf versions (#397) Co-authored-by: Chen Peter Co-authored-by: Zlobin Vladimir --- llm_bench/python/requirements.txt | 4 ++-- llm_bench/python/requirements_2024.1.txt | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/llm_bench/python/requirements.txt b/llm_bench/python/requirements.txt index bb0eceba5..1f25fe8ce 100644 --- a/llm_bench/python/requirements.txt +++ b/llm_bench/python/requirements.txt @@ -1,6 +1,7 @@ --extra-index-url https://download.pytorch.org/whl/cpu numpy -openvino>=2024.0.0 +openvino~=2024.1.0 +nncf~=2.10.0 auto-gptq>=0.5.1 # for gptq pillow torch @@ -8,7 +9,6 @@ transformers>=4.33.0 diffusers>=0.22.0 #optimum is in dependency list of optimum-intel git+https://github.com/huggingface/optimum-intel.git@ff792c278502a85444dd116413dbca71aa660599#egg=optimum-intel -git+https://github.com/openvinotoolkit/nncf.git@ec497ce0781fe867d73d5c5bdf8310fdb40604a4#egg=nncf packaging psutil timm diff --git a/llm_bench/python/requirements_2024.1.txt b/llm_bench/python/requirements_2024.1.txt index a0d438887..0ee019d1e 100644 --- a/llm_bench/python/requirements_2024.1.txt +++ b/llm_bench/python/requirements_2024.1.txt @@ -54,6 +54,7 @@ networkx==3.3 ninja==1.11.1.1 numpy==1.26.4 onnx==1.16.0 +openvino==2024.1.0 openvino-telemetry==2024.1.0 optimum-intel @ git+https://github.com/huggingface/optimum-intel.git@ff792c278502a85444dd116413dbca71aa660599 packaging==24.0 From af92812e7231e3ee675fc9aa2618087c7f5b77fe Mon Sep 17 00:00:00 2001 From: Zlobin Vladimir Date: Wed, 8 May 2024 13:40:02 +0400 Subject: [PATCH 03/10] Upgrade openvino and tokenizers (#395) Co-authored-by: yatarkan --- .github/workflows/causal_lm_cpp.yml | 20 +++++++++---------- .github/workflows/lcm_dreamshaper_cpp.yml | 10 +++++----- .../workflows/stable_diffusion_1_5_cpp.yml | 6 ++++-- .../stable_diffusion_1_5/cpp/README.md | 2 +- thirdparty/openvino_tokenizers | 2 +- 5 files changed, 21 insertions(+), 19 deletions(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index b4a38838c..cc69e414b 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -23,7 +23,7 @@ jobs: - name: Install OpenVINO run: | mkdir ./ov/ - curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz sudo ./ov/install_dependencies/install_openvino_dependencies.sh - name: Download, convert and build run: | @@ -51,7 +51,7 @@ jobs: - name: Install OpenVINO run: | mkdir ./ov/ - curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz sudo ./ov/install_dependencies/install_openvino_dependencies.sh - name: Download, convert and build run: | @@ -142,12 +142,12 @@ jobs: - name: Install OpenVINO shell: bash run: | - curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64.zip + curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/windows/w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64.zip unzip ov.zip - name: Download, convert and build shell: cmd run: | - call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64\setupvars.bat + call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64\setupvars.bat python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt python -m pip install ./thirdparty/openvino_tokenizers/[transformers] optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 @@ -156,7 +156,7 @@ jobs: - name: Compare shell: cmd run: | - call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64\setupvars.bat + call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64\setupvars.bat convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer .\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt @@ -184,7 +184,7 @@ jobs: - name: Install OpenVINO run: | mkdir ./ov/ - curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz sudo ./ov/install_dependencies/install_openvino_dependencies.sh - name: Download, convert and build run: | @@ -212,7 +212,7 @@ jobs: - name: Install OpenVINO run: | mkdir ./ov/ - curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz sudo ./ov/install_dependencies/install_openvino_dependencies.sh - name: Download, convert and build run: | @@ -240,7 +240,7 @@ jobs: - name: Install OpenVINO run: | mkdir ./ov/ - curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz sudo ./ov/install_dependencies/install_openvino_dependencies.sh - name: Download, convert and build run: | @@ -268,7 +268,7 @@ jobs: - name: Install OpenVINO run: | mkdir ./ov/ - curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz sudo ./ov/install_dependencies/install_openvino_dependencies.sh - name: Download, convert and build run: | @@ -335,7 +335,7 @@ jobs: - name: Install OpenVINO run: | mkdir ./ov/ - curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz sudo ./ov/install_dependencies/install_openvino_dependencies.sh - name: Download, convert and build run: | diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml index 9ce91bc60..001265e36 100644 --- a/.github/workflows/lcm_dreamshaper_cpp.yml +++ b/.github/workflows/lcm_dreamshaper_cpp.yml @@ -22,7 +22,7 @@ jobs: - name: Initialize OpenVINO run: | mkdir openvino - curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./openvino/ --strip-components 1 -xz + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./openvino/ --strip-components 1 -xz sudo ./openvino/install_dependencies/install_openvino_dependencies.sh - name: Download / convert a model / tokenizer run: | @@ -55,12 +55,12 @@ jobs: - name: Initialize OpenVINO shell: cmd run: | - curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64.zip + curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/windows/w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64.zip unzip ov.zip - name: Download / convert a model / tokenizer shell: cmd run: | - call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64/setupvars.bat + call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64/setupvars.bat cd ./image_generation/lcm_dreamshaper_v7/cpp/scripts/ python -m pip install -r ./requirements.txt python -m pip install ../../../../thirdparty/openvino_tokenizers/ @@ -68,13 +68,13 @@ jobs: - name: Build app shell: cmd run: | - call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64/setupvars.bat + call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64/setupvars.bat cd ./image_generation/lcm_dreamshaper_v7/cpp/ cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release --parallel - name: Run app shell: cmd run: | - call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64/setupvars.bat + call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64/setupvars.bat cd ./image_generation/lcm_dreamshaper_v7/cpp/build/ call "./Release/lcm_dreamshaper.exe" diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml index f3f7e5285..16ba21891 100644 --- a/.github/workflows/stable_diffusion_1_5_cpp.yml +++ b/.github/workflows/stable_diffusion_1_5_cpp.yml @@ -38,7 +38,7 @@ jobs: - name: Install OpenVINO and other conda dependencies run: | conda activate openvino_sd_cpp - conda install -c conda-forge openvino=2024.0.0 c-compiler cxx-compiler make cmake + conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH - name: Install python dependencies @@ -85,13 +85,15 @@ jobs: - name: Install OpenVINO and other conda dependencies run: | conda activate openvino_sd_cpp - conda install -c conda-forge openvino=2024.0.0 c-compiler cxx-compiler make cmake + conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake - name: Install python dependencies working-directory: ${{ env.working_directory }} run: | conda activate openvino_sd_cpp python -m pip install -r requirements.txt + $env:SPM_PROTOBUF_PROVIDER = 'internal' + $env:SPM_ABSL_PROVIDER = 'internal' python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] - name: Download and convert model and tokenizer diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md index 2b4cdb031..2dfa32628 100644 --- a/image_generation/stable_diffusion_1_5/cpp/README.md +++ b/image_generation/stable_diffusion_1_5/cpp/README.md @@ -18,7 +18,7 @@ Prepare a python environment and install dependencies: ```shell conda create -n openvino_sd_cpp python==3.10 conda activate openvino_sd_cpp -conda install -c conda-forge openvino=2024.0.0 c-compiler cxx-compiler make cmake +conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake # Ensure that Conda standard libraries are used conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH ``` diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers index 0e4bb32ca..60d3ccc42 160000 --- a/thirdparty/openvino_tokenizers +++ b/thirdparty/openvino_tokenizers @@ -1 +1 @@ -Subproject commit 0e4bb32ca3412f589e1d094faa8b0aad19ee47ca +Subproject commit 60d3ccc426984acc623630a8e4d8c8878ec74eb7 From a3d4153382e2d86b7a7ae274bee5b8b8cb97999f Mon Sep 17 00:00:00 2001 From: Zlobin Vladimir Date: Wed, 8 May 2024 14:36:59 +0400 Subject: [PATCH 04/10] Add bandit scan (#386) --- .github/workflows/bandit.yml | 16 ++ bandit.yml | 398 ++++++++++++++++++++++++++++++++++ llm_bench/python/benchmark.py | 6 +- 3 files changed, 417 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/bandit.yml create mode 100644 bandit.yml diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml new file mode 100644 index 000000000..9faa853a2 --- /dev/null +++ b/.github/workflows/bandit.yml @@ -0,0 +1,16 @@ +name: python -m bandit --recursive --configfile bandit.yml . +on: + pull_request: + paths-ignore: + - 'thirdparty' + - '**.md' +jobs: + bandit: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: 3.11 + - run: python -m pip install bandit + - run: python -m bandit --recursive --configfile bandit.yml . diff --git a/bandit.yml b/bandit.yml new file mode 100644 index 000000000..be2fd3da5 --- /dev/null +++ b/bandit.yml @@ -0,0 +1,398 @@ +### This config may optionally select a subset of tests to run or skip by +### filling out the 'tests' and 'skips' lists given below. If no tests are +### specified for inclusion then it is assumed all tests are desired. The skips +### set will remove specific tests from the include set. This can be controlled +### using the -t/-s CLI options. Note that the same test ID should not appear +### in both 'tests' and 'skips', this would be nonsensical and is detected by +### Bandit at runtime. + +# Available tests: +# B101 : assert_used +# B102 : exec_used +# B103 : set_bad_file_permissions +# B104 : hardcoded_bind_all_interfaces +# B105 : hardcoded_password_string +# B106 : hardcoded_password_funcarg +# B107 : hardcoded_password_default +# B108 : hardcoded_tmp_directory +# B110 : try_except_pass +# B112 : try_except_continue +# B201 : flask_debug_true +# B301 : pickle +# B302 : marshal +# B303 : md5 +# B304 : ciphers +# B305 : cipher_modes +# B306 : mktemp_q +# B307 : eval +# B308 : mark_safe +# B310 : urllib_urlopen +# B311 : random +# B312 : telnetlib +# B313 : xml_bad_cElementTree +# B314 : xml_bad_ElementTree +# B315 : xml_bad_expatreader +# B316 : xml_bad_expatbuilder +# B317 : xml_bad_sax +# B318 : xml_bad_minidom +# B319 : xml_bad_pulldom +# B320 : xml_bad_etree +# B321 : ftplib +# B323 : unverified_context +# B324 : hashlib_new_insecure_functions +# B401 : import_telnetlib +# B402 : import_ftplib +# B403 : import_pickle +# B404 : import_subprocess +# B405 : import_xml_etree +# B406 : import_xml_sax +# B407 : import_xml_expat +# B408 : import_xml_minidom +# B409 : import_xml_pulldom +# B410 : import_lxml +# B411 : import_xmlrpclib +# B412 : import_httpoxy +# B413 : import_pycrypto +# B501 : request_with_no_cert_validation +# B502 : ssl_with_bad_version +# B503 : ssl_with_bad_defaults +# B504 : ssl_with_no_version +# B505 : weak_cryptographic_key +# B506 : yaml_load +# B507 : ssh_no_host_key_verification +# B601 : paramiko_calls +# B602 : subprocess_popen_with_shell_equals_true +# B603 : subprocess_without_shell_equals_true +# B604 : any_other_function_with_shell_equals_true +# B605 : start_process_with_a_shell +# B606 : start_process_with_no_shell +# B607 : start_process_with_partial_path +# B608 : hardcoded_sql_expressions +# B609 : linux_commands_wildcard_injection +# B610 : django_extra_used +# B611 : django_rawsql_used +# B701 : jinja2_autoescape_false +# B702 : use_of_mako_templates +# B703 : django_mark_safe + +# (optional) list included test IDs here, eg '[B101, B406]': +# IPAS Required Checkers. Do not disable these +# Additional checkers may be added if desired +tests: + [ 'B301', 'B302', 'B303', 'B304', 'B305', 'B306', 'B308', 'B310', 'B311', 'B312', 'B313', 'B314', 'B315', 'B316', 'B317', 'B318', 'B319', 'B320', 'B321', 'B323', 'B324', 'B401', 'B402', 'B403', 'B404', 'B405', 'B406', 'B407', 'B408', 'B409', 'B410', 'B411', 'B412', 'B413'] + +# (optional) list skipped test IDs here, eg '[B101, B406]': +# The following checkers are not required but be added to tests list if desired +skips: + [ 'B101', 'B102', 'B103', 'B104', 'B105', 'B106', 'B107', 'B108', 'B110', 'B112', 'B201', 'B501', 'B502', 'B503', 'B504', 'B505', 'B506', 'B507', 'B601', 'B602', 'B603', 'B604', 'B605', 'B606', 'B607', 'B608', 'B609', 'B610', 'B611', 'B701', 'B702', 'B703'] + +### (optional) plugin settings - some test plugins require configuration data +### that may be given here, per-plugin. All bandit test plugins have a built in +### set of sensible defaults and these will be used if no configuration is +### provided. It is not necessary to provide settings for every (or any) plugin +### if the defaults are acceptable. + +any_other_function_with_shell_equals_true: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +assert_used: + skips: [] +hardcoded_tmp_directory: + tmp_dirs: + - /tmp + - /var/tmp + - /dev/shm +linux_commands_wildcard_injection: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +ssl_with_bad_defaults: + bad_protocol_versions: + - PROTOCOL_SSLv2 + - SSLv2_METHOD + - SSLv23_METHOD + - PROTOCOL_SSLv3 + - PROTOCOL_TLSv1 + - SSLv3_METHOD + - TLSv1_METHOD +ssl_with_bad_version: + bad_protocol_versions: + - PROTOCOL_SSLv2 + - SSLv2_METHOD + - SSLv23_METHOD + - PROTOCOL_SSLv3 + - PROTOCOL_TLSv1 + - SSLv3_METHOD + - TLSv1_METHOD +start_process_with_a_shell: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +start_process_with_no_shell: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +start_process_with_partial_path: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +subprocess_popen_with_shell_equals_true: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +subprocess_without_shell_equals_true: + no_shell: + - os.execl + - os.execle + - os.execlp + - os.execlpe + - os.execv + - os.execve + - os.execvp + - os.execvpe + - os.spawnl + - os.spawnle + - os.spawnlp + - os.spawnlpe + - os.spawnv + - os.spawnve + - os.spawnvp + - os.spawnvpe + - os.startfile + shell: + - os.system + - os.popen + - os.popen2 + - os.popen3 + - os.popen4 + - popen2.popen2 + - popen2.popen3 + - popen2.popen4 + - popen2.Popen3 + - popen2.Popen4 + - commands.getoutput + - commands.getstatusoutput + subprocess: + - subprocess.Popen + - subprocess.call + - subprocess.check_call + - subprocess.check_output + - subprocess.run +try_except_continue: + check_typed_exception: false +try_except_pass: + check_typed_exception: false +weak_cryptographic_key: + weak_key_size_dsa_high: 1024 + weak_key_size_dsa_medium: 2048 + weak_key_size_ec_high: 160 + weak_key_size_ec_medium: 224 + weak_key_size_rsa_high: 1024 + weak_key_size_rsa_medium: 2048 +exclude_dirs: + - thirdparty diff --git a/llm_bench/python/benchmark.py b/llm_bench/python/benchmark.py index 3f1d1fa11..6b39fc936 100644 --- a/llm_bench/python/benchmark.py +++ b/llm_bench/python/benchmark.py @@ -129,7 +129,7 @@ def run_text_generation(input_text, num, model, tokenizer, args, iter_data_list, result_text = generated_text[bs_idx] if args["output_dir"] is not None: utils.output_file.output_gen_text(result_text, args, model_precision, prompt_index, num, bs_idx, proc_id) - result_md5_list.append(hashlib.md5(result_text.encode()).hexdigest()) + result_md5_list.append(hashlib.md5(result_text.encode(), usedforsecurity=False).hexdigest()) if num == 0: warmup_md5[prompt_index] = result_md5_list per_token_time = generation_time * 1000 / (num_tokens / args['batch_size']) @@ -239,7 +239,7 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list, mem_consumption.clear_max_memory_consumption() for bs_idx in range(args['batch_size']): rslt_img_fn = utils.output_file.output_gen_image(res[bs_idx], args, image_id, num, bs_idx, proc_id, '.png') - result_md5_list.append(hashlib.md5(Image.open(rslt_img_fn).tobytes()).hexdigest()) + result_md5_list.append(hashlib.md5(Image.open(rslt_img_fn).tobytes(), usedforsecurity=False).hexdigest()) generation_time = end - start iter_data = gen_iterate_data( iter_idx=num, @@ -339,7 +339,7 @@ def run_ldm_super_resolution(img, num, pipe, args, framework, iter_data_list, im result_md5_list = [] if framework == 'ov': rslt_img_fn = utils.output_file.output_gen_image(res[0], args, image_id, num, None, proc_id, '.png') - result_md5_list.append(hashlib.md5(Image.open(rslt_img_fn).tobytes()).hexdigest()) + result_md5_list.append(hashlib.md5(Image.open(rslt_img_fn).tobytes(), usedforsecurity=False).hexdigest()) generation_time = end - start iter_data = gen_iterate_data( From 387c72865b40052e4047c1e26318ba1caf2d06bd Mon Sep 17 00:00:00 2001 From: Yaroslav Tarkan Date: Wed, 8 May 2024 15:49:31 +0300 Subject: [PATCH 05/10] [Port 24.1] Move from image_generation python conversion scripts to optimum-cli (LCM Dreamshaper v7 model) (#402) --- .github/workflows/lcm_dreamshaper_cpp.yml | 142 +++++++++++------- .../workflows/stable_diffusion_1_5_cpp.yml | 2 - .../common/diffusers/src/scheduler_lcm.cpp | 2 +- .../lcm_dreamshaper_v7/cpp/README.md | 50 +++--- .../np_latents_512x512.txt | 0 .../torch_noise_step_0.txt | 0 .../torch_noise_step_1.txt | 0 .../torch_noise_step_2.txt | 0 .../cpp/{scripts => }/requirements.txt | 0 .../cpp/scripts/convert_model.py | 41 ----- .../lcm_dreamshaper_v7/cpp/src/main.cpp | 120 ++++++++++++--- thirdparty/openvino_tokenizers | 2 +- 12 files changed, 221 insertions(+), 138 deletions(-) rename image_generation/lcm_dreamshaper_v7/cpp/{scripts => latents}/np_latents_512x512.txt (100%) rename image_generation/lcm_dreamshaper_v7/cpp/{scripts => latents}/torch_noise_step_0.txt (100%) rename image_generation/lcm_dreamshaper_v7/cpp/{scripts => latents}/torch_noise_step_1.txt (100%) rename image_generation/lcm_dreamshaper_v7/cpp/{scripts => latents}/torch_noise_step_2.txt (100%) rename image_generation/lcm_dreamshaper_v7/cpp/{scripts => }/requirements.txt (100%) delete mode 100644 image_generation/lcm_dreamshaper_v7/cpp/scripts/convert_model.py diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml index 001265e36..131927d76 100644 --- a/.github/workflows/lcm_dreamshaper_cpp.yml +++ b/.github/workflows/lcm_dreamshaper_cpp.yml @@ -1,4 +1,5 @@ name: lcm_dreamshaper + on: pull_request: paths: @@ -6,75 +7,112 @@ on: - image_generation/common/** - .github/workflows/lcm_dreamshaper_cpp.yml - thirdparty/openvino_tokenizers + +env: + working_directory: "./image_generation/lcm_dreamshaper_v7/cpp/" + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true + jobs: lcm_dreamshaper_v7_cpp-linux: runs-on: ubuntu-20.04 + defaults: + run: + # Do not ignore bash profile files. From: + # https://github.com/marketplace/actions/setup-miniconda#important + shell: bash -l {0} steps: - uses: actions/checkout@v4 with: submodules: recursive - - uses: actions/setup-python@v4 + + - name: Setup conda + uses: conda-incubator/setup-miniconda@v3 with: - python-version: 3.8 - - name: Initialize OpenVINO + miniconda-version: "latest" + activate-environment: openvino_lcm_cpp + python-version: "3.10" + + - name: Install OpenVINO and other conda dependencies run: | - mkdir openvino - curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./openvino/ --strip-components 1 -xz - sudo ./openvino/install_dependencies/install_openvino_dependencies.sh - - name: Download / convert a model / tokenizer + conda activate openvino_lcm_cpp + conda update -c conda-forge --all + conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake + conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH + + - name: Install python dependencies + working-directory: ${{ env.working_directory }} run: | - source ./openvino/setupvars.sh - cd ./image_generation/lcm_dreamshaper_v7/cpp/scripts/ - python -m pip install -U pip - python -m pip install -r ./requirements.txt - python -m pip install ../../../../thirdparty/openvino_tokenizers/ - python convert_model.py -lcm "SimianLuo/LCM_Dreamshaper_v7" -t "FP16" + conda activate openvino_lcm_cpp + python -m pip install -r requirements.txt + python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] + + - name: Download and convert model and tokenizer + working-directory: ${{ env.working_directory }} + run: | + conda activate openvino_lcm_cpp + export MODEL_PATH="models/lcm_dreamshaper_v7/FP16" + optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH + convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/ + - name: Build app + working-directory: ${{ env.working_directory }} run: | - source ./openvino/setupvars.sh - cd ./image_generation/lcm_dreamshaper_v7/cpp/ + conda activate openvino_lcm_cpp cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ cmake --build ./build/ --config Release --parallel + - name: Run app + working-directory: ${{ env.working_directory }} run: | - source ./openvino/setupvars.sh - cd ./image_generation/lcm_dreamshaper_v7/cpp/build/ - ./lcm_dreamshaper + ./build/lcm_dreamshaper + lcm_dreamshaper_v7_cpp-windows: - runs-on: windows-latest - steps: - - uses: actions/checkout@v4 - with: - submodules: recursive - - uses: actions/setup-python@v4 - with: - python-version: 3.8 - - name: Initialize OpenVINO - shell: cmd - run: | - curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/windows/w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64.zip - unzip ov.zip - - name: Download / convert a model / tokenizer - shell: cmd - run: | - call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64/setupvars.bat - cd ./image_generation/lcm_dreamshaper_v7/cpp/scripts/ - python -m pip install -r ./requirements.txt - python -m pip install ../../../../thirdparty/openvino_tokenizers/ - python convert_model.py -lcm "SimianLuo/LCM_Dreamshaper_v7" -t "FP16" - - name: Build app - shell: cmd - run: | - call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64/setupvars.bat - cd ./image_generation/lcm_dreamshaper_v7/cpp/ - cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ - cmake --build ./build/ --config Release --parallel - - name: Run app - shell: cmd - run: | - call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64/setupvars.bat - cd ./image_generation/lcm_dreamshaper_v7/cpp/build/ - call "./Release/lcm_dreamshaper.exe" + runs-on: windows-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Setup conda + uses: conda-incubator/setup-miniconda@v3 + with: + miniconda-version: "latest" + activate-environment: openvino_lcm_cpp + python-version: "3.10" + + - name: Install OpenVINO and other conda dependencies + run: | + conda activate openvino_lcm_cpp + conda update -c conda-forge --all + conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake + conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH + + - name: Install python dependencies + working-directory: ${{ env.working_directory }} + run: | + conda activate openvino_lcm_cpp + python -m pip install -r requirements.txt + python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] + + - name: Download and convert model and tokenizer + working-directory: ${{ env.working_directory }} + run: | + conda activate openvino_lcm_cpp + $env:MODEL_PATH='models/lcm_dreamshaper_v7/FP16' + optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $env:MODEL_PATH + convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/ + + - name: Build app + working-directory: ${{ env.working_directory }} + run: | + conda activate openvino_lcm_cpp + cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ + cmake --build ./build/ --config Release --parallel + + - name: Run app + working-directory: ${{ env.working_directory }} + run: | + & "./build/Release/lcm_dreamshaper.exe" -r --dynamic diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml index 16ba21891..ad929ea06 100644 --- a/.github/workflows/stable_diffusion_1_5_cpp.yml +++ b/.github/workflows/stable_diffusion_1_5_cpp.yml @@ -92,8 +92,6 @@ jobs: run: | conda activate openvino_sd_cpp python -m pip install -r requirements.txt - $env:SPM_PROTOBUF_PROVIDER = 'internal' - $env:SPM_ABSL_PROVIDER = 'internal' python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] - name: Download and convert model and tokenizer diff --git a/image_generation/common/diffusers/src/scheduler_lcm.cpp b/image_generation/common/diffusers/src/scheduler_lcm.cpp index af82c981a..d5f97b677 100644 --- a/image_generation/common/diffusers/src/scheduler_lcm.cpp +++ b/image_generation/common/diffusers/src/scheduler_lcm.cpp @@ -192,7 +192,7 @@ std::map LCMScheduler::step(ov::Tensor noise_pred, ov:: if (inference_step != num_inference_steps - 1) { std::vector noise; if (read_torch_noise) { - std::string noise_file = "../scripts/torch_noise_step_" + std::to_string(inference_step) + ".txt"; + std::string noise_file = "./latents/torch_noise_step_" + std::to_string(inference_step) + ".txt"; noise = read_vector_from_txt(noise_file); } else { noise = randn_function(noise_pred.get_size(), seed); diff --git a/image_generation/lcm_dreamshaper_v7/cpp/README.md b/image_generation/lcm_dreamshaper_v7/cpp/README.md index f7f6a7615..d4a62fb27 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/README.md +++ b/image_generation/lcm_dreamshaper_v7/cpp/README.md @@ -2,19 +2,25 @@ The pure C++ text-to-image pipeline, driven by the OpenVINO native API for SD v1.5 Latent Consistency Model with LCM Scheduler. It includes advanced features like LoRA integration with safetensors and [OpenVINO Tokenizers](https://github.com/openvinotoolkit/openvino_tokenizers). Loading `openvino_tokenizers` to `ov::Core` enables tokenization. [The common folder](../../common/) contains schedulers for image generation and `imwrite()` for saving `bmp` images. This demo has been tested for Linux platform only. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/263-latent-consistency-models-image-generation/263-lcm-lora-controlnet.ipynb) which provides an example of image generaztion in Python. > [!NOTE] ->This tutorial assumes that the current working directory is `/image_generation/lcm_dreamshaper_v7/cpp/` and all paths are relative to this folder. +> This tutorial assumes that the current working directory is `/image_generation/lcm_dreamshaper_v7/cpp/` and all paths are relative to this folder. ## Step 1: Prepare build environment +Prerequisites: +- Conda ([installation guide](https://conda.io/projects/conda/en/latest/user-guide/install/index.html)) + C++ Packages: * [CMake](https://cmake.org/download/): Cross-platform build tool -* [OpenVINO](https://docs.openvino.ai/2023.2/openvino_docs_install_guides_overview.html): Model inference +* [OpenVINO](https://docs.openvino.ai/2024/get-started/install-openvino.html): Model inference Prepare a python environment and install dependencies: ```shell conda create -n openvino_lcm_cpp python==3.10 conda activate openvino_lcm_cpp -conda install -c conda-forge openvino c-compiler cxx-compiler make +conda update -c conda-forge --all +conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake +# Ensure that Conda standard libraries are used +conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH ``` ## Step 2: Latent Consistency Model and Tokenizer models @@ -26,20 +32,19 @@ conda install -c conda-forge openvino c-compiler cxx-compiler make ```shell git submodule update --init conda activate openvino_lcm_cpp - python -m pip install -r scripts/requirements.txt + python -m pip install -r requirements.txt python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] ``` -2. Run model conversion script to download and convert PyTorch model to OpenVINO IR via [optimum-intel](https://github.com/huggingface/optimum-intel). Please, use the script `scripts/convert_model.py` to convert the model: - +2. Download the model from Huggingface and convert it to OpenVINO IR via [optimum-intel CLI](https://github.com/huggingface/optimum-intel). Example command for downloading and exporting FP16 model: ```shell - cd scripts - python convert_model.py -lcm "SimianLuo/LCM_Dreamshaper_v7" -t FP16 + export MODEL_PATH="models/lcm_dreamshaper_v7/FP16" + # Using optimum-cli for exporting model to OpenVINO format + optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH + # Converting tokenizer + convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/ ``` -> [!NOTE] ->Only static model is currently supported for this sample. - ### LoRA enabling with safetensors Refer to [python pipeline blog](https://blog.openvino.ai/blog-posts/enable-lora-weights-with-stable-diffusion-controlnet-pipeline). @@ -65,19 +70,20 @@ Usage: lcm_dreamshaper [OPTION...] ``` -* `-p, --posPrompt arg` Initial positive prompt for SD (default: cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting) +* `-p, --posPrompt arg` Initial positive prompt for LCM (default: a beautiful pink unicorn) * `-d, --device arg` AUTO, CPU, or GPU. Doesn't apply to Tokenizer model, OpenVINO Tokenizers can be inferred on a CPU device only (default: CPU) -* `--step arg` Number of diffusion step ( default: 20) +* `--step arg` Number of diffusion step (default: 4) * `-s, --seed arg` Number of random seed to generate latent (default: 42) -* `--num arg` Number of image output(default: 1) +* `--num arg` Number of image output (default: 1) * `--height arg` Height of output image (default: 512) * `--width arg` Width of output image (default: 512) * `-c, --useCache` Use model caching -* `-r, --readNPLatent` Read numpy generated latents from file -* `-m, --modelPath arg` Specify path of SD model IR (default: ../scripts/SimianLuo/LCM_Dreamshaper_v7) -* `-t, --type arg` Specify the type of SD model IR (FP16_static or FP16_dyn) (default: FP16_static) -* `-l, --loraPath arg` Specify path of lora file. (*.safetensors). (default: ) -* `-a, --alpha arg` alpha for lora (default: 0.75) +* `-r, --readNPLatent` Read numpy generated latents from file, only supported for one output image +* `-m, --modelPath arg` Specify path to LCM model IRs (default: ./models/lcm_dreamshaper_v7) +* `-t, --type arg` Specify the type of LCM model IRs (e.g., FP32, FP16 or INT8) (default: FP16) +* `--dynamic` Specify the model input shape to use dynamic shape +* `-l, --loraPath arg` Specify path to LoRA file (*.safetensors) (default: ) +* `-a, --alpha arg` Specify alpha for LoRA (default: 0.75) * `-h, --help` Print usage > [!NOTE] @@ -89,15 +95,15 @@ Positive prompt: a beautiful pink unicorn Read the numpy latent input and noise for scheduler instead of C++ std lib for the alignment with Python pipeline. -* Generate image with random data generated by Python `./build/lcm_dreamshaper -r` +* Generate image with random data generated by Python: `./build/lcm_dreamshaper -r` ![image](./python_random.bmp) -* Generate image with C++ lib generated latent and noise : `./build/lcm_dreamshaper` +* Generate image with C++ lib generated latent and noise: `./build/lcm_dreamshaper` ![image](./cpp_random.bmp) -* Generate image with soulcard lora and C++ generated latent and noise `./stable_diffusion -r -l path/to/soulcard.safetensors` +* Generate image with soulcard lora and C++ generated latent and noise: `./stable_diffusion -r -l path/to/soulcard.safetensors` ![image](./lora_cpp_random.bmp) diff --git a/image_generation/lcm_dreamshaper_v7/cpp/scripts/np_latents_512x512.txt b/image_generation/lcm_dreamshaper_v7/cpp/latents/np_latents_512x512.txt similarity index 100% rename from image_generation/lcm_dreamshaper_v7/cpp/scripts/np_latents_512x512.txt rename to image_generation/lcm_dreamshaper_v7/cpp/latents/np_latents_512x512.txt diff --git a/image_generation/lcm_dreamshaper_v7/cpp/scripts/torch_noise_step_0.txt b/image_generation/lcm_dreamshaper_v7/cpp/latents/torch_noise_step_0.txt similarity index 100% rename from image_generation/lcm_dreamshaper_v7/cpp/scripts/torch_noise_step_0.txt rename to image_generation/lcm_dreamshaper_v7/cpp/latents/torch_noise_step_0.txt diff --git a/image_generation/lcm_dreamshaper_v7/cpp/scripts/torch_noise_step_1.txt b/image_generation/lcm_dreamshaper_v7/cpp/latents/torch_noise_step_1.txt similarity index 100% rename from image_generation/lcm_dreamshaper_v7/cpp/scripts/torch_noise_step_1.txt rename to image_generation/lcm_dreamshaper_v7/cpp/latents/torch_noise_step_1.txt diff --git a/image_generation/lcm_dreamshaper_v7/cpp/scripts/torch_noise_step_2.txt b/image_generation/lcm_dreamshaper_v7/cpp/latents/torch_noise_step_2.txt similarity index 100% rename from image_generation/lcm_dreamshaper_v7/cpp/scripts/torch_noise_step_2.txt rename to image_generation/lcm_dreamshaper_v7/cpp/latents/torch_noise_step_2.txt diff --git a/image_generation/lcm_dreamshaper_v7/cpp/scripts/requirements.txt b/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt similarity index 100% rename from image_generation/lcm_dreamshaper_v7/cpp/scripts/requirements.txt rename to image_generation/lcm_dreamshaper_v7/cpp/requirements.txt diff --git a/image_generation/lcm_dreamshaper_v7/cpp/scripts/convert_model.py b/image_generation/lcm_dreamshaper_v7/cpp/scripts/convert_model.py deleted file mode 100644 index c55ec0ecc..000000000 --- a/image_generation/lcm_dreamshaper_v7/cpp/scripts/convert_model.py +++ /dev/null @@ -1,41 +0,0 @@ -from pathlib import Path -import argparse -from optimum.intel.openvino import OVLatentConsistencyModelPipeline -from transformers import AutoTokenizer -from openvino_tokenizers import convert_tokenizer -from openvino import Type, save_model - - -def parse_args() -> argparse.Namespace: - """Parse and return command line arguments.""" - parser = argparse.ArgumentParser(add_help=False) - args = parser.add_argument_group('Options') - args.add_argument('-h', '--help', action = 'help', - help='Show this help message and exit.') - args.add_argument('-t', '--type', type = str, default = "FP32", required = True, - help='Required. data type, FP32, FP16.') - args.add_argument('-lcm','--lcm_weights', type = str, default="SimianLuo/LCM_Dreamshaper_v7", required = True, - help='Specify the path of lcm model') - return parser.parse_args() - -args = parse_args() -output_path = Path(args.lcm_weights) / (args.type + "_static") - -###convert LCM model to IR - -model = OVLatentConsistencyModelPipeline.from_pretrained(args.lcm_weights, trust_remote_code=True, export=True, compile=False) -if args.type == "FP16": - model.half() - -model.reshape(1, 512, 512, 1) - -model.compile() -model.save_pretrained(output_path) - -# convert tokenizer - -tokenizer_path = output_path / "tokenizer" -hf_tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) -ov_tokenizer_encoder = convert_tokenizer(hf_tokenizer, tokenizer_output_type=Type.i32) - -save_model(ov_tokenizer_encoder, tokenizer_path / "openvino_tokenizer.xml", compress_to_fp16=False) diff --git a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp index 1df11bee2..20e240fa8 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp +++ b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp @@ -21,11 +21,17 @@ #include "lora.hpp" #include "imwrite.hpp" +const size_t TOKENIZER_MODEL_MAX_LENGTH = 77; // 'model_max_length' parameter from 'tokenizer_config.json' +const int64_t UNET_IN_CHANNELS = 4; // 'in_channels' parameter from 'unet/config.json' +const int64_t UNET_TIME_COND_PROJ_DIM = 256; // 'time_cond_proj_dim' parameter from 'unet/config.json' +const int64_t VAE_DECODER_LATENT_CHANNELS = 4; // 'latent_channels' parameter from 'vae_decoder/config.json' +const size_t VAE_SCALE_FACTOR = 8; + ov::Tensor randn_tensor(uint32_t height, uint32_t width, bool use_np_latents, uint32_t seed = 42) { - ov::Tensor noise(ov::element::f32, {1, 4, height / 8, width / 8}); + ov::Tensor noise(ov::element::f32, {1, UNET_IN_CHANNELS, height / VAE_SCALE_FACTOR, width / VAE_SCALE_FACTOR}); if (use_np_latents) { // read np generated latents with defaut seed 42 - const char * latent_file_name = "../scripts/np_latents_512x512.txt"; + const char * latent_file_name = "./latents/np_latents_512x512.txt"; std::ifstream latent_copy_file(latent_file_name, std::ios::ate); OPENVINO_ASSERT(latent_copy_file.is_open(), "Cannot open ", latent_file_name); @@ -60,13 +66,67 @@ void apply_lora(std::shared_ptr model, InsertLoRA::LoRAMap& lora_map) } } -StableDiffusionModels compile_models(const std::string& model_path, const std::string& device, - const std::string& lora_path, const float alpha, const bool use_cache) { +void reshape_text_encoder(std::shared_ptr model, size_t batch_size, size_t tokenizer_model_max_length) { + ov::PartialShape input_shape = model->input(0).get_partial_shape(); + input_shape[0] = batch_size; + input_shape[1] = tokenizer_model_max_length; + std::map idx_to_shape{{0, input_shape}}; + model->reshape(idx_to_shape); +} + +void reshape_unet(std::shared_ptr model, + int64_t batch_size, + int64_t height, + int64_t width, + int64_t tokenizer_model_max_length) { + height = height / VAE_SCALE_FACTOR; + width = width / VAE_SCALE_FACTOR; + + std::map name_to_shape; + + for (auto input : model->inputs()) { + std::string input_name = input.get_any_name(); + name_to_shape[input_name] = input.get_partial_shape(); + if (input_name == "timestep") { + name_to_shape[input_name][0] = 1; + } else if (input_name == "sample") { + name_to_shape[input_name] = {batch_size, UNET_IN_CHANNELS, height, width}; + } else if (input_name == "time_ids") { + name_to_shape[input_name][0] = batch_size; + } else if (input_name == "timestep_cond") { + name_to_shape[input_name] = {batch_size, UNET_TIME_COND_PROJ_DIM}; + } else { + name_to_shape[input_name][0] = batch_size; + name_to_shape[input_name][1] = TOKENIZER_MODEL_MAX_LENGTH; + } + } + + model->reshape(name_to_shape); +} + +void reshape_vae_decoder(std::shared_ptr model, int64_t height, int64_t width) { + height = height / VAE_SCALE_FACTOR; + width = width / VAE_SCALE_FACTOR; + + std::map idx_to_shape{{0, {1, VAE_DECODER_LATENT_CHANNELS, height, width}}}; + model->reshape(idx_to_shape); +} + +StableDiffusionModels compile_models(const std::string& model_path, + const std::string& device, + const std::string& lora_path, + const float alpha, + const bool use_cache, + const bool use_dynamic_shapes, + const size_t batch_size, + const size_t height, + const size_t width) { StableDiffusionModels models; ov::Core core; if (use_cache) core.set_property(ov::cache_dir("./cache_dir")); + core.add_extension(TOKENIZERS_LIBRARY_PATH); // read LoRA weights @@ -78,6 +138,9 @@ StableDiffusionModels compile_models(const std::string& model_path, const std::s // Text encoder { auto text_encoder_model = core.read_model(model_path + "/text_encoder/openvino_model.xml"); + if (!use_dynamic_shapes) { + reshape_text_encoder(text_encoder_model, batch_size, TOKENIZER_MODEL_MAX_LENGTH); + } apply_lora(text_encoder_model, lora_weights["text_encoder"]); models.text_encoder = core.compile_model(text_encoder_model, device); } @@ -85,6 +148,9 @@ StableDiffusionModels compile_models(const std::string& model_path, const std::s // UNet { auto unet_model = core.read_model(model_path + "/unet/openvino_model.xml"); + if (!use_dynamic_shapes) { + reshape_unet(unet_model, batch_size, height, width, TOKENIZER_MODEL_MAX_LENGTH); + } apply_lora(unet_model, lora_weights["unet"]); models.unet = core.compile_model(unet_model, device); } @@ -92,6 +158,9 @@ StableDiffusionModels compile_models(const std::string& model_path, const std::s // VAE decoder { auto vae_decoder_model = core.read_model(model_path + "/vae_decoder/openvino_model.xml"); + if (!use_dynamic_shapes) { + reshape_vae_decoder(vae_decoder_model, height, width); + } ov::preprocess::PrePostProcessor ppp(vae_decoder_model); ppp.output().model().set_layout("NCHW"); ppp.output().tensor().set_layout("NHWC"); @@ -108,15 +177,14 @@ StableDiffusionModels compile_models(const std::string& model_path, const std::s } ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt) { - const size_t MAX_LENGTH = 77; // 'model_max_length' from 'tokenizer_config.json' const size_t HIDDEN_SIZE = static_cast(models.text_encoder.output(0).get_partial_shape()[2].get_length()); const int32_t EOS_TOKEN_ID = 49407, PAD_TOKEN_ID = EOS_TOKEN_ID; - const ov::Shape input_ids_shape({1, MAX_LENGTH}); + const ov::Shape input_ids_shape({1, TOKENIZER_MODEL_MAX_LENGTH}); ov::InferRequest tokenizer_req = models.tokenizer.create_infer_request(); ov::InferRequest text_encoder_req = models.text_encoder.create_infer_request(); - ov::Tensor text_embeddings(ov::element::f32, {1, MAX_LENGTH, HIDDEN_SIZE}); + ov::Tensor text_embeddings(ov::element::f32, {1, TOKENIZER_MODEL_MAX_LENGTH, HIDDEN_SIZE}); ov::Tensor input_ids(ov::element::i32, input_ids_shape); std::fill_n(input_ids.data(), input_ids.get_size(), PAD_TOKEN_ID); @@ -192,20 +260,23 @@ ov::Tensor postprocess_image(ov::Tensor decoded_image) { } int32_t main(int32_t argc, char* argv[]) try { - cxxopts::Options options("stable_diffusion", "Stable Diffusion implementation in C++ using OpenVINO\n"); + cxxopts::Options options("lcm_dreamshaper", "LCM_Dreamshaper_v7 implementation in C++ using OpenVINO\n"); options.add_options() - ("p,posPrompt", "Initial positive prompt for LCM ", cxxopts::value()->default_value("a beautiful pink unicorn")) + ("p,posPrompt", "Initial positive prompt for LCM", cxxopts::value()->default_value("a beautiful pink unicorn")) ("d,device", "AUTO, CPU, or GPU.\nDoesn't apply to Tokenizer model, OpenVINO Tokenizers can be inferred on a CPU device only", cxxopts::value()->default_value("CPU")) ("step", "Number of diffusion steps", cxxopts::value()->default_value("4")) ("s,seed", "Number of random seed to generate latent for one image output", cxxopts::value()->default_value("42")) ("num", "Number of image output", cxxopts::value()->default_value("1")) + ("height","Height of output image",cxxopts::value()->default_value("512")) + ("width", "Width of output image", cxxopts::value()->default_value("512")) ("c,useCache", "Use model caching", cxxopts::value()->default_value("false")) ("r,readNPLatent", "Read numpy generated latents from file, only supported for one output image", cxxopts::value()->default_value("false")) - ("m,modelPath", "Specify path of LCM model IRs", cxxopts::value()->default_value("../scripts/SimianLuo/LCM_Dreamshaper_v7")) - ("t,type", "Specify the type of LCM model IRs (e.g., FP16_static or FP16_dyn)", cxxopts::value()->default_value("FP16_static")) + ("m,modelPath", "Specify path to LCM model IRs", cxxopts::value()->default_value("./models/lcm_dreamshaper_v7")) + ("t,type", "Specify the type of LCM model IRs (e.g., FP32, FP16 or INT8)", cxxopts::value()->default_value("FP16")) + ("dynamic","Specify the model input shape to use dynamic shape",cxxopts::value()->default_value("false")) ("l,loraPath", "Specify path of LoRA file. (*.safetensors).", cxxopts::value()->default_value("")) - ("a,alpha", "alpha for LoRA", cxxopts::value()->default_value("0.75")) + ("a,alpha", "Specify alpha for LoRA", cxxopts::value()->default_value("0.75")) ("h,help", "Print usage"); cxxopts::ParseResult result; @@ -227,13 +298,15 @@ int32_t main(int32_t argc, char* argv[]) try { const uint32_t num_inference_steps = result["step"].as(); const uint32_t user_seed = result["seed"].as(); const uint32_t num_images = result["num"].as(); + const uint32_t height = result["height"].as(); + const uint32_t width = result["width"].as(); const bool use_cache = result["useCache"].as(); const bool read_np_latent = result["readNPLatent"].as(); const std::string model_base_path = result["modelPath"].as(); const std::string model_type = result["type"].as(); + const bool use_dynamic_shapes = result["dynamic"].as(); const std::string lora_path = result["loraPath"].as(); const float alpha = result["alpha"].as(); - const uint32_t height = 512, width = 512; OPENVINO_ASSERT(!read_np_latent || (read_np_latent && (num_images == 1)), "\"readNPLatent\" option is only supported for one output image. Number of image output was set to: " + std::to_string(num_images)); @@ -248,14 +321,23 @@ int32_t main(int32_t argc, char* argv[]) try { std::cout << "OpenVINO version: " << ov::get_openvino_version() << std::endl; std::cout << "Running (may take some time) ..." << std::endl; - // Stable Diffusion pipeline + const std::string model_path = model_base_path + "/" + model_type; + if (!std::filesystem::exists(model_path)) { + std::cerr << "Model IRs for type " << model_type << " don't exist in directory " << model_path << "\n"; + std::cerr << "Refer to README.md to know how to export OpenVINO model with particular data type." << std::endl; + return EXIT_FAILURE; + } - StableDiffusionModels models = compile_models(model_base_path + "/" + model_type, device, lora_path, alpha, use_cache); + // Stable Diffusion pipeline + const size_t batch_size = 1; + StableDiffusionModels models = + compile_models(model_path, device, lora_path, alpha, use_cache, use_dynamic_shapes, batch_size, height, width); ov::InferRequest unet_infer_request = models.unet.create_infer_request(); ov::PartialShape sample_shape = models.unet.input("sample").get_partial_shape(); - OPENVINO_ASSERT(sample_shape.is_dynamic() || (sample_shape[2] * 8 == width && sample_shape[3] * 8 == height), - "UNet model has static shapes [1, 4, H/8, W/8] or dynamic shapes [?, 4, ?, ?]"); + OPENVINO_ASSERT(sample_shape.is_dynamic() || + (sample_shape[2] * VAE_SCALE_FACTOR == height && sample_shape[3] * VAE_SCALE_FACTOR == width), + "UNet model has static shapes [1, 4, H/8, W/8] or dynamic shapes [?, 4, ?, ?]"); // no negative prompt for LCM model: // https://huggingface.co/docs/diffusers/api/pipelines/latent_consistency_models#diffusers.LatentConsistencyModelPipeline @@ -269,9 +351,9 @@ int32_t main(int32_t argc, char* argv[]) try { std::vector timesteps = scheduler->get_timesteps(); float guidance_scale = 8.0; - ov::Tensor guidance_scale_embedding = get_w_embedding(guidance_scale, 256); + ov::Tensor guidance_scale_embedding = get_w_embedding(guidance_scale, UNET_TIME_COND_PROJ_DIM); - ov::Tensor denoised(ov::element::f32, {1, 4, height / 8, width / 8}); + ov::Tensor denoised(ov::element::f32, {1, UNET_IN_CHANNELS, height / VAE_SCALE_FACTOR, width / VAE_SCALE_FACTOR}); for (uint32_t n = 0; n < num_images; n++) { std::uint32_t seed = num_images == 1 ? user_seed: user_seed + n; ov::Tensor latent_model_input = randn_tensor(height, width, read_np_latent, seed); diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers index 60d3ccc42..c55f8e256 160000 --- a/thirdparty/openvino_tokenizers +++ b/thirdparty/openvino_tokenizers @@ -1 +1 @@ -Subproject commit 60d3ccc426984acc623630a8e4d8c8878ec74eb7 +Subproject commit c55f8e2568fe0093f6558b9ef7b49c512a412c14 From 5b9195c51fadf7f2b57fcaf254f1a5b3d1d59062 Mon Sep 17 00:00:00 2001 From: Zlobin Vladimir Date: Thu, 9 May 2024 15:00:10 +0400 Subject: [PATCH 06/10] Remove convert_tokenizer (#425) --- .github/workflows/causal_lm_cpp.yml | 12 +----------- .github/workflows/lcm_dreamshaper_cpp.yml | 18 ++++++------------ .github/workflows/stable_diffusion_1_5_cpp.yml | 18 ++++++------------ .../lcm_dreamshaper_v7/cpp/README.md | 11 +++-------- .../lcm_dreamshaper_v7/cpp/requirements.txt | 2 +- .../lcm_dreamshaper_v7/cpp/src/main.cpp | 2 +- .../stable_diffusion_1_5/cpp/README.md | 11 +++-------- .../stable_diffusion_1_5/cpp/requirements.txt | 2 +- .../stable_diffusion_1_5/cpp/src/main.cpp | 2 +- text_generation/causal_lm/cpp/README.md | 6 ++---- text_generation/causal_lm/cpp/requirements.txt | 1 + thirdparty/openvino_tokenizers | 2 +- 12 files changed, 27 insertions(+), 60 deletions(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index cc69e414b..0a6e9dd5a 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -33,10 +33,9 @@ jobs: optimum-cli export openvino --trust-remote-code --weight-format fp16 --model openlm-research/open_llama_3b_v2 open_llama_3b_v2 cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j - - name: convert_tokenizer and run + - name: greedy_causal_lm run: | source ./ov/setupvars.sh - convert_tokenizer ./open_llama_3b_v2/ --output ./open_llama_3b_v2/ --with-detokenizer ./build/greedy_causal_lm ./open_llama_3b_v2/ "return 0" cpp-beam_search_causal_lm-ubuntu: @@ -64,7 +63,6 @@ jobs: - name: Compare run: | source ./ov/setupvars.sh - convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt python -c " @@ -157,7 +155,6 @@ jobs: shell: cmd run: | call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64\setupvars.bat - convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer .\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt echo import transformers > ref.py @@ -197,7 +194,6 @@ jobs: - name: Compare run: | source ./ov/setupvars.sh - convert_tokenizer Qwen/Qwen-7B-Chat --output ./Qwen-7B-Chat/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/ 69 > ./pred.txt cpp-beam_search_causal_lm-Qwen1_5-7B-Chat: @@ -225,7 +221,6 @@ jobs: - name: Run run: | source ./ov/setupvars.sh - convert_tokenizer ./Qwen1.5-7B-Chat/ --output ./Qwen1.5-7B-Chat/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/ "你好!" > ./pred_qwen15.txt cpp-beam_search_causal_lm-Phi-2: @@ -253,7 +248,6 @@ jobs: - name: Compare run: | source ./ov/setupvars.sh - convert_tokenizer ./phi-2/ --output ./phi-2/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./phi-2/ 69 > ./pred.txt cpp-beam_search_causal_lm-notus-7b-v1: @@ -281,7 +275,6 @@ jobs: - name: Compare run: | source ./ov/setupvars.sh - convert_tokenizer ./notus-7b-v1/ --output ./notus-7b-v1/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./notus-7b-v1/ 69 > ./pred.txt cpp-speculative_decoding_lm-ubuntu: @@ -305,8 +298,6 @@ jobs: python -m pip install ./thirdparty/openvino_tokenizers/[transformers] optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b - convert_tokenizer ./dolly-v2-3b/ --output ./dolly-v2-3b/ --with-detokenizer - convert_tokenizer ./dolly-v2-7b/ --output ./dolly-v2-7b/ --with-detokenizer cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j - name: run and compare @@ -348,7 +339,6 @@ jobs: - name: Run Generation run: | source ./ov/setupvars.sh - convert_tokenizer ./phi-1_5/ --output ./phi-1_5/ --with-detokenizer --trust-remote-code timeout 50s ./build/greedy_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_greedy.txt timeout 50s ./build/beam_search_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_beam.txt - name: Compare diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml index 131927d76..427fada3a 100644 --- a/.github/workflows/lcm_dreamshaper_cpp.yml +++ b/.github/workflows/lcm_dreamshaper_cpp.yml @@ -39,7 +39,7 @@ jobs: run: | conda activate openvino_lcm_cpp conda update -c conda-forge --all - conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake + conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH - name: Install python dependencies @@ -53,9 +53,7 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_lcm_cpp - export MODEL_PATH="models/lcm_dreamshaper_v7/FP16" - optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH - convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/ + optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16 - name: Build app working-directory: ${{ env.working_directory }} @@ -66,8 +64,7 @@ jobs: - name: Run app working-directory: ${{ env.working_directory }} - run: | - ./build/lcm_dreamshaper + run: ./build/lcm_dreamshaper lcm_dreamshaper_v7_cpp-windows: runs-on: windows-latest @@ -87,7 +84,7 @@ jobs: run: | conda activate openvino_lcm_cpp conda update -c conda-forge --all - conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake + conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH - name: Install python dependencies @@ -101,9 +98,7 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_lcm_cpp - $env:MODEL_PATH='models/lcm_dreamshaper_v7/FP16' - optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $env:MODEL_PATH - convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/ + optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16 - name: Build app working-directory: ${{ env.working_directory }} @@ -114,5 +109,4 @@ jobs: - name: Run app working-directory: ${{ env.working_directory }} - run: | - & "./build/Release/lcm_dreamshaper.exe" -r --dynamic + run: '& "./build/Release/lcm_dreamshaper.exe" -r --dynamic' diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml index ad929ea06..0d0d3ea8b 100644 --- a/.github/workflows/stable_diffusion_1_5_cpp.yml +++ b/.github/workflows/stable_diffusion_1_5_cpp.yml @@ -38,7 +38,7 @@ jobs: - name: Install OpenVINO and other conda dependencies run: | conda activate openvino_sd_cpp - conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake + conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH - name: Install python dependencies @@ -52,9 +52,7 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_sd_cpp - export MODEL_PATH="models/stable_diffusion_v1_5_ov/FP16" - optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH - convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/ + optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16 - name: Build app working-directory: ${{ env.working_directory }} @@ -65,8 +63,7 @@ jobs: - name: Run app working-directory: ${{ env.working_directory }} - run: | - ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16 + run: ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16 stable_diffusion_1_5_cpp-windows: runs-on: windows-latest @@ -85,7 +82,7 @@ jobs: - name: Install OpenVINO and other conda dependencies run: | conda activate openvino_sd_cpp - conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake + conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake - name: Install python dependencies working-directory: ${{ env.working_directory }} @@ -98,9 +95,7 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_sd_cpp - $env:MODEL_PATH='models/stable_diffusion_v1_5_ov/FP16' - optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $env:MODEL_PATH - convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/ + optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16 - name: Build app working-directory: ${{ env.working_directory }} @@ -111,5 +106,4 @@ jobs: - name: Run app working-directory: ${{ env.working_directory }} - run: | - & "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic + run: '& "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic' diff --git a/image_generation/lcm_dreamshaper_v7/cpp/README.md b/image_generation/lcm_dreamshaper_v7/cpp/README.md index d4a62fb27..c993f80c5 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/README.md +++ b/image_generation/lcm_dreamshaper_v7/cpp/README.md @@ -18,7 +18,7 @@ Prepare a python environment and install dependencies: conda create -n openvino_lcm_cpp python==3.10 conda activate openvino_lcm_cpp conda update -c conda-forge --all -conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake +conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake # Ensure that Conda standard libraries are used conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH ``` @@ -37,13 +37,8 @@ conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH ``` 2. Download the model from Huggingface and convert it to OpenVINO IR via [optimum-intel CLI](https://github.com/huggingface/optimum-intel). Example command for downloading and exporting FP16 model: - ```shell - export MODEL_PATH="models/lcm_dreamshaper_v7/FP16" - # Using optimum-cli for exporting model to OpenVINO format - optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH - # Converting tokenizer - convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/ - ``` + + `optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16` ### LoRA enabling with safetensors diff --git a/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt b/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt index dbf28af22..7ffbb9213 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt +++ b/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt @@ -1,4 +1,4 @@ --extra-index-url https://download.pytorch.org/whl/cpu torch==2.2.2+cpu diffusers==0.27.2 -optimum-intel[nncf,openvino]==1.16.0 +optimum-intel[nncf,openvino] @ git+https://github.com/apaniukov/optimum-intel.git@0029e9165a2dad4cfcf787aa63181d9dc0cd49d5 diff --git a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp index 20e240fa8..546bd170b 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp +++ b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp @@ -192,7 +192,7 @@ ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt) { tokenizer_req.set_input_tensor(ov::Tensor{ov::element::string, {1}, &pos_prompt}); tokenizer_req.infer(); ov::Tensor input_ids_token = tokenizer_req.get_tensor("input_ids"); - std::copy_n(input_ids_token.data(), input_ids_token.get_size(), input_ids.data()); + std::copy_n(input_ids_token.data(), input_ids_token.get_size(), input_ids.data()); // text embeddings text_encoder_req.set_tensor("input_ids", input_ids); diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md index 2dfa32628..daf638443 100644 --- a/image_generation/stable_diffusion_1_5/cpp/README.md +++ b/image_generation/stable_diffusion_1_5/cpp/README.md @@ -18,7 +18,7 @@ Prepare a python environment and install dependencies: ```shell conda create -n openvino_sd_cpp python==3.10 conda activate openvino_sd_cpp -conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake +conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake # Ensure that Conda standard libraries are used conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH ``` @@ -40,13 +40,8 @@ python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] - [dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) to run Stable Diffusion with LoRA adapters. Example command for downloading and exporting FP16 model: - ```shell - export MODEL_PATH="models/dreamlike_anime_1_0_ov/FP16" - # Using optimum-cli for exporting model to OpenVINO format - optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH - # Converting tokenizer manually (`--convert-tokenizer` flag of `optimum-cli` results in "OpenVINO Tokenizer export for CLIPTokenizer is not supported.") - convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/ - ``` + + `optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 models/dreamlike_anime_1_0_ov/FP16` You can also choose other precision and export FP32 or INT8 model. diff --git a/image_generation/stable_diffusion_1_5/cpp/requirements.txt b/image_generation/stable_diffusion_1_5/cpp/requirements.txt index 289149d13..5e6bfe037 100644 --- a/image_generation/stable_diffusion_1_5/cpp/requirements.txt +++ b/image_generation/stable_diffusion_1_5/cpp/requirements.txt @@ -2,5 +2,5 @@ torch==2.2.2+cpu diffusers==0.27.2 transformers==4.39.3 -optimum-intel[nncf,openvino]==1.16.0 +optimum-intel[nncf,openvino] @ git+https://github.com/apaniukov/optimum-intel.git@0029e9165a2dad4cfcf787aa63181d9dc0cd49d5 huggingface_hub[cli]==0.22.2 diff --git a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp index d1c24c32a..3d6c8a799 100644 --- a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp +++ b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp @@ -216,7 +216,7 @@ ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt, s tokenizer_req.set_input_tensor(ov::Tensor{ov::element::string, {1}, &prompt}); tokenizer_req.infer(); ov::Tensor input_ids_token = tokenizer_req.get_tensor("input_ids"); - std::copy_n(input_ids_token.data(), input_ids_token.get_size(), input_ids.data()); + std::copy_n(input_ids_token.data(), input_ids_token.get_size(), input_ids.data()); // text embeddings text_encoder_req.set_tensor("input_ids", input_ids); diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md index a0b4a0a1b..d65c79bad 100644 --- a/text_generation/causal_lm/cpp/README.md +++ b/text_generation/causal_lm/cpp/README.md @@ -1,6 +1,6 @@ # Text generation C++ samples that support most popular models like LLaMA 2 -These examples showcase inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The applications don't have many configuration options to encourage the reader to explore and modify the source code. Loading `openvino_tokenizers` to `ov::Core` enables tokenization. Run `convert_tokenizer` to generate IRs for the samples. [group_beam_searcher.hpp](group_beam_searcher.hpp) implements the algorithm of the same name, which is used by `beam_search_causal_lm`. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python. +These examples showcase inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The applications don't have many configuration options to encourage the reader to explore and modify the source code. Loading `openvino_tokenizers` to `ov::Core` enables tokenization. Run `optimum-cli` to generate IRs for the samples. [group_beam_searcher.hpp](group_beam_searcher.hpp) implements the algorithm of the same name, which is used by `beam_search_causal_lm`. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python. ## How it works @@ -49,7 +49,7 @@ This approach reduces the need for multiple infer requests to the main model, en ## Install OpenVINO -Install [OpenVINO Archives >= 2024.0](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `` below refers to the extraction location. +Install [OpenVINO Archives >= 2024.1](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `` below refers to the extraction location. ## Build `greedy_causal_lm`, `beam_search_causal_lm` and `openvino_tokenizers` @@ -81,7 +81,6 @@ python3 -m pip install --upgrade-strategy eager -r requirements.txt # Update openvino_tokenizers from the submodule python3 -m pip install ./../../../thirdparty/openvino_tokenizers/[transformers] optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 -convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer --trust-remote-code ``` #### Windows @@ -92,7 +91,6 @@ python -m pip install --upgrade-strategy eager -r requirements.txt REM Update openvino_tokenizers from the submodule python -m pip install .\..\..\..\thirdparty\openvino_tokenizers\[transformers] optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 -convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer --trust-remote-code ``` ## Run diff --git a/text_generation/causal_lm/cpp/requirements.txt b/text_generation/causal_lm/cpp/requirements.txt index 89c6a4b4d..019e172dd 100644 --- a/text_generation/causal_lm/cpp/requirements.txt +++ b/text_generation/causal_lm/cpp/requirements.txt @@ -1,4 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/cpu optimum[openvino]==1.19.1 +optimum-intel[openvino] @ git+https://github.com/apaniukov/optimum-intel.git@0029e9165a2dad4cfcf787aa63181d9dc0cd49d5 einops==0.7.0 # For Qwen transformers_stream_generator==0.0.4 # For Qwen diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers index c55f8e256..37d20ce20 160000 --- a/thirdparty/openvino_tokenizers +++ b/thirdparty/openvino_tokenizers @@ -1 +1 @@ -Subproject commit c55f8e2568fe0093f6558b9ef7b49c512a412c14 +Subproject commit 37d20ce209b120f6ffd450484e207ef71f8c8d03 From 6ac17bf5978a60bb6302195c200840c48886f4ef Mon Sep 17 00:00:00 2001 From: Wovchena Date: Thu, 9 May 2024 15:26:28 +0400 Subject: [PATCH 07/10] upgrade openvino --- .github/workflows/causal_lm_cpp.yml | 2 +- thirdparty/openvino_tokenizers | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index 49f91241d..dd22b88d0 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -453,7 +453,7 @@ jobs: - name: Install OpenVINO run: | mkdir ./ov/ - curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.0/linux/l_openvino_toolkit_ubuntu20_2024.0.0.14509.34caeefd078_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz sudo ./ov/install_dependencies/install_openvino_dependencies.sh - name: Download, convert and build run: | diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers index 37d20ce20..0e4bb32ca 160000 --- a/thirdparty/openvino_tokenizers +++ b/thirdparty/openvino_tokenizers @@ -1 +1 @@ -Subproject commit 37d20ce209b120f6ffd450484e207ef71f8c8d03 +Subproject commit 0e4bb32ca3412f589e1d094faa8b0aad19ee47ca From 78d0914778ff665800b9e3c13e4bce8ee1a37013 Mon Sep 17 00:00:00 2001 From: Wovchena Date: Thu, 9 May 2024 15:36:25 +0400 Subject: [PATCH 08/10] update newer pipelines --- .github/workflows/causal_lm_cpp.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index dd22b88d0..22a6e6905 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -144,7 +144,7 @@ jobs: " echo "你好! 你好嗎?" passed - timeout 1m ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "Alan Turing was a" "return 0" "你好! 你好嗎?" > ./pred.txt + timeout 1m ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Alan Turing was a" "return 0" "你好! 你好嗎?" > ./pred.txt python -c " import transformers with open('pred.txt', 'r') as file: @@ -369,12 +369,11 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt "transformers<4.38" ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu - python ./llm_bench/python/convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir ./TinyLlama-1.1B-Chat-v1.0/ --precision FP16 - convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ --output ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ --with-detokenizer + python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j - wait - name: run and compare run: | source ./ov/setupvars.sh @@ -386,8 +385,8 @@ jobs: Question: Can you please add 2 and 3 A:' > ./prompt.txt - ./build/prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "$( predictions_prompt_lookup.txt - ./build/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "$( predictions_greedy.txt + ./build/prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "$( predictions_prompt_lookup.txt + ./build/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "$( predictions_greedy.txt python -c " with open('predictions_greedy.txt', 'r') as f: predicted_greedy = f.readline() @@ -458,15 +457,16 @@ jobs: - name: Download, convert and build run: | source ./ov/setupvars.sh - python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id ikala/redpajama-3b-chat --output_dir ./redpajama-3b-chat/ --precision FP16 & + python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt + python -m pip install ./thirdparty/openvino_tokenizers/[transformers] + optimum-cli export openvino --trust-remote-code --weight-format fp16 --model ikala/redpajama-3b-chat redpajama-3b-chat cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j - wait - run: source ./ov/setupvars.sh && convert_tokenizer ./redpajama-3b-chat/pytorch/dldt/FP16/ --output ./redpajama-3b-chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code - name: Run Generation run: | source ./ov/setupvars.sh - timeout 50s ./build/greedy_causal_lm ./redpajama-3b-chat/pytorch/dldt/FP16/ "Alan Turing was a" > ./pred_greedy.txt + timeout 50s ./build/greedy_causal_lm ./redpajama-3b-chat/ "Alan Turing was a" > ./pred_greedy.txt - name: Compare run: | python -c " From f2211a0feb83098c2e1650b089f4ab5804d8c86a Mon Sep 17 00:00:00 2001 From: Wovchena Date: Thu, 9 May 2024 15:49:40 +0400 Subject: [PATCH 09/10] fix causal_lm_cpp workflows --- .github/workflows/causal_lm_cpp.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index 22a6e6905..52f865634 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -364,7 +364,7 @@ jobs: - name: Install OpenVINO run: | mkdir ./ov/ - curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz sudo ./ov/install_dependencies/install_openvino_dependencies.sh - name: Download, convert and build run: | @@ -462,7 +462,7 @@ jobs: optimum-cli export openvino --trust-remote-code --weight-format fp16 --model ikala/redpajama-3b-chat redpajama-3b-chat cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j - - run: source ./ov/setupvars.sh && convert_tokenizer ./redpajama-3b-chat/pytorch/dldt/FP16/ --output ./redpajama-3b-chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code + - run: source ./ov/setupvars.sh && convert_tokenizer ./redpajama-3b-chat/ --output ./redpajama-3b-chat/ --with-detokenizer --trust-remote-code - name: Run Generation run: | source ./ov/setupvars.sh From c9f56539d227cd1d82e3c08659ff0fd4299e7ec1 Mon Sep 17 00:00:00 2001 From: Wovchena Date: Thu, 9 May 2024 15:53:10 +0400 Subject: [PATCH 10/10] set openvino_tokenizers to releases/2024/1 --- thirdparty/openvino_tokenizers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers index 0e4bb32ca..37d20ce20 160000 --- a/thirdparty/openvino_tokenizers +++ b/thirdparty/openvino_tokenizers @@ -1 +1 @@ -Subproject commit 0e4bb32ca3412f589e1d094faa8b0aad19ee47ca +Subproject commit 37d20ce209b120f6ffd450484e207ef71f8c8d03