From 5aaa62fd01c3ae4d73580c748eae81925ced99d1 Mon Sep 17 00:00:00 2001
From: Zlobin Vladimir <vladimir.zlobin@intel.com>
Date: Mon, 6 May 2024 13:13:08 +0400
Subject: [PATCH 01/10] Migrate to optimum-cli from llm_bench usage (#417)

Ticket 128657

I can't remove `convert_tokenizer` call because `optimum-cli` reports:
> OpenVINO Tokenizer version is not compatible with OpenVINO version.
Installed OpenVINO version: 2024.1.0,OpenVINO Tokenizers requires
2024.0.0. OpenVINO Tokenizers models will not be added during export.
---
 .github/dependabot.yml                        |  4 +
 .github/workflows/causal_lm_cpp.yml           | 97 ++++++++++---------
 text_generation/causal_lm/cpp/README.md       | 28 +++---
 .../causal_lm/cpp/requirements.txt            |  4 +
 4 files changed, 76 insertions(+), 57 deletions(-)
 create mode 100644 text_generation/causal_lm/cpp/requirements.txt

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index a9b468dff..9ab4587c2 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -8,3 +8,7 @@ updates:
     directory: "image_generation/lcm_dreamshaper_v7/cpp/scripts/"
     schedule:
       interval: "weekly"
+  - package-ecosystem: "pip"
+    directory: "text_generation/causal_lm/cpp/"
+    schedule:
+      interval: "weekly"
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 9017cd544..b4a38838c 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -3,7 +3,6 @@ on:
   pull_request:
     paths:
       - .github/workflows/causal_lm_cpp.yml
-      - llm_bench/python/**
       - text_generation/causal_lm/cpp/*
       - thirdparty/openvino_tokenizers
       - "!**.md"
@@ -29,15 +28,16 @@ jobs:
       - name: Download, convert and build
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt "transformers<4.38" ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id openlm-research/open_llama_3b_v2 --output_dir ./open_llama_3b_v2/ --precision FP16  &
+          python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model openlm-research/open_llama_3b_v2 open_llama_3b_v2
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
-          wait
       - name: convert_tokenizer and run
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./open_llama_3b_v2/pytorch/dldt/FP16/ --output ./open_llama_3b_v2/pytorch/dldt/FP16/ --with-detokenizer
-          ./build/greedy_causal_lm ./open_llama_3b_v2/pytorch/dldt/FP16/ "return 0"
+          convert_tokenizer ./open_llama_3b_v2/ --output ./open_llama_3b_v2/ --with-detokenizer
+          ./build/greedy_causal_lm ./open_llama_3b_v2/ "return 0"
 
   cpp-beam_search_causal_lm-ubuntu:
     runs-on: ubuntu-20.04
@@ -56,16 +56,17 @@ jobs:
       - name: Download, convert and build
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt "transformers<4.38" ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir ./TinyLlama-1.1B-Chat-v1.0/ --precision FP16 &
+          python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
-          wait
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ --output ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ --with-detokenizer
+          convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer
 
-          timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ 69 > ./pred.txt
+          timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt
           python -c "
           import transformers
           with open('pred.txt', 'r') as file:
@@ -81,7 +82,7 @@ jobs:
           "
           echo "69" passed
 
-          timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ Hi > ./pred.txt
+          timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ Hi > ./pred.txt
           python -c "
           import transformers
           with open('pred.txt', 'r') as file:
@@ -97,7 +98,7 @@ jobs:
           "
           echo "Hi" passed
 
-          timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "return 0" > ./pred.txt
+          timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "return 0" > ./pred.txt
           python -c "
           import transformers
           with open('pred.txt', 'r') as file:
@@ -113,7 +114,7 @@ jobs:
           "
           echo "return 0" passed
 
-          ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "你好！ 你好嗎？" > ./pred.txt
+          ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "你好！ 你好嗎？" > ./pred.txt
           python -c "
           import transformers
           with open('pred.txt', 'r') as file:
@@ -147,17 +148,18 @@ jobs:
         shell: cmd
         run: |
           call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64\setupvars.bat
-          python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt "transformers<4.38" ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu
-          python ./llm_bench/python/convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir ./TinyLlama-1.1B-Chat-v1.0/ --precision FP16
+          python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
       - name: Compare
         shell: cmd
         run: |
           call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64\setupvars.bat
-          convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ --output .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ --with-detokenizer
+          convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer
 
-          .\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ "69" > .\pred.txt
+          .\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt
           echo import transformers > ref.py
           echo predictions = open('pred.txt', 'r').read() >> ref.py
           echo tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') >> ref.py
@@ -187,15 +189,16 @@ jobs:
       - name: Download, convert and build
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id Qwen/Qwen-7B-Chat --output_dir ./Qwen-7B-Chat/ --precision FP16 &
+          python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen-7B-Chat Qwen-7B-Chat
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
-          wait
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./Qwen-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
-          timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/pytorch/dldt/FP16/ 69 > ./pred.txt
+          convert_tokenizer Qwen/Qwen-7B-Chat --output ./Qwen-7B-Chat/ --with-detokenizer --trust-remote-code
+          timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/ 69 > ./pred.txt
 
   cpp-beam_search_causal_lm-Qwen1_5-7B-Chat:
     runs-on: ubuntu-20.04-16-cores
@@ -214,15 +217,16 @@ jobs:
       - name: Download, convert and build
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id Qwen/Qwen1.5-7B-Chat --output_dir ./Qwen1.5-7B-Chat/ --precision FP16 &
+          python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen1.5-7B-Chat Qwen1.5-7B-Chat
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
-          wait
       - name: Run
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
-          timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ "你好！" > ./pred_qwen15.txt
+          convert_tokenizer ./Qwen1.5-7B-Chat/ --output ./Qwen1.5-7B-Chat/ --with-detokenizer --trust-remote-code
+          timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/ "你好！" > ./pred_qwen15.txt
 
   cpp-beam_search_causal_lm-Phi-2:
     runs-on: ubuntu-20.04-16-cores
@@ -241,15 +245,16 @@ jobs:
       - name: Download, convert and build
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id microsoft/phi-2 --output_dir ./Phi-2/ --precision FP16 &
+          python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-2 phi-2
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j 15
-          wait
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./Phi-2/pytorch/dldt/FP16/ --output ./Phi-2/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
-          timeout 50s ./build/beam_search_causal_lm ./Phi-2/pytorch/dldt/FP16/ 69 > ./pred.txt
+          convert_tokenizer ./phi-2/ --output ./phi-2/ --with-detokenizer --trust-remote-code
+          timeout 50s ./build/beam_search_causal_lm ./phi-2/ 69 > ./pred.txt
 
   cpp-beam_search_causal_lm-notus-7b-v1:
     runs-on: ubuntu-20.04-16-cores
@@ -268,15 +273,16 @@ jobs:
       - name: Download, convert and build
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id argilla/notus-7b-v1 --output_dir ./notus-7b-v1/ --precision FP16 &
+          python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model argilla/notus-7b-v1 notus-7b-v1
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
-          wait
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./notus-7b-v1/pytorch/dldt/FP16/ --output ./notus-7b-v1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
-          timeout 50s ./build/beam_search_causal_lm ./notus-7b-v1/pytorch/dldt/FP16/ 69 > ./pred.txt
+          convert_tokenizer ./notus-7b-v1/ --output ./notus-7b-v1/ --with-detokenizer --trust-remote-code
+          timeout 50s ./build/beam_search_causal_lm ./notus-7b-v1/ 69 > ./pred.txt
 
   cpp-speculative_decoding_lm-ubuntu:
     runs-on: ubuntu-20.04-16-cores
@@ -295,19 +301,19 @@ jobs:
       - name: Download, convert and build
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt "transformers<4.38" ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu
-          python ./llm_bench/python/convert.py --model_id databricks/dolly-v2-3b --output_dir ./dolly-v2-3b/ --precision FP16
-          python ./llm_bench/python/convert.py --model_id databricks/dolly-v2-7b --output_dir ./dolly-v2-7b/ --precision FP16
-          convert_tokenizer ./dolly-v2-3b/pytorch/dldt/FP16/ --output ./dolly-v2-3b/pytorch/dldt/FP16/ --with-detokenizer
-          convert_tokenizer ./dolly-v2-7b/pytorch/dldt/FP16/ --output ./dolly-v2-7b/pytorch/dldt/FP16/ --with-detokenizer
+          python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b
+          convert_tokenizer ./dolly-v2-3b/ --output ./dolly-v2-3b/ --with-detokenizer
+          convert_tokenizer ./dolly-v2-7b/ --output ./dolly-v2-7b/ --with-detokenizer
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
-          wait
       - name: run and compare
         run: |
           source ./ov/setupvars.sh
-          ./build/speculative_decoding_lm ./dolly-v2-3b/pytorch/dldt/FP16/ ./dolly-v2-7b/pytorch/dldt/FP16/ "Alan Turing was a" > predictions_speculative.txt
-          ./build/greedy_causal_lm ./dolly-v2-7b/pytorch/dldt/FP16/ "Alan Turing was a" > predictions_greedy.txt
+          ./build/speculative_decoding_lm ./dolly-v2-3b/ ./dolly-v2-7b/ "Alan Turing was a" > predictions_speculative.txt
+          ./build/greedy_causal_lm ./dolly-v2-7b/ "Alan Turing was a" > predictions_greedy.txt
           python -c "
           with open('predictions_greedy.txt', 'r') as f:
               predicted_greedy = f.readline()
@@ -334,16 +340,17 @@ jobs:
       - name: Download, convert and build
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id microsoft/phi-1_5 --output_dir ./Phi-1_5/ --precision FP16 &
+          python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-1_5 phi-1_5
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j 15
-          wait
       - name: Run Generation
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./Phi-1_5/pytorch/dldt/FP16/ --output ./Phi-1_5/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
-          timeout 50s ./build/greedy_causal_lm ./Phi-1_5/pytorch/dldt/FP16/ "Alan Turing was a" > ./pred_greedy.txt
-          timeout 50s ./build/beam_search_causal_lm ./Phi-1_5/pytorch/dldt/FP16/ "Alan Turing was a" > ./pred_beam.txt
+          convert_tokenizer ./phi-1_5/ --output ./phi-1_5/ --with-detokenizer --trust-remote-code
+          timeout 50s ./build/greedy_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_greedy.txt
+          timeout 50s ./build/beam_search_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_beam.txt
       - name: Compare
         run: |
           python -c "
diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md
index 5ba4f8110..a0b4a0a1b 100644
--- a/text_generation/causal_lm/cpp/README.md
+++ b/text_generation/causal_lm/cpp/README.md
@@ -77,18 +77,22 @@ The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upg
 
 ```sh
 source <INSTALL_DIR>/setupvars.sh
-python3 -m pip install --upgrade-strategy eager "transformers<4.38" -r ../../../llm_bench/python/requirements.txt ../../../thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu
-python3 ../../../llm_bench/python/convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir ./TinyLlama-1.1B-Chat-v1.0/ --precision FP16
-convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ --output ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
+python3 -m pip install --upgrade-strategy eager -r requirements.txt
+# Update openvino_tokenizers from the submodule
+python3 -m pip install ./../../../thirdparty/openvino_tokenizers/[transformers]
+optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer --trust-remote-code
 ```
 
 #### Windows
 
 ```bat
 <INSTALL_DIR>\setupvars.bat
-python -m pip install --upgrade-strategy eager "transformers<4.38" -r ..\..\..\llm_bench\python\requirements.txt ..\..\..\thirdparty\openvino_tokenizers\[transformers] --extra-index-url https://download.pytorch.org/whl/cpu
-python ..\..\..\llm_bench\python\convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir .\TinyLlama-1.1B-Chat-v1.0\ --precision FP16
-convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ --output .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ --with-detokenizer --trust-remote-code
+python -m pip install --upgrade-strategy eager -r requirements.txt
+REM Update openvino_tokenizers from the submodule
+python -m pip install .\..\..\..\thirdparty\openvino_tokenizers\[transformers]
+optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer --trust-remote-code
 ```
 
 ## Run
@@ -100,14 +104,14 @@ convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\pytorch\dldt\FP16\ --output .\TinyL
 
 ### Examples:
 #### Windows:
-1. `/build/Release/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "Why is the Sun yellow?"`
-2. `/build/Release/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "Why is the Sun yellow?"`
-3. `/build/Release/speculative_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ ./Llama-2-7b-chat-hf/pytorch/dldt/FP16/ "Why is the Sun yellow?"`
+1. `/build/Release/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?"`
+2. `/build/Release/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?"`
+3. `/build/Release/speculative_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ ./Llama-2-7b-chat-hf/ "Why is the Sun yellow?"`
 
 #### Linux/MacOS:
-1. `./build/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "Why is the Sun yellow?"`
-2. `./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "Why is the Sun yellow?"`
-3. `./build/speculative_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ ./Llama-2-7b-chat-hf/pytorch/dldt/FP16/ "Why is the Sun yellow?"`
+1. `./build/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?"`
+2. `./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?"`
+3. `./build/speculative_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ ./Llama-2-7b-chat-hf/ "Why is the Sun yellow?"`
 
 To enable Unicode characters for Windows cmd open `Region` settings from `Control panel`. `Administrative`->`Change system locale`->`Beta: Use Unicode UTF-8 for worldwide language support`->`OK`. Reboot.
 
diff --git a/text_generation/causal_lm/cpp/requirements.txt b/text_generation/causal_lm/cpp/requirements.txt
new file mode 100644
index 000000000..89c6a4b4d
--- /dev/null
+++ b/text_generation/causal_lm/cpp/requirements.txt
@@ -0,0 +1,4 @@
+--extra-index-url https://download.pytorch.org/whl/cpu
+optimum[openvino]==1.19.1
+einops==0.7.0  # For Qwen
+transformers_stream_generator==0.0.4  # For Qwen

From b0169c9fa02f7ee99b30233f97bdd85099e43f7e Mon Sep 17 00:00:00 2001
From: guozhong wang <guozhong.wang@intel.com>
Date: Tue, 7 May 2024 15:01:44 +0800
Subject: [PATCH 02/10] Update openvino and nncf versions (#397)

Co-authored-by: Chen Peter <peter.chen@intel.com>
Co-authored-by: Zlobin Vladimir <vladimir.zlobin@intel.com>
---
 llm_bench/python/requirements.txt        | 4 ++--
 llm_bench/python/requirements_2024.1.txt | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/llm_bench/python/requirements.txt b/llm_bench/python/requirements.txt
index bb0eceba5..1f25fe8ce 100644
--- a/llm_bench/python/requirements.txt
+++ b/llm_bench/python/requirements.txt
@@ -1,6 +1,7 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 numpy
-openvino>=2024.0.0
+openvino~=2024.1.0
+nncf~=2.10.0
 auto-gptq>=0.5.1 # for gptq
 pillow
 torch
@@ -8,7 +9,6 @@ transformers>=4.33.0
 diffusers>=0.22.0
 #optimum is in dependency list of optimum-intel 
 git+https://github.com/huggingface/optimum-intel.git@ff792c278502a85444dd116413dbca71aa660599#egg=optimum-intel
-git+https://github.com/openvinotoolkit/nncf.git@ec497ce0781fe867d73d5c5bdf8310fdb40604a4#egg=nncf
 packaging
 psutil
 timm
diff --git a/llm_bench/python/requirements_2024.1.txt b/llm_bench/python/requirements_2024.1.txt
index a0d438887..0ee019d1e 100644
--- a/llm_bench/python/requirements_2024.1.txt
+++ b/llm_bench/python/requirements_2024.1.txt
@@ -54,6 +54,7 @@ networkx==3.3
 ninja==1.11.1.1
 numpy==1.26.4
 onnx==1.16.0
+openvino==2024.1.0
 openvino-telemetry==2024.1.0
 optimum-intel @ git+https://github.com/huggingface/optimum-intel.git@ff792c278502a85444dd116413dbca71aa660599
 packaging==24.0

From af92812e7231e3ee675fc9aa2618087c7f5b77fe Mon Sep 17 00:00:00 2001
From: Zlobin Vladimir <vladimir.zlobin@intel.com>
Date: Wed, 8 May 2024 13:40:02 +0400
Subject: [PATCH 03/10] Upgrade openvino and tokenizers (#395)

Co-authored-by: yatarkan <yaroslav.tarkan@intel.com>
---
 .github/workflows/causal_lm_cpp.yml           | 20 +++++++++----------
 .github/workflows/lcm_dreamshaper_cpp.yml     | 10 +++++-----
 .../workflows/stable_diffusion_1_5_cpp.yml    |  6 ++++--
 .../stable_diffusion_1_5/cpp/README.md        |  2 +-
 thirdparty/openvino_tokenizers                |  2 +-
 5 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index b4a38838c..cc69e414b 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -23,7 +23,7 @@ jobs:
       - name: Install OpenVINO
         run: |
           mkdir ./ov/
-          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
           sudo ./ov/install_dependencies/install_openvino_dependencies.sh
       - name: Download, convert and build
         run: |
@@ -51,7 +51,7 @@ jobs:
       - name: Install OpenVINO
         run: |
           mkdir ./ov/
-          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
           sudo ./ov/install_dependencies/install_openvino_dependencies.sh
       - name: Download, convert and build
         run: |
@@ -142,12 +142,12 @@ jobs:
       - name: Install OpenVINO
         shell: bash
         run: |
-          curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64.zip
+          curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/windows/w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64.zip
           unzip ov.zip
       - name: Download, convert and build
         shell: cmd
         run: |
-          call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64\setupvars.bat
+          call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64\setupvars.bat
           python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
@@ -156,7 +156,7 @@ jobs:
       - name: Compare
         shell: cmd
         run: |
-          call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64\setupvars.bat
+          call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64\setupvars.bat
           convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer
 
           .\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt
@@ -184,7 +184,7 @@ jobs:
       - name: Install OpenVINO
         run: |
           mkdir ./ov/
-          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
           sudo ./ov/install_dependencies/install_openvino_dependencies.sh
       - name: Download, convert and build
         run: |
@@ -212,7 +212,7 @@ jobs:
       - name: Install OpenVINO
         run: |
           mkdir ./ov/
-          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
           sudo ./ov/install_dependencies/install_openvino_dependencies.sh
       - name: Download, convert and build
         run: |
@@ -240,7 +240,7 @@ jobs:
       - name: Install OpenVINO
         run: |
           mkdir ./ov/
-          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
           sudo ./ov/install_dependencies/install_openvino_dependencies.sh
       - name: Download, convert and build
         run: |
@@ -268,7 +268,7 @@ jobs:
       - name: Install OpenVINO
         run: |
           mkdir ./ov/
-          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
           sudo ./ov/install_dependencies/install_openvino_dependencies.sh
       - name: Download, convert and build
         run: |
@@ -335,7 +335,7 @@ jobs:
       - name: Install OpenVINO
         run: |
           mkdir ./ov/
-          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
           sudo ./ov/install_dependencies/install_openvino_dependencies.sh
       - name: Download, convert and build
         run: |
diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml
index 9ce91bc60..001265e36 100644
--- a/.github/workflows/lcm_dreamshaper_cpp.yml
+++ b/.github/workflows/lcm_dreamshaper_cpp.yml
@@ -22,7 +22,7 @@ jobs:
       - name: Initialize OpenVINO
         run: |
           mkdir openvino
-          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./openvino/ --strip-components 1 -xz
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./openvino/ --strip-components 1 -xz
           sudo ./openvino/install_dependencies/install_openvino_dependencies.sh
       - name: Download / convert a model / tokenizer
         run: |
@@ -55,12 +55,12 @@ jobs:
         - name: Initialize OpenVINO
           shell: cmd
           run: |
-            curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64.zip
+            curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/windows/w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64.zip
             unzip ov.zip
         - name: Download / convert a model / tokenizer
           shell: cmd
           run: |
-            call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64/setupvars.bat
+            call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64/setupvars.bat
             cd ./image_generation/lcm_dreamshaper_v7/cpp/scripts/
             python -m pip install -r ./requirements.txt
             python -m pip install ../../../../thirdparty/openvino_tokenizers/
@@ -68,13 +68,13 @@ jobs:
         - name: Build app
           shell: cmd
           run: |
-            call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64/setupvars.bat
+            call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64/setupvars.bat
             cd ./image_generation/lcm_dreamshaper_v7/cpp/
             cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
             cmake --build ./build/ --config Release --parallel
         - name: Run app
           shell: cmd
           run: |
-            call w_openvino_toolkit_windows_2024.1.0.dev20240304_x86_64/setupvars.bat
+            call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64/setupvars.bat
             cd ./image_generation/lcm_dreamshaper_v7/cpp/build/
             call "./Release/lcm_dreamshaper.exe"
diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index f3f7e5285..16ba21891 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -38,7 +38,7 @@ jobs:
       - name: Install OpenVINO and other conda dependencies
         run: |
           conda activate openvino_sd_cpp
-          conda install -c conda-forge openvino=2024.0.0 c-compiler cxx-compiler make cmake
+          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
           conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
 
       - name: Install python dependencies
@@ -85,13 +85,15 @@ jobs:
         - name: Install OpenVINO and other conda dependencies
           run: |
             conda activate openvino_sd_cpp
-            conda install -c conda-forge openvino=2024.0.0 c-compiler cxx-compiler make cmake
+            conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
   
         - name: Install python dependencies
           working-directory: ${{ env.working_directory }}
           run: |
             conda activate openvino_sd_cpp
             python -m pip install -r requirements.txt
+            $env:SPM_PROTOBUF_PROVIDER = 'internal'
+            $env:SPM_ABSL_PROVIDER = 'internal'
             python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
   
         - name: Download and convert model and tokenizer
diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md
index 2b4cdb031..2dfa32628 100644
--- a/image_generation/stable_diffusion_1_5/cpp/README.md
+++ b/image_generation/stable_diffusion_1_5/cpp/README.md
@@ -18,7 +18,7 @@ Prepare a python environment and install dependencies:
 ```shell
 conda create -n openvino_sd_cpp python==3.10
 conda activate openvino_sd_cpp
-conda install -c conda-forge openvino=2024.0.0 c-compiler cxx-compiler make cmake
+conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
 # Ensure that Conda standard libraries are used
 conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
 ```
diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers
index 0e4bb32ca..60d3ccc42 160000
--- a/thirdparty/openvino_tokenizers
+++ b/thirdparty/openvino_tokenizers
@@ -1 +1 @@
-Subproject commit 0e4bb32ca3412f589e1d094faa8b0aad19ee47ca
+Subproject commit 60d3ccc426984acc623630a8e4d8c8878ec74eb7

From a3d4153382e2d86b7a7ae274bee5b8b8cb97999f Mon Sep 17 00:00:00 2001
From: Zlobin Vladimir <vladimir.zlobin@intel.com>
Date: Wed, 8 May 2024 14:36:59 +0400
Subject: [PATCH 04/10] Add bandit scan (#386)

---
 .github/workflows/bandit.yml  |  16 ++
 bandit.yml                    | 398 ++++++++++++++++++++++++++++++++++
 llm_bench/python/benchmark.py |   6 +-
 3 files changed, 417 insertions(+), 3 deletions(-)
 create mode 100644 .github/workflows/bandit.yml
 create mode 100644 bandit.yml

diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml
new file mode 100644
index 000000000..9faa853a2
--- /dev/null
+++ b/.github/workflows/bandit.yml
@@ -0,0 +1,16 @@
+name: python -m bandit --recursive --configfile bandit.yml .
+on:
+  pull_request:
+    paths-ignore:
+      - 'thirdparty'
+      - '**.md'
+jobs:
+  bandit:
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.11
+      - run: python -m pip install bandit
+      - run: python -m bandit --recursive --configfile bandit.yml .
diff --git a/bandit.yml b/bandit.yml
new file mode 100644
index 000000000..be2fd3da5
--- /dev/null
+++ b/bandit.yml
@@ -0,0 +1,398 @@
+### This config may optionally select a subset of tests to run or skip by
+### filling out the 'tests' and 'skips' lists given below. If no tests are
+### specified for inclusion then it is assumed all tests are desired. The skips
+### set will remove specific tests from the include set. This can be controlled
+### using the -t/-s CLI options. Note that the same test ID should not appear
+### in both 'tests' and 'skips', this would be nonsensical and is detected by
+### Bandit at runtime.
+
+# Available tests:
+# B101 : assert_used
+# B102 : exec_used
+# B103 : set_bad_file_permissions
+# B104 : hardcoded_bind_all_interfaces
+# B105 : hardcoded_password_string
+# B106 : hardcoded_password_funcarg
+# B107 : hardcoded_password_default
+# B108 : hardcoded_tmp_directory
+# B110 : try_except_pass
+# B112 : try_except_continue
+# B201 : flask_debug_true
+# B301 : pickle
+# B302 : marshal
+# B303 : md5
+# B304 : ciphers
+# B305 : cipher_modes
+# B306 : mktemp_q
+# B307 : eval
+# B308 : mark_safe
+# B310 : urllib_urlopen
+# B311 : random
+# B312 : telnetlib
+# B313 : xml_bad_cElementTree
+# B314 : xml_bad_ElementTree
+# B315 : xml_bad_expatreader
+# B316 : xml_bad_expatbuilder
+# B317 : xml_bad_sax
+# B318 : xml_bad_minidom
+# B319 : xml_bad_pulldom
+# B320 : xml_bad_etree
+# B321 : ftplib
+# B323 : unverified_context
+# B324 : hashlib_new_insecure_functions
+# B401 : import_telnetlib
+# B402 : import_ftplib
+# B403 : import_pickle
+# B404 : import_subprocess
+# B405 : import_xml_etree
+# B406 : import_xml_sax
+# B407 : import_xml_expat
+# B408 : import_xml_minidom
+# B409 : import_xml_pulldom
+# B410 : import_lxml
+# B411 : import_xmlrpclib
+# B412 : import_httpoxy
+# B413 : import_pycrypto
+# B501 : request_with_no_cert_validation
+# B502 : ssl_with_bad_version
+# B503 : ssl_with_bad_defaults
+# B504 : ssl_with_no_version
+# B505 : weak_cryptographic_key
+# B506 : yaml_load
+# B507 : ssh_no_host_key_verification
+# B601 : paramiko_calls
+# B602 : subprocess_popen_with_shell_equals_true
+# B603 : subprocess_without_shell_equals_true
+# B604 : any_other_function_with_shell_equals_true
+# B605 : start_process_with_a_shell
+# B606 : start_process_with_no_shell
+# B607 : start_process_with_partial_path
+# B608 : hardcoded_sql_expressions
+# B609 : linux_commands_wildcard_injection
+# B610 : django_extra_used
+# B611 : django_rawsql_used
+# B701 : jinja2_autoescape_false
+# B702 : use_of_mako_templates
+# B703 : django_mark_safe
+
+# (optional) list included test IDs here, eg '[B101, B406]':
+# IPAS Required Checkers. Do not disable these
+# Additional checkers may be added if desired
+tests:
+  [ 'B301', 'B302', 'B303', 'B304', 'B305', 'B306', 'B308', 'B310', 'B311', 'B312', 'B313', 'B314', 'B315', 'B316', 'B317', 'B318', 'B319', 'B320', 'B321', 'B323', 'B324', 'B401', 'B402', 'B403', 'B404', 'B405', 'B406', 'B407', 'B408', 'B409', 'B410', 'B411', 'B412', 'B413']
+
+# (optional) list skipped test IDs here, eg '[B101, B406]':
+# The following checkers are not required but be added to tests list if desired
+skips:
+  [ 'B101', 'B102', 'B103', 'B104', 'B105', 'B106', 'B107', 'B108', 'B110', 'B112', 'B201', 'B501', 'B502', 'B503', 'B504', 'B505', 'B506', 'B507', 'B601', 'B602', 'B603', 'B604', 'B605', 'B606', 'B607', 'B608', 'B609', 'B610', 'B611', 'B701', 'B702', 'B703']
+
+### (optional) plugin settings - some test plugins require configuration data
+### that may be given here, per-plugin. All bandit test plugins have a built in
+### set of sensible defaults and these will be used if no configuration is
+### provided. It is not necessary to provide settings for every (or any) plugin
+### if the defaults are acceptable.
+
+any_other_function_with_shell_equals_true:
+  no_shell:
+  - os.execl
+  - os.execle
+  - os.execlp
+  - os.execlpe
+  - os.execv
+  - os.execve
+  - os.execvp
+  - os.execvpe
+  - os.spawnl
+  - os.spawnle
+  - os.spawnlp
+  - os.spawnlpe
+  - os.spawnv
+  - os.spawnve
+  - os.spawnvp
+  - os.spawnvpe
+  - os.startfile
+  shell:
+  - os.system
+  - os.popen
+  - os.popen2
+  - os.popen3
+  - os.popen4
+  - popen2.popen2
+  - popen2.popen3
+  - popen2.popen4
+  - popen2.Popen3
+  - popen2.Popen4
+  - commands.getoutput
+  - commands.getstatusoutput
+  subprocess:
+  - subprocess.Popen
+  - subprocess.call
+  - subprocess.check_call
+  - subprocess.check_output
+  - subprocess.run
+assert_used:
+  skips: []
+hardcoded_tmp_directory:
+  tmp_dirs:
+  - /tmp
+  - /var/tmp
+  - /dev/shm
+linux_commands_wildcard_injection:
+  no_shell:
+  - os.execl
+  - os.execle
+  - os.execlp
+  - os.execlpe
+  - os.execv
+  - os.execve
+  - os.execvp
+  - os.execvpe
+  - os.spawnl
+  - os.spawnle
+  - os.spawnlp
+  - os.spawnlpe
+  - os.spawnv
+  - os.spawnve
+  - os.spawnvp
+  - os.spawnvpe
+  - os.startfile
+  shell:
+  - os.system
+  - os.popen
+  - os.popen2
+  - os.popen3
+  - os.popen4
+  - popen2.popen2
+  - popen2.popen3
+  - popen2.popen4
+  - popen2.Popen3
+  - popen2.Popen4
+  - commands.getoutput
+  - commands.getstatusoutput
+  subprocess:
+  - subprocess.Popen
+  - subprocess.call
+  - subprocess.check_call
+  - subprocess.check_output
+  - subprocess.run
+ssl_with_bad_defaults:
+  bad_protocol_versions:
+  - PROTOCOL_SSLv2
+  - SSLv2_METHOD
+  - SSLv23_METHOD
+  - PROTOCOL_SSLv3
+  - PROTOCOL_TLSv1
+  - SSLv3_METHOD
+  - TLSv1_METHOD
+ssl_with_bad_version:
+  bad_protocol_versions:
+  - PROTOCOL_SSLv2
+  - SSLv2_METHOD
+  - SSLv23_METHOD
+  - PROTOCOL_SSLv3
+  - PROTOCOL_TLSv1
+  - SSLv3_METHOD
+  - TLSv1_METHOD
+start_process_with_a_shell:
+  no_shell:
+  - os.execl
+  - os.execle
+  - os.execlp
+  - os.execlpe
+  - os.execv
+  - os.execve
+  - os.execvp
+  - os.execvpe
+  - os.spawnl
+  - os.spawnle
+  - os.spawnlp
+  - os.spawnlpe
+  - os.spawnv
+  - os.spawnve
+  - os.spawnvp
+  - os.spawnvpe
+  - os.startfile
+  shell:
+  - os.system
+  - os.popen
+  - os.popen2
+  - os.popen3
+  - os.popen4
+  - popen2.popen2
+  - popen2.popen3
+  - popen2.popen4
+  - popen2.Popen3
+  - popen2.Popen4
+  - commands.getoutput
+  - commands.getstatusoutput
+  subprocess:
+  - subprocess.Popen
+  - subprocess.call
+  - subprocess.check_call
+  - subprocess.check_output
+  - subprocess.run
+start_process_with_no_shell:
+  no_shell:
+  - os.execl
+  - os.execle
+  - os.execlp
+  - os.execlpe
+  - os.execv
+  - os.execve
+  - os.execvp
+  - os.execvpe
+  - os.spawnl
+  - os.spawnle
+  - os.spawnlp
+  - os.spawnlpe
+  - os.spawnv
+  - os.spawnve
+  - os.spawnvp
+  - os.spawnvpe
+  - os.startfile
+  shell:
+  - os.system
+  - os.popen
+  - os.popen2
+  - os.popen3
+  - os.popen4
+  - popen2.popen2
+  - popen2.popen3
+  - popen2.popen4
+  - popen2.Popen3
+  - popen2.Popen4
+  - commands.getoutput
+  - commands.getstatusoutput
+  subprocess:
+  - subprocess.Popen
+  - subprocess.call
+  - subprocess.check_call
+  - subprocess.check_output
+  - subprocess.run
+start_process_with_partial_path:
+  no_shell:
+  - os.execl
+  - os.execle
+  - os.execlp
+  - os.execlpe
+  - os.execv
+  - os.execve
+  - os.execvp
+  - os.execvpe
+  - os.spawnl
+  - os.spawnle
+  - os.spawnlp
+  - os.spawnlpe
+  - os.spawnv
+  - os.spawnve
+  - os.spawnvp
+  - os.spawnvpe
+  - os.startfile
+  shell:
+  - os.system
+  - os.popen
+  - os.popen2
+  - os.popen3
+  - os.popen4
+  - popen2.popen2
+  - popen2.popen3
+  - popen2.popen4
+  - popen2.Popen3
+  - popen2.Popen4
+  - commands.getoutput
+  - commands.getstatusoutput
+  subprocess:
+  - subprocess.Popen
+  - subprocess.call
+  - subprocess.check_call
+  - subprocess.check_output
+  - subprocess.run
+subprocess_popen_with_shell_equals_true:
+  no_shell:
+  - os.execl
+  - os.execle
+  - os.execlp
+  - os.execlpe
+  - os.execv
+  - os.execve
+  - os.execvp
+  - os.execvpe
+  - os.spawnl
+  - os.spawnle
+  - os.spawnlp
+  - os.spawnlpe
+  - os.spawnv
+  - os.spawnve
+  - os.spawnvp
+  - os.spawnvpe
+  - os.startfile
+  shell:
+  - os.system
+  - os.popen
+  - os.popen2
+  - os.popen3
+  - os.popen4
+  - popen2.popen2
+  - popen2.popen3
+  - popen2.popen4
+  - popen2.Popen3
+  - popen2.Popen4
+  - commands.getoutput
+  - commands.getstatusoutput
+  subprocess:
+  - subprocess.Popen
+  - subprocess.call
+  - subprocess.check_call
+  - subprocess.check_output
+  - subprocess.run
+subprocess_without_shell_equals_true:
+  no_shell:
+  - os.execl
+  - os.execle
+  - os.execlp
+  - os.execlpe
+  - os.execv
+  - os.execve
+  - os.execvp
+  - os.execvpe
+  - os.spawnl
+  - os.spawnle
+  - os.spawnlp
+  - os.spawnlpe
+  - os.spawnv
+  - os.spawnve
+  - os.spawnvp
+  - os.spawnvpe
+  - os.startfile
+  shell:
+  - os.system
+  - os.popen
+  - os.popen2
+  - os.popen3
+  - os.popen4
+  - popen2.popen2
+  - popen2.popen3
+  - popen2.popen4
+  - popen2.Popen3
+  - popen2.Popen4
+  - commands.getoutput
+  - commands.getstatusoutput
+  subprocess:
+  - subprocess.Popen
+  - subprocess.call
+  - subprocess.check_call
+  - subprocess.check_output
+  - subprocess.run
+try_except_continue:
+  check_typed_exception: false
+try_except_pass:
+  check_typed_exception: false
+weak_cryptographic_key:
+  weak_key_size_dsa_high: 1024
+  weak_key_size_dsa_medium: 2048
+  weak_key_size_ec_high: 160
+  weak_key_size_ec_medium: 224
+  weak_key_size_rsa_high: 1024
+  weak_key_size_rsa_medium: 2048
+exclude_dirs:
+  - thirdparty
diff --git a/llm_bench/python/benchmark.py b/llm_bench/python/benchmark.py
index 3f1d1fa11..6b39fc936 100644
--- a/llm_bench/python/benchmark.py
+++ b/llm_bench/python/benchmark.py
@@ -129,7 +129,7 @@ def run_text_generation(input_text, num, model, tokenizer, args, iter_data_list,
         result_text = generated_text[bs_idx]
         if args["output_dir"] is not None:
             utils.output_file.output_gen_text(result_text, args, model_precision, prompt_index, num, bs_idx, proc_id)
-        result_md5_list.append(hashlib.md5(result_text.encode()).hexdigest())
+        result_md5_list.append(hashlib.md5(result_text.encode(), usedforsecurity=False).hexdigest())
     if num == 0:
         warmup_md5[prompt_index] = result_md5_list
     per_token_time = generation_time * 1000 / (num_tokens / args['batch_size'])
@@ -239,7 +239,7 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
         mem_consumption.clear_max_memory_consumption()
     for bs_idx in range(args['batch_size']):
         rslt_img_fn = utils.output_file.output_gen_image(res[bs_idx], args, image_id, num, bs_idx, proc_id, '.png')
-        result_md5_list.append(hashlib.md5(Image.open(rslt_img_fn).tobytes()).hexdigest())
+        result_md5_list.append(hashlib.md5(Image.open(rslt_img_fn).tobytes(), usedforsecurity=False).hexdigest())
     generation_time = end - start
     iter_data = gen_iterate_data(
         iter_idx=num,
@@ -339,7 +339,7 @@ def run_ldm_super_resolution(img, num, pipe, args, framework, iter_data_list, im
     result_md5_list = []
     if framework == 'ov':
         rslt_img_fn = utils.output_file.output_gen_image(res[0], args, image_id, num, None, proc_id, '.png')
-        result_md5_list.append(hashlib.md5(Image.open(rslt_img_fn).tobytes()).hexdigest())
+        result_md5_list.append(hashlib.md5(Image.open(rslt_img_fn).tobytes(), usedforsecurity=False).hexdigest())
 
     generation_time = end - start
     iter_data = gen_iterate_data(

From 387c72865b40052e4047c1e26318ba1caf2d06bd Mon Sep 17 00:00:00 2001
From: Yaroslav Tarkan <yaroslav.tarkan@intel.com>
Date: Wed, 8 May 2024 15:49:31 +0300
Subject: [PATCH 05/10] [Port 24.1] Move from image_generation python
 conversion scripts to optimum-cli (LCM Dreamshaper v7 model) (#402)

---
 .github/workflows/lcm_dreamshaper_cpp.yml     | 142 +++++++++++-------
 .../workflows/stable_diffusion_1_5_cpp.yml    |   2 -
 .../common/diffusers/src/scheduler_lcm.cpp    |   2 +-
 .../lcm_dreamshaper_v7/cpp/README.md          |  50 +++---
 .../np_latents_512x512.txt                    |   0
 .../torch_noise_step_0.txt                    |   0
 .../torch_noise_step_1.txt                    |   0
 .../torch_noise_step_2.txt                    |   0
 .../cpp/{scripts => }/requirements.txt        |   0
 .../cpp/scripts/convert_model.py              |  41 -----
 .../lcm_dreamshaper_v7/cpp/src/main.cpp       | 120 ++++++++++++---
 thirdparty/openvino_tokenizers                |   2 +-
 12 files changed, 221 insertions(+), 138 deletions(-)
 rename image_generation/lcm_dreamshaper_v7/cpp/{scripts => latents}/np_latents_512x512.txt (100%)
 rename image_generation/lcm_dreamshaper_v7/cpp/{scripts => latents}/torch_noise_step_0.txt (100%)
 rename image_generation/lcm_dreamshaper_v7/cpp/{scripts => latents}/torch_noise_step_1.txt (100%)
 rename image_generation/lcm_dreamshaper_v7/cpp/{scripts => latents}/torch_noise_step_2.txt (100%)
 rename image_generation/lcm_dreamshaper_v7/cpp/{scripts => }/requirements.txt (100%)
 delete mode 100644 image_generation/lcm_dreamshaper_v7/cpp/scripts/convert_model.py

diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml
index 001265e36..131927d76 100644
--- a/.github/workflows/lcm_dreamshaper_cpp.yml
+++ b/.github/workflows/lcm_dreamshaper_cpp.yml
@@ -1,4 +1,5 @@
 name: lcm_dreamshaper
+
 on:
   pull_request:
     paths:
@@ -6,75 +7,112 @@ on:
       - image_generation/common/**
       - .github/workflows/lcm_dreamshaper_cpp.yml
       - thirdparty/openvino_tokenizers
+
+env:
+  working_directory: "./image_generation/lcm_dreamshaper_v7/cpp/"
+
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
+
 jobs:
   lcm_dreamshaper_v7_cpp-linux:
     runs-on: ubuntu-20.04
+    defaults:
+      run:
+        # Do not ignore bash profile files. From:
+        # https://github.com/marketplace/actions/setup-miniconda#important
+        shell: bash -l {0}
     steps:
       - uses: actions/checkout@v4
         with:
           submodules: recursive
-      - uses: actions/setup-python@v4
+
+      - name: Setup conda
+        uses: conda-incubator/setup-miniconda@v3
         with:
-          python-version: 3.8
-      - name: Initialize OpenVINO
+          miniconda-version: "latest"
+          activate-environment: openvino_lcm_cpp
+          python-version: "3.10"
+
+      - name: Install OpenVINO and other conda dependencies
         run: |
-          mkdir openvino
-          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./openvino/ --strip-components 1 -xz
-          sudo ./openvino/install_dependencies/install_openvino_dependencies.sh
-      - name: Download / convert a model / tokenizer
+          conda activate openvino_lcm_cpp
+          conda update -c conda-forge --all
+          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+          conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
+      
+      - name: Install python dependencies
+        working-directory: ${{ env.working_directory }}
         run: |
-          source ./openvino/setupvars.sh
-          cd ./image_generation/lcm_dreamshaper_v7/cpp/scripts/
-          python -m pip install -U pip
-          python -m pip install -r ./requirements.txt
-          python -m pip install ../../../../thirdparty/openvino_tokenizers/
-          python convert_model.py -lcm "SimianLuo/LCM_Dreamshaper_v7" -t "FP16"
+          conda activate openvino_lcm_cpp
+          python -m pip install -r requirements.txt
+          python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
+          
+      - name: Download and convert model and tokenizer
+        working-directory: ${{ env.working_directory }}
+        run: |
+          conda activate openvino_lcm_cpp
+          export MODEL_PATH="models/lcm_dreamshaper_v7/FP16"
+          optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH
+          convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
+
       - name: Build app
+        working-directory: ${{ env.working_directory }}
         run: |
-          source ./openvino/setupvars.sh
-          cd ./image_generation/lcm_dreamshaper_v7/cpp/
+          conda activate openvino_lcm_cpp
           cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
           cmake --build ./build/ --config Release --parallel
+      
       - name: Run app
+        working-directory: ${{ env.working_directory }}
         run: |
-          source ./openvino/setupvars.sh
-          cd ./image_generation/lcm_dreamshaper_v7/cpp/build/
-          ./lcm_dreamshaper
+          ./build/lcm_dreamshaper
+
   lcm_dreamshaper_v7_cpp-windows:
-      runs-on: windows-latest
-      steps:
-        - uses: actions/checkout@v4
-          with:
-            submodules: recursive
-        - uses: actions/setup-python@v4
-          with:
-            python-version: 3.8
-        - name: Initialize OpenVINO
-          shell: cmd
-          run: |
-            curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/windows/w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64.zip
-            unzip ov.zip
-        - name: Download / convert a model / tokenizer
-          shell: cmd
-          run: |
-            call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64/setupvars.bat
-            cd ./image_generation/lcm_dreamshaper_v7/cpp/scripts/
-            python -m pip install -r ./requirements.txt
-            python -m pip install ../../../../thirdparty/openvino_tokenizers/
-            python convert_model.py -lcm "SimianLuo/LCM_Dreamshaper_v7" -t "FP16"
-        - name: Build app
-          shell: cmd
-          run: |
-            call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64/setupvars.bat
-            cd ./image_generation/lcm_dreamshaper_v7/cpp/
-            cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
-            cmake --build ./build/ --config Release --parallel
-        - name: Run app
-          shell: cmd
-          run: |
-            call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64/setupvars.bat
-            cd ./image_generation/lcm_dreamshaper_v7/cpp/build/
-            call "./Release/lcm_dreamshaper.exe"
+    runs-on: windows-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Setup conda
+        uses: conda-incubator/setup-miniconda@v3
+        with:
+          miniconda-version: "latest"
+          activate-environment: openvino_lcm_cpp
+          python-version: "3.10"
+
+      - name: Install OpenVINO and other conda dependencies
+        run: |
+          conda activate openvino_lcm_cpp
+          conda update -c conda-forge --all
+          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+          conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
+      
+      - name: Install python dependencies
+        working-directory: ${{ env.working_directory }}
+        run: |
+          conda activate openvino_lcm_cpp
+          python -m pip install -r requirements.txt
+          python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
+
+      - name: Download and convert model and tokenizer
+        working-directory: ${{ env.working_directory }}
+        run: |
+          conda activate openvino_lcm_cpp
+          $env:MODEL_PATH='models/lcm_dreamshaper_v7/FP16'
+          optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $env:MODEL_PATH
+          convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/
+  
+      - name: Build app
+        working-directory: ${{ env.working_directory }}
+        run: |
+          conda activate openvino_lcm_cpp
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+          cmake --build ./build/ --config Release --parallel
+      
+      - name: Run app
+        working-directory: ${{ env.working_directory }}
+        run: |
+          & "./build/Release/lcm_dreamshaper.exe" -r --dynamic
diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index 16ba21891..ad929ea06 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -92,8 +92,6 @@ jobs:
           run: |
             conda activate openvino_sd_cpp
             python -m pip install -r requirements.txt
-            $env:SPM_PROTOBUF_PROVIDER = 'internal'
-            $env:SPM_ABSL_PROVIDER = 'internal'
             python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
   
         - name: Download and convert model and tokenizer
diff --git a/image_generation/common/diffusers/src/scheduler_lcm.cpp b/image_generation/common/diffusers/src/scheduler_lcm.cpp
index af82c981a..d5f97b677 100644
--- a/image_generation/common/diffusers/src/scheduler_lcm.cpp
+++ b/image_generation/common/diffusers/src/scheduler_lcm.cpp
@@ -192,7 +192,7 @@ std::map<std::string, ov::Tensor> LCMScheduler::step(ov::Tensor noise_pred, ov::
     if (inference_step != num_inference_steps - 1) {
         std::vector<float> noise;
         if (read_torch_noise) {
-            std::string noise_file = "../scripts/torch_noise_step_" + std::to_string(inference_step) + ".txt";
+            std::string noise_file = "./latents/torch_noise_step_" + std::to_string(inference_step) + ".txt";
             noise = read_vector_from_txt(noise_file);
         } else {
             noise = randn_function(noise_pred.get_size(), seed);
diff --git a/image_generation/lcm_dreamshaper_v7/cpp/README.md b/image_generation/lcm_dreamshaper_v7/cpp/README.md
index f7f6a7615..d4a62fb27 100644
--- a/image_generation/lcm_dreamshaper_v7/cpp/README.md
+++ b/image_generation/lcm_dreamshaper_v7/cpp/README.md
@@ -2,19 +2,25 @@
 The pure C++ text-to-image pipeline, driven by the OpenVINO native API for SD v1.5 Latent Consistency Model with LCM Scheduler. It includes advanced features like LoRA integration with safetensors and [OpenVINO Tokenizers](https://github.com/openvinotoolkit/openvino_tokenizers). Loading `openvino_tokenizers` to `ov::Core` enables tokenization. [The common folder](../../common/) contains schedulers for image generation and `imwrite()` for saving `bmp` images. This demo has been tested for Linux platform only. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/263-latent-consistency-models-image-generation/263-lcm-lora-controlnet.ipynb) which provides an example of image generaztion in Python.
 
 > [!NOTE]
->This tutorial assumes that the current working directory is `<openvino.genai repo>/image_generation/lcm_dreamshaper_v7/cpp/` and all paths are relative to this folder.
+> This tutorial assumes that the current working directory is `<openvino.genai repo>/image_generation/lcm_dreamshaper_v7/cpp/` and all paths are relative to this folder.
 
 ## Step 1: Prepare build environment
 
+Prerequisites:
+- Conda ([installation guide](https://conda.io/projects/conda/en/latest/user-guide/install/index.html))
+
 C++ Packages:
 * [CMake](https://cmake.org/download/): Cross-platform build tool
-* [OpenVINO](https://docs.openvino.ai/2023.2/openvino_docs_install_guides_overview.html): Model inference
+* [OpenVINO](https://docs.openvino.ai/2024/get-started/install-openvino.html): Model inference
 
 Prepare a python environment and install dependencies:
 ```shell
 conda create -n openvino_lcm_cpp python==3.10
 conda activate openvino_lcm_cpp
-conda install -c conda-forge openvino c-compiler cxx-compiler make
+conda update -c conda-forge --all
+conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+# Ensure that Conda standard libraries are used
+conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
 ```
 
 ## Step 2: Latent Consistency Model and Tokenizer models
@@ -26,20 +32,19 @@ conda install -c conda-forge openvino c-compiler cxx-compiler make
     ```shell
     git submodule update --init
     conda activate openvino_lcm_cpp
-    python -m pip install -r scripts/requirements.txt
+    python -m pip install -r requirements.txt
     python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
     ```
 
-2. Run model conversion script to download and convert PyTorch model to OpenVINO IR via [optimum-intel](https://github.com/huggingface/optimum-intel). Please, use the script `scripts/convert_model.py` to convert the model:
-
+2. Download the model from Huggingface and convert it to OpenVINO IR via [optimum-intel CLI](https://github.com/huggingface/optimum-intel). Example command for downloading and exporting FP16 model:
     ```shell
-    cd scripts
-    python convert_model.py -lcm "SimianLuo/LCM_Dreamshaper_v7" -t FP16
+    export MODEL_PATH="models/lcm_dreamshaper_v7/FP16"
+    # Using optimum-cli for exporting model to OpenVINO format
+    optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH
+    # Converting tokenizer
+    convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
     ```
 
-> [!NOTE]
->Only static model is currently supported for this sample.
-
 ### LoRA enabling with safetensors
 
 Refer to [python pipeline blog](https://blog.openvino.ai/blog-posts/enable-lora-weights-with-stable-diffusion-controlnet-pipeline).
@@ -65,19 +70,20 @@ Usage:
   lcm_dreamshaper [OPTION...]
 ```
 
-* `-p, --posPrompt arg` Initial positive prompt for SD  (default: cyberpunk cityscape like Tokyo New York  with tall buildings at dusk golden hour cinematic lighting)
+* `-p, --posPrompt arg` Initial positive prompt for LCM (default: a beautiful pink unicorn)
 * `-d, --device arg`    AUTO, CPU, or GPU. Doesn't apply to Tokenizer model, OpenVINO Tokenizers can be inferred on a CPU device only (default: CPU)
-* `--step arg`          Number of diffusion step ( default: 20)
+* `--step arg`          Number of diffusion step (default: 4)
 * `-s, --seed arg`      Number of random seed to generate latent (default: 42)
-* `--num arg`           Number of image output(default: 1)
+* `--num arg`           Number of image output (default: 1)
 * `--height arg`        Height of output image (default: 512)
 * `--width arg`         Width of output image (default: 512)
 * `-c, --useCache`      Use model caching
-* `-r, --readNPLatent`  Read numpy generated latents from file
-* `-m, --modelPath arg` Specify path of SD model IR (default: ../scripts/SimianLuo/LCM_Dreamshaper_v7)
-* `-t, --type arg`      Specify the type of SD model IR (FP16_static or FP16_dyn) (default: FP16_static)
-* `-l, --loraPath arg`  Specify path of lora file. (*.safetensors). (default: )
-* `-a, --alpha arg`     alpha for lora (default: 0.75)
+* `-r, --readNPLatent`  Read numpy generated latents from file, only supported for one output image
+* `-m, --modelPath arg` Specify path to LCM model IRs (default: ./models/lcm_dreamshaper_v7)
+* `-t, --type arg`      Specify the type of LCM model IRs (e.g., FP32, FP16 or INT8) (default: FP16)
+* `--dynamic`           Specify the model input shape to use dynamic shape
+* `-l, --loraPath arg`  Specify path to LoRA file (*.safetensors) (default: )
+* `-a, --alpha arg`     Specify alpha for LoRA (default: 0.75)
 * `-h, --help`          Print usage
 
 > [!NOTE]
@@ -89,15 +95,15 @@ Positive prompt: a beautiful pink unicorn
 
 Read the numpy latent input and noise for scheduler instead of C++ std lib for the alignment with Python pipeline.
 
-* Generate image with random data generated by Python `./build/lcm_dreamshaper -r`
+* Generate image with random data generated by Python: `./build/lcm_dreamshaper -r`
 
 ![image](./python_random.bmp)
 
-* Generate image with C++ lib generated latent and noise : `./build/lcm_dreamshaper`
+* Generate image with C++ lib generated latent and noise: `./build/lcm_dreamshaper`
 
 ![image](./cpp_random.bmp)
 
-* Generate image with soulcard lora and C++ generated latent and noise `./stable_diffusion -r -l path/to/soulcard.safetensors`
+* Generate image with soulcard lora and C++ generated latent and noise: `./stable_diffusion -r -l path/to/soulcard.safetensors`
 
 ![image](./lora_cpp_random.bmp)
 
diff --git a/image_generation/lcm_dreamshaper_v7/cpp/scripts/np_latents_512x512.txt b/image_generation/lcm_dreamshaper_v7/cpp/latents/np_latents_512x512.txt
similarity index 100%
rename from image_generation/lcm_dreamshaper_v7/cpp/scripts/np_latents_512x512.txt
rename to image_generation/lcm_dreamshaper_v7/cpp/latents/np_latents_512x512.txt
diff --git a/image_generation/lcm_dreamshaper_v7/cpp/scripts/torch_noise_step_0.txt b/image_generation/lcm_dreamshaper_v7/cpp/latents/torch_noise_step_0.txt
similarity index 100%
rename from image_generation/lcm_dreamshaper_v7/cpp/scripts/torch_noise_step_0.txt
rename to image_generation/lcm_dreamshaper_v7/cpp/latents/torch_noise_step_0.txt
diff --git a/image_generation/lcm_dreamshaper_v7/cpp/scripts/torch_noise_step_1.txt b/image_generation/lcm_dreamshaper_v7/cpp/latents/torch_noise_step_1.txt
similarity index 100%
rename from image_generation/lcm_dreamshaper_v7/cpp/scripts/torch_noise_step_1.txt
rename to image_generation/lcm_dreamshaper_v7/cpp/latents/torch_noise_step_1.txt
diff --git a/image_generation/lcm_dreamshaper_v7/cpp/scripts/torch_noise_step_2.txt b/image_generation/lcm_dreamshaper_v7/cpp/latents/torch_noise_step_2.txt
similarity index 100%
rename from image_generation/lcm_dreamshaper_v7/cpp/scripts/torch_noise_step_2.txt
rename to image_generation/lcm_dreamshaper_v7/cpp/latents/torch_noise_step_2.txt
diff --git a/image_generation/lcm_dreamshaper_v7/cpp/scripts/requirements.txt b/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt
similarity index 100%
rename from image_generation/lcm_dreamshaper_v7/cpp/scripts/requirements.txt
rename to image_generation/lcm_dreamshaper_v7/cpp/requirements.txt
diff --git a/image_generation/lcm_dreamshaper_v7/cpp/scripts/convert_model.py b/image_generation/lcm_dreamshaper_v7/cpp/scripts/convert_model.py
deleted file mode 100644
index c55ec0ecc..000000000
--- a/image_generation/lcm_dreamshaper_v7/cpp/scripts/convert_model.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from pathlib import Path
-import argparse
-from optimum.intel.openvino import OVLatentConsistencyModelPipeline
-from transformers import AutoTokenizer
-from openvino_tokenizers import convert_tokenizer
-from openvino import Type, save_model
-
-
-def parse_args() -> argparse.Namespace:
-    """Parse and return command line arguments."""
-    parser = argparse.ArgumentParser(add_help=False)
-    args = parser.add_argument_group('Options')
-    args.add_argument('-h', '--help', action = 'help',
-                      help='Show this help message and exit.')
-    args.add_argument('-t', '--type', type = str, default = "FP32", required = True,
-                      help='Required. data type, FP32, FP16.')
-    args.add_argument('-lcm','--lcm_weights', type = str, default="SimianLuo/LCM_Dreamshaper_v7", required = True,
-                      help='Specify the path of lcm model')
-    return parser.parse_args()
-
-args = parse_args()
-output_path = Path(args.lcm_weights) / (args.type + "_static")
-
-###convert LCM model to IR
-
-model = OVLatentConsistencyModelPipeline.from_pretrained(args.lcm_weights, trust_remote_code=True, export=True, compile=False)
-if args.type == "FP16":
-    model.half()
-
-model.reshape(1, 512, 512, 1)
-
-model.compile()
-model.save_pretrained(output_path)
-
-# convert tokenizer
-
-tokenizer_path = output_path / "tokenizer"
-hf_tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
-ov_tokenizer_encoder = convert_tokenizer(hf_tokenizer, tokenizer_output_type=Type.i32)
-
-save_model(ov_tokenizer_encoder, tokenizer_path / "openvino_tokenizer.xml", compress_to_fp16=False)
diff --git a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp
index 1df11bee2..20e240fa8 100644
--- a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp
+++ b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp
@@ -21,11 +21,17 @@
 #include "lora.hpp"
 #include "imwrite.hpp"
 
+const size_t TOKENIZER_MODEL_MAX_LENGTH = 77;   // 'model_max_length' parameter from 'tokenizer_config.json'
+const int64_t UNET_IN_CHANNELS = 4;             // 'in_channels' parameter from 'unet/config.json'
+const int64_t UNET_TIME_COND_PROJ_DIM = 256;    // 'time_cond_proj_dim' parameter from 'unet/config.json'
+const int64_t VAE_DECODER_LATENT_CHANNELS = 4;  // 'latent_channels' parameter from 'vae_decoder/config.json'
+const size_t VAE_SCALE_FACTOR = 8;
+
 ov::Tensor randn_tensor(uint32_t height, uint32_t width, bool use_np_latents, uint32_t seed = 42) {
-    ov::Tensor noise(ov::element::f32, {1, 4, height / 8, width / 8});
+    ov::Tensor noise(ov::element::f32, {1, UNET_IN_CHANNELS, height / VAE_SCALE_FACTOR, width / VAE_SCALE_FACTOR});
     if (use_np_latents) {
         // read np generated latents with defaut seed 42
-        const char * latent_file_name = "../scripts/np_latents_512x512.txt";
+        const char * latent_file_name = "./latents/np_latents_512x512.txt";
         std::ifstream latent_copy_file(latent_file_name, std::ios::ate);
         OPENVINO_ASSERT(latent_copy_file.is_open(), "Cannot open ", latent_file_name);
 
@@ -60,13 +66,67 @@ void apply_lora(std::shared_ptr<ov::Model> model, InsertLoRA::LoRAMap& lora_map)
     }
 }
 
-StableDiffusionModels compile_models(const std::string& model_path, const std::string& device,
-                                     const std::string& lora_path, const float alpha, const bool use_cache) {
+void reshape_text_encoder(std::shared_ptr<ov::Model> model, size_t batch_size, size_t tokenizer_model_max_length) {
+    ov::PartialShape input_shape = model->input(0).get_partial_shape();
+    input_shape[0] = batch_size;
+    input_shape[1] = tokenizer_model_max_length;
+    std::map<size_t, ov::PartialShape> idx_to_shape{{0, input_shape}};
+    model->reshape(idx_to_shape);
+}
+
+void reshape_unet(std::shared_ptr<ov::Model> model,
+                  int64_t batch_size,
+                  int64_t height,
+                  int64_t width,
+                  int64_t tokenizer_model_max_length) {
+    height = height / VAE_SCALE_FACTOR;
+    width = width / VAE_SCALE_FACTOR;
+
+    std::map<std::string, ov::PartialShape> name_to_shape;
+
+    for (auto input : model->inputs()) {
+        std::string input_name = input.get_any_name();
+        name_to_shape[input_name] = input.get_partial_shape();
+        if (input_name == "timestep") {
+            name_to_shape[input_name][0] = 1;
+        } else if (input_name == "sample") {
+            name_to_shape[input_name] = {batch_size, UNET_IN_CHANNELS, height, width};
+        } else if (input_name == "time_ids") {
+            name_to_shape[input_name][0] = batch_size;
+        } else if (input_name == "timestep_cond") {
+            name_to_shape[input_name] = {batch_size, UNET_TIME_COND_PROJ_DIM};
+        } else {
+            name_to_shape[input_name][0] = batch_size;
+            name_to_shape[input_name][1] = TOKENIZER_MODEL_MAX_LENGTH;
+        }
+    }
+
+    model->reshape(name_to_shape);
+}
+
+void reshape_vae_decoder(std::shared_ptr<ov::Model> model, int64_t height, int64_t width) {
+    height = height / VAE_SCALE_FACTOR;
+    width = width / VAE_SCALE_FACTOR;
+
+    std::map<size_t, ov::PartialShape> idx_to_shape{{0, {1, VAE_DECODER_LATENT_CHANNELS, height, width}}};
+    model->reshape(idx_to_shape);
+}
+
+StableDiffusionModels compile_models(const std::string& model_path,
+                                     const std::string& device,
+                                     const std::string& lora_path, 
+                                     const float alpha, 
+                                     const bool use_cache,
+                                     const bool use_dynamic_shapes,
+                                     const size_t batch_size,
+                                     const size_t height,
+                                     const size_t width) {
     StableDiffusionModels models;
 
     ov::Core core;
     if (use_cache)
         core.set_property(ov::cache_dir("./cache_dir"));
+
     core.add_extension(TOKENIZERS_LIBRARY_PATH);
 
     // read LoRA weights
@@ -78,6 +138,9 @@ StableDiffusionModels compile_models(const std::string& model_path, const std::s
     // Text encoder
     {
         auto text_encoder_model = core.read_model(model_path + "/text_encoder/openvino_model.xml");
+        if (!use_dynamic_shapes) {
+            reshape_text_encoder(text_encoder_model, batch_size, TOKENIZER_MODEL_MAX_LENGTH);
+        }
         apply_lora(text_encoder_model, lora_weights["text_encoder"]);
         models.text_encoder = core.compile_model(text_encoder_model, device);
     }
@@ -85,6 +148,9 @@ StableDiffusionModels compile_models(const std::string& model_path, const std::s
     // UNet
     {
         auto unet_model = core.read_model(model_path + "/unet/openvino_model.xml");
+        if (!use_dynamic_shapes) {
+            reshape_unet(unet_model, batch_size, height, width, TOKENIZER_MODEL_MAX_LENGTH);
+        }
         apply_lora(unet_model, lora_weights["unet"]);
         models.unet = core.compile_model(unet_model, device);
     }
@@ -92,6 +158,9 @@ StableDiffusionModels compile_models(const std::string& model_path, const std::s
     // VAE decoder
     {
         auto vae_decoder_model = core.read_model(model_path + "/vae_decoder/openvino_model.xml");
+        if (!use_dynamic_shapes) {
+            reshape_vae_decoder(vae_decoder_model, height, width);
+        }
         ov::preprocess::PrePostProcessor ppp(vae_decoder_model);
         ppp.output().model().set_layout("NCHW");
         ppp.output().tensor().set_layout("NHWC");
@@ -108,15 +177,14 @@ StableDiffusionModels compile_models(const std::string& model_path, const std::s
 }
 
 ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt) {
-    const size_t MAX_LENGTH = 77; // 'model_max_length' from 'tokenizer_config.json'
     const size_t HIDDEN_SIZE = static_cast<size_t>(models.text_encoder.output(0).get_partial_shape()[2].get_length());
     const int32_t EOS_TOKEN_ID = 49407, PAD_TOKEN_ID = EOS_TOKEN_ID;
-    const ov::Shape input_ids_shape({1, MAX_LENGTH});
+    const ov::Shape input_ids_shape({1, TOKENIZER_MODEL_MAX_LENGTH});
 
     ov::InferRequest tokenizer_req = models.tokenizer.create_infer_request();
     ov::InferRequest text_encoder_req = models.text_encoder.create_infer_request();
 
-    ov::Tensor text_embeddings(ov::element::f32, {1, MAX_LENGTH, HIDDEN_SIZE});
+    ov::Tensor text_embeddings(ov::element::f32, {1, TOKENIZER_MODEL_MAX_LENGTH, HIDDEN_SIZE});
     ov::Tensor input_ids(ov::element::i32, input_ids_shape);
     std::fill_n(input_ids.data<int32_t>(), input_ids.get_size(), PAD_TOKEN_ID);
 
@@ -192,20 +260,23 @@ ov::Tensor postprocess_image(ov::Tensor decoded_image) {
 }
 
 int32_t main(int32_t argc, char* argv[]) try {
-    cxxopts::Options options("stable_diffusion", "Stable Diffusion implementation in C++ using OpenVINO\n");
+    cxxopts::Options options("lcm_dreamshaper", "LCM_Dreamshaper_v7 implementation in C++ using OpenVINO\n");
 
     options.add_options()
-    ("p,posPrompt", "Initial positive prompt for LCM ", cxxopts::value<std::string>()->default_value("a beautiful pink unicorn"))
+    ("p,posPrompt", "Initial positive prompt for LCM", cxxopts::value<std::string>()->default_value("a beautiful pink unicorn"))
     ("d,device", "AUTO, CPU, or GPU.\nDoesn't apply to Tokenizer model, OpenVINO Tokenizers can be inferred on a CPU device only", cxxopts::value<std::string>()->default_value("CPU"))
     ("step", "Number of diffusion steps", cxxopts::value<size_t>()->default_value("4"))
     ("s,seed", "Number of random seed to generate latent for one image output", cxxopts::value<size_t>()->default_value("42"))
     ("num", "Number of image output", cxxopts::value<size_t>()->default_value("1"))
+    ("height","Height of output image",cxxopts::value<size_t>()->default_value("512"))
+    ("width", "Width of output image", cxxopts::value<size_t>()->default_value("512"))
     ("c,useCache", "Use model caching", cxxopts::value<bool>()->default_value("false"))
     ("r,readNPLatent", "Read numpy generated latents from file, only supported for one output image", cxxopts::value<bool>()->default_value("false"))
-    ("m,modelPath", "Specify path of LCM model IRs", cxxopts::value<std::string>()->default_value("../scripts/SimianLuo/LCM_Dreamshaper_v7"))
-    ("t,type", "Specify the type of LCM model IRs (e.g., FP16_static or FP16_dyn)", cxxopts::value<std::string>()->default_value("FP16_static"))
+    ("m,modelPath", "Specify path to LCM model IRs", cxxopts::value<std::string>()->default_value("./models/lcm_dreamshaper_v7"))
+    ("t,type", "Specify the type of LCM model IRs (e.g., FP32, FP16 or INT8)", cxxopts::value<std::string>()->default_value("FP16"))
+    ("dynamic","Specify the model input shape to use dynamic shape",cxxopts::value<bool>()->default_value("false"))
     ("l,loraPath", "Specify path of LoRA file. (*.safetensors).", cxxopts::value<std::string>()->default_value(""))
-    ("a,alpha", "alpha for LoRA", cxxopts::value<float>()->default_value("0.75"))
+    ("a,alpha", "Specify alpha for LoRA", cxxopts::value<float>()->default_value("0.75"))
     ("h,help", "Print usage");
     cxxopts::ParseResult result;
 
@@ -227,13 +298,15 @@ int32_t main(int32_t argc, char* argv[]) try {
     const uint32_t num_inference_steps = result["step"].as<size_t>();
     const uint32_t user_seed = result["seed"].as<size_t>();
     const uint32_t num_images = result["num"].as<size_t>();
+    const uint32_t height = result["height"].as<size_t>();
+    const uint32_t width = result["width"].as<size_t>();
     const bool use_cache = result["useCache"].as<bool>();
     const bool read_np_latent = result["readNPLatent"].as<bool>();
     const std::string model_base_path = result["modelPath"].as<std::string>();
     const std::string model_type = result["type"].as<std::string>();
+    const bool use_dynamic_shapes = result["dynamic"].as<bool>();
     const std::string lora_path = result["loraPath"].as<std::string>();
     const float alpha = result["alpha"].as<float>();
-    const uint32_t height = 512, width = 512;
 
     OPENVINO_ASSERT(!read_np_latent || (read_np_latent && (num_images == 1)),
         "\"readNPLatent\" option is only supported for one output image. Number of image output was set to: " + std::to_string(num_images));
@@ -248,14 +321,23 @@ int32_t main(int32_t argc, char* argv[]) try {
     std::cout << "OpenVINO version: " << ov::get_openvino_version() << std::endl;
     std::cout << "Running (may take some time) ..." << std::endl;
 
-    // Stable Diffusion pipeline
+    const std::string model_path = model_base_path + "/" + model_type;
+    if (!std::filesystem::exists(model_path)) {
+        std::cerr << "Model IRs for type " << model_type << " don't exist in directory " << model_path << "\n";
+        std::cerr << "Refer to README.md to know how to export OpenVINO model with particular data type." << std::endl;
+        return EXIT_FAILURE;
+    }
 
-    StableDiffusionModels models = compile_models(model_base_path + "/" + model_type, device, lora_path, alpha, use_cache);
+    // Stable Diffusion pipeline
+    const size_t batch_size = 1;
+    StableDiffusionModels models = 
+        compile_models(model_path, device, lora_path, alpha, use_cache, use_dynamic_shapes, batch_size, height, width);
     ov::InferRequest unet_infer_request = models.unet.create_infer_request();
 
     ov::PartialShape sample_shape = models.unet.input("sample").get_partial_shape();
-    OPENVINO_ASSERT(sample_shape.is_dynamic() || (sample_shape[2] * 8 == width && sample_shape[3] * 8 == height),
-        "UNet model has static shapes [1, 4, H/8, W/8] or dynamic shapes [?, 4, ?, ?]");
+    OPENVINO_ASSERT(sample_shape.is_dynamic() ||
+                        (sample_shape[2] * VAE_SCALE_FACTOR == height && sample_shape[3] * VAE_SCALE_FACTOR == width),
+                    "UNet model has static shapes [1, 4, H/8, W/8] or dynamic shapes [?, 4, ?, ?]");
 
     // no negative prompt for LCM model: 
     // https://huggingface.co/docs/diffusers/api/pipelines/latent_consistency_models#diffusers.LatentConsistencyModelPipeline
@@ -269,9 +351,9 @@ int32_t main(int32_t argc, char* argv[]) try {
     std::vector<std::int64_t> timesteps = scheduler->get_timesteps();
 
     float guidance_scale = 8.0;
-    ov::Tensor guidance_scale_embedding = get_w_embedding(guidance_scale, 256);
+    ov::Tensor guidance_scale_embedding = get_w_embedding(guidance_scale, UNET_TIME_COND_PROJ_DIM);
 
-    ov::Tensor denoised(ov::element::f32, {1, 4, height / 8, width / 8});
+    ov::Tensor denoised(ov::element::f32, {1, UNET_IN_CHANNELS, height / VAE_SCALE_FACTOR, width / VAE_SCALE_FACTOR});
     for (uint32_t n = 0; n < num_images; n++) {
         std::uint32_t seed = num_images == 1 ? user_seed: user_seed + n;
         ov::Tensor latent_model_input = randn_tensor(height, width, read_np_latent, seed);
diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers
index 60d3ccc42..c55f8e256 160000
--- a/thirdparty/openvino_tokenizers
+++ b/thirdparty/openvino_tokenizers
@@ -1 +1 @@
-Subproject commit 60d3ccc426984acc623630a8e4d8c8878ec74eb7
+Subproject commit c55f8e2568fe0093f6558b9ef7b49c512a412c14

From 5b9195c51fadf7f2b57fcaf254f1a5b3d1d59062 Mon Sep 17 00:00:00 2001
From: Zlobin Vladimir <vladimir.zlobin@intel.com>
Date: Thu, 9 May 2024 15:00:10 +0400
Subject: [PATCH 06/10] Remove convert_tokenizer (#425)

---
 .github/workflows/causal_lm_cpp.yml            | 12 +-----------
 .github/workflows/lcm_dreamshaper_cpp.yml      | 18 ++++++------------
 .github/workflows/stable_diffusion_1_5_cpp.yml | 18 ++++++------------
 .../lcm_dreamshaper_v7/cpp/README.md           | 11 +++--------
 .../lcm_dreamshaper_v7/cpp/requirements.txt    |  2 +-
 .../lcm_dreamshaper_v7/cpp/src/main.cpp        |  2 +-
 .../stable_diffusion_1_5/cpp/README.md         | 11 +++--------
 .../stable_diffusion_1_5/cpp/requirements.txt  |  2 +-
 .../stable_diffusion_1_5/cpp/src/main.cpp      |  2 +-
 text_generation/causal_lm/cpp/README.md        |  6 ++----
 text_generation/causal_lm/cpp/requirements.txt |  1 +
 thirdparty/openvino_tokenizers                 |  2 +-
 12 files changed, 27 insertions(+), 60 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index cc69e414b..0a6e9dd5a 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -33,10 +33,9 @@ jobs:
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model openlm-research/open_llama_3b_v2 open_llama_3b_v2
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
-      - name: convert_tokenizer and run
+      - name: greedy_causal_lm
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./open_llama_3b_v2/ --output ./open_llama_3b_v2/ --with-detokenizer
           ./build/greedy_causal_lm ./open_llama_3b_v2/ "return 0"
 
   cpp-beam_search_causal_lm-ubuntu:
@@ -64,7 +63,6 @@ jobs:
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer
 
           timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt
           python -c "
@@ -157,7 +155,6 @@ jobs:
         shell: cmd
         run: |
           call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64\setupvars.bat
-          convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer
 
           .\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt
           echo import transformers > ref.py
@@ -197,7 +194,6 @@ jobs:
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer Qwen/Qwen-7B-Chat --output ./Qwen-7B-Chat/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/ 69 > ./pred.txt
 
   cpp-beam_search_causal_lm-Qwen1_5-7B-Chat:
@@ -225,7 +221,6 @@ jobs:
       - name: Run
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./Qwen1.5-7B-Chat/ --output ./Qwen1.5-7B-Chat/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/ "你好！" > ./pred_qwen15.txt
 
   cpp-beam_search_causal_lm-Phi-2:
@@ -253,7 +248,6 @@ jobs:
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./phi-2/ --output ./phi-2/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm ./phi-2/ 69 > ./pred.txt
 
   cpp-beam_search_causal_lm-notus-7b-v1:
@@ -281,7 +275,6 @@ jobs:
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./notus-7b-v1/ --output ./notus-7b-v1/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm ./notus-7b-v1/ 69 > ./pred.txt
 
   cpp-speculative_decoding_lm-ubuntu:
@@ -305,8 +298,6 @@ jobs:
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b
-          convert_tokenizer ./dolly-v2-3b/ --output ./dolly-v2-3b/ --with-detokenizer
-          convert_tokenizer ./dolly-v2-7b/ --output ./dolly-v2-7b/ --with-detokenizer
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
       - name: run and compare
@@ -348,7 +339,6 @@ jobs:
       - name: Run Generation
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./phi-1_5/ --output ./phi-1_5/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/greedy_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_greedy.txt
           timeout 50s ./build/beam_search_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_beam.txt
       - name: Compare
diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml
index 131927d76..427fada3a 100644
--- a/.github/workflows/lcm_dreamshaper_cpp.yml
+++ b/.github/workflows/lcm_dreamshaper_cpp.yml
@@ -39,7 +39,7 @@ jobs:
         run: |
           conda activate openvino_lcm_cpp
           conda update -c conda-forge --all
-          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
           conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
       
       - name: Install python dependencies
@@ -53,9 +53,7 @@ jobs:
         working-directory: ${{ env.working_directory }}
         run: |
           conda activate openvino_lcm_cpp
-          export MODEL_PATH="models/lcm_dreamshaper_v7/FP16"
-          optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH
-          convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
+          optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16
 
       - name: Build app
         working-directory: ${{ env.working_directory }}
@@ -66,8 +64,7 @@ jobs:
       
       - name: Run app
         working-directory: ${{ env.working_directory }}
-        run: |
-          ./build/lcm_dreamshaper
+        run: ./build/lcm_dreamshaper
 
   lcm_dreamshaper_v7_cpp-windows:
     runs-on: windows-latest
@@ -87,7 +84,7 @@ jobs:
         run: |
           conda activate openvino_lcm_cpp
           conda update -c conda-forge --all
-          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
           conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
       
       - name: Install python dependencies
@@ -101,9 +98,7 @@ jobs:
         working-directory: ${{ env.working_directory }}
         run: |
           conda activate openvino_lcm_cpp
-          $env:MODEL_PATH='models/lcm_dreamshaper_v7/FP16'
-          optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $env:MODEL_PATH
-          convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/
+          optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16
   
       - name: Build app
         working-directory: ${{ env.working_directory }}
@@ -114,5 +109,4 @@ jobs:
       
       - name: Run app
         working-directory: ${{ env.working_directory }}
-        run: |
-          & "./build/Release/lcm_dreamshaper.exe" -r --dynamic
+        run: '& "./build/Release/lcm_dreamshaper.exe" -r --dynamic'
diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index ad929ea06..0d0d3ea8b 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -38,7 +38,7 @@ jobs:
       - name: Install OpenVINO and other conda dependencies
         run: |
           conda activate openvino_sd_cpp
-          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
           conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
 
       - name: Install python dependencies
@@ -52,9 +52,7 @@ jobs:
         working-directory: ${{ env.working_directory }}
         run: |
           conda activate openvino_sd_cpp
-          export MODEL_PATH="models/stable_diffusion_v1_5_ov/FP16"
-          optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH
-          convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
+          optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16
 
       - name: Build app
         working-directory: ${{ env.working_directory }}
@@ -65,8 +63,7 @@ jobs:
 
       - name: Run app
         working-directory: ${{ env.working_directory }}
-        run: |
-          ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16
+        run: ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16
 
   stable_diffusion_1_5_cpp-windows:
       runs-on: windows-latest
@@ -85,7 +82,7 @@ jobs:
         - name: Install OpenVINO and other conda dependencies
           run: |
             conda activate openvino_sd_cpp
-            conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+            conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
   
         - name: Install python dependencies
           working-directory: ${{ env.working_directory }}
@@ -98,9 +95,7 @@ jobs:
           working-directory: ${{ env.working_directory }}
           run: |
             conda activate openvino_sd_cpp
-            $env:MODEL_PATH='models/stable_diffusion_v1_5_ov/FP16'
-            optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $env:MODEL_PATH
-            convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/
+            optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16
   
         - name: Build app
           working-directory: ${{ env.working_directory }}
@@ -111,5 +106,4 @@ jobs:
   
         - name: Run app
           working-directory: ${{ env.working_directory }}
-          run: |
-            & "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic
+          run: '& "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic'
diff --git a/image_generation/lcm_dreamshaper_v7/cpp/README.md b/image_generation/lcm_dreamshaper_v7/cpp/README.md
index d4a62fb27..c993f80c5 100644
--- a/image_generation/lcm_dreamshaper_v7/cpp/README.md
+++ b/image_generation/lcm_dreamshaper_v7/cpp/README.md
@@ -18,7 +18,7 @@ Prepare a python environment and install dependencies:
 conda create -n openvino_lcm_cpp python==3.10
 conda activate openvino_lcm_cpp
 conda update -c conda-forge --all
-conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
 # Ensure that Conda standard libraries are used
 conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
 ```
@@ -37,13 +37,8 @@ conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
     ```
 
 2. Download the model from Huggingface and convert it to OpenVINO IR via [optimum-intel CLI](https://github.com/huggingface/optimum-intel). Example command for downloading and exporting FP16 model:
-    ```shell
-    export MODEL_PATH="models/lcm_dreamshaper_v7/FP16"
-    # Using optimum-cli for exporting model to OpenVINO format
-    optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH
-    # Converting tokenizer
-    convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
-    ```
+
+    `optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16`
 
 ### LoRA enabling with safetensors
 
diff --git a/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt b/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt
index dbf28af22..7ffbb9213 100644
--- a/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt
+++ b/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt
@@ -1,4 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 torch==2.2.2+cpu
 diffusers==0.27.2
-optimum-intel[nncf,openvino]==1.16.0
+optimum-intel[nncf,openvino] @ git+https://github.com/apaniukov/optimum-intel.git@0029e9165a2dad4cfcf787aa63181d9dc0cd49d5
diff --git a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp
index 20e240fa8..546bd170b 100644
--- a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp
+++ b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp
@@ -192,7 +192,7 @@ ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt) {
     tokenizer_req.set_input_tensor(ov::Tensor{ov::element::string, {1}, &pos_prompt});
     tokenizer_req.infer();
     ov::Tensor input_ids_token = tokenizer_req.get_tensor("input_ids");
-    std::copy_n(input_ids_token.data<std::int32_t>(), input_ids_token.get_size(), input_ids.data<int32_t>());
+    std::copy_n(input_ids_token.data<std::int64_t>(), input_ids_token.get_size(), input_ids.data<int32_t>());
 
     // text embeddings
     text_encoder_req.set_tensor("input_ids", input_ids);
diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md
index 2dfa32628..daf638443 100644
--- a/image_generation/stable_diffusion_1_5/cpp/README.md
+++ b/image_generation/stable_diffusion_1_5/cpp/README.md
@@ -18,7 +18,7 @@ Prepare a python environment and install dependencies:
 ```shell
 conda create -n openvino_sd_cpp python==3.10
 conda activate openvino_sd_cpp
-conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
 # Ensure that Conda standard libraries are used
 conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
 ```
@@ -40,13 +40,8 @@ python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
 - [dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) to run Stable Diffusion with LoRA adapters.
 
    Example command for downloading and exporting FP16 model:
-   ```shell
-   export MODEL_PATH="models/dreamlike_anime_1_0_ov/FP16"
-   # Using optimum-cli for exporting model to OpenVINO format
-   optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH
-   # Converting tokenizer manually (`--convert-tokenizer` flag of `optimum-cli` results in "OpenVINO Tokenizer export for CLIPTokenizer is not supported.")
-   convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
-   ```
+
+   `optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 models/dreamlike_anime_1_0_ov/FP16`
 
    You can also choose other precision and export FP32 or INT8 model.
 
diff --git a/image_generation/stable_diffusion_1_5/cpp/requirements.txt b/image_generation/stable_diffusion_1_5/cpp/requirements.txt
index 289149d13..5e6bfe037 100644
--- a/image_generation/stable_diffusion_1_5/cpp/requirements.txt
+++ b/image_generation/stable_diffusion_1_5/cpp/requirements.txt
@@ -2,5 +2,5 @@
 torch==2.2.2+cpu
 diffusers==0.27.2
 transformers==4.39.3
-optimum-intel[nncf,openvino]==1.16.0
+optimum-intel[nncf,openvino] @ git+https://github.com/apaniukov/optimum-intel.git@0029e9165a2dad4cfcf787aa63181d9dc0cd49d5
 huggingface_hub[cli]==0.22.2
diff --git a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
index d1c24c32a..3d6c8a799 100644
--- a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
+++ b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
@@ -216,7 +216,7 @@ ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt, s
         tokenizer_req.set_input_tensor(ov::Tensor{ov::element::string, {1}, &prompt});
         tokenizer_req.infer();
         ov::Tensor input_ids_token = tokenizer_req.get_tensor("input_ids");
-        std::copy_n(input_ids_token.data<std::int32_t>(), input_ids_token.get_size(), input_ids.data<int32_t>());
+        std::copy_n(input_ids_token.data<std::int64_t>(), input_ids_token.get_size(), input_ids.data<std::int32_t>());
 
         // text embeddings
         text_encoder_req.set_tensor("input_ids", input_ids);
diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md
index a0b4a0a1b..d65c79bad 100644
--- a/text_generation/causal_lm/cpp/README.md
+++ b/text_generation/causal_lm/cpp/README.md
@@ -1,6 +1,6 @@
 # Text generation C++ samples that support most popular models like LLaMA 2
 
-These examples showcase inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The applications don't have many configuration options to encourage the reader to explore and modify the source code. Loading `openvino_tokenizers` to `ov::Core` enables tokenization. Run `convert_tokenizer` to generate IRs for the samples. [group_beam_searcher.hpp](group_beam_searcher.hpp) implements the algorithm of the same name, which is used by `beam_search_causal_lm`. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python.
+These examples showcase inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The applications don't have many configuration options to encourage the reader to explore and modify the source code. Loading `openvino_tokenizers` to `ov::Core` enables tokenization. Run `optimum-cli` to generate IRs for the samples. [group_beam_searcher.hpp](group_beam_searcher.hpp) implements the algorithm of the same name, which is used by `beam_search_causal_lm`. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python.
 
 ## How it works
 
@@ -49,7 +49,7 @@ This approach reduces the need for multiple infer requests to the main model, en
 
 ## Install OpenVINO
 
-Install [OpenVINO Archives >= 2024.0](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `<INSTALL_DIR>` below refers to the extraction location.
+Install [OpenVINO Archives >= 2024.1](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `<INSTALL_DIR>` below refers to the extraction location.
 
 ## Build `greedy_causal_lm`, `beam_search_causal_lm` and `openvino_tokenizers`
 
@@ -81,7 +81,6 @@ python3 -m pip install --upgrade-strategy eager -r requirements.txt
 # Update openvino_tokenizers from the submodule
 python3 -m pip install ./../../../thirdparty/openvino_tokenizers/[transformers]
 optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
-convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer --trust-remote-code
 ```
 
 #### Windows
@@ -92,7 +91,6 @@ python -m pip install --upgrade-strategy eager -r requirements.txt
 REM Update openvino_tokenizers from the submodule
 python -m pip install .\..\..\..\thirdparty\openvino_tokenizers\[transformers]
 optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
-convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer --trust-remote-code
 ```
 
 ## Run
diff --git a/text_generation/causal_lm/cpp/requirements.txt b/text_generation/causal_lm/cpp/requirements.txt
index 89c6a4b4d..019e172dd 100644
--- a/text_generation/causal_lm/cpp/requirements.txt
+++ b/text_generation/causal_lm/cpp/requirements.txt
@@ -1,4 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 optimum[openvino]==1.19.1
+optimum-intel[openvino] @ git+https://github.com/apaniukov/optimum-intel.git@0029e9165a2dad4cfcf787aa63181d9dc0cd49d5
 einops==0.7.0  # For Qwen
 transformers_stream_generator==0.0.4  # For Qwen
diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers
index c55f8e256..37d20ce20 160000
--- a/thirdparty/openvino_tokenizers
+++ b/thirdparty/openvino_tokenizers
@@ -1 +1 @@
-Subproject commit c55f8e2568fe0093f6558b9ef7b49c512a412c14
+Subproject commit 37d20ce209b120f6ffd450484e207ef71f8c8d03

From 6ac17bf5978a60bb6302195c200840c48886f4ef Mon Sep 17 00:00:00 2001
From: Wovchena <vladimir.zlobin@intel.com>
Date: Thu, 9 May 2024 15:26:28 +0400
Subject: [PATCH 07/10] upgrade openvino

---
 .github/workflows/causal_lm_cpp.yml | 2 +-
 thirdparty/openvino_tokenizers      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 49f91241d..dd22b88d0 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -453,7 +453,7 @@ jobs:
       - name: Install OpenVINO
         run: |
           mkdir ./ov/
-          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.0/linux/l_openvino_toolkit_ubuntu20_2024.0.0.14509.34caeefd078_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
           sudo ./ov/install_dependencies/install_openvino_dependencies.sh
       - name: Download, convert and build
         run: |
diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers
index 37d20ce20..0e4bb32ca 160000
--- a/thirdparty/openvino_tokenizers
+++ b/thirdparty/openvino_tokenizers
@@ -1 +1 @@
-Subproject commit 37d20ce209b120f6ffd450484e207ef71f8c8d03
+Subproject commit 0e4bb32ca3412f589e1d094faa8b0aad19ee47ca

From 78d0914778ff665800b9e3c13e4bce8ee1a37013 Mon Sep 17 00:00:00 2001
From: Wovchena <vladimir.zlobin@intel.com>
Date: Thu, 9 May 2024 15:36:25 +0400
Subject: [PATCH 08/10] update newer pipelines

---
 .github/workflows/causal_lm_cpp.yml | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index dd22b88d0..22a6e6905 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -144,7 +144,7 @@ jobs:
           "
           echo "你好！ 你好嗎？" passed
 
-          timeout 1m ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "Alan Turing was a" "return 0" "你好！ 你好嗎？" > ./pred.txt
+          timeout 1m ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Alan Turing was a" "return 0" "你好！ 你好嗎？" > ./pred.txt
           python -c "
           import transformers
           with open('pred.txt', 'r') as file:
@@ -369,12 +369,11 @@ jobs:
       - name: Download, convert and build
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt "transformers<4.38" ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu
-          python ./llm_bench/python/convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir ./TinyLlama-1.1B-Chat-v1.0/ --precision FP16
-          convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ --output ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ --with-detokenizer
+          python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
-          wait
       - name: run and compare
         run: |
           source ./ov/setupvars.sh
@@ -386,8 +385,8 @@ jobs:
           Question: Can you please add 2 and 3
           A:' > ./prompt.txt
 
-          ./build/prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "$(<prompt.txt)" > predictions_prompt_lookup.txt
-          ./build/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "$(<prompt.txt)" > predictions_greedy.txt
+          ./build/prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_prompt_lookup.txt
+          ./build/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_greedy.txt
           python -c "
           with open('predictions_greedy.txt', 'r') as f:
               predicted_greedy = f.readline()
@@ -458,15 +457,16 @@ jobs:
       - name: Download, convert and build
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager "optimum>=1.14" -r ./llm_bench/python/requirements.txt ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python ./llm_bench/python/convert.py --model_id ikala/redpajama-3b-chat --output_dir ./redpajama-3b-chat/ --precision FP16 &
+          python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model ikala/redpajama-3b-chat redpajama-3b-chat
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
-          wait
       - run: source ./ov/setupvars.sh && convert_tokenizer ./redpajama-3b-chat/pytorch/dldt/FP16/ --output ./redpajama-3b-chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
       - name: Run Generation
         run: |
           source ./ov/setupvars.sh
-          timeout 50s ./build/greedy_causal_lm ./redpajama-3b-chat/pytorch/dldt/FP16/ "Alan Turing was a" > ./pred_greedy.txt 
+          timeout 50s ./build/greedy_causal_lm ./redpajama-3b-chat/ "Alan Turing was a" > ./pred_greedy.txt
       - name: Compare
         run: |
           python -c "

From f2211a0feb83098c2e1650b089f4ab5804d8c86a Mon Sep 17 00:00:00 2001
From: Wovchena <vladimir.zlobin@intel.com>
Date: Thu, 9 May 2024 15:49:40 +0400
Subject: [PATCH 09/10] fix causal_lm_cpp workflows

---
 .github/workflows/causal_lm_cpp.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 22a6e6905..52f865634 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -364,7 +364,7 @@ jobs:
       - name: Install OpenVINO
         run: |
           mkdir ./ov/
-          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
+          curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.1/linux/l_openvino_toolkit_ubuntu20_2024.1.0.15008.f4afc983258_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
           sudo ./ov/install_dependencies/install_openvino_dependencies.sh
       - name: Download, convert and build
         run: |
@@ -462,7 +462,7 @@ jobs:
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model ikala/redpajama-3b-chat redpajama-3b-chat
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
-      - run: source ./ov/setupvars.sh && convert_tokenizer ./redpajama-3b-chat/pytorch/dldt/FP16/ --output ./redpajama-3b-chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
+      - run: source ./ov/setupvars.sh && convert_tokenizer ./redpajama-3b-chat/ --output ./redpajama-3b-chat/ --with-detokenizer --trust-remote-code
       - name: Run Generation
         run: |
           source ./ov/setupvars.sh

From c9f56539d227cd1d82e3c08659ff0fd4299e7ec1 Mon Sep 17 00:00:00 2001
From: Wovchena <vladimir.zlobin@intel.com>
Date: Thu, 9 May 2024 15:53:10 +0400
Subject: [PATCH 10/10] set openvino_tokenizers to releases/2024/1

---
 thirdparty/openvino_tokenizers | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers
index 0e4bb32ca..37d20ce20 160000
--- a/thirdparty/openvino_tokenizers
+++ b/thirdparty/openvino_tokenizers
@@ -1 +1 @@
-Subproject commit 0e4bb32ca3412f589e1d094faa8b0aad19ee47ca
+Subproject commit 37d20ce209b120f6ffd450484e207ef71f8c8d03