openvinotoolkit · Wovchena · May 9, 2024 · May 8, 2024 · May 9, 2024 · May 9, 2024
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
@@ -33,10 +33,9 @@ jobs:
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model openlm-research/open_llama_3b_v2 open_llama_3b_v2
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
-      - name: convert_tokenizer and run
+      - name: greedy_causal_lm
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./open_llama_3b_v2/ --output ./open_llama_3b_v2/ --with-detokenizer
           ./build/greedy_causal_lm ./open_llama_3b_v2/ "return 0"
 
   cpp-beam_search_causal_lm-ubuntu:
@@ -64,7 +63,6 @@ jobs:
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer
 
           timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt
           python -c "
@@ -157,7 +155,6 @@ jobs:
         shell: cmd
         run: |
           call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64\setupvars.bat
-          convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer
 
           .\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt
           echo import transformers > ref.py
@@ -197,7 +194,6 @@ jobs:
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer Qwen/Qwen-7B-Chat --output ./Qwen-7B-Chat/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/ 69 > ./pred.txt
 
   cpp-beam_search_causal_lm-Qwen1_5-7B-Chat:
@@ -225,7 +221,6 @@ jobs:
       - name: Run
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./Qwen1.5-7B-Chat/ --output ./Qwen1.5-7B-Chat/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/ "你好！" > ./pred_qwen15.txt
 
   cpp-beam_search_causal_lm-Phi-2:
@@ -253,7 +248,6 @@ jobs:
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./phi-2/ --output ./phi-2/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm ./phi-2/ 69 > ./pred.txt
 
   cpp-beam_search_causal_lm-notus-7b-v1:
@@ -281,7 +275,6 @@ jobs:
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./notus-7b-v1/ --output ./notus-7b-v1/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm ./notus-7b-v1/ 69 > ./pred.txt
 
   cpp-speculative_decoding_lm-ubuntu:
@@ -305,8 +298,6 @@ jobs:
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b
-          convert_tokenizer ./dolly-v2-3b/ --output ./dolly-v2-3b/ --with-detokenizer
-          convert_tokenizer ./dolly-v2-7b/ --output ./dolly-v2-7b/ --with-detokenizer
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
       - name: run and compare
@@ -348,7 +339,6 @@ jobs:
       - name: Run Generation
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./phi-1_5/ --output ./phi-1_5/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/greedy_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_greedy.txt
           timeout 50s ./build/beam_search_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_beam.txt
       - name: Compare

diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml
@@ -39,7 +39,7 @@ jobs:
         run: |
           conda activate openvino_lcm_cpp
           conda update -c conda-forge --all
-          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
           conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
 
       - name: Install python dependencies
@@ -53,9 +53,7 @@ jobs:
         working-directory: ${{ env.working_directory }}
         run: |
           conda activate openvino_lcm_cpp
-          export MODEL_PATH="models/lcm_dreamshaper_v7/FP16"
-          optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH
-          convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
+          optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16
 
       - name: Build app
         working-directory: ${{ env.working_directory }}
@@ -66,8 +64,7 @@ jobs:
 
       - name: Run app
         working-directory: ${{ env.working_directory }}
-        run: |
-          ./build/lcm_dreamshaper
+        run: ./build/lcm_dreamshaper
 
   lcm_dreamshaper_v7_cpp-windows:
     runs-on: windows-latest
@@ -87,7 +84,7 @@ jobs:
         run: |
           conda activate openvino_lcm_cpp
           conda update -c conda-forge --all
-          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
           conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
 
       - name: Install python dependencies
@@ -101,9 +98,7 @@ jobs:
         working-directory: ${{ env.working_directory }}
         run: |
           conda activate openvino_lcm_cpp
-          $env:MODEL_PATH='models/lcm_dreamshaper_v7/FP16'
-          optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $env:MODEL_PATH
-          convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/
+          optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16
 
       - name: Build app
         working-directory: ${{ env.working_directory }}
@@ -114,5 +109,4 @@ jobs:
 
       - name: Run app
         working-directory: ${{ env.working_directory }}
-        run: |
-          & "./build/Release/lcm_dreamshaper.exe" -r --dynamic
+        run: '& "./build/Release/lcm_dreamshaper.exe" -r --dynamic'
diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -38,7 +38,7 @@ jobs:
       - name: Install OpenVINO and other conda dependencies
         run: |
           conda activate openvino_sd_cpp
-          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+          conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
           conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
 
       - name: Install python dependencies
@@ -52,9 +52,7 @@ jobs:
         working-directory: ${{ env.working_directory }}
         run: |
           conda activate openvino_sd_cpp
-          export MODEL_PATH="models/stable_diffusion_v1_5_ov/FP16"
-          optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH
-          convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
+          optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16
 
       - name: Build app
         working-directory: ${{ env.working_directory }}
@@ -65,8 +63,7 @@ jobs:
 
       - name: Run app
         working-directory: ${{ env.working_directory }}
-        run: |
-          ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16
+        run: ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16
 
   stable_diffusion_1_5_cpp-windows:
       runs-on: windows-latest
@@ -85,7 +82,7 @@ jobs:
         - name: Install OpenVINO and other conda dependencies
           run: |
             conda activate openvino_sd_cpp
-            conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+            conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
 
         - name: Install python dependencies
           working-directory: ${{ env.working_directory }}
@@ -98,9 +95,7 @@ jobs:
           working-directory: ${{ env.working_directory }}
           run: |
             conda activate openvino_sd_cpp
-            $env:MODEL_PATH='models/stable_diffusion_v1_5_ov/FP16'
-            optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $env:MODEL_PATH
-            convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/
+            optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16
 
         - name: Build app
           working-directory: ${{ env.working_directory }}
@@ -111,5 +106,4 @@ jobs:
 
         - name: Run app
           working-directory: ${{ env.working_directory }}
-          run: |
-            & "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic
+          run: '& "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic'
diff --git a/image_generation/lcm_dreamshaper_v7/cpp/README.md b/image_generation/lcm_dreamshaper_v7/cpp/README.md
@@ -18,7 +18,7 @@ Prepare a python environment and install dependencies:
 conda create -n openvino_lcm_cpp python==3.10
 conda activate openvino_lcm_cpp
 conda update -c conda-forge --all
-conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
 # Ensure that Conda standard libraries are used
 conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
 ```
@@ -37,13 +37,8 @@ conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
     ```
 
 2. Download the model from Huggingface and convert it to OpenVINO IR via [optimum-intel CLI](https://github.com/huggingface/optimum-intel). Example command for downloading and exporting FP16 model:
-    ```shell
-    export MODEL_PATH="models/lcm_dreamshaper_v7/FP16"
-    # Using optimum-cli for exporting model to OpenVINO format
-    optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH
-    # Converting tokenizer
-    convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
-    ```
+
+    `optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16`
 
 ### LoRA enabling with safetensors
 

diff --git a/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt b/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt
@@ -1,4 +1,4 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 torch==2.2.2+cpu
 diffusers==0.27.2
-optimum-intel[nncf,openvino]==1.16.0
+optimum-intel[nncf,openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers
diff --git a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp
@@ -192,7 +192,7 @@ ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt) {
     tokenizer_req.set_input_tensor(ov::Tensor{ov::element::string, {1}, &pos_prompt});
     tokenizer_req.infer();
     ov::Tensor input_ids_token = tokenizer_req.get_tensor("input_ids");
-    std::copy_n(input_ids_token.data<std::int32_t>(), input_ids_token.get_size(), input_ids.data<int32_t>());
+    std::copy_n(input_ids_token.data<std::int64_t>(), input_ids_token.get_size(), input_ids.data<int32_t>());
 
     // text embeddings
     text_encoder_req.set_tensor("input_ids", input_ids);

diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md
@@ -18,7 +18,7 @@ Prepare a python environment and install dependencies:
 ```shell
 conda create -n openvino_sd_cpp python==3.10
 conda activate openvino_sd_cpp
-conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
+conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
 # Ensure that Conda standard libraries are used
 conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
 ```
@@ -40,13 +40,8 @@ python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
 - [dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) to run Stable Diffusion with LoRA adapters.
 
    Example command for downloading and exporting FP16 model:
-   ```shell
-   export MODEL_PATH="models/dreamlike_anime_1_0_ov/FP16"
-   # Using optimum-cli for exporting model to OpenVINO format
-   optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH
-   # Converting tokenizer manually (`--convert-tokenizer` flag of `optimum-cli` results in "OpenVINO Tokenizer export for CLIPTokenizer is not supported.")
-   convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
-   ```
+
+   `optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 models/dreamlike_anime_1_0_ov/FP16`
 
    You can also choose other precision and export FP32 or INT8 model.
 

diff --git a/image_generation/stable_diffusion_1_5/cpp/requirements.txt b/image_generation/stable_diffusion_1_5/cpp/requirements.txt
@@ -2,5 +2,5 @@
 torch==2.2.2+cpu
 diffusers==0.27.2
 transformers==4.39.3
-optimum-intel[nncf,openvino]==1.16.0
+optimum-intel[nncf,openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers
 huggingface_hub[cli]==0.22.2
diff --git a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp
@@ -216,7 +216,7 @@ ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt, s
         tokenizer_req.set_input_tensor(ov::Tensor{ov::element::string, {1}, &prompt});
         tokenizer_req.infer();
         ov::Tensor input_ids_token = tokenizer_req.get_tensor("input_ids");
-        std::copy_n(input_ids_token.data<std::int32_t>(), input_ids_token.get_size(), input_ids.data<int32_t>());
+        std::copy_n(input_ids_token.data<std::int64_t>(), input_ids_token.get_size(), input_ids.data<std::int32_t>());
 
         // text embeddings
         text_encoder_req.set_tensor("input_ids", input_ids);

diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md
@@ -1,6 +1,6 @@
 # Text generation C++ samples that support most popular models like LLaMA 2
 
-These examples showcase inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The applications don't have many configuration options to encourage the reader to explore and modify the source code. Loading `openvino_tokenizers` to `ov::Core` enables tokenization. Run `convert_tokenizer` to generate IRs for the samples. [group_beam_searcher.hpp](group_beam_searcher.hpp) implements the algorithm of the same name, which is used by `beam_search_causal_lm`. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python.
+These examples showcase inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The applications don't have many configuration options to encourage the reader to explore and modify the source code. Loading `openvino_tokenizers` to `ov::Core` enables tokenization. Run `optimum-cli` to generate IRs for the samples. [group_beam_searcher.hpp](group_beam_searcher.hpp) implements the algorithm of the same name, which is used by `beam_search_causal_lm`. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python.
 
 ## How it works
 
@@ -49,7 +49,7 @@ This approach reduces the need for multiple infer requests to the main model, en
 
 ## Install OpenVINO
 
-Install [OpenVINO Archives >= 2024.0](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `<INSTALL_DIR>` below refers to the extraction location.
+Install [OpenVINO Archives >= 2024.1](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `<INSTALL_DIR>` below refers to the extraction location.
 
 ## Build `greedy_causal_lm`, `beam_search_causal_lm` and `openvino_tokenizers`
 
@@ -81,7 +81,6 @@ python3 -m pip install --upgrade-strategy eager -r requirements.txt
 # Update openvino_tokenizers from the submodule
 python3 -m pip install ./../../../thirdparty/openvino_tokenizers/[transformers]
 optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
-convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer --trust-remote-code
 ```
 
 #### Windows
@@ -92,7 +91,6 @@ python -m pip install --upgrade-strategy eager -r requirements.txt
 REM Update openvino_tokenizers from the submodule
 python -m pip install .\..\..\..\thirdparty\openvino_tokenizers\[transformers]
 optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
-convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer --trust-remote-code
 ```
 
 ## Run

diff --git a/text_generation/causal_lm/cpp/requirements.txt b/text_generation/causal_lm/cpp/requirements.txt
@@ -1,4 +1,5 @@
 --extra-index-url https://download.pytorch.org/whl/cpu
 optimum[openvino]==1.19.1
+optimum-intel[openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers
 einops==0.7.0  # For Qwen
 transformers_stream_generator==0.0.4  # For Qwen
diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers