From 3fb27c67c8c0e2a77cc5c4d4cbd43c073ba56066 Mon Sep 17 00:00:00 2001 From: Wovchena Date: Wed, 8 May 2024 20:17:33 +0400 Subject: [PATCH 1/7] Remove convert_tokenizer --- .github/workflows/causal_lm_cpp.yml | 12 +----------- .github/workflows/lcm_dreamshaper_cpp.yml | 14 ++++---------- .github/workflows/stable_diffusion_1_5_cpp.yml | 14 ++++---------- image_generation/lcm_dreamshaper_v7/cpp/README.md | 9 ++------- .../stable_diffusion_1_5/cpp/README.md | 9 ++------- text_generation/causal_lm/cpp/README.md | 6 ++---- 6 files changed, 15 insertions(+), 49 deletions(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index cc69e414b..0a6e9dd5a 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -33,10 +33,9 @@ jobs: optimum-cli export openvino --trust-remote-code --weight-format fp16 --model openlm-research/open_llama_3b_v2 open_llama_3b_v2 cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j - - name: convert_tokenizer and run + - name: greedy_causal_lm run: | source ./ov/setupvars.sh - convert_tokenizer ./open_llama_3b_v2/ --output ./open_llama_3b_v2/ --with-detokenizer ./build/greedy_causal_lm ./open_llama_3b_v2/ "return 0" cpp-beam_search_causal_lm-ubuntu: @@ -64,7 +63,6 @@ jobs: - name: Compare run: | source ./ov/setupvars.sh - convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt python -c " @@ -157,7 +155,6 @@ jobs: shell: cmd run: | call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64\setupvars.bat - convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer .\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt echo import transformers > ref.py @@ -197,7 +194,6 @@ jobs: - name: Compare run: | source ./ov/setupvars.sh - convert_tokenizer Qwen/Qwen-7B-Chat --output ./Qwen-7B-Chat/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/ 69 > ./pred.txt cpp-beam_search_causal_lm-Qwen1_5-7B-Chat: @@ -225,7 +221,6 @@ jobs: - name: Run run: | source ./ov/setupvars.sh - convert_tokenizer ./Qwen1.5-7B-Chat/ --output ./Qwen1.5-7B-Chat/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/ "你好!" > ./pred_qwen15.txt cpp-beam_search_causal_lm-Phi-2: @@ -253,7 +248,6 @@ jobs: - name: Compare run: | source ./ov/setupvars.sh - convert_tokenizer ./phi-2/ --output ./phi-2/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./phi-2/ 69 > ./pred.txt cpp-beam_search_causal_lm-notus-7b-v1: @@ -281,7 +275,6 @@ jobs: - name: Compare run: | source ./ov/setupvars.sh - convert_tokenizer ./notus-7b-v1/ --output ./notus-7b-v1/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./notus-7b-v1/ 69 > ./pred.txt cpp-speculative_decoding_lm-ubuntu: @@ -305,8 +298,6 @@ jobs: python -m pip install ./thirdparty/openvino_tokenizers/[transformers] optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b - convert_tokenizer ./dolly-v2-3b/ --output ./dolly-v2-3b/ --with-detokenizer - convert_tokenizer ./dolly-v2-7b/ --output ./dolly-v2-7b/ --with-detokenizer cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/ cmake --build ./build/ --config Release -j - name: run and compare @@ -348,7 +339,6 @@ jobs: - name: Run Generation run: | source ./ov/setupvars.sh - convert_tokenizer ./phi-1_5/ --output ./phi-1_5/ --with-detokenizer --trust-remote-code timeout 50s ./build/greedy_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_greedy.txt timeout 50s ./build/beam_search_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_beam.txt - name: Compare diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml index 131927d76..2009aa84b 100644 --- a/.github/workflows/lcm_dreamshaper_cpp.yml +++ b/.github/workflows/lcm_dreamshaper_cpp.yml @@ -53,9 +53,7 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_lcm_cpp - export MODEL_PATH="models/lcm_dreamshaper_v7/FP16" - optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH - convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/ + optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --convert-tokenizer --weight-format fp16 models/lcm_dreamshaper_v7/FP16 - name: Build app working-directory: ${{ env.working_directory }} @@ -66,8 +64,7 @@ jobs: - name: Run app working-directory: ${{ env.working_directory }} - run: | - ./build/lcm_dreamshaper + run: ./build/lcm_dreamshaper lcm_dreamshaper_v7_cpp-windows: runs-on: windows-latest @@ -101,9 +98,7 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_lcm_cpp - $env:MODEL_PATH='models/lcm_dreamshaper_v7/FP16' - optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $env:MODEL_PATH - convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/ + optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --convert-tokenizer --weight-format fp16 models/lcm_dreamshaper_v7/FP16 - name: Build app working-directory: ${{ env.working_directory }} @@ -114,5 +109,4 @@ jobs: - name: Run app working-directory: ${{ env.working_directory }} - run: | - & "./build/Release/lcm_dreamshaper.exe" -r --dynamic + run: '& "./build/Release/lcm_dreamshaper.exe" -r --dynamic' diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml index ad929ea06..b4c1fe050 100644 --- a/.github/workflows/stable_diffusion_1_5_cpp.yml +++ b/.github/workflows/stable_diffusion_1_5_cpp.yml @@ -52,9 +52,7 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_sd_cpp - export MODEL_PATH="models/stable_diffusion_v1_5_ov/FP16" - optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH - convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/ + optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16 - name: Build app working-directory: ${{ env.working_directory }} @@ -65,8 +63,7 @@ jobs: - name: Run app working-directory: ${{ env.working_directory }} - run: | - ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16 + run: ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16 stable_diffusion_1_5_cpp-windows: runs-on: windows-latest @@ -98,9 +95,7 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_sd_cpp - $env:MODEL_PATH='models/stable_diffusion_v1_5_ov/FP16' - optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $env:MODEL_PATH - convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/ + optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16 - name: Build app working-directory: ${{ env.working_directory }} @@ -111,5 +106,4 @@ jobs: - name: Run app working-directory: ${{ env.working_directory }} - run: | - & "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic + run: '& "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic' diff --git a/image_generation/lcm_dreamshaper_v7/cpp/README.md b/image_generation/lcm_dreamshaper_v7/cpp/README.md index d4a62fb27..ca3cb7750 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/README.md +++ b/image_generation/lcm_dreamshaper_v7/cpp/README.md @@ -37,13 +37,8 @@ conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH ``` 2. Download the model from Huggingface and convert it to OpenVINO IR via [optimum-intel CLI](https://github.com/huggingface/optimum-intel). Example command for downloading and exporting FP16 model: - ```shell - export MODEL_PATH="models/lcm_dreamshaper_v7/FP16" - # Using optimum-cli for exporting model to OpenVINO format - optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH - # Converting tokenizer - convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/ - ``` + + `optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --convert-tokenizer --weight-format fp16 models/lcm_dreamshaper_v7/FP16` ### LoRA enabling with safetensors diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md index 2dfa32628..acd4505c0 100644 --- a/image_generation/stable_diffusion_1_5/cpp/README.md +++ b/image_generation/stable_diffusion_1_5/cpp/README.md @@ -40,13 +40,8 @@ python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] - [dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) to run Stable Diffusion with LoRA adapters. Example command for downloading and exporting FP16 model: - ```shell - export MODEL_PATH="models/dreamlike_anime_1_0_ov/FP16" - # Using optimum-cli for exporting model to OpenVINO format - optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH - # Converting tokenizer manually (`--convert-tokenizer` flag of `optimum-cli` results in "OpenVINO Tokenizer export for CLIPTokenizer is not supported.") - convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/ - ``` + + `optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --convert-tokenizer --weight-format fp16 models/dreamlike_anime_1_0_ov/FP16` You can also choose other precision and export FP32 or INT8 model. diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md index a0b4a0a1b..d65c79bad 100644 --- a/text_generation/causal_lm/cpp/README.md +++ b/text_generation/causal_lm/cpp/README.md @@ -1,6 +1,6 @@ # Text generation C++ samples that support most popular models like LLaMA 2 -These examples showcase inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The applications don't have many configuration options to encourage the reader to explore and modify the source code. Loading `openvino_tokenizers` to `ov::Core` enables tokenization. Run `convert_tokenizer` to generate IRs for the samples. [group_beam_searcher.hpp](group_beam_searcher.hpp) implements the algorithm of the same name, which is used by `beam_search_causal_lm`. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python. +These examples showcase inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The applications don't have many configuration options to encourage the reader to explore and modify the source code. Loading `openvino_tokenizers` to `ov::Core` enables tokenization. Run `optimum-cli` to generate IRs for the samples. [group_beam_searcher.hpp](group_beam_searcher.hpp) implements the algorithm of the same name, which is used by `beam_search_causal_lm`. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python. ## How it works @@ -49,7 +49,7 @@ This approach reduces the need for multiple infer requests to the main model, en ## Install OpenVINO -Install [OpenVINO Archives >= 2024.0](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `` below refers to the extraction location. +Install [OpenVINO Archives >= 2024.1](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `` below refers to the extraction location. ## Build `greedy_causal_lm`, `beam_search_causal_lm` and `openvino_tokenizers` @@ -81,7 +81,6 @@ python3 -m pip install --upgrade-strategy eager -r requirements.txt # Update openvino_tokenizers from the submodule python3 -m pip install ./../../../thirdparty/openvino_tokenizers/[transformers] optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 -convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer --trust-remote-code ``` #### Windows @@ -92,7 +91,6 @@ python -m pip install --upgrade-strategy eager -r requirements.txt REM Update openvino_tokenizers from the submodule python -m pip install .\..\..\..\thirdparty\openvino_tokenizers\[transformers] optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 -convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer --trust-remote-code ``` ## Run From fbc55e1891ed0a2c4f6256734d50af86a4a65211 Mon Sep 17 00:00:00 2001 From: Wovchena Date: Thu, 9 May 2024 11:45:04 +0400 Subject: [PATCH 2/7] Apply https://github.com/huggingface/optimum-intel/pull/697 --- image_generation/lcm_dreamshaper_v7/cpp/requirements.txt | 2 +- image_generation/stable_diffusion_1_5/cpp/requirements.txt | 2 +- text_generation/causal_lm/cpp/requirements.txt | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt b/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt index dbf28af22..68ef4d0cc 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt +++ b/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt @@ -1,4 +1,4 @@ --extra-index-url https://download.pytorch.org/whl/cpu torch==2.2.2+cpu diffusers==0.27.2 -optimum-intel[nncf,openvino]==1.16.0 +optimum-intel[openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers diff --git a/image_generation/stable_diffusion_1_5/cpp/requirements.txt b/image_generation/stable_diffusion_1_5/cpp/requirements.txt index 289149d13..15d3a5d60 100644 --- a/image_generation/stable_diffusion_1_5/cpp/requirements.txt +++ b/image_generation/stable_diffusion_1_5/cpp/requirements.txt @@ -2,5 +2,5 @@ torch==2.2.2+cpu diffusers==0.27.2 transformers==4.39.3 -optimum-intel[nncf,openvino]==1.16.0 +optimum-intel[openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers huggingface_hub[cli]==0.22.2 diff --git a/text_generation/causal_lm/cpp/requirements.txt b/text_generation/causal_lm/cpp/requirements.txt index 89c6a4b4d..a01383c8f 100644 --- a/text_generation/causal_lm/cpp/requirements.txt +++ b/text_generation/causal_lm/cpp/requirements.txt @@ -1,4 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/cpu optimum[openvino]==1.19.1 +optimum-intel[openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers einops==0.7.0 # For Qwen transformers_stream_generator==0.0.4 # For Qwen From 9f61788e82eb21720860c1b73bc096cec8d3b761 Mon Sep 17 00:00:00 2001 From: Wovchena Date: Thu, 9 May 2024 12:06:36 +0400 Subject: [PATCH 3/7] Fix git and openvino_tokenizer path --- .github/workflows/lcm_dreamshaper_cpp.yml | 8 ++++---- .github/workflows/stable_diffusion_1_5_cpp.yml | 8 ++++---- image_generation/lcm_dreamshaper_v7/cpp/README.md | 4 ++-- image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp | 2 +- image_generation/stable_diffusion_1_5/cpp/README.md | 4 ++-- image_generation/stable_diffusion_1_5/cpp/src/main.cpp | 2 +- text_generation/causal_lm/cpp/beam_search_causal_lm.cpp | 4 ++-- text_generation/causal_lm/cpp/greedy_causal_lm.cpp | 4 ++-- text_generation/causal_lm/cpp/speculative_decoding_lm.cpp | 4 ++-- 9 files changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml index 2009aa84b..427fada3a 100644 --- a/.github/workflows/lcm_dreamshaper_cpp.yml +++ b/.github/workflows/lcm_dreamshaper_cpp.yml @@ -39,7 +39,7 @@ jobs: run: | conda activate openvino_lcm_cpp conda update -c conda-forge --all - conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake + conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH - name: Install python dependencies @@ -53,7 +53,7 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_lcm_cpp - optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --convert-tokenizer --weight-format fp16 models/lcm_dreamshaper_v7/FP16 + optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16 - name: Build app working-directory: ${{ env.working_directory }} @@ -84,7 +84,7 @@ jobs: run: | conda activate openvino_lcm_cpp conda update -c conda-forge --all - conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake + conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH - name: Install python dependencies @@ -98,7 +98,7 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_lcm_cpp - optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --convert-tokenizer --weight-format fp16 models/lcm_dreamshaper_v7/FP16 + optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16 - name: Build app working-directory: ${{ env.working_directory }} diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml index b4c1fe050..0d0d3ea8b 100644 --- a/.github/workflows/stable_diffusion_1_5_cpp.yml +++ b/.github/workflows/stable_diffusion_1_5_cpp.yml @@ -38,7 +38,7 @@ jobs: - name: Install OpenVINO and other conda dependencies run: | conda activate openvino_sd_cpp - conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake + conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH - name: Install python dependencies @@ -52,7 +52,7 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_sd_cpp - optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16 + optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16 - name: Build app working-directory: ${{ env.working_directory }} @@ -82,7 +82,7 @@ jobs: - name: Install OpenVINO and other conda dependencies run: | conda activate openvino_sd_cpp - conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake + conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake - name: Install python dependencies working-directory: ${{ env.working_directory }} @@ -95,7 +95,7 @@ jobs: working-directory: ${{ env.working_directory }} run: | conda activate openvino_sd_cpp - optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16 + optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16 - name: Build app working-directory: ${{ env.working_directory }} diff --git a/image_generation/lcm_dreamshaper_v7/cpp/README.md b/image_generation/lcm_dreamshaper_v7/cpp/README.md index ca3cb7750..c993f80c5 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/README.md +++ b/image_generation/lcm_dreamshaper_v7/cpp/README.md @@ -18,7 +18,7 @@ Prepare a python environment and install dependencies: conda create -n openvino_lcm_cpp python==3.10 conda activate openvino_lcm_cpp conda update -c conda-forge --all -conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake +conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake # Ensure that Conda standard libraries are used conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH ``` @@ -38,7 +38,7 @@ conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH 2. Download the model from Huggingface and convert it to OpenVINO IR via [optimum-intel CLI](https://github.com/huggingface/optimum-intel). Example command for downloading and exporting FP16 model: - `optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --convert-tokenizer --weight-format fp16 models/lcm_dreamshaper_v7/FP16` + `optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16` ### LoRA enabling with safetensors diff --git a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp index 20e240fa8..df42d313b 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp +++ b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp @@ -170,7 +170,7 @@ StableDiffusionModels compile_models(const std::string& model_path, // Tokenizer { // Tokenizer model wil be loaded to CPU: OpenVINO Tokenizers can be inferred on a CPU device only. - models.tokenizer = core.compile_model(model_path + "/tokenizer/openvino_tokenizer.xml", "CPU"); + models.tokenizer = core.compile_model(model_path + "/openvino_tokenizer/openvino_tokenizer.xml", "CPU"); } return models; diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md index acd4505c0..daf638443 100644 --- a/image_generation/stable_diffusion_1_5/cpp/README.md +++ b/image_generation/stable_diffusion_1_5/cpp/README.md @@ -18,7 +18,7 @@ Prepare a python environment and install dependencies: ```shell conda create -n openvino_sd_cpp python==3.10 conda activate openvino_sd_cpp -conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake +conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake # Ensure that Conda standard libraries are used conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH ``` @@ -41,7 +41,7 @@ python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers] Example command for downloading and exporting FP16 model: - `optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --convert-tokenizer --weight-format fp16 models/dreamlike_anime_1_0_ov/FP16` + `optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 models/dreamlike_anime_1_0_ov/FP16` You can also choose other precision and export FP32 or INT8 model. diff --git a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp index d1c24c32a..19254aca9 100644 --- a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp +++ b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp @@ -194,7 +194,7 @@ StableDiffusionModels compile_models(const std::string& model_path, { Timer t("Loading and compiling tokenizer"); // Tokenizer model wil be loaded to CPU: OpenVINO Tokenizers can be inferred on a CPU device only. - models.tokenizer = core.compile_model(model_path + "/tokenizer/openvino_tokenizer.xml", "CPU"); + models.tokenizer = core.compile_model(model_path + "/openvino_tokenizer/openvino_tokenizer.xml", "CPU"); } return models; diff --git a/text_generation/causal_lm/cpp/beam_search_causal_lm.cpp b/text_generation/causal_lm/cpp/beam_search_causal_lm.cpp index 218b0af9e..4ee34ae48 100644 --- a/text_generation/causal_lm/cpp/beam_search_causal_lm.cpp +++ b/text_generation/causal_lm/cpp/beam_search_causal_lm.cpp @@ -32,13 +32,13 @@ int main(int argc, char* argv[]) try { ov::Core core; core.add_extension(OPENVINO_TOKENIZERS_PATH); // OPENVINO_TOKENIZERS_PATH is defined in CMakeLists.txt //Read the tokenizer model information from the file to later get the runtime information - auto tokenizer_model = core.read_model(std::string{argv[1]} + "/openvino_tokenizer.xml"); + auto tokenizer_model = core.read_model(std::string{argv[1]} + "/openvino_tokenizer/openvino_tokenizer.xml"); // tokenizer and detokenizer work on CPU only ov::InferRequest tokenizer = core.compile_model( tokenizer_model, "CPU").create_infer_request(); auto [input_ids, attention_mask] = tokenize(tokenizer, argv[2]); ov::InferRequest detokenizer = core.compile_model( - std::string{argv[1]} + "/openvino_detokenizer.xml", "CPU").create_infer_request(); + std::string{argv[1]} + "/openvino_tokenizer/openvino_detokenizer.xml", "CPU").create_infer_request(); // The model can be compiled for GPU as well ov::InferRequest lm = core.compile_model( std::string{argv[1]} + "/openvino_model.xml", "CPU").create_infer_request(); diff --git a/text_generation/causal_lm/cpp/greedy_causal_lm.cpp b/text_generation/causal_lm/cpp/greedy_causal_lm.cpp index d75d32d0e..46a6f3bcb 100644 --- a/text_generation/causal_lm/cpp/greedy_causal_lm.cpp +++ b/text_generation/causal_lm/cpp/greedy_causal_lm.cpp @@ -62,13 +62,13 @@ int main(int argc, char* argv[]) try { ov::Core core; core.add_extension(OPENVINO_TOKENIZERS_PATH); // OPENVINO_TOKENIZERS_PATH is defined in CMakeLists.txt //Read the tokenizer model information from the file to later get the runtime information - auto tokenizer_model = core.read_model(std::string{argv[1]} + "/openvino_tokenizer.xml"); + auto tokenizer_model = core.read_model(std::string{argv[1]} + "/openvino_tokenizer/openvino_tokenizer.xml"); // tokenizer and detokenizer work on CPU only ov::InferRequest tokenizer = core.compile_model( tokenizer_model, "CPU").create_infer_request(); auto [input_ids, attention_mask] = tokenize(tokenizer, argv[2]); ov::InferRequest detokenizer = core.compile_model( - std::string{argv[1]} + "/openvino_detokenizer.xml", "CPU").create_infer_request(); + std::string{argv[1]} + "/openvino_tokenizer/openvino_detokenizer.xml", "CPU").create_infer_request(); // The model can be compiled for GPU as well ov::InferRequest lm = core.compile_model( std::string{argv[1]} + "/openvino_model.xml", "CPU").create_infer_request(); diff --git a/text_generation/causal_lm/cpp/speculative_decoding_lm.cpp b/text_generation/causal_lm/cpp/speculative_decoding_lm.cpp index f5e79ae8f..27ad14af7 100644 --- a/text_generation/causal_lm/cpp/speculative_decoding_lm.cpp +++ b/text_generation/causal_lm/cpp/speculative_decoding_lm.cpp @@ -119,10 +119,10 @@ int main(int argc, char* argv[]) try { core.add_extension(OPENVINO_TOKENIZERS_PATH); // OPENVINO_TOKENIZERS_PATH is defined in CMakeLists.txt // tokenizer and detokenizer work on CPU only ov::InferRequest tokenizer = core.compile_model( - std::string{argv[1]} + "/openvino_tokenizer.xml", "CPU").create_infer_request(); + std::string{argv[1]} + "/openvino_tokenizer/openvino_tokenizer.xml", "CPU").create_infer_request(); auto [draft_input_ids, draft_attention_mask] = tokenize(tokenizer, argv[3]); ov::InferRequest detokenizer = core.compile_model( - std::string{argv[1]} + "/openvino_detokenizer.xml", "CPU").create_infer_request(); + std::string{argv[1]} + "/openvino_tokenizer/openvino_detokenizer.xml", "CPU").create_infer_request(); TextStreamer text_streamer{std::move(detokenizer)}; // draft model From 2dfe44db9341eff8246bd1978ec9d32e52895d52 Mon Sep 17 00:00:00 2001 From: Wovchena Date: Thu, 9 May 2024 12:27:52 +0400 Subject: [PATCH 4/7] Put nncf and tokenizer back --- image_generation/lcm_dreamshaper_v7/cpp/requirements.txt | 2 +- image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp | 2 +- image_generation/stable_diffusion_1_5/cpp/requirements.txt | 2 +- image_generation/stable_diffusion_1_5/cpp/src/main.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt b/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt index 68ef4d0cc..4d0dc7e00 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt +++ b/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt @@ -1,4 +1,4 @@ --extra-index-url https://download.pytorch.org/whl/cpu torch==2.2.2+cpu diffusers==0.27.2 -optimum-intel[openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers +optimum-intel[nncf,openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers diff --git a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp index df42d313b..20e240fa8 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp +++ b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp @@ -170,7 +170,7 @@ StableDiffusionModels compile_models(const std::string& model_path, // Tokenizer { // Tokenizer model wil be loaded to CPU: OpenVINO Tokenizers can be inferred on a CPU device only. - models.tokenizer = core.compile_model(model_path + "/openvino_tokenizer/openvino_tokenizer.xml", "CPU"); + models.tokenizer = core.compile_model(model_path + "/tokenizer/openvino_tokenizer.xml", "CPU"); } return models; diff --git a/image_generation/stable_diffusion_1_5/cpp/requirements.txt b/image_generation/stable_diffusion_1_5/cpp/requirements.txt index 15d3a5d60..cea7a7dfa 100644 --- a/image_generation/stable_diffusion_1_5/cpp/requirements.txt +++ b/image_generation/stable_diffusion_1_5/cpp/requirements.txt @@ -2,5 +2,5 @@ torch==2.2.2+cpu diffusers==0.27.2 transformers==4.39.3 -optimum-intel[openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers +optimum-intel[nncf,openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers huggingface_hub[cli]==0.22.2 diff --git a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp index 19254aca9..d1c24c32a 100644 --- a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp +++ b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp @@ -194,7 +194,7 @@ StableDiffusionModels compile_models(const std::string& model_path, { Timer t("Loading and compiling tokenizer"); // Tokenizer model wil be loaded to CPU: OpenVINO Tokenizers can be inferred on a CPU device only. - models.tokenizer = core.compile_model(model_path + "/openvino_tokenizer/openvino_tokenizer.xml", "CPU"); + models.tokenizer = core.compile_model(model_path + "/tokenizer/openvino_tokenizer.xml", "CPU"); } return models; From f1147737928c18a2a183c68c75ab6f3da09f12d9 Mon Sep 17 00:00:00 2001 From: Wovchena Date: Thu, 9 May 2024 12:42:41 +0400 Subject: [PATCH 5/7] int32- --- image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp | 2 +- image_generation/stable_diffusion_1_5/cpp/src/main.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp index 20e240fa8..546bd170b 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp +++ b/image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp @@ -192,7 +192,7 @@ ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt) { tokenizer_req.set_input_tensor(ov::Tensor{ov::element::string, {1}, &pos_prompt}); tokenizer_req.infer(); ov::Tensor input_ids_token = tokenizer_req.get_tensor("input_ids"); - std::copy_n(input_ids_token.data(), input_ids_token.get_size(), input_ids.data()); + std::copy_n(input_ids_token.data(), input_ids_token.get_size(), input_ids.data()); // text embeddings text_encoder_req.set_tensor("input_ids", input_ids); diff --git a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp index d1c24c32a..3d6c8a799 100644 --- a/image_generation/stable_diffusion_1_5/cpp/src/main.cpp +++ b/image_generation/stable_diffusion_1_5/cpp/src/main.cpp @@ -216,7 +216,7 @@ ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt, s tokenizer_req.set_input_tensor(ov::Tensor{ov::element::string, {1}, &prompt}); tokenizer_req.infer(); ov::Tensor input_ids_token = tokenizer_req.get_tensor("input_ids"); - std::copy_n(input_ids_token.data(), input_ids_token.get_size(), input_ids.data()); + std::copy_n(input_ids_token.data(), input_ids_token.get_size(), input_ids.data()); // text embeddings text_encoder_req.set_tensor("input_ids", input_ids); From 0a8c559f4d2e757b87a509e4f30ee89511827abc Mon Sep 17 00:00:00 2001 From: Wovchena Date: Thu, 9 May 2024 14:18:57 +0400 Subject: [PATCH 6/7] remove openvino_tokenizer folder --- text_generation/causal_lm/cpp/beam_search_causal_lm.cpp | 4 ++-- text_generation/causal_lm/cpp/greedy_causal_lm.cpp | 4 ++-- text_generation/causal_lm/cpp/speculative_decoding_lm.cpp | 4 ++-- thirdparty/openvino_tokenizers | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/text_generation/causal_lm/cpp/beam_search_causal_lm.cpp b/text_generation/causal_lm/cpp/beam_search_causal_lm.cpp index 4ee34ae48..218b0af9e 100644 --- a/text_generation/causal_lm/cpp/beam_search_causal_lm.cpp +++ b/text_generation/causal_lm/cpp/beam_search_causal_lm.cpp @@ -32,13 +32,13 @@ int main(int argc, char* argv[]) try { ov::Core core; core.add_extension(OPENVINO_TOKENIZERS_PATH); // OPENVINO_TOKENIZERS_PATH is defined in CMakeLists.txt //Read the tokenizer model information from the file to later get the runtime information - auto tokenizer_model = core.read_model(std::string{argv[1]} + "/openvino_tokenizer/openvino_tokenizer.xml"); + auto tokenizer_model = core.read_model(std::string{argv[1]} + "/openvino_tokenizer.xml"); // tokenizer and detokenizer work on CPU only ov::InferRequest tokenizer = core.compile_model( tokenizer_model, "CPU").create_infer_request(); auto [input_ids, attention_mask] = tokenize(tokenizer, argv[2]); ov::InferRequest detokenizer = core.compile_model( - std::string{argv[1]} + "/openvino_tokenizer/openvino_detokenizer.xml", "CPU").create_infer_request(); + std::string{argv[1]} + "/openvino_detokenizer.xml", "CPU").create_infer_request(); // The model can be compiled for GPU as well ov::InferRequest lm = core.compile_model( std::string{argv[1]} + "/openvino_model.xml", "CPU").create_infer_request(); diff --git a/text_generation/causal_lm/cpp/greedy_causal_lm.cpp b/text_generation/causal_lm/cpp/greedy_causal_lm.cpp index 46a6f3bcb..d75d32d0e 100644 --- a/text_generation/causal_lm/cpp/greedy_causal_lm.cpp +++ b/text_generation/causal_lm/cpp/greedy_causal_lm.cpp @@ -62,13 +62,13 @@ int main(int argc, char* argv[]) try { ov::Core core; core.add_extension(OPENVINO_TOKENIZERS_PATH); // OPENVINO_TOKENIZERS_PATH is defined in CMakeLists.txt //Read the tokenizer model information from the file to later get the runtime information - auto tokenizer_model = core.read_model(std::string{argv[1]} + "/openvino_tokenizer/openvino_tokenizer.xml"); + auto tokenizer_model = core.read_model(std::string{argv[1]} + "/openvino_tokenizer.xml"); // tokenizer and detokenizer work on CPU only ov::InferRequest tokenizer = core.compile_model( tokenizer_model, "CPU").create_infer_request(); auto [input_ids, attention_mask] = tokenize(tokenizer, argv[2]); ov::InferRequest detokenizer = core.compile_model( - std::string{argv[1]} + "/openvino_tokenizer/openvino_detokenizer.xml", "CPU").create_infer_request(); + std::string{argv[1]} + "/openvino_detokenizer.xml", "CPU").create_infer_request(); // The model can be compiled for GPU as well ov::InferRequest lm = core.compile_model( std::string{argv[1]} + "/openvino_model.xml", "CPU").create_infer_request(); diff --git a/text_generation/causal_lm/cpp/speculative_decoding_lm.cpp b/text_generation/causal_lm/cpp/speculative_decoding_lm.cpp index 27ad14af7..f5e79ae8f 100644 --- a/text_generation/causal_lm/cpp/speculative_decoding_lm.cpp +++ b/text_generation/causal_lm/cpp/speculative_decoding_lm.cpp @@ -119,10 +119,10 @@ int main(int argc, char* argv[]) try { core.add_extension(OPENVINO_TOKENIZERS_PATH); // OPENVINO_TOKENIZERS_PATH is defined in CMakeLists.txt // tokenizer and detokenizer work on CPU only ov::InferRequest tokenizer = core.compile_model( - std::string{argv[1]} + "/openvino_tokenizer/openvino_tokenizer.xml", "CPU").create_infer_request(); + std::string{argv[1]} + "/openvino_tokenizer.xml", "CPU").create_infer_request(); auto [draft_input_ids, draft_attention_mask] = tokenize(tokenizer, argv[3]); ov::InferRequest detokenizer = core.compile_model( - std::string{argv[1]} + "/openvino_tokenizer/openvino_detokenizer.xml", "CPU").create_infer_request(); + std::string{argv[1]} + "/openvino_detokenizer.xml", "CPU").create_infer_request(); TextStreamer text_streamer{std::move(detokenizer)}; // draft model diff --git a/thirdparty/openvino_tokenizers b/thirdparty/openvino_tokenizers index c55f8e256..37d20ce20 160000 --- a/thirdparty/openvino_tokenizers +++ b/thirdparty/openvino_tokenizers @@ -1 +1 @@ -Subproject commit c55f8e2568fe0093f6558b9ef7b49c512a412c14 +Subproject commit 37d20ce209b120f6ffd450484e207ef71f8c8d03 From 0393e54c3d139c591177629ff365528c71c20a67 Mon Sep 17 00:00:00 2001 From: Wovchena Date: Thu, 9 May 2024 14:23:52 +0400 Subject: [PATCH 7/7] pin optimum-intel commit --- image_generation/lcm_dreamshaper_v7/cpp/requirements.txt | 2 +- image_generation/stable_diffusion_1_5/cpp/requirements.txt | 2 +- text_generation/causal_lm/cpp/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt b/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt index 4d0dc7e00..7ffbb9213 100644 --- a/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt +++ b/image_generation/lcm_dreamshaper_v7/cpp/requirements.txt @@ -1,4 +1,4 @@ --extra-index-url https://download.pytorch.org/whl/cpu torch==2.2.2+cpu diffusers==0.27.2 -optimum-intel[nncf,openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers +optimum-intel[nncf,openvino] @ git+https://github.com/apaniukov/optimum-intel.git@0029e9165a2dad4cfcf787aa63181d9dc0cd49d5 diff --git a/image_generation/stable_diffusion_1_5/cpp/requirements.txt b/image_generation/stable_diffusion_1_5/cpp/requirements.txt index cea7a7dfa..5e6bfe037 100644 --- a/image_generation/stable_diffusion_1_5/cpp/requirements.txt +++ b/image_generation/stable_diffusion_1_5/cpp/requirements.txt @@ -2,5 +2,5 @@ torch==2.2.2+cpu diffusers==0.27.2 transformers==4.39.3 -optimum-intel[nncf,openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers +optimum-intel[nncf,openvino] @ git+https://github.com/apaniukov/optimum-intel.git@0029e9165a2dad4cfcf787aa63181d9dc0cd49d5 huggingface_hub[cli]==0.22.2 diff --git a/text_generation/causal_lm/cpp/requirements.txt b/text_generation/causal_lm/cpp/requirements.txt index a01383c8f..019e172dd 100644 --- a/text_generation/causal_lm/cpp/requirements.txt +++ b/text_generation/causal_lm/cpp/requirements.txt @@ -1,5 +1,5 @@ --extra-index-url https://download.pytorch.org/whl/cpu optimum[openvino]==1.19.1 -optimum-intel[openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers +optimum-intel[openvino] @ git+https://github.com/apaniukov/optimum-intel.git@0029e9165a2dad4cfcf787aa63181d9dc0cd49d5 einops==0.7.0 # For Qwen transformers_stream_generator==0.0.4 # For Qwen