Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove convert_tokenizer #425

Merged
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 1 addition & 11 deletions .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@ jobs:
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model openlm-research/open_llama_3b_v2 open_llama_3b_v2
cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
cmake --build ./build/ --config Release -j
- name: convert_tokenizer and run
- name: greedy_causal_lm
run: |
source ./ov/setupvars.sh
convert_tokenizer ./open_llama_3b_v2/ --output ./open_llama_3b_v2/ --with-detokenizer
ilya-lavrenov marked this conversation as resolved.
Show resolved Hide resolved
./build/greedy_causal_lm ./open_llama_3b_v2/ "return 0"

cpp-beam_search_causal_lm-ubuntu:
Expand Down Expand Up @@ -64,7 +63,6 @@ jobs:
- name: Compare
run: |
source ./ov/setupvars.sh
convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer

timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt
python -c "
Expand Down Expand Up @@ -157,7 +155,6 @@ jobs:
shell: cmd
run: |
call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64\setupvars.bat
convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer

.\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt
echo import transformers > ref.py
Expand Down Expand Up @@ -197,7 +194,6 @@ jobs:
- name: Compare
run: |
source ./ov/setupvars.sh
convert_tokenizer Qwen/Qwen-7B-Chat --output ./Qwen-7B-Chat/ --with-detokenizer --trust-remote-code
timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/ 69 > ./pred.txt

cpp-beam_search_causal_lm-Qwen1_5-7B-Chat:
Expand Down Expand Up @@ -225,7 +221,6 @@ jobs:
- name: Run
run: |
source ./ov/setupvars.sh
convert_tokenizer ./Qwen1.5-7B-Chat/ --output ./Qwen1.5-7B-Chat/ --with-detokenizer --trust-remote-code
timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/ "你好!" > ./pred_qwen15.txt

cpp-beam_search_causal_lm-Phi-2:
Expand Down Expand Up @@ -253,7 +248,6 @@ jobs:
- name: Compare
run: |
source ./ov/setupvars.sh
convert_tokenizer ./phi-2/ --output ./phi-2/ --with-detokenizer --trust-remote-code
timeout 50s ./build/beam_search_causal_lm ./phi-2/ 69 > ./pred.txt

cpp-beam_search_causal_lm-notus-7b-v1:
Expand Down Expand Up @@ -281,7 +275,6 @@ jobs:
- name: Compare
run: |
source ./ov/setupvars.sh
convert_tokenizer ./notus-7b-v1/ --output ./notus-7b-v1/ --with-detokenizer --trust-remote-code
timeout 50s ./build/beam_search_causal_lm ./notus-7b-v1/ 69 > ./pred.txt

cpp-speculative_decoding_lm-ubuntu:
Expand All @@ -305,8 +298,6 @@ jobs:
python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b
convert_tokenizer ./dolly-v2-3b/ --output ./dolly-v2-3b/ --with-detokenizer
convert_tokenizer ./dolly-v2-7b/ --output ./dolly-v2-7b/ --with-detokenizer
cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
cmake --build ./build/ --config Release -j
- name: run and compare
Expand Down Expand Up @@ -348,7 +339,6 @@ jobs:
- name: Run Generation
run: |
source ./ov/setupvars.sh
convert_tokenizer ./phi-1_5/ --output ./phi-1_5/ --with-detokenizer --trust-remote-code
timeout 50s ./build/greedy_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_greedy.txt
timeout 50s ./build/beam_search_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_beam.txt
- name: Compare
Expand Down
18 changes: 6 additions & 12 deletions .github/workflows/lcm_dreamshaper_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
run: |
conda activate openvino_lcm_cpp
conda update -c conda-forge --all
conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH

- name: Install python dependencies
Expand All @@ -53,9 +53,7 @@ jobs:
working-directory: ${{ env.working_directory }}
run: |
conda activate openvino_lcm_cpp
export MODEL_PATH="models/lcm_dreamshaper_v7/FP16"
optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH
convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16

- name: Build app
working-directory: ${{ env.working_directory }}
Expand All @@ -66,8 +64,7 @@ jobs:

- name: Run app
working-directory: ${{ env.working_directory }}
run: |
./build/lcm_dreamshaper
run: ./build/lcm_dreamshaper

lcm_dreamshaper_v7_cpp-windows:
runs-on: windows-latest
Expand All @@ -87,7 +84,7 @@ jobs:
run: |
conda activate openvino_lcm_cpp
conda update -c conda-forge --all
conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH

- name: Install python dependencies
Expand All @@ -101,9 +98,7 @@ jobs:
working-directory: ${{ env.working_directory }}
run: |
conda activate openvino_lcm_cpp
$env:MODEL_PATH='models/lcm_dreamshaper_v7/FP16'
optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $env:MODEL_PATH
convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/
ilya-lavrenov marked this conversation as resolved.
Show resolved Hide resolved
optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16

- name: Build app
working-directory: ${{ env.working_directory }}
Expand All @@ -114,5 +109,4 @@ jobs:

- name: Run app
working-directory: ${{ env.working_directory }}
run: |
& "./build/Release/lcm_dreamshaper.exe" -r --dynamic
run: '& "./build/Release/lcm_dreamshaper.exe" -r --dynamic'
18 changes: 6 additions & 12 deletions .github/workflows/stable_diffusion_1_5_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
- name: Install OpenVINO and other conda dependencies
run: |
conda activate openvino_sd_cpp
conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH

- name: Install python dependencies
Expand All @@ -52,9 +52,7 @@ jobs:
working-directory: ${{ env.working_directory }}
run: |
conda activate openvino_sd_cpp
export MODEL_PATH="models/stable_diffusion_v1_5_ov/FP16"
optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH
convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16

- name: Build app
working-directory: ${{ env.working_directory }}
Expand All @@ -65,8 +63,7 @@ jobs:

- name: Run app
working-directory: ${{ env.working_directory }}
run: |
./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16
run: ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16

stable_diffusion_1_5_cpp-windows:
runs-on: windows-latest
Expand All @@ -85,7 +82,7 @@ jobs:
- name: Install OpenVINO and other conda dependencies
run: |
conda activate openvino_sd_cpp
conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake

- name: Install python dependencies
working-directory: ${{ env.working_directory }}
Expand All @@ -98,9 +95,7 @@ jobs:
working-directory: ${{ env.working_directory }}
run: |
conda activate openvino_sd_cpp
$env:MODEL_PATH='models/stable_diffusion_v1_5_ov/FP16'
optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $env:MODEL_PATH
convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/
optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16

- name: Build app
working-directory: ${{ env.working_directory }}
Expand All @@ -111,5 +106,4 @@ jobs:

- name: Run app
working-directory: ${{ env.working_directory }}
run: |
& "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic
run: '& "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic'
11 changes: 3 additions & 8 deletions image_generation/lcm_dreamshaper_v7/cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Prepare a python environment and install dependencies:
conda create -n openvino_lcm_cpp python==3.10
conda activate openvino_lcm_cpp
conda update -c conda-forge --all
conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
# Ensure that Conda standard libraries are used
conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
```
Expand All @@ -37,13 +37,8 @@ conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
```

2. Download the model from Huggingface and convert it to OpenVINO IR via [optimum-intel CLI](https://github.com/huggingface/optimum-intel). Example command for downloading and exporting FP16 model:
```shell
export MODEL_PATH="models/lcm_dreamshaper_v7/FP16"
# Using optimum-cli for exporting model to OpenVINO format
optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH
# Converting tokenizer
convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
```

`optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 models/lcm_dreamshaper_v7/FP16`

### LoRA enabling with safetensors

Expand Down
2 changes: 1 addition & 1 deletion image_generation/lcm_dreamshaper_v7/cpp/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
--extra-index-url https://download.pytorch.org/whl/cpu
torch==2.2.2+cpu
diffusers==0.27.2
optimum-intel[nncf,openvino]==1.16.0
optimum-intel[nncf,openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers
2 changes: 1 addition & 1 deletion image_generation/lcm_dreamshaper_v7/cpp/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt) {
tokenizer_req.set_input_tensor(ov::Tensor{ov::element::string, {1}, &pos_prompt});
tokenizer_req.infer();
ov::Tensor input_ids_token = tokenizer_req.get_tensor("input_ids");
std::copy_n(input_ids_token.data<std::int32_t>(), input_ids_token.get_size(), input_ids.data<int32_t>());
std::copy_n(input_ids_token.data<std::int64_t>(), input_ids_token.get_size(), input_ids.data<int32_t>());

// text embeddings
text_encoder_req.set_tensor("input_ids", input_ids);
Expand Down
11 changes: 3 additions & 8 deletions image_generation/stable_diffusion_1_5/cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Prepare a python environment and install dependencies:
```shell
conda create -n openvino_sd_cpp python==3.10
conda activate openvino_sd_cpp
conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler make cmake
conda install -c conda-forge openvino=2024.1.0 c-compiler cxx-compiler git make cmake
# Ensure that Conda standard libraries are used
conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
```
Expand All @@ -40,13 +40,8 @@ python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
- [dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) to run Stable Diffusion with LoRA adapters.

Example command for downloading and exporting FP16 model:
```shell
export MODEL_PATH="models/dreamlike_anime_1_0_ov/FP16"
# Using optimum-cli for exporting model to OpenVINO format
optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH
# Converting tokenizer manually (`--convert-tokenizer` flag of `optimum-cli` results in "OpenVINO Tokenizer export for CLIPTokenizer is not supported.")
convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
```

`optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 models/dreamlike_anime_1_0_ov/FP16`

You can also choose other precision and export FP32 or INT8 model.

Expand Down
2 changes: 1 addition & 1 deletion image_generation/stable_diffusion_1_5/cpp/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
torch==2.2.2+cpu
diffusers==0.27.2
transformers==4.39.3
optimum-intel[nncf,openvino]==1.16.0
optimum-intel[nncf,openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers
huggingface_hub[cli]==0.22.2
2 changes: 1 addition & 1 deletion image_generation/stable_diffusion_1_5/cpp/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ ov::Tensor text_encoder(StableDiffusionModels models, std::string& pos_prompt, s
tokenizer_req.set_input_tensor(ov::Tensor{ov::element::string, {1}, &prompt});
tokenizer_req.infer();
ov::Tensor input_ids_token = tokenizer_req.get_tensor("input_ids");
std::copy_n(input_ids_token.data<std::int32_t>(), input_ids_token.get_size(), input_ids.data<int32_t>());
std::copy_n(input_ids_token.data<std::int64_t>(), input_ids_token.get_size(), input_ids.data<std::int32_t>());

// text embeddings
text_encoder_req.set_tensor("input_ids", input_ids);
Expand Down
6 changes: 2 additions & 4 deletions text_generation/causal_lm/cpp/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Text generation C++ samples that support most popular models like LLaMA 2

These examples showcase inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The applications don't have many configuration options to encourage the reader to explore and modify the source code. Loading `openvino_tokenizers` to `ov::Core` enables tokenization. Run `convert_tokenizer` to generate IRs for the samples. [group_beam_searcher.hpp](group_beam_searcher.hpp) implements the algorithm of the same name, which is used by `beam_search_causal_lm`. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python.
These examples showcase inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The applications don't have many configuration options to encourage the reader to explore and modify the source code. Loading `openvino_tokenizers` to `ov::Core` enables tokenization. Run `optimum-cli` to generate IRs for the samples. [group_beam_searcher.hpp](group_beam_searcher.hpp) implements the algorithm of the same name, which is used by `beam_search_causal_lm`. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python.

## How it works

Expand Down Expand Up @@ -49,7 +49,7 @@ This approach reduces the need for multiple infer requests to the main model, en

## Install OpenVINO

Install [OpenVINO Archives >= 2024.0](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `<INSTALL_DIR>` below refers to the extraction location.
Install [OpenVINO Archives >= 2024.1](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `<INSTALL_DIR>` below refers to the extraction location.

## Build `greedy_causal_lm`, `beam_search_causal_lm` and `openvino_tokenizers`

Expand Down Expand Up @@ -81,7 +81,6 @@ python3 -m pip install --upgrade-strategy eager -r requirements.txt
# Update openvino_tokenizers from the submodule
python3 -m pip install ./../../../thirdparty/openvino_tokenizers/[transformers]
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer --trust-remote-code
```

#### Windows
Expand All @@ -92,7 +91,6 @@ python -m pip install --upgrade-strategy eager -r requirements.txt
REM Update openvino_tokenizers from the submodule
python -m pip install .\..\..\..\thirdparty\openvino_tokenizers\[transformers]
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer --trust-remote-code
```

## Run
Expand Down
1 change: 1 addition & 0 deletions text_generation/causal_lm/cpp/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
--extra-index-url https://download.pytorch.org/whl/cpu
optimum[openvino]==1.19.1
optimum-intel[openvino] @ git+https://github.com/apaniukov/optimum-intel.git@ov-tokenizers-leftovers
einops==0.7.0 # For Qwen
transformers_stream_generator==0.0.4 # For Qwen
2 changes: 1 addition & 1 deletion thirdparty/openvino_tokenizers
Loading