Skip to content

Commit

Permalink
Fix pytorch bugs of llm bench (openvinotoolkit#192)
Browse files Browse the repository at this point in the history
1. Fix typeError: not all arguments converted during string formatting
2. Fix the bug that pytorch does not output the time consumption of 1st
and 2nd tokens to csv
  • Loading branch information
wgzintel committed Jan 30, 2024
1 parent f1f6225 commit 9e3864e
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 19 deletions.
17 changes: 1 addition & 16 deletions llm_bench/python/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,17 @@
import torch
import numpy as np
from openvino.runtime import get_version
from utils.config_class import DEFAULT_MODEL_CLASSES
import PIL
import hashlib
import utils.metrics_print
import utils.output_csv
import utils.hook_greedy_search
import utils.hook_beam_search
import traceback
from transformers import set_seed
from PIL import Image
from utils.memory_profile import MemConsumption
from utils.hook_forward import StableDiffusionHook
import utils.output_json

HOOK_BEAM_SEARCH_UTILS = {'pt': utils.hook_beam_search, 'ov': utils.hook_beam_search}
HOOK_GREEDY_SEARCH_UTILS = {'pt': utils.hook_greedy_search, 'ov': utils.hook_greedy_search}
FW_UTILS = {'pt': utils.pt_utils, 'ov': utils.ov_utils}

DEFAULT_INFERENCE_STEPS = 20
Expand Down Expand Up @@ -168,17 +163,7 @@ def run_text_generation(input_text, num, model, tokenizer, args, iter_data_list,


def run_text_generation_benchmark(model_path, framework, device, args, num_iters):
model, tokenizer, pretrain_time = FW_UTILS[framework].create_text_gen_model(model_path, device, **args)
# Override forward for statistic each forward time.
default_model_type = DEFAULT_MODEL_CLASSES[args['use_case']]
model_type = args.get('model_type', default_model_type)

if args['num_beams'] > 1:
bench_hook = HOOK_BEAM_SEARCH_UTILS[framework].BeamSearchHook()
else:
bench_hook = HOOK_GREEDY_SEARCH_UTILS[framework].GreedySearchHook()
bench_hook.new_forward(model, model_type)

model, tokenizer, pretrain_time, bench_hook = FW_UTILS[framework].create_text_gen_model(model_path, device, **args)
iter_data_list = []
input_text_list = utils.model_utils.get_prompts(args)
if len(input_text_list) == 0:
Expand Down
9 changes: 8 additions & 1 deletion llm_bench/python/utils/ov_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import torch
import time
import types
import utils.hook_greedy_search
import utils.hook_beam_search

from utils.config_class import OV_MODEL_CLASSES_MAPPING, TOKENIZE_CLASSES_MAPPING, DEFAULT_MODEL_CLASSES
from .ov_model_classes import register_normalized_configs
Expand Down Expand Up @@ -165,13 +167,18 @@ def create_text_gen_model(model_path, device, **kwargs):
if not isinstance(ov_model, OV_MODEL_CLASSES_MAPPING['t5']):
patch_inter_processing_and_compile(ov_model, **kwargs)
end = time.perf_counter()
if kwargs['num_beams'] > 1:
bench_hook = utils.hook_beam_search.BeamSearchHook()
else:
bench_hook = utils.hook_greedy_search.GreedySearchHook()
bench_hook.new_forward(ov_model, model_type)
from_pretrained_time = end - start
log.info(f'From pretrained time: {from_pretrained_time:.2f}s')
# load token
tokenizer = token_class.from_pretrained(model_path, trust_remote_code=True)
if kwargs.get("convert_tokenizer", False):
tokenizer = build_ov_tokenizer(tokenizer)
return ov_model, tokenizer, from_pretrained_time
return ov_model, tokenizer, from_pretrained_time, bench_hook


def create_image_gen_model(model_path, device, **kwargs):
Expand Down
12 changes: 10 additions & 2 deletions llm_bench/python/utils/pt_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import time
import logging as log
import openvino.torch # noqa: F401
import utils.hook_greedy_search
import utils.hook_beam_search

MAX_CONNECT_TIME = 50

Expand Down Expand Up @@ -74,7 +76,7 @@ def create_text_gen_model(model_path, device, **kwargs):
gptneoxclm = 'transformers.models.gpt_neox.modeling_gpt_neox.GPTNeoXForCausalLM'
chatglmfcg = 'transformers_modules.pytorch_original.modeling_chatglm.ChatGLMForConditionalGeneration'
real_base_model_name = str(type(model)).lower()
log.info('Real base model=', real_base_model_name)
log.info(f'Real base model={real_base_model_name}')
# bfclm will trigger generate crash.

# If the device is set to GPU there's a need to substitute it with 'cuda' so it will be accepted by PyTorch
Expand All @@ -93,11 +95,17 @@ def create_text_gen_model(model_path, device, **kwargs):
else:
raise RuntimeError('==Failure ==: no device to load')

if kwargs['num_beams'] > 1:
bench_hook = utils.hook_beam_search.BeamSearchHook()
else:
bench_hook = utils.hook_greedy_search.GreedySearchHook()
bench_hook.new_forward(model, model_type)

if kwargs['torch_compile_backend']:
backend = kwargs['torch_compile_backend']
compiled_model = run_torch_compile(model, backend)
model = compiled_model
return model, tokenizer, from_pretrain_time
return model, tokenizer, from_pretrain_time, bench_hook


def create_image_gen_model(model_path, device, **kwargs):
Expand Down

0 comments on commit 9e3864e

Please sign in to comment.