Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Model][VLM] Add Qwen2-VL model support #7905

Merged
merged 44 commits into from
Sep 11, 2024
Merged
Changes from 1 commit
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
0a648b2
Add support to Qwen2-VL.
fyabc Aug 23, 2024
320df57
Merge branch 'refs/heads/main' into add_qwen2_vl_new
fyabc Aug 26, 2024
7f96df8
Reformat
fyabc Aug 27, 2024
fbf2b8b
Merge branch 'refs/heads/main' into add_qwen2_vl_new
fyabc Aug 27, 2024
bcaff4f
Update transformers link.
fyabc Aug 27, 2024
f2185bf
Bugfix of mrope_input_positions in model_runner.py.
fyabc Aug 27, 2024
60448cb
Rename pixel_values_video to pixel_values_videos in qwen2_vl.py.
fyabc Aug 27, 2024
71a77b1
Fix the bug of MultiModalInputs.batch() when passing different modali…
fyabc Aug 27, 2024
60c4cbd
Fix the bug when running OpenAI-compatible API server.
fyabc Aug 27, 2024
e29ff54
Merge branch 'refs/heads/main' into add_qwen2_vl_new
fyabc Aug 29, 2024
ddb7138
Refactor qwen2_vl.py based on review comments.
fyabc Aug 29, 2024
14fe12a
reformat
fyabc Aug 29, 2024
89def23
reformat
fyabc Aug 29, 2024
e721e60
Fix the bug of model_is_mrope in model_runner.py.
fyabc Aug 29, 2024
d66d167
fix type hints in qwen2_vl.py
fyabc Aug 29, 2024
acd85ed
Update mm input processors according to new MultiModalInput.batch() i…
fyabc Aug 29, 2024
8d762c6
Merge branch 'refs/heads/main' into add_qwen2_vl_new
fyabc Aug 30, 2024
87ba5ed
Fix SamplerOutput.
fyabc Aug 30, 2024
cda300a
Fix bug of quantization.
fyabc Aug 30, 2024
da03a3f
Bugfix of type hints in qwen2_vl.py.
fyabc Aug 31, 2024
25fb189
reformat.
fyabc Aug 31, 2024
d01530d
Merge branch 'main' into add_qwen2_vl_new
ywang96 Sep 1, 2024
faebfe4
fix typo from resolving conflict
ywang96 Sep 1, 2024
e492e53
Merge branch 'refs/heads/main' into add_qwen2_vl_new
fyabc Sep 2, 2024
2e87db7
Bugfix in qwen2_vl.py.
fyabc Sep 2, 2024
39a1069
Adding xformers implementation
fyabc Sep 5, 2024
855c78b
Fix bug of attn_bias in xformers implementation
fyabc Sep 5, 2024
091983f
Fix bug in xformers implementation, and add backend check in vision a…
fyabc Sep 6, 2024
b406571
Merge branch 'refs/heads/main' into add_qwen2_vl_new
fyabc Sep 6, 2024
7739588
Bugfix in qwen2_vl.py.
fyabc Sep 6, 2024
5bab9ba
Bugfix in qwen2_vl.py.
fyabc Sep 6, 2024
4587346
reformat.
fyabc Sep 6, 2024
ffad79f
Refactor MRotaryEmbedding.
fyabc Sep 6, 2024
9e7a946
Merge branch 'refs/heads/main' into add_qwen2_vl_new
fyabc Sep 9, 2024
d527417
Add "video" into ModalityStr.
fyabc Sep 9, 2024
6f3116c
Add Qwen2-VL examples.
fyabc Sep 9, 2024
386f302
Optimizer Qwen2-VL input processor. Update document.
fyabc Sep 10, 2024
c64c217
Update model notes and requirements-common.txt.
fyabc Sep 10, 2024
6bdefd6
Update model notes.
fyabc Sep 10, 2024
33dd048
Skip loading model
DarkLight1337 Sep 11, 2024
369ce7d
Merge branch 'main' into add_qwen2_vl_new
DarkLight1337 Sep 11, 2024
282c66a
format
DarkLight1337 Sep 11, 2024
14ef94d
Increase `max_model_len` to fit the original image
DarkLight1337 Sep 11, 2024
09b7a4f
Merge branch 'main' into add_qwen2_vl_new
DarkLight1337 Sep 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge branch 'main' into add_qwen2_vl_new
  • Loading branch information
ywang96 committed Sep 1, 2024
commit d01530d946ee84b603589f1baa321c4a18c78f7c
41 changes: 6 additions & 35 deletions vllm/entrypoints/chat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,18 @@ def add(self, modality: Literal["image", "audio"],
self._model_config.hf_config.image_token_index)
if model_type in ("chameleon", "internvl_chat"):
return "<image>"
if model_type == "qwen2_vl":
return "<|vision_start|><|image_pad|><|vision_end|>"

raise TypeError(f"Unknown model type: {model_type}")
elif modality == "audio":
if model_type == "ultravox":
return "<|reserved_special_token_0|>"
raise TypeError(f"Unknown model type: {model_type}")
elif modality == "video":
if model_type == "qwen2_vl":
return "<|vision_start|><|video_pad|><|vision_end|>"
raise TypeError(f"Unknown model type: {model_type}")
else:
raise TypeError(f"Unknown modality: {modality}")

Expand Down Expand Up @@ -193,41 +199,6 @@ def load_chat_template(
return resolved_chat_template


@lru_cache(maxsize=None)
def _mm_token_str(model_config: ModelConfig, tokenizer: AnyTokenizer,
modality: Literal["image", "audio"]) -> Optional[str]:
# TODO: Let user specify how to insert image tokens into prompt
# (similar to chat template)
model_type = model_config.hf_config.model_type
if modality == "image":
if model_type == "phi3_v":
# Workaround since this token is not defined in the tokenizer
return "<|image_1|>"
if model_type == "minicpmv":
return "(<image>./</image>)"
if model_type in ("blip-2", "chatglm", "fuyu", "paligemma"):
# These models do not use image tokens in the prompt
return None
if model_type.startswith("llava"):
return tokenizer.decode(model_config.hf_config.image_token_index)
if model_type in ("chameleon", "internvl_chat"):
return "<image>"
if model_type == "qwen2_vl":
return "<|vision_start|><|image_pad|><|vision_end|>"

raise TypeError(f"Unknown model type: {model_type}")
elif modality == "audio":
if model_type == "ultravox":
return "<|reserved_special_token_0|>"
raise TypeError(f"Unknown model type: {model_type}")
elif modality == "video":
if model_type == "qwen2_vl":
return "<|vision_start|><|video_pad|><|vision_end|>"
raise TypeError(f"Unknown model type: {model_type}")
else:
raise TypeError(f"Unknown modality: {modality}")


# TODO: Let user specify how to insert multimodal tokens into prompt
# (similar to chat template)
def _get_full_multimodal_text_prompt(placeholder_counts: Dict[str, int],
Expand Down
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.