Skip to content

Commit

Permalink
InstructBlipVideo: Update docstring (#31886)
Browse files Browse the repository at this point in the history
* update docs

* one more change
  • Loading branch information
zucchini-nlp authored Jul 11, 2024
1 parent c54af4c commit d625294
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ def forward(
>>> from transformers import InstructBlipVideoProcessor, InstructBlipVideoForConditionalGeneration
>>> import torch
>>> from huggingface_hub import hf_hub_download
>>> from av
>>> import av
>>> import numpy as np
>>> def read_video_pyav(container, indices):
... '''
Expand All @@ -180,20 +181,21 @@ def forward(
... frames.append(frame)
... return np.stack([x.to_ndarray(format="rgb24") for x in frames])
>>> model = InstructBlipVideoProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b", device_map="auto")
>>> processor = InstructBlipVideoForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b")
>>> model = InstructBlipVideoForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b", device_map="auto")
>>> processor = InstructBlipVideoProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
>>> file_path = hf_hub_download(
repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset"
)
>>> container = av.open(video_path)
... repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset"
... )
>>> container = av.open(file_path)
>>> # sample uniformly 4 frames from the videWhy is this video funny?o
>>> total_frames = container.streams.video[0].frames
>>> indices = np.arange(0, total_frames, total_frames / 4).astype(int)
>>> clip = read_video_pyav(container, indices)
>>> prompt = "What is happening in the video?"
>>> inputs = processor(videos=clip, text=prompt, return_tensors="pt").to(device)
>>> inputs = processor(text=prompt, images=clip, return_tensors="pt").to(model.device)
>>> outputs = model.generate(
... **inputs,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1393,7 +1393,8 @@ def forward(
>>> from transformers import InstructBlipVideoProcessor, InstructBlipVideoForConditionalGeneration
>>> import torch
>>> from huggingface_hub import hf_hub_download
>>> from av
>>> import av
>>> import numpy as np
>>> def read_video_pyav(container, indices):
... '''
Expand All @@ -1415,20 +1416,21 @@ def forward(
... frames.append(frame)
... return np.stack([x.to_ndarray(format="rgb24") for x in frames])
>>> model = InstructBlipVideoProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b", device_map="auto")
>>> processor = InstructBlipVideoForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b")
>>> model = InstructBlipVideoForConditionalGeneration.from_pretrained("Salesforce/instructblip-vicuna-7b", device_map="auto")
>>> processor = InstructBlipVideoProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
>>> file_path = hf_hub_download(
repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset"
)
>>> container = av.open(video_path)
... repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset"
... )
>>> container = av.open(file_path)
>>> # sample uniformly 4 frames from the videWhy is this video funny?o
>>> total_frames = container.streams.video[0].frames
>>> indices = np.arange(0, total_frames, total_frames / 4).astype(int)
>>> clip = read_video_pyav(container, indices)
>>> prompt = "What is happening in the video?"
>>> inputs = processor(videos=clip, text=prompt, return_tensors="pt").to(device)
>>> inputs = processor(text=prompt, images=clip, return_tensors="pt").to(model.device)
>>> outputs = model.generate(
... **inputs,
Expand Down

0 comments on commit d625294

Please sign in to comment.