Skip to content

Commit

Permalink
fix lazy load (#328)
Browse files Browse the repository at this point in the history
(cherry picked from commit 326d9a8b3fab1b4da35b6c04bd7c27dbbe9cf14c)
  • Loading branch information
tastelikefeet committed Jan 19, 2024
1 parent 0ea9d24 commit a12f0f4
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 14 deletions.
15 changes: 1 addition & 14 deletions swift/llm/utils/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1121,20 +1121,7 @@ def load_dataset_from_local(
dataset = HfDataset.from_dict(df.to_dict(orient='list'))
dataset_list.append(preprocess_func(dataset))

dataset = concatenate_datasets(dataset_list)

def load_image(row):
from PIL import Image
import requests
if not os.path.exists(row['image']):
row['image'] = requests.get(row['image'], stream=True).raw
row['image'] = Image.open(row['image'])
return row

if 'image' in dataset.features and isinstance(dataset[0]['image'], str):
dataset = HfDataset.from_list(
dataset_map(dataset, load_image, num_proc=4).data)
return dataset
return concatenate_datasets(dataset_list)


def get_custom_dataset(_: str, train_subset_split_list: Union[str, List[str]],
Expand Down
8 changes: 8 additions & 0 deletions swift/llm/utils/template.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from copy import deepcopy
from typing import Any, Dict, List, Literal, Optional, Tuple, Union

Expand Down Expand Up @@ -554,6 +555,13 @@ def build_conversation_input_ids(

def encode(self, example: Dict[str,
Any]) -> Dict[str, Optional[List[int]]]:
if 'image' in example and isinstance(example['image'], str):
from PIL import Image
import requests
if not os.path.exists(example['image']):
example['image'] = requests.get(
example['image'], stream=True).raw
example['image'] = Image.open(example['image'])
return self.build_conversation_input_ids(
self.tokenizer,
query=example['query'],
Expand Down

0 comments on commit a12f0f4

Please sign in to comment.