Skip to content

Commit

Permalink
WIP Resolve excessive memory usage when exporting projects with multi…
Browse files Browse the repository at this point in the history
…ple video tasks
  • Loading branch information
SpecLad committed Jan 17, 2024
1 parent 8c2db1d commit f6c4a98
Show file tree
Hide file tree
Showing 23 changed files with 283 additions and 212 deletions.
202 changes: 125 additions & 77 deletions cvat/apps/dataset_manager/bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@
from cvat.apps.dataset_manager.formats.utils import get_label_color
from cvat.apps.dataset_manager.util import add_prefetch_fields
from cvat.apps.engine.frame_provider import FrameProvider
from cvat.apps.engine.models import (AttributeSpec, AttributeType, DimensionType, Job, JobType,
Label, LabelType, Project, SegmentType, ShapeType, Task)
from cvat.apps.engine.models import (AttributeSpec, AttributeType, Data, DimensionType, Job,
JobType, Label, LabelType, Project, SegmentType, ShapeType,
Task)
from cvat.apps.engine.models import Image as Img

from .annotation import AnnotationIR, AnnotationManager, TrackManager
Expand Down Expand Up @@ -1311,12 +1312,80 @@ def add_labels(self, labels: List[dict]):
def add_task(self, task, files):
self._project_annotation.add_task(task, files, self)

@attrs(frozen=True, auto_attribs=True)
class ImageSource:
db_data: Data
is_video: bool

class ImageProvider:
def __init__(self, sources: Dict[int, ImageSource]) -> None:
self._sources = sources

def unload(self) -> None:
pass

class ImageProvider2D(ImageProvider):
def __init__(self, sources: Dict[int, ImageSource]) -> None:
super().__init__(sources)
self._current_source_id = None
self._frame_provider = None

def unload(self) -> None:
self._unload_source()

def get_image_for_frame(self, source_id: int, frame_index: int, **image_kwargs):
source = self._sources[source_id]

if source.is_video:
def video_frame_loader(_):
self._load_source(source_id, source)

# optimization for videos: use numpy arrays instead of bytes
# some formats or transforms can require image data
return self._frame_provider.get_frame(frame_index,
quality=FrameProvider.Quality.ORIGINAL,
out_type=FrameProvider.Type.NUMPY_ARRAY)[0]
return dm.Image(data=video_frame_loader, **image_kwargs)
else:
def image_loader(_):
self._load_source(source_id, source)

# for images use encoded data to avoid recoding
return self._frame_provider.get_frame(frame_index,
quality=FrameProvider.Quality.ORIGINAL,
out_type=FrameProvider.Type.BUFFER)[0].getvalue()
return dm.ByteImage(data=image_loader, **image_kwargs)

def _load_source(self, source_id: int, source: ImageSource) -> None:
if self._current_source_id == source_id:
return

self._unload_source()
self._frame_provider = FrameProvider(source.db_data)
self._current_source_id = source_id

def _unload_source(self) -> None:
if self._frame_provider:
self._frame_provider.unload()
self._frame_provider = None

self._current_source_id = None

class CVATDataExtractorMixin:
def __init__(self, *,
convert_annotations: Callable = None
):
self.convert_annotations = convert_annotations or convert_cvat_anno_to_dm

self._image_provider: Optional[ImageProvider] = None

def __enter__(self):
return self

def __exit__(self, exc_type, exc_value, traceback) -> None:
if self._image_provider:
self._image_provider.unload()

def categories(self) -> dict:
raise NotImplementedError()

Expand Down Expand Up @@ -1368,6 +1437,34 @@ def map_label(name, parent=''): return label_cat.find(name, parent)[0]

return self.convert_annotations(cvat_frame_anno, label_attrs, map_label)

class ImageProvider3D(ImageProvider):
def __init__(self, sources: Dict[int, ImageSource]) -> None:
super().__init__(sources)
self._images_per_source = {
source_id: {
image.id: image
for image in source.db_data.images.prefetch_related('related_files')
}
for source_id, source in sources.items()
}

def get_image_for_frame(self, source_id: int, frame_id: int, **image_kwargs):
source = self._sources[source_id]

point_cloud_path = osp.join(
source.db_data.get_upload_dirname(), image_kwargs['path'],
)

image = self._images_per_source[source_id][frame_id]

related_images = [
path
for i in image.related_files.all()
for path in [osp.realpath(str(i.path))]
if osp.isfile(path)
]

return point_cloud_path, related_images

class CvatTaskOrJobDataExtractor(dm.SourceExtractor, CVATDataExtractorMixin):
def __init__(
Expand Down Expand Up @@ -1397,34 +1494,9 @@ def __init__(
ext = FrameProvider.VIDEO_FRAME_EXT

if dimension == DimensionType.DIM_3D:
def _make_image(image_id, **kwargs):
loader = osp.join(
instance_data.db_data.get_upload_dirname(), kwargs['path'])
related_images = []
image = Img.objects.get(id=image_id)
for i in image.related_files.all():
path = osp.realpath(str(i.path))
if osp.isfile(path):
related_images.append(path)
return loader, related_images

self._image_provider = ImageProvider3D({0: ImageSource(instance_data.db_data, False)})
elif include_images:
frame_provider = FrameProvider(instance_data.db_data)
if is_video:
# optimization for videos: use numpy arrays instead of bytes
# some formats or transforms can require image data
def _make_image(i, **kwargs):
loader = lambda _: frame_provider.get_frame(i,
quality=frame_provider.Quality.ORIGINAL,
out_type=frame_provider.Type.NUMPY_ARRAY)[0]
return dm.Image(data=loader, **kwargs)
else:
# for images use encoded data to avoid recoding
def _make_image(i, **kwargs):
loader = lambda _: frame_provider.get_frame(i,
quality=frame_provider.Quality.ORIGINAL,
out_type=frame_provider.Type.BUFFER)[0].getvalue()
return dm.ByteImage(data=loader, **kwargs)
self._image_provider = ImageProvider3D({0: ImageSource(instance_data.db_data, is_video)})

for frame_data in instance_data.group_by_frame(include_empty=True):
image_args = {
Expand All @@ -1433,9 +1505,9 @@ def _make_image(i, **kwargs):
}

if dimension == DimensionType.DIM_3D:
dm_image = _make_image(frame_data.id, **image_args)
dm_image = self._image_provider.get_image_for_frame(0, frame_data.id, **image_args)
elif include_images:
dm_image = _make_image(frame_data.idx, **image_args)
dm_image = self._image_provider.get_image_for_frame(0, frame_data.idx, **image_args)
else:
dm_image = dm.Image(**image_args)
dm_anno = self._read_cvat_anno(frame_data, instance_meta['labels'])
Expand Down Expand Up @@ -1501,61 +1573,37 @@ def __init__(

dm_items: List[dm.DatasetItem] = []

ext_per_task: Dict[int, str] = {}
image_maker_per_task: Dict[int, Callable] = {}
if self._dimension == DimensionType.DIM_3D:
self._image_provider = ImageProvider3D(
{task.id: ImageSource(task.data, False) for task in project_data.tasks}
)
elif include_images:
self._image_provider = ImageProvider2D(
{
task.id: ImageSource(task.data, is_video=task.mode == 'interpolation')
for task in project_data.tasks
}
)
else:
self._image_provider = None

for task in project_data.tasks:
is_video = task.mode == 'interpolation'
ext_per_task[task.id] = FrameProvider.VIDEO_FRAME_EXT if is_video else ''
if self._dimension == DimensionType.DIM_3D:
def image_maker_factory(task):
images_query = task.data.images.prefetch_related()
def _make_image(i, **kwargs):
loader = osp.join(
task.data.get_upload_dirname(), kwargs['path'],
)
related_images = []
image = images_query.get(id=i)
for i in image.related_files.all():
path = osp.realpath(str(i.path))
if osp.isfile(path):
related_images.append(path)
return loader, related_images
return _make_image
image_maker_per_task[task.id] = image_maker_factory(task)
elif include_images:
if is_video:
# optimization for videos: use numpy arrays instead of bytes
# some formats or transforms can require image data
def image_maker_factory(task):
frame_provider = FrameProvider(task.data)
def _make_image(i, **kwargs):
loader = lambda _: frame_provider.get_frame(i,
quality=frame_provider.Quality.ORIGINAL,
out_type=frame_provider.Type.NUMPY_ARRAY)[0]
return dm.Image(data=loader, **kwargs)
return _make_image
else:
# for images use encoded data to avoid recoding
def image_maker_factory(task):
frame_provider = FrameProvider(task.data)
def _make_image(i, **kwargs):
loader = lambda _: frame_provider.get_frame(i,
quality=frame_provider.Quality.ORIGINAL,
out_type=frame_provider.Type.BUFFER)[0].getvalue()
return dm.ByteImage(data=loader, **kwargs)
return _make_image
image_maker_per_task[task.id] = image_maker_factory(task)
ext_per_task: Dict[int, str] = {
task.id: FrameProvider.VIDEO_FRAME_EXT if is_video else ''
for task in project_data.tasks
for is_video in [task.mode == 'interpolation']
}

for frame_data in project_data.group_by_frame(include_empty=True):
image_args = {
'path': frame_data.name + ext_per_task[frame_data.task_id],
'size': (frame_data.height, frame_data.width),
}
if self._dimension == DimensionType.DIM_3D:
dm_image = image_maker_per_task[frame_data.task_id](frame_data.id, **image_args)
dm_image = self._image_provider.get_image_for_frame(
frame_data.task_id, frame_data.id, **image_args)
elif include_images:
dm_image = image_maker_per_task[frame_data.task_id](frame_data.idx, **image_args)
dm_image = self._image_provider.get_image_for_frame(
frame_data.task_id, frame_data.idx, **image_args)
else:
dm_image = dm.Image(**image_args)
dm_anno = self._read_cvat_anno(frame_data, project_data.meta[project_data.META_FIELD]['labels'])
Expand Down
22 changes: 11 additions & 11 deletions cvat/apps/dataset_manager/formats/camvid.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@

@exporter(name='CamVid', ext='ZIP', version='1.0')
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
dataset.transform(RotatedBoxesToPolygons)
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')
label_map = make_colormap(instance_data)

dataset.export(temp_dir, 'camvid',
save_images=save_images, apply_colormap=True,
label_map={label: label_map[label][0] for label in label_map})
with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor:
dataset = Dataset.from_extractors(extractor, env=dm_env)
dataset.transform(RotatedBoxesToPolygons)
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')
label_map = make_colormap(instance_data)

dataset.export(temp_dir, 'camvid',
save_images=save_images, apply_colormap=True,
label_map={label: label_map[label][0] for label in label_map})

make_zip_archive(temp_dir, dst_file)

Expand Down
20 changes: 10 additions & 10 deletions cvat/apps/dataset_manager/formats/cityscapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@

@exporter(name='Cityscapes', ext='ZIP', version='1.0')
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
dataset.transform(RotatedBoxesToPolygons)
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')

dataset.export(temp_dir, 'cityscapes', save_images=save_images,
apply_colormap=True, label_map={label: info[0]
for label, info in make_colormap(instance_data).items()})
with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor:
dataset = Dataset.from_extractors(extractor, env=dm_env)
dataset.transform(RotatedBoxesToPolygons)
dataset.transform('polygons_to_masks')
dataset.transform('boxes_to_masks')
dataset.transform('merge_instance_segments')

dataset.export(temp_dir, 'cityscapes', save_images=save_images,
apply_colormap=True, label_map={label: info[0]
for label, info in make_colormap(instance_data).items()})

make_zip_archive(temp_dir, dst_file)

Expand Down
16 changes: 8 additions & 8 deletions cvat/apps/dataset_manager/formats/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@

@exporter(name='COCO', ext='ZIP', version='1.0')
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
dataset.export(temp_dir, 'coco_instances', save_images=save_images,
merge_images=True)
with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor:
dataset = Dataset.from_extractors(extractor, env=dm_env)
dataset.export(temp_dir, 'coco_instances', save_images=save_images,
merge_images=True)

make_zip_archive(temp_dir, dst_file)

Expand All @@ -38,10 +38,10 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs

@exporter(name='COCO Keypoints', ext='ZIP', version='1.0')
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data, include_images=save_images), env=dm_env)
dataset.export(temp_dir, 'coco_person_keypoints', save_images=save_images,
merge_images=True)
with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor:
dataset = Dataset.from_extractors(extractor, env=dm_env)
dataset.export(temp_dir, 'coco_person_keypoints', save_images=save_images,
merge_images=True)

make_zip_archive(temp_dir, dst_file)

Expand Down
22 changes: 12 additions & 10 deletions cvat/apps/dataset_manager/formats/datumaro.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ def transform_item(self, item):

@exporter(name="Datumaro", ext="ZIP", version="1.0")
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data=instance_data, include_images=save_images), env=dm_env)
if not save_images:
dataset.transform(DeleteImagePath)
dataset.export(temp_dir, 'datumaro', save_images=save_images)
with GetCVATDataExtractor(instance_data=instance_data, include_images=save_images) as extractor:
dataset = Dataset.from_extractors(extractor, env=dm_env)
if not save_images:
dataset.transform(DeleteImagePath)
dataset.export(temp_dir, 'datumaro', save_images=save_images)

make_zip_archive(temp_dir, dst_file)

Expand All @@ -45,13 +45,15 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs

@exporter(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D)
def _export(dst_file, temp_dir, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
with GetCVATDataExtractor(
instance_data=instance_data, include_images=save_images,
dimension=DimensionType.DIM_3D), env=dm_env)
dimension=DimensionType.DIM_3D,
) as extractor:
dataset = Dataset.from_extractors(extractor, env=dm_env)

if not save_images:
dataset.transform(DeleteImagePath)
dataset.export(temp_dir, 'datumaro', save_images=save_images)
if not save_images:
dataset.transform(DeleteImagePath)
dataset.export(temp_dir, 'datumaro', save_images=save_images)

make_zip_archive(temp_dir, dst_file)

Expand Down
Loading

0 comments on commit f6c4a98

Please sign in to comment.