Skip to content

Commit

Permalink
collectionviews: Use generators for collection tasks
Browse files Browse the repository at this point in the history
By using generators instead of functions returning lists, we enable a
small performance optimization when determining if a download is
required.

https://phabricator.endlessm.com/T35137
  • Loading branch information
dylanmccall committed Jan 4, 2024
1 parent 721c7d5 commit 0b773ee
Showing 1 changed file with 23 additions and 46 deletions.
69 changes: 23 additions & 46 deletions kolibri_explore_plugin/collectionviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,18 +151,17 @@ def get_channels_count(self):
def is_download_required(self):
return any(
itertools.chain(
self.get_channelimport_tasks(),
self.get_contentimport_tasks(),
self.get_contentthumbnail_tasks(),
self.iter_channelimport_tasks(),
self.iter_contentimport_tasks(),
self.iter_contentthumbnail_tasks(),
)
)

def get_channelimport_tasks(self):
def iter_channelimport_tasks(self):
"""Return a serializable object to create channelimport tasks
For all the channels in this content manifest.
"""
tasks = []
for channel_id, channel_version in self.get_latest_channels():
metadata = get_channel_metadata(channel_id)
if metadata and metadata.version >= channel_version:
Expand All @@ -171,16 +170,14 @@ def get_channelimport_tasks(self):
"already present"
)
continue
tasks.append(get_remotechannelimport_task(channel_id))
return tasks
yield get_remotechannelimport_task(channel_id)

def get_extra_channelimport_tasks(self):
def iter_extra_channelimport_tasks(self):
"""Return a serializable object to create extra channelimport tasks
For all channels featured in Endless Key content manifests. In addition
to the channel metadata, all thumbnails are downloaded.
"""
tasks = []
for channel_id, channel_version in self.get_latest_extra_channels():
# Check if the channel metadata and thumbnails are already
# available.
Expand All @@ -202,21 +199,15 @@ def get_extra_channelimport_tasks(self):
)
continue

tasks.append(
get_remoteimport_task(
channel_id, node_ids=[], all_thumbnails=True
)
yield get_remoteimport_task(
channel_id, node_ids=[], all_thumbnails=True
)

return tasks

def get_contentimport_tasks(self):
def iter_contentimport_tasks(self):
"""Return a serializable object to create contentimport tasks
For all the channels in this content manifest.
"""
tasks = []

for channel_id in self.get_channel_ids():
channel_metadata = get_channel_metadata(channel_id)
node_ids = list(
Expand All @@ -236,38 +227,28 @@ def get_contentimport_tasks(self):
)
continue

tasks.append(
get_remotecontentimport_task(
channel_id, channel_metadata.name, node_ids
)
yield get_remotecontentimport_task(
channel_id, channel_metadata.name, node_ids
)

return tasks

def get_applyexternaltags_tasks(self):
def iter_applyexternaltags_tasks(self):
"""Return a serializable object to create applyexternaltags tasks
As defined in this content manifest metadata.
"""
if "tagged_node_ids" not in self.metadata:
return []

tasks = []

for tagged in self.metadata["tagged_node_ids"]:
node_id = tagged["node_id"]
tags = tagged["tags"]
tasks.append(get_applyexternaltags_task(node_id, tags))
yield get_applyexternaltags_task(node_id, tags)

return tasks

def get_contentthumbnail_tasks(self):
def iter_contentthumbnail_tasks(self):
"""Return a serializable object to create thumbnail contentimport tasks
For all the channels in this content manifest.
"""
tasks = []

for channel_id in self.get_channel_ids():
# Check if the desired thumbnail nodes are already available.
num_resources, _, _ = get_import_export_data(
Expand All @@ -283,14 +264,10 @@ def get_contentthumbnail_tasks(self):
)
continue

tasks.append(
get_remotecontentimport_task(
channel_id, node_ids=[], all_thumbnails=True
)
yield get_remotecontentimport_task(
channel_id, node_ids=[], all_thumbnails=True
)

return tasks

def _get_node_ids_for_channel(self, channel_metadata, channel_id):
"""Get node IDs regardless of the version
Expand Down Expand Up @@ -583,29 +560,29 @@ def _set_next_stage(self, user):
while not tasks and self._stage != DownloadStage.COMPLETED:
self._stage = DownloadStage(self._stage + 1)
if self._stage == DownloadStage.IMPORTING_CHANNELS:
tasks = self._content_manifest.get_channelimport_tasks()
tasks = self._content_manifest.iter_channelimport_tasks()
elif self._stage == DownloadStage.IMPORTING_CONTENT:
tasks = self._content_manifest.get_contentimport_tasks()
tasks = self._content_manifest.iter_contentimport_tasks()
elif self._stage == DownloadStage.APPLYING_EXTERNAL_TAGS:
tasks = self._content_manifest.get_applyexternaltags_tasks()
tasks = self._content_manifest.iter_applyexternaltags_tasks()

if self._stage == DownloadStage.COMPLETED:
logger.info("Download completed!")

# Download the manifest content thumbnails and the extra channels
# in the background.
thumbnail_tasks = (
self._content_manifest.get_contentthumbnail_tasks()
self._content_manifest.iter_contentthumbnail_tasks()
)
extra_channel_tasks = (
self._content_manifest.get_extra_channelimport_tasks()
self._content_manifest.iter_extra_channelimport_tasks()
)
for task in thumbnail_tasks + extra_channel_tasks:
for task in itertools.chain(thumbnail_tasks, extra_channel_tasks):
BackgroundTask.create_from_task_data(task)
logger.info("Starting background download tasks")
enqueue_next_background_task()

self._tasks_pending = tasks
self._tasks_pending = list(tasks)
self._tasks_previously_completed.extend(self._tasks_completed)
self._tasks_completed = []
logger.info(f"Started download stage: {self._stage.name}")
Expand Down

0 comments on commit 0b773ee

Please sign in to comment.