Skip to content

Commit

Permalink
Merge pull request #4830 from voxel51/builtin-operators
Browse files Browse the repository at this point in the history
Adding builtin operators for more of the FO interface
  • Loading branch information
brimoor authored Sep 24, 2024
2 parents 75e8e87 + 160f7bd commit 77de164
Show file tree
Hide file tree
Showing 6 changed files with 2,167 additions and 287 deletions.
2 changes: 1 addition & 1 deletion docs/source/brain.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ FiftyOne Brain

.. default-role:: code

The `FiftyOne Brain <https://github.com/voxel51/fiftyone-brain>` provides
The `FiftyOne Brain <https://github.com/voxel51/fiftyone-brain>`_ provides
powerful machine learning techniques that are designed to transform how you
curate your data from an art into a measurable science.

Expand Down
63 changes: 44 additions & 19 deletions fiftyone/core/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,33 +643,58 @@ def _sync_samples_last_modified_at(self):

def _sync_dataset_last_modified_at(self):
dataset = self._root_dataset
if self.media_type == fom.GROUP:
samples = self.select_group_slices(media_type=fom.VIDEO)
else:
samples = self
curr_lma = dataset.last_modified_at
lma = self._get_last_modified_at()

if lma is not None and (curr_lma is None or lma > curr_lma):
dataset._doc.last_modified_at = lma
dataset._doc.save(virtual=True)

def _get_last_modified_at(self, frames=False):
if frames and not self._contains_videos(any_slice=True):
return

results = samples._aggregate(
post_pipeline=[
if isinstance(self, fod.Dataset):
# pylint:disable=no-member
dataset = self
if frames:
coll = dataset._frame_collection
else:
coll = dataset._sample_collection

pipeline = [
{"$sort": {"last_modified_at": -1}},
{"$limit": 1},
{"$project": {"last_modified_at": True}},
]
)

try:
last_modified_at = next(iter(results))["last_modified_at"]
except:
last_modified_at = None
results = foo.aggregate(coll, pipeline)
else:
if self.media_type == fom.GROUP:
if frames:
view = self.select_group_slices(media_type=fom.VIDEO)
else:
view = self.select_group_slices(_allow_mixed=True)
else:
view = self

if last_modified_at is None:
return
pipeline = [
{
"$group": {
"_id": None,
"last_modified_at": {"$max": "$last_modified_at"},
}
}
]

if (
dataset.last_modified_at is None
or last_modified_at > dataset.last_modified_at
):
dataset._doc.last_modified_at = last_modified_at
dataset._doc.save(virtual=True)
results = view._aggregate(
frames_only=frames, post_pipeline=pipeline
)

try:
return next(iter(results))["last_modified_at"]
except:
return None

def stats(
self,
Expand Down
62 changes: 32 additions & 30 deletions fiftyone/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1716,9 +1716,9 @@ def create_summary_field(
given ``path``
sidebar_group (None): the name of a
:ref:`App sidebar group <app-sidebar-groups>` to which to add
the summary field, if necessary. By default, all summary fields
are added to a ``"summaries"`` group. You can pass ``False`` to
skip sidebar group modification
the summary field. By default, all summary fields are added to
a ``"summaries"`` group. You can pass ``False`` to skip sidebar
group modification
include_counts (False): whether to include per-value counts when
summarizing categorical fields
group_by (None): an optional attribute to group by when ``path``
Expand Down Expand Up @@ -1752,27 +1752,8 @@ def create_summary_field(
f"undeclared field '{path}'"
)

_path, is_frame_field, list_fields, _, _ = self._parse_field_name(path)

if field_name is None:
_chunks = _path.split(".")

chunks = []
if is_frame_field:
chunks.append("frames")

found_list = False
for i, _chunk in enumerate(_chunks, 1):
if ".".join(_chunks[:i]) in list_fields:
found_list = True
break
else:
chunks.append(_chunk)

if found_list:
chunks.append(_chunks[-1])

field_name = "_".join(chunks)
field_name = self._get_default_summary_field_name(path)

index_fields = []
summary_info = {"path": path, "field_type": field_type}
Expand Down Expand Up @@ -1891,6 +1872,27 @@ def create_summary_field(

return field_name

def _get_default_summary_field_name(self, path):
_path, is_frame_field, list_fields, _, _ = self._parse_field_name(path)
_chunks = _path.split(".")

chunks = []
if is_frame_field:
chunks.append("frames")

found_list = False
for i, _chunk in enumerate(_chunks, 1):
if ".".join(_chunks[:i]) in list_fields:
found_list = True
break
else:
chunks.append(_chunk)

if found_list:
chunks.append(_chunks[-1])

return "_".join(chunks)

def _populate_summary_field(self, field_name, summary_info):
path = summary_info["path"]
field_type = summary_info["field_type"]
Expand Down Expand Up @@ -2066,17 +2068,15 @@ def check_summary_fields(self):
update_indexes.append(path)
elif self._is_frame_field(source_path):
if frames_last_modified_at is None:
frames_last_modified_at, _ = self.bounds(
"frames.last_modified_at"
frames_last_modified_at = self._get_last_modified_at(
frames=True
)

if frames_last_modified_at > last_modified_at:
update_indexes.append(path)
else:
if samples_last_modified_at is None:
_, samples_last_modified_at = self.bounds(
"last_modified_at"
)
samples_last_modified_at = self._get_last_modified_at()

if samples_last_modified_at > last_modified_at:
update_indexes.append(path)
Expand Down Expand Up @@ -4324,7 +4324,8 @@ def save_view(
self._doc.reload("saved_views")

self._doc.saved_views.append(view_doc)
self.save()
self._doc.last_modified_at = now
self._doc.save(virtual=True)

def get_saved_view_info(self, name):
"""Loads the editable information about the saved view with the given
Expand Down Expand Up @@ -4625,7 +4626,8 @@ def save_workspace(
self._doc.reload("workspaces")

self._doc.workspaces.append(workspace_doc)
self.save()
self._doc.last_modified_at = now
self._doc.save(virtual=True)

def load_workspace(self, name):
"""Loads the saved workspace with the given name.
Expand Down
123 changes: 89 additions & 34 deletions fiftyone/migrations/revisions/v1_0_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,62 +5,84 @@
| `voxel51.com <https://voxel51.com/>`_
|
"""
import logging


logger = logging.getLogger(__name__)
from datetime import datetime


def up(db, dataset_name):
match_d = {"name": dataset_name}
dataset_dict = db.datasets.find_one(match_d)

# Add `last_modified_at` property
if "last_modified_at" not in dataset_dict:
dataset_dict["last_modified_at"] = None
now = datetime.utcnow()

add_samples_created_at = False
# Populate `Dataset.last_modified_at`
if dataset_dict.get("last_modified_at", None) is None:
dataset_dict["last_modified_at"] = now

added_created_at_samples = False
added_last_modified_at_samples = False
sample_fields = dataset_dict.get("sample_fields", [])
if sample_fields:
add_samples_created_at = _up_fields(sample_fields)
(
added_created_at_samples,
added_last_modified_at_samples,
) = _up_fields(dataset_name, sample_fields)

add_frames_created_at = False
added_created_at_frames = False
added_last_modified_at_frames = False
frame_fields = dataset_dict.get("frame_fields", [])
if frame_fields:
add_frames_created_at = _up_fields(frame_fields)
(
added_created_at_frames,
added_last_modified_at_frames,
) = _up_fields(dataset_name, frame_fields)

db.datasets.replace_one(match_d, dataset_dict)

# Populate `Sample.created_at` values
if add_samples_created_at:
sample_collection_name = dataset_dict.get(
"sample_collection_name", None
sample_collection_name = dataset_dict.get("sample_collection_name", None)
if sample_collection_name:
_up_field_values(
db,
dataset_name,
sample_collection_name,
added_created_at_samples,
added_last_modified_at_samples,
now,
)
if sample_collection_name:
_add_created_at(db, dataset_name, sample_collection_name)

# Populate `Frame.created_at` values
if add_frames_created_at:
frame_collection_name = dataset_dict.get("frame_collection_name", None)
if frame_collection_name:
_add_created_at(db, dataset_name, frame_collection_name)
frame_collection_name = dataset_dict.get("frame_collection_name", None)
if frame_collection_name:
_up_field_values(
db,
dataset_name,
frame_collection_name,
added_created_at_frames,
added_last_modified_at_frames,
now,
)


def down(db, dataset_name):
pass


def _up_fields(fields):
def _up_fields(dataset_name, fields):
found_created_at = False
found_last_modified_at = False

for field in fields:
name = field.get("name", None)
found_created_at |= name == "created_at"
found_last_modified_at |= name == "last_modified_at"

# Add `read_only` property
if "read_only" not in field:
if name == "created_at":
# Existing 'created_at' field must be read-only DateTimeField
found_created_at = True
_up_read_only_datetime_field(dataset_name, field)
elif name == "last_modified_at":
# Existing 'last_modified_at' field must be read-only DateTimeField
found_last_modified_at = True
_up_read_only_datetime_field(dataset_name, field)
elif "read_only" not in field:
# Add `read_only` property
field["read_only"] = False

# Add `created_at` field
Expand Down Expand Up @@ -95,16 +117,49 @@ def _up_fields(fields):
}
)

return not found_created_at
added_created_at = not found_created_at
added_last_modified_at = not found_last_modified_at

return added_created_at, added_last_modified_at


def _up_read_only_datetime_field(dataset_name, field):
field_name = field.get("name", None)
ftype = field.get("ftype", None)
expected_ftype = "fiftyone.core.fields.DateTimeField"

if ftype != expected_ftype:
raise ValueError(
f"Cannot migrate dataset '{dataset_name}' to v1.0.0 because it "
f"has an existing '{field_name}' field of type "
f"{ftype} != {expected_ftype}. Please rename or delete the field "
"and try again"
)

field["read_only"] = True


def _up_field_values(
db,
dataset_name,
collection_name,
set_created_at,
set_last_modified_at,
now,
):
set_expr = {}
if set_created_at:
set_expr["created_at"] = {"$toDate": "$_id"}
if set_last_modified_at:
set_expr["last_modified_at"] = now

if not set_expr:
return

def _add_created_at(db, dataset_name, collection_name):
try:
pipeline = [{"$set": {"created_at": {"$toDate": "$_id"}}}]
db[collection_name].update_many({}, pipeline)
db[collection_name].update_many({}, [{"$set": set_expr}])
except Exception as e:
logger.warning(
"Failed to populate 'created_at' field for dataset %s. Reason: %s",
dataset_name,
e,
raise RuntimeError(
"Failed to populate 'created_at' and/or 'last_modified_at' fields "
f"for dataset '{dataset_name}'. Reason: {e}"
)
Loading

0 comments on commit 77de164

Please sign in to comment.