From 581d3f93f4de93d9a5d047f60b0f8313ce62696d Mon Sep 17 00:00:00 2001 From: Stuart Wheaton Date: Mon, 26 Aug 2024 10:24:47 -0400 Subject: [PATCH 1/3] fix copied doc updates not insert --- fiftyone/core/dataset.py | 20 ++++++++------------ fiftyone/core/odm/document.py | 15 +++++++++++++++ tests/unittests/odm_tests.py | 25 +++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/fiftyone/core/dataset.py b/fiftyone/core/dataset.py index d45edfcaed..59070f907e 100644 --- a/fiftyone/core/dataset.py +++ b/fiftyone/core/dataset.py @@ -7773,7 +7773,12 @@ def _clone_dataset_or_view(dataset_or_view, name, persistent): dataset._reload() - _id = ObjectId() + # + # Clone dataset document + # + + dataset_doc = dataset._doc.copy_with_new_id() + _id = dataset_doc.id sample_collection_name = _make_sample_collection_name(_id) @@ -7784,13 +7789,6 @@ def _clone_dataset_or_view(dataset_or_view, name, persistent): else: frame_collection_name = None - # - # Clone dataset document - # - - dataset_doc = dataset._doc.copy() - - dataset_doc.id = _id dataset_doc.name = name dataset_doc.slug = slug dataset_doc.created_at = datetime.utcnow() @@ -8251,14 +8249,12 @@ def _clone_extras(src_dataset, dst_dataset): def _clone_reference_doc(ref_doc): - _ref_doc = ref_doc.copy() - _ref_doc.id = ObjectId() + _ref_doc = ref_doc.copy_with_new_id() return _ref_doc def _clone_run(run_doc): - _run_doc = run_doc.copy() - _run_doc.id = ObjectId() + _run_doc = run_doc.copy_with_new_id() _run_doc.results = None # Unfortunately the only way to copy GridFS files is to read-write them... diff --git a/fiftyone/core/odm/document.py b/fiftyone/core/odm/document.py index feada2e548..c550aae3d4 100644 --- a/fiftyone/core/odm/document.py +++ b/fiftyone/core/odm/document.py @@ -9,6 +9,7 @@ from copy import deepcopy import json +import bson from bson import json_util, ObjectId import mongoengine from pymongo import InsertOne, UpdateOne @@ -583,6 +584,20 @@ class Document(BaseDocument, mongoengine.Document): def _doc_name(cls): return "Document" + def copy_with_new_id(self): + """Returns a copy of this document with a new ID""" + doc_copy = self.copy() + new_id = bson.ObjectId() + + # pylint: disable=no-member + id_field = self._meta.get("id_field", "id") + doc_copy.set_field(id_field, new_id) + + # Setting _created explicitly as True because we know this is a new + # document + doc_copy._created = True + return doc_copy + def reload(self, *fields, **kwargs): """Reloads the document from the database. diff --git a/tests/unittests/odm_tests.py b/tests/unittests/odm_tests.py index 60c4eeb7a0..b2a9eaabc6 100644 --- a/tests/unittests/odm_tests.py +++ b/tests/unittests/odm_tests.py @@ -10,6 +10,7 @@ from bson import ObjectId import fiftyone as fo +import fiftyone.core.odm as foo class ColorSchemeTests(unittest.TestCase): @@ -29,3 +30,27 @@ def test_color_scheme_serialization(self): self.assertIsInstance(d["_id"], dict) assert color_scheme == also_color_scheme + + +class DocumentTests(unittest.TestCase): + def test_doc_copy_with_new_id(self): + dataset_doc = foo.DatasetDocument( + name="unique", + slug="unique", + sample_collection_name="samples.unique", + version="51.51", + ) + dataset_doc.save() + + # Copy with new ID -- ID should be new, _created should be True + doc_copy = dataset_doc.copy_with_new_id() + self.assertNotEqual( + dataset_doc.get_field("id"), doc_copy.get_field("id") + ) + self.assertTrue(doc_copy._created) + + # Now if we set ID to be same, the doc should be the same + doc_copy.set_field("id", dataset_doc.get_field("id")) + self.assertEqual(doc_copy, dataset_doc) + + dataset_doc.delete() From 63a5cc4a59cba41a5eb4d6566a44832f357a4496 Mon Sep 17 00:00:00 2001 From: Stuart Wheaton Date: Fri, 30 Aug 2024 10:17:59 -0400 Subject: [PATCH 2/3] pr comment: copy_with_new_id->copy(new_id=False) --- fiftyone/core/dataset.py | 6 +++--- fiftyone/core/odm/document.py | 27 +++++++++++++++++---------- tests/unittests/odm_tests.py | 25 ++++++++++++++----------- 3 files changed, 34 insertions(+), 24 deletions(-) diff --git a/fiftyone/core/dataset.py b/fiftyone/core/dataset.py index 59070f907e..916ff0125a 100644 --- a/fiftyone/core/dataset.py +++ b/fiftyone/core/dataset.py @@ -7777,7 +7777,7 @@ def _clone_dataset_or_view(dataset_or_view, name, persistent): # Clone dataset document # - dataset_doc = dataset._doc.copy_with_new_id() + dataset_doc = dataset._doc.copy(new_id=True) _id = dataset_doc.id sample_collection_name = _make_sample_collection_name(_id) @@ -8249,12 +8249,12 @@ def _clone_extras(src_dataset, dst_dataset): def _clone_reference_doc(ref_doc): - _ref_doc = ref_doc.copy_with_new_id() + _ref_doc = ref_doc.copy(new_id=True) return _ref_doc def _clone_run(run_doc): - _run_doc = run_doc.copy_with_new_id() + _run_doc = run_doc.copy(new_id=True) _run_doc.results = None # Unfortunately the only way to copy GridFS files is to read-write them... diff --git a/fiftyone/core/odm/document.py b/fiftyone/core/odm/document.py index c550aae3d4..b263e5bacb 100644 --- a/fiftyone/core/odm/document.py +++ b/fiftyone/core/odm/document.py @@ -584,18 +584,25 @@ class Document(BaseDocument, mongoengine.Document): def _doc_name(cls): return "Document" - def copy_with_new_id(self): - """Returns a copy of this document with a new ID""" - doc_copy = self.copy() - new_id = bson.ObjectId() + def copy(self, new_id=False): + """Returns a deep copy of the document. - # pylint: disable=no-member - id_field = self._meta.get("id_field", "id") - doc_copy.set_field(id_field, new_id) + Args: + new_id (False): Whether to generate a new ID for the copied + document locally. By default, ID is set to None which causes + ID generation to be done on the server. + """ + doc_copy = super().copy() + if new_id: + new_id = bson.ObjectId() + + # pylint: disable=no-member + id_field = self._meta.get("id_field", "id") + doc_copy.set_field(id_field, new_id) - # Setting _created explicitly as True because we know this is a new - # document - doc_copy._created = True + # Setting _created explicitly as True because we know this is a new + # document + doc_copy._created = True return doc_copy def reload(self, *fields, **kwargs): diff --git a/tests/unittests/odm_tests.py b/tests/unittests/odm_tests.py index b2a9eaabc6..85e01ab4bc 100644 --- a/tests/unittests/odm_tests.py +++ b/tests/unittests/odm_tests.py @@ -40,17 +40,20 @@ def test_doc_copy_with_new_id(self): sample_collection_name="samples.unique", version="51.51", ) - dataset_doc.save() - # Copy with new ID -- ID should be new, _created should be True - doc_copy = dataset_doc.copy_with_new_id() - self.assertNotEqual( - dataset_doc.get_field("id"), doc_copy.get_field("id") - ) - self.assertTrue(doc_copy._created) + try: + dataset_doc.save() + + # Copy with new ID -- ID should be new, _created should be True + doc_copy = dataset_doc.copy(new_id=True) + self.assertNotEqual( + dataset_doc.get_field("id"), doc_copy.get_field("id") + ) + self.assertTrue(doc_copy._created) - # Now if we set ID to be same, the doc should be the same - doc_copy.set_field("id", dataset_doc.get_field("id")) - self.assertEqual(doc_copy, dataset_doc) + # Now if we set ID to be same, the doc should be the same + doc_copy.set_field("id", dataset_doc.get_field("id")) + self.assertEqual(doc_copy, dataset_doc) - dataset_doc.delete() + finally: + dataset_doc.delete() From 86aad064377babd45da1f258fb5838b56382d84f Mon Sep 17 00:00:00 2001 From: brimoor Date: Wed, 4 Sep 2024 00:09:21 -0400 Subject: [PATCH 3/3] tweak --- fiftyone/core/odm/document.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/fiftyone/core/odm/document.py b/fiftyone/core/odm/document.py index b263e5bacb..c847fc8384 100644 --- a/fiftyone/core/odm/document.py +++ b/fiftyone/core/odm/document.py @@ -9,7 +9,6 @@ from copy import deepcopy import json -import bson from bson import json_util, ObjectId import mongoengine from pymongo import InsertOne, UpdateOne @@ -588,21 +587,19 @@ def copy(self, new_id=False): """Returns a deep copy of the document. Args: - new_id (False): Whether to generate a new ID for the copied - document locally. By default, ID is set to None which causes - ID generation to be done on the server. + new_id (False): whether to generate a new ID for the copied + document. By default, the ID is left as ``None`` and will be + automatically populated when the document is added to the + database """ doc_copy = super().copy() - if new_id: - new_id = bson.ObjectId() + if new_id: # pylint: disable=no-member id_field = self._meta.get("id_field", "id") - doc_copy.set_field(id_field, new_id) - - # Setting _created explicitly as True because we know this is a new - # document + doc_copy.set_field(id_field, ObjectId()) doc_copy._created = True + return doc_copy def reload(self, *fields, **kwargs):