diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d6497c92..eae5c757 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -44,4 +44,4 @@ repos: hooks: - id: isort name: Format imports - exclude: docs/ + exclude: docs/|nemo_curator/modules/__init__.py diff --git a/nemo_curator/modules/__init__.py b/nemo_curator/modules/__init__.py index 2e105a02..d845441f 100644 --- a/nemo_curator/modules/__init__.py +++ b/nemo_curator/modules/__init__.py @@ -13,7 +13,6 @@ # limitations under the License. from .add_id import AddId -from .distributed_data_classifier import DomainClassifier, QualityClassifier from .exact_dedup import ExactDuplicates from .filter import Filter, Score, ScoreFilter from .fuzzy_dedup import LSH, MinHash @@ -21,6 +20,11 @@ from .modify import Modify from .task import TaskDecontamination +# Pytorch related imports must come after all imports that require cugraph, +# because of context cleanup issues b/w pytorch and cugraph +# See this issue: https://github.com/rapidsai/cugraph/issues/2718 +from .distributed_data_classifier import DomainClassifier, QualityClassifier + __all__ = [ "DomainClassifier", "ExactDuplicates",