From b41ec502574b076200eb379ee099f409cd52d720 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 15 Dec 2023 14:34:11 -0800 Subject: [PATCH 1/5] Use instance over is_foo_dtype --- python/cudf/cudf/_lib/column.pyx | 14 ++++---- python/cudf/cudf/_lib/groupby.pyx | 51 +++++++++++++++--------------- python/cudf/cudf/_lib/interop.pyx | 8 ++--- python/cudf/cudf/_lib/io/utils.pyx | 4 +-- python/cudf/cudf/_lib/json.pyx | 19 ++++------- python/cudf/cudf/_lib/orc.pyx | 5 ++- python/cudf/cudf/_lib/parquet.pyx | 19 ++++------- python/cudf/cudf/_lib/scalar.pyx | 11 ++----- python/cudf/cudf/_lib/types.pyx | 32 +++++++++---------- python/cudf/cudf/_lib/utils.pyx | 22 +++++-------- 10 files changed, 80 insertions(+), 105 deletions(-) diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index 0edf9f8aa95..bd9755c0914 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -5,13 +5,13 @@ from typing import Literal import cupy as cp import numpy as np +import pandas as pd import rmm import cudf import cudf._lib as libcudf from cudf._lib import pylibcudf -from cudf.api.types import is_categorical_dtype, is_datetime64tz_dtype from cudf.core.buffer import ( Buffer, ExposureTrackedBuffer, @@ -330,10 +330,10 @@ cdef class Column: ) cdef mutable_column_view mutable_view(self) except *: - if is_categorical_dtype(self.dtype): + if isinstance(self.dtype, cudf.CategoricalDtype): col = self.base_children[0] data_dtype = col.dtype - elif is_datetime64tz_dtype(self.dtype): + elif isinstance(self.dtype, pd.DatetimeTZDtype): col = self data_dtype = _get_base_dtype(col.dtype) else: @@ -393,10 +393,10 @@ cdef class Column: return self._view(c_null_count) cdef column_view _view(self, libcudf_types.size_type null_count) except *: - if is_categorical_dtype(self.dtype): + if isinstance(self.dtype, cudf.CategoricalDtype): col = self.base_children[0] data_dtype = col.dtype - elif is_datetime64tz_dtype(self.dtype): + elif isinstance(self.dtype, pd.DatetimeTZDtype): col = self data_dtype = _get_base_dtype(col.dtype) else: @@ -468,7 +468,7 @@ cdef class Column: # categoricals because cudf supports ordered and unordered categoricals # while libcudf supports only unordered categoricals (see # https://github.com/rapidsai/cudf/pull/8567). - if is_categorical_dtype(self.dtype): + if isinstance(self.dtype, cudf.CategoricalDtype): col = self.base_children[0] else: col = self @@ -634,7 +634,7 @@ cdef class Column: """ column_owner = isinstance(owner, Column) mask_owner = owner - if column_owner and is_categorical_dtype(owner.dtype): + if column_owner and isinstance(owner.dtype, cudf.CategoricalDtype): owner = owner.base_children[0] size = cv.size() diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx index b3778e45cde..37db4312bf6 100644 --- a/python/cudf/cudf/_lib/groupby.pyx +++ b/python/cudf/cudf/_lib/groupby.pyx @@ -2,15 +2,15 @@ from pandas.core.groupby.groupby import DataError -from cudf.api.types import ( - is_categorical_dtype, - is_decimal_dtype, - is_interval_dtype, - is_list_dtype, - is_string_dtype, - is_struct_dtype, -) +from cudf.api.types import is_string_dtype from cudf.core.buffer import acquire_spill_lock +from cudf.core.dtypes import ( + CategoricalDtype, + DecimalDtype, + IntervalDtype, + ListDtype, + StructDtype, +) from libcpp cimport bool from libcpp.memory cimport unique_ptr @@ -73,6 +73,21 @@ _DECIMAL_AGGS = { ctypedef const scalar constscalar +cdef get_valid_aggregation(object dtype): + if isinstance(dtype, ListDtype): + return _LIST_AGGS + elif is_string_dtype(dtype): + return _STRING_AGGS + elif isinstance(dtype, CategoricalDtype): + return _CATEGORICAL_AGGS + elif isinstance(dtype, StructDtype): + return _STRUCT_AGGS + elif isinstance(dtype, IntervalDtype): + return _INTERVAL_AGGS + elif isinstance(dtype, DecimalDtype): + return _DECIMAL_AGGS + return "ALL" + cdef _agg_result_from_columns( vector[libcudf_groupby.aggregation_result]& c_result_columns, set column_included, @@ -187,15 +202,7 @@ cdef class GroupBy: for i, (col, aggs) in enumerate(zip(values, aggregations)): dtype = col.dtype - valid_aggregations = ( - _LIST_AGGS if is_list_dtype(dtype) - else _STRING_AGGS if is_string_dtype(dtype) - else _CATEGORICAL_AGGS if is_categorical_dtype(dtype) - else _STRUCT_AGGS if is_struct_dtype(dtype) - else _INTERVAL_AGGS if is_interval_dtype(dtype) - else _DECIMAL_AGGS if is_decimal_dtype(dtype) - else "ALL" - ) + valid_aggregations = get_valid_aggregation(dtype) included_aggregations_i = [] c_agg_request = move(libcudf_groupby.aggregation_request()) @@ -258,15 +265,7 @@ cdef class GroupBy: for i, (col, aggs) in enumerate(zip(values, aggregations)): dtype = col.dtype - valid_aggregations = ( - _LIST_AGGS if is_list_dtype(dtype) - else _STRING_AGGS if is_string_dtype(dtype) - else _CATEGORICAL_AGGS if is_categorical_dtype(dtype) - else _STRUCT_AGGS if is_struct_dtype(dtype) - else _INTERVAL_AGGS if is_interval_dtype(dtype) - else _DECIMAL_AGGS if is_decimal_dtype(dtype) - else "ALL" - ) + valid_aggregations = get_valid_aggregation(dtype) included_aggregations_i = [] c_agg_request = move(libcudf_groupby.scan_request()) diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx index 8fd2a409d90..2b78ac353f9 100644 --- a/python/cudf/cudf/_lib/interop.pyx +++ b/python/cudf/cudf/_lib/interop.pyx @@ -18,8 +18,8 @@ from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns -from cudf.api.types import is_list_dtype, is_struct_dtype from cudf.core.buffer import acquire_spill_lock +from cudf.core.dtypes import ListDtype, StructDtype def from_dlpack(dlpack_capsule): @@ -98,7 +98,7 @@ cdef vector[column_metadata] gather_metadata(object cols_dtypes) except *: if cols_dtypes is not None: for idx, (col_name, col_dtype) in enumerate(cols_dtypes): cpp_metadata.push_back(column_metadata(col_name.encode())) - if is_struct_dtype(col_dtype) or is_list_dtype(col_dtype): + if isinstance(col_dtype, (ListDtype, StructDtype)): _set_col_children_metadata(col_dtype, cpp_metadata[idx]) else: raise TypeError( @@ -113,14 +113,14 @@ cdef _set_col_children_metadata(dtype, cdef column_metadata element_metadata - if is_struct_dtype(dtype): + if isinstance(dtype, StructDtype): for name, value in dtype.fields.items(): element_metadata = column_metadata(name.encode()) _set_col_children_metadata( value, element_metadata ) col_meta.children_meta.push_back(element_metadata) - elif is_list_dtype(dtype): + elif isinstance(dtype, ListDtype): col_meta.children_meta.reserve(2) # Offsets - child 0 col_meta.children_meta.push_back(column_metadata()) diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx index 9b027a4d275..2ed878edbab 100644 --- a/python/cudf/cudf/_lib/io/utils.pyx +++ b/python/cudf/cudf/_lib/io/utils.pyx @@ -23,7 +23,7 @@ import errno import io import os -from cudf.api.types import is_struct_dtype +from cudf.core.dtypes import StructDtype # Converts the Python source input to libcudf IO source_info @@ -172,7 +172,7 @@ cdef Column update_column_struct_field_names( ) col.set_base_children(tuple(children)) - if is_struct_dtype(col): + if isinstance(col.dtype, StructDtype): field_names.reserve(len(col.base_children)) for i in range(info.children.size()): field_names.push_back(info.children[i].name) diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx index 437c3ef6ec4..637af921251 100644 --- a/python/cudf/cudf/_lib/json.pyx +++ b/python/cudf/cudf/_lib/json.pyx @@ -17,6 +17,7 @@ from libcpp.utility cimport move from libcpp.vector cimport vector cimport cudf._lib.cpp.io.types as cudf_io_types +from cudf._lib.column cimport Column from cudf._lib.cpp.io.data_sink cimport data_sink from cudf._lib.cpp.io.json cimport ( json_reader_options, @@ -42,10 +43,6 @@ from cudf._lib.io.utils cimport ( from cudf._lib.types cimport dtype_to_data_type from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table -from cudf.api.types import is_list_dtype, is_struct_dtype - -from cudf._lib.column cimport Column - cpdef read_json(object filepaths_or_buffers, object dtype, @@ -214,13 +211,12 @@ def write_json( cdef schema_element _get_cudf_schema_element_from_dtype(object dtype) except *: cdef schema_element s_element cdef data_type lib_type - if cudf.api.types.is_categorical_dtype(dtype): + dtype = cudf.dtype(dtype) + if isinstance(dtype, cudf.CategoricalDtype): raise NotImplementedError( "CategoricalDtype as dtype is not yet " "supported in JSON reader" ) - - dtype = cudf.dtype(dtype) lib_type = dtype_to_data_type(dtype) s_element.type = lib_type if isinstance(dtype, cudf.StructDtype): @@ -237,19 +233,18 @@ cdef schema_element _get_cudf_schema_element_from_dtype(object dtype) except *: cdef data_type _get_cudf_data_type_from_dtype(object dtype) except *: - if cudf.api.types.is_categorical_dtype(dtype): + dtype = cudf.dtype(dtype) + if isinstance(dtype, cudf.CategoricalDtype): raise NotImplementedError( "CategoricalDtype as dtype is not yet " "supported in JSON reader" ) - - dtype = cudf.dtype(dtype) return dtype_to_data_type(dtype) cdef _set_col_children_metadata(Column col, column_name_info& col_meta): cdef column_name_info child_info - if is_struct_dtype(col): + if isinstance(col.dtype, cudf.StructDtype): for i, (child_col, name) in enumerate( zip(col.children, list(col.dtype.fields)) ): @@ -258,7 +253,7 @@ cdef _set_col_children_metadata(Column col, _set_col_children_metadata( child_col, col_meta.children[i] ) - elif is_list_dtype(col): + elif isinstance(col.dtype, cudf.ListDtype): for i, child_col in enumerate(col.children): col_meta.children.push_back(child_info) _set_col_children_metadata( diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx index 0ae039b14d2..5a0274d42bf 100644 --- a/python/cudf/cudf/_lib/orc.pyx +++ b/python/cudf/cudf/_lib/orc.pyx @@ -59,7 +59,6 @@ from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table from pyarrow.lib import NativeFile from cudf._lib.utils import _index_level_name, generate_pandas_metadata -from cudf.api.types import is_list_dtype, is_struct_dtype cpdef read_raw_orc_statistics(filepath_or_buffer): @@ -474,7 +473,7 @@ cdef class ORCWriter: cdef _set_col_children_metadata(Column col, column_in_metadata& col_meta, list_column_as_map=False): - if is_struct_dtype(col): + if isinstance(col.dtype, cudf.StructDtype): for i, (child_col, name) in enumerate( zip(col.children, list(col.dtype.fields)) ): @@ -482,7 +481,7 @@ cdef _set_col_children_metadata(Column col, _set_col_children_metadata( child_col, col_meta.child(i), list_column_as_map ) - elif is_list_dtype(col): + elif isinstance(col.dtype, cudf.ListDtype): if list_column_as_map: col_meta.set_list_column_as_map() _set_col_children_metadata( diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index 4acb1ce10b1..10f635bba43 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -18,12 +18,7 @@ import numpy as np from cython.operator cimport dereference -from cudf.api.types import ( - is_decimal_dtype, - is_list_dtype, - is_list_like, - is_struct_dtype, -) +from cudf.api.types import is_list_like from cudf._lib.utils cimport data_from_unique_ptr @@ -220,7 +215,7 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None, # update the decimal precision of each column for col in names: - if is_decimal_dtype(df._data[col].dtype): + if isinstance(df._data[col].dtype, cudf.core.dtypes.DecimalDtype): df._data[col].dtype.precision = ( meta_data_per_column[col]["metadata"]["precision"] ) @@ -703,7 +698,7 @@ cdef _set_col_metadata( # is true. col_meta.set_nullability(True) - if is_struct_dtype(col): + if isinstance(col.dtype, cudf.StructDtype): for i, (child_col, name) in enumerate( zip(col.children, list(col.dtype.fields)) ): @@ -713,13 +708,11 @@ cdef _set_col_metadata( col_meta.child(i), force_nullable_schema ) - elif is_list_dtype(col): + elif isinstance(col.dtype, cudf.ListDtype): _set_col_metadata( col.children[1], col_meta.child(1), force_nullable_schema ) - else: - if is_decimal_dtype(col): - col_meta.set_decimal_precision(col.dtype.precision) - return + elif isinstance(df._data[col].dtype, cudf.core.dtypes.DecimalDtype): + col_meta.set_decimal_precision(col.dtype.precision) diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index 0b64c75f7b6..445c2d8b0b2 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -14,12 +14,7 @@ from libcpp.utility cimport move import cudf from cudf._lib import pylibcudf from cudf._lib.types import LIBCUDF_TO_SUPPORTED_NUMPY_TYPES -from cudf.core.dtypes import ( - ListDtype, - StructDtype, - is_list_dtype, - is_struct_dtype, -) +from cudf.core.dtypes import ListDtype, StructDtype from cudf.core.missing import NA, NaT cimport cudf._lib.cpp.types as libcudf_types @@ -79,9 +74,9 @@ def gather_metadata(dtypes): out = [] for name, dtype in dtypes.items(): v = pylibcudf.interop.ColumnMetadata(name) - if is_struct_dtype(dtype): + if isinstance(dtype, cudf.StructDtype): v.children_meta = gather_metadata(dtype.fields) - elif is_list_dtype(dtype): + elif isinstance(dtype, cudf.ListDtype): # Offsets column is unnamed and has no children v.children_meta.append(pylibcudf.interop.ColumnMetadata("")) v.children_meta.extend( diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index d87104bf168..fb98f6bc366 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -238,15 +238,15 @@ cdef dtype_from_column_view(column_view cv): cdef libcudf_types.data_type dtype_to_data_type(dtype) except *: cdef libcudf_types.type_id tid - if cudf.api.types.is_list_dtype(dtype): + if isinstance(dtype, cudf.ListDtype): tid = libcudf_types.type_id.LIST - elif cudf.api.types.is_struct_dtype(dtype): + elif isinstance(dtype, cudf.StructDtype): tid = libcudf_types.type_id.STRUCT - elif cudf.api.types.is_decimal128_dtype(dtype): + elif isinstance(dtype, cudf.Decimal128Dtype): tid = libcudf_types.type_id.DECIMAL128 - elif cudf.api.types.is_decimal64_dtype(dtype): + elif isinstance(dtype, cudf.Decimal64Dtype): tid = libcudf_types.type_id.DECIMAL64 - elif cudf.api.types.is_decimal32_dtype(dtype): + elif isinstance(dtype, cudf.Decimal32Dtype): tid = libcudf_types.type_id.DECIMAL32 else: tid = ( @@ -259,21 +259,21 @@ cdef libcudf_types.data_type dtype_to_data_type(dtype) except *: return libcudf_types.data_type(tid) cpdef dtype_to_pylibcudf_type(dtype): - if cudf.api.types.is_list_dtype(dtype): + if isinstance(dtype, cudf.ListDtype): return pylibcudf.DataType(pylibcudf.TypeId.LIST) - elif cudf.api.types.is_struct_dtype(dtype): + elif isinstance(dtype, cudf.StructDtype): return pylibcudf.DataType(pylibcudf.TypeId.STRUCT) - elif cudf.api.types.is_decimal_dtype(dtype): - if cudf.api.types.is_decimal128_dtype(dtype): - tid = pylibcudf.TypeId.DECIMAL128 - elif cudf.api.types.is_decimal64_dtype(dtype): - tid = pylibcudf.TypeId.DECIMAL64 - else: - tid = pylibcudf.TypeId.DECIMAL32 + elif isinstance(dtype, cudf.Decimal128Dtype): + tid = pylibcudf.TypeId.DECIMAL128 + return pylibcudf.DataType(tid, -dtype.scale) + elif isinstance(dtype, cudf.Decimal64Dtype): + tid = pylibcudf.TypeId.DECIMAL64 + return pylibcudf.DataType(tid, -dtype.scale) + elif isinstance(dtype, cudf.Decimal32Dtype): + tid = pylibcudf.TypeId.DECIMAL32 return pylibcudf.DataType(tid, -dtype.scale) - # libcudf types don't support localization so convert to the base type - if isinstance(dtype, pd.DatetimeTZDtype): + elif isinstance(dtype, pd.DatetimeTZDtype): dtype = np.dtype(f" Date: Fri, 15 Dec 2023 16:26:31 -0800 Subject: [PATCH 2/5] fix typo --- python/cudf/cudf/_lib/parquet.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index 10f635bba43..8d0cb2931e5 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -714,5 +714,5 @@ cdef _set_col_metadata( col_meta.child(1), force_nullable_schema ) - elif isinstance(df._data[col].dtype, cudf.core.dtypes.DecimalDtype): + elif isinstance(col.dtype, cudf.core.dtypes.DecimalDtype): col_meta.set_decimal_precision(col.dtype.precision) From 2e4e3b6c65e758bc1f9b182b7cd25bdd40dbbbdf Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 11 Jan 2024 11:02:29 -0800 Subject: [PATCH 3/5] Add copyright --- python/cudf/cudf/_lib/column.pyx | 2 +- python/cudf/cudf/_lib/groupby.pyx | 2 +- python/cudf/cudf/_lib/interop.pyx | 2 +- python/cudf/cudf/_lib/io/utils.pyx | 2 +- python/cudf/cudf/_lib/json.pyx | 2 +- python/cudf/cudf/_lib/orc.pyx | 2 +- python/cudf/cudf/_lib/parquet.pyx | 2 +- python/cudf/cudf/_lib/scalar.pyx | 2 +- python/cudf/cudf/_lib/string_casting.pyx | 2 +- python/cudf/cudf/_lib/types.pyx | 2 +- python/cudf/cudf/_lib/utils.pyx | 2 +- python/cudf/cudf/core/column/interval.py | 2 +- python/cudf/cudf/core/column/lists.py | 2 +- python/cudf/cudf/core/indexed_frame.py | 2 +- python/cudf/cudf/core/single_column_frame.py | 2 +- 15 files changed, 15 insertions(+), 15 deletions(-) diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index bd9755c0914..907f5dd5476 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from typing import Literal diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx index 37db4312bf6..8075f0b4504 100644 --- a/python/cudf/cudf/_lib/groupby.pyx +++ b/python/cudf/cudf/_lib/groupby.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from pandas.core.groupby.groupby import DataError diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx index 2b78ac353f9..13c8ce43ea3 100644 --- a/python/cudf/cudf/_lib/interop.pyx +++ b/python/cudf/cudf/_lib/interop.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from cpython cimport pycapsule from libcpp.memory cimport shared_ptr, unique_ptr diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx index 2ed878edbab..ae978d18813 100644 --- a/python/cudf/cudf/_lib/io/utils.pyx +++ b/python/cudf/cudf/_lib/io/utils.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from cpython.buffer cimport PyBUF_READ from cpython.memoryview cimport PyMemoryView_FromMemory diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx index 637af921251..c361a3f00c4 100644 --- a/python/cudf/cudf/_lib/json.pyx +++ b/python/cudf/cudf/_lib/json.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # cython: boundscheck = False diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx index 5a0274d42bf..c64296eb7da 100644 --- a/python/cudf/cudf/_lib/orc.pyx +++ b/python/cudf/cudf/_lib/orc.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import cudf from cudf.core.buffer import acquire_spill_lock diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index 8d0cb2931e5..27efc5e1ecd 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # cython: boundscheck = False diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index 6f29521b4f8..37708a4e3ba 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import copy diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx index 4b44ac83a70..8799c316139 100644 --- a/python/cudf/cudf/_lib/string_casting.pyx +++ b/python/cudf/cudf/_lib/string_casting.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from cudf._lib.column cimport Column diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index fb98f6bc366..1b4f4617e97 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from enum import IntEnum diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 2d6517bd60c..50a47b4f507 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import numpy as np import pyarrow as pa diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py index eed7bba3628..dd714864976 100644 --- a/python/cudf/cudf/core/column/interval.py +++ b/python/cudf/cudf/core/column/interval.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. from typing import Optional import pandas as pd diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index a5653e66513..a60189a3002 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from functools import cached_property from typing import List, Optional, Sequence, Tuple, Union diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index ab089ceb103..433616d7d45 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. """Base class for Frame types that have an index.""" from __future__ import annotations diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 911e7ac905c..5305eeab8a6 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. """Base class for Frame types that only have a single column.""" from __future__ import annotations From 023658823402091bc65677257881d5f65304c816 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 11 Jan 2024 11:09:44 -0800 Subject: [PATCH 4/5] Revert "Add copyright" This reverts commit 2e4e3b6c65e758bc1f9b182b7cd25bdd40dbbbdf. --- python/cudf/cudf/_lib/column.pyx | 2 +- python/cudf/cudf/_lib/groupby.pyx | 2 +- python/cudf/cudf/_lib/interop.pyx | 2 +- python/cudf/cudf/_lib/io/utils.pyx | 2 +- python/cudf/cudf/_lib/json.pyx | 2 +- python/cudf/cudf/_lib/orc.pyx | 2 +- python/cudf/cudf/_lib/parquet.pyx | 2 +- python/cudf/cudf/_lib/scalar.pyx | 2 +- python/cudf/cudf/_lib/string_casting.pyx | 2 +- python/cudf/cudf/_lib/types.pyx | 2 +- python/cudf/cudf/_lib/utils.pyx | 2 +- python/cudf/cudf/core/column/interval.py | 2 +- python/cudf/cudf/core/column/lists.py | 2 +- python/cudf/cudf/core/indexed_frame.py | 2 +- python/cudf/cudf/core/single_column_frame.py | 2 +- 15 files changed, 15 insertions(+), 15 deletions(-) diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index 907f5dd5476..bd9755c0914 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from typing import Literal diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx index 8075f0b4504..37db4312bf6 100644 --- a/python/cudf/cudf/_lib/groupby.pyx +++ b/python/cudf/cudf/_lib/groupby.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from pandas.core.groupby.groupby import DataError diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx index 13c8ce43ea3..2b78ac353f9 100644 --- a/python/cudf/cudf/_lib/interop.pyx +++ b/python/cudf/cudf/_lib/interop.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from cpython cimport pycapsule from libcpp.memory cimport shared_ptr, unique_ptr diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx index ae978d18813..2ed878edbab 100644 --- a/python/cudf/cudf/_lib/io/utils.pyx +++ b/python/cudf/cudf/_lib/io/utils.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from cpython.buffer cimport PyBUF_READ from cpython.memoryview cimport PyMemoryView_FromMemory diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx index c361a3f00c4..637af921251 100644 --- a/python/cudf/cudf/_lib/json.pyx +++ b/python/cudf/cudf/_lib/json.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # cython: boundscheck = False diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx index c64296eb7da..5a0274d42bf 100644 --- a/python/cudf/cudf/_lib/orc.pyx +++ b/python/cudf/cudf/_lib/orc.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. import cudf from cudf.core.buffer import acquire_spill_lock diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index 27efc5e1ecd..8d0cb2931e5 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # cython: boundscheck = False diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index 37708a4e3ba..6f29521b4f8 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. import copy diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx index 8799c316139..4b44ac83a70 100644 --- a/python/cudf/cudf/_lib/string_casting.pyx +++ b/python/cudf/cudf/_lib/string_casting.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from cudf._lib.column cimport Column diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index 1b4f4617e97..fb98f6bc366 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from enum import IntEnum diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 50a47b4f507..2d6517bd60c 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. import numpy as np import pyarrow as pa diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py index dd714864976..eed7bba3628 100644 --- a/python/cudf/cudf/core/column/interval.py +++ b/python/cudf/cudf/core/column/interval.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. from typing import Optional import pandas as pd diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index a60189a3002..a5653e66513 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. from functools import cached_property from typing import List, Optional, Sequence, Tuple, Union diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 433616d7d45..ab089ceb103 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. """Base class for Frame types that have an index.""" from __future__ import annotations diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py index 5305eeab8a6..911e7ac905c 100644 --- a/python/cudf/cudf/core/single_column_frame.py +++ b/python/cudf/cudf/core/single_column_frame.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. """Base class for Frame types that only have a single column.""" from __future__ import annotations From 1b8f7212aba0f2ba3a8b17c0bf2495dd374191bf Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 11 Jan 2024 12:55:14 -0800 Subject: [PATCH 5/5] Use single dispatch --- python/cudf/cudf/_lib/column.pyx | 2 +- python/cudf/cudf/_lib/groupby.pyx | 49 ++++++++++++++++++++++-------- python/cudf/cudf/_lib/interop.pyx | 2 +- python/cudf/cudf/_lib/io/utils.pyx | 2 +- python/cudf/cudf/_lib/json.pyx | 2 +- python/cudf/cudf/_lib/orc.pyx | 2 +- python/cudf/cudf/_lib/parquet.pyx | 2 +- python/cudf/cudf/_lib/scalar.pyx | 2 +- python/cudf/cudf/_lib/types.pyx | 2 +- python/cudf/cudf/_lib/utils.pyx | 2 +- 10 files changed, 45 insertions(+), 22 deletions(-) diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index bd9755c0914..907f5dd5476 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from typing import Literal diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx index 37db4312bf6..beb2b92e013 100644 --- a/python/cudf/cudf/_lib/groupby.pyx +++ b/python/cudf/cudf/_lib/groupby.pyx @@ -1,4 +1,5 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. +from functools import singledispatch from pandas.core.groupby.groupby import DataError @@ -73,21 +74,43 @@ _DECIMAL_AGGS = { ctypedef const scalar constscalar -cdef get_valid_aggregation(object dtype): - if isinstance(dtype, ListDtype): - return _LIST_AGGS - elif is_string_dtype(dtype): +@singledispatch +def get_valid_aggregation(dtype): + if is_string_dtype(dtype): return _STRING_AGGS - elif isinstance(dtype, CategoricalDtype): - return _CATEGORICAL_AGGS - elif isinstance(dtype, StructDtype): - return _STRUCT_AGGS - elif isinstance(dtype, IntervalDtype): - return _INTERVAL_AGGS - elif isinstance(dtype, DecimalDtype): - return _DECIMAL_AGGS return "ALL" + +@get_valid_aggregation.register +def _(dtype: ListDtype): + return _LIST_AGGS + + +@get_valid_aggregation.register +def _(dtype: CategoricalDtype): + return _CATEGORICAL_AGGS + + +@get_valid_aggregation.register +def _(dtype: ListDtype): + return _LIST_AGGS + + +@get_valid_aggregation.register +def _(dtype: StructDtype): + return _STRUCT_AGGS + + +@get_valid_aggregation.register +def _(dtype: IntervalDtype): + return _INTERVAL_AGGS + + +@get_valid_aggregation.register +def _(dtype: DecimalDtype): + return _DECIMAL_AGGS + + cdef _agg_result_from_columns( vector[libcudf_groupby.aggregation_result]& c_result_columns, set column_included, diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx index 2b78ac353f9..13c8ce43ea3 100644 --- a/python/cudf/cudf/_lib/interop.pyx +++ b/python/cudf/cudf/_lib/interop.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from cpython cimport pycapsule from libcpp.memory cimport shared_ptr, unique_ptr diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx index 2ed878edbab..ae978d18813 100644 --- a/python/cudf/cudf/_lib/io/utils.pyx +++ b/python/cudf/cudf/_lib/io/utils.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from cpython.buffer cimport PyBUF_READ from cpython.memoryview cimport PyMemoryView_FromMemory diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx index 637af921251..c361a3f00c4 100644 --- a/python/cudf/cudf/_lib/json.pyx +++ b/python/cudf/cudf/_lib/json.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # cython: boundscheck = False diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx index 5a0274d42bf..c64296eb7da 100644 --- a/python/cudf/cudf/_lib/orc.pyx +++ b/python/cudf/cudf/_lib/orc.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import cudf from cudf.core.buffer import acquire_spill_lock diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index 8d0cb2931e5..27efc5e1ecd 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # cython: boundscheck = False diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index 6f29521b4f8..37708a4e3ba 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import copy diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index fb98f6bc366..1b4f4617e97 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from enum import IntEnum diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 2d6517bd60c..50a47b4f507 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. import numpy as np import pyarrow as pa