rapidsai · rapids-bot · Aug 13, 2021 · Aug 4, 2021 · Aug 4, 2021 · Aug 4, 2021
@@ -8,42 +8,44 @@
 
 import rmm
 
+from cudf.api.types import dtype
 from cudf import core, datasets, testing
 from cudf._version import get_versions
 from cudf.api.extensions import (
     register_dataframe_accessor,
     register_index_accessor,
     register_series_accessor,
 )
-from cudf.core import (
+from cudf.core.scalar import (
     NA,
+    Scalar,
+)
+from cudf.core.index import (
     BaseIndex,
     CategoricalIndex,
-    DataFrame,
     DatetimeIndex,
     Float32Index,
     Float64Index,
     Index,
+    GenericIndex,
     Int8Index,
     Int16Index,
     Int32Index,
     Int64Index,
     IntervalIndex,
-    MultiIndex,
     RangeIndex,
     StringIndex,
-    Scalar,
-    Series,
     TimedeltaIndex,
     UInt8Index,
     UInt16Index,
     UInt32Index,
     UInt64Index,
-    cut,
-    from_pandas,
     interval_range,
-    merge,
 )
+from cudf.core.dataframe import DataFrame, from_pandas, merge
+from cudf.core.series import Series
+from cudf.core.multiindex import MultiIndex
+from cudf.core.cut import cut
 from cudf.core.algorithms import factorize
 from cudf.core.dtypes import (
     CategoricalDtype,

@@ -18,44 +18,44 @@
 ALL_POSSIBLE_VALUES = "ALL_POSSIBLE_VALUES"
 
 _PANDAS_TO_AVRO_SCHEMA_MAP = {
-    np.dtype("int8"): "int",
+    cudf.dtype("int8"): "int",
     pd.Int8Dtype(): ["int", "null"],
     pd.Int16Dtype(): ["int", "null"],
     pd.Int32Dtype(): ["int", "null"],
     pd.Int64Dtype(): ["long", "null"],
     pd.BooleanDtype(): ["boolean", "null"],
     pd.StringDtype(): ["string", "null"],
-    np.dtype("bool_"): "boolean",
-    np.dtype("int16"): "int",
-    np.dtype("int32"): "int",
-    np.dtype("int64"): "long",
-    np.dtype("O"): "string",
-    np.dtype("str"): "string",
-    np.dtype("float32"): "float",
-    np.dtype("float64"): "double",
-    np.dtype("<M8[ns]"): {"type": "long", "logicalType": "timestamp-millis"},
-    np.dtype("<M8[ms]"): {"type": "long", "logicalType": "timestamp-millis"},
-    np.dtype("<M8[us]"): {"type": "long", "logicalType": "timestamp-micros"},
+    cudf.dtype("bool_"): "boolean",
+    cudf.dtype("int16"): "int",
+    cudf.dtype("int32"): "int",
+    cudf.dtype("int64"): "long",
+    cudf.dtype("O"): "string",
+    cudf.dtype("str"): "string",
+    cudf.dtype("float32"): "float",
+    cudf.dtype("float64"): "double",
+    cudf.dtype("<M8[ns]"): {"type": "long", "logicalType": "timestamp-millis"},
+    cudf.dtype("<M8[ms]"): {"type": "long", "logicalType": "timestamp-millis"},
+    cudf.dtype("<M8[us]"): {"type": "long", "logicalType": "timestamp-micros"},
 }
 
 PANDAS_TO_ORC_TYPES = {
-    np.dtype("int8"): pyorc.TinyInt(),
+    cudf.dtype("int8"): pyorc.TinyInt(),
     pd.Int8Dtype(): pyorc.TinyInt(),
     pd.Int16Dtype(): pyorc.SmallInt(),
     pd.Int32Dtype(): pyorc.Int(),
     pd.Int64Dtype(): pyorc.BigInt(),
     pd.BooleanDtype(): pyorc.Boolean(),
-    np.dtype("bool_"): pyorc.Boolean(),
-    np.dtype("int16"): pyorc.SmallInt(),
-    np.dtype("int32"): pyorc.Int(),
-    np.dtype("int64"): pyorc.BigInt(),
-    np.dtype("O"): pyorc.String(),
+    cudf.dtype("bool_"): pyorc.Boolean(),
+    cudf.dtype("int16"): pyorc.SmallInt(),
+    cudf.dtype("int32"): pyorc.Int(),
+    cudf.dtype("int64"): pyorc.BigInt(),
+    cudf.dtype("O"): pyorc.String(),
     pd.StringDtype(): pyorc.String(),
-    np.dtype("float32"): pyorc.Float(),
-    np.dtype("float64"): pyorc.Double(),
-    np.dtype("<M8[ns]"): pyorc.Timestamp(),
-    np.dtype("<M8[ms]"): pyorc.Timestamp(),
-    np.dtype("<M8[us]"): pyorc.Timestamp(),
+    cudf.dtype("float32"): pyorc.Float(),
+    cudf.dtype("float64"): pyorc.Double(),
+    cudf.dtype("<M8[ns]"): pyorc.Timestamp(),
+    cudf.dtype("<M8[ms]"): pyorc.Timestamp(),
+    cudf.dtype("<M8[us]"): pyorc.Timestamp(),
 }
 
 ORC_TO_PANDAS_TYPES = {
@@ -64,10 +64,10 @@
     pyorc.Boolean().name: pd.BooleanDtype(),
     pyorc.SmallInt().name: pd.Int16Dtype(),
     pyorc.BigInt().name: pd.Int64Dtype(),
-    pyorc.String().name: np.dtype("O"),
-    pyorc.Float().name: np.dtype("float32"),
-    pyorc.Double().name: np.dtype("float64"),
-    pyorc.Timestamp().name: np.dtype("<M8[ns]"),
+    pyorc.String().name: cudf.dtype("O"),
+    pyorc.Float().name: cudf.dtype("float32"),
+    pyorc.Double().name: cudf.dtype("float64"),
+    pyorc.Timestamp().name: cudf.dtype("<M8[ns]"),
 }
 
 

@@ -15,6 +15,7 @@
     interop,
     join,
     json,
+    labeling,
     merge,
     null_mask,
     nvtext,

@@ -27,6 +27,8 @@ from cudf._lib.types import Interpolation
 cimport cudf._lib.cpp.aggregation as libcudf_aggregation
 cimport cudf._lib.cpp.types as libcudf_types
 
+import cudf
+
 
 class AggregationKind(Enum):
     SUM = libcudf_aggregation.aggregation.Kind.SUM
@@ -277,7 +279,7 @@ cdef class Aggregation:
         nb_type = numpy_support.from_dtype(kwargs['dtype'])
         type_signature = (nb_type[:],)
         compiled_op = cudautils.compile_udf(op, type_signature)
-        output_np_dtype = np.dtype(compiled_op[1])
+        output_np_dtype = cudf.dtype(compiled_op[1])
         cpp_str = compiled_op[0].encode('UTF-8')
         if output_np_dtype not in np_to_cudf_types:
             raise TypeError(
@@ -421,7 +423,7 @@ cdef class RollingAggregation:
         nb_type = numpy_support.from_dtype(kwargs['dtype'])
         type_signature = (nb_type[:],)
         compiled_op = cudautils.compile_udf(op, type_signature)
-        output_np_dtype = np.dtype(compiled_op[1])
+        output_np_dtype = cudf.dtype(compiled_op[1])
         cpp_str = compiled_op[0].encode('UTF-8')
         if output_np_dtype not in np_to_cudf_types:
             raise TypeError(

@@ -28,6 +28,7 @@ from cudf.utils.dtypes import is_scalar, is_string_dtype
 
 cimport cudf._lib.cpp.binaryop as cpp_binaryop
 from cudf._lib.cpp.binaryop cimport binary_operator
+import cudf
 
 
 class BinaryOperation(IntEnum):
@@ -211,7 +212,7 @@ def binaryop_udf(Column lhs, Column rhs, udf_ptx, dtype):
     cdef type_id tid = (
         <type_id> (
             <underlying_type_t_type_id> (
-                np_to_cudf_types[np.dtype(dtype)]
+                np_to_cudf_types[cudf.dtype(dtype)]
             )
         )
     )

@@ -787,12 +787,12 @@ cdef class _CPackedColumns:
         """
         Construct a ``PackedColumns`` object from a ``cudf.DataFrame``.
         """
-        from cudf.core import RangeIndex, dtypes
+        import cudf.core.dtypes
 
         cdef _CPackedColumns p = _CPackedColumns.__new__(_CPackedColumns)
 
         if keep_index and (
-            not isinstance(input_table.index, RangeIndex)
+            not isinstance(input_table.index, cudf.RangeIndex)
             or input_table.index.start != 0
             or input_table.index.stop != len(input_table)
             or input_table.index.step != 1
@@ -805,7 +805,7 @@ cdef class _CPackedColumns:
         p.column_names = input_table._column_names
         p.column_dtypes = {}
         for name, col in input_table._data.items():
-            if isinstance(col.dtype, dtypes._BaseDtype):
+            if isinstance(col.dtype, cudf.core.dtypes._BaseDtype):
                 p.column_dtypes[name] = col.dtype
 
         p.c_obj = move(cpp_copying.pack(input_table_view))

@@ -97,7 +97,7 @@ cpdef read_orc(object filepaths_or_buffers,
             if timestamp_type is None else
             <type_id>(
                 <underlying_type_t_type_id> (
-                    np_to_cudf_types[np.dtype(timestamp_type)]
+                    np_to_cudf_types[cudf.dtype(timestamp_type)]
                 )
             )
         ),

@@ -199,7 +199,7 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
             meta_dtype = cols_dtype_map.get(col, None)
             df._data[col] = cudf.core.column.column_empty(
                 row_count=0,
-                dtype=np.dtype(meta_dtype)
+                dtype=cudf.dtype(meta_dtype)
             )
 
     # Set the index column

@@ -35,6 +35,7 @@ from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id
 
 from cudf._lib.interop import from_arrow, to_arrow
 
+cimport cudf._lib.cpp.types as libcudf_types
 from cudf._lib.cpp.scalar.scalar cimport (
     duration_scalar,
     fixed_point_scalar,
@@ -60,9 +61,7 @@ from cudf._lib.cpp.wrappers.timestamps cimport (
 )
 from cudf._lib.utils cimport data_from_table_view
 
-from cudf.utils.dtypes import _decimal_to_int64, is_list_dtype, is_struct_dtype
-
-cimport cudf._lib.cpp.types as libcudf_types
+import cudf
 
 
 cdef class DeviceScalar:
@@ -81,7 +80,7 @@ cdef class DeviceScalar:
         dtype : dtype
             A NumPy dtype.
         """
-        self._dtype = dtype if dtype.kind != 'U' else np.dtype('object')
+        self._dtype = dtype if dtype.kind != 'U' else cudf.dtype('object')
         self._set_value(value, self._dtype)
 
     def _set_value(self, value, dtype):
@@ -120,9 +119,9 @@ cdef class DeviceScalar:
     def _to_host_scalar(self):
         if isinstance(self.dtype, cudf.Decimal64Dtype):
             result = _get_py_decimal_from_fixed_point(self.c_value)
-        elif is_struct_dtype(self.dtype):
+        elif cudf.api.types.is_struct_dtype(self.dtype):
             result = _get_py_dict_from_struct(self.c_value)
-        elif is_list_dtype(self.dtype):
+        elif cudf.api.types.is_list_dtype(self.dtype):
             result = _get_py_list_from_list(self.c_value)
         elif pd.api.types.is_string_dtype(self.dtype):
             result = _get_py_string_from_string(self.c_value)
@@ -309,7 +308,7 @@ cdef _set_decimal64_from_scalar(unique_ptr[scalar]& s,
                                 object value,
                                 object dtype,
                                 bool valid=True):
-    value = _decimal_to_int64(value) if valid else 0
+    value = cudf.utils.dtypes._decimal_to_int64(value) if valid else 0
     s.reset(
         new fixed_point_scalar[decimal64](
             <int64_t>np.int64(value), scale_type(-dtype.scale), valid
@@ -560,7 +559,7 @@ def _is_null_host_scalar(slr):
 def _create_proxy_nat_scalar(dtype):
     cdef DeviceScalar result = DeviceScalar.__new__(DeviceScalar)
 
-    dtype = np.dtype(dtype)
+    dtype = cudf.dtype(dtype)
     if dtype.char in 'mM':
         nat = dtype.type('NaT').astype(dtype)
         if dtype.type == np.datetime64: