From 9510ef6a97102782906423ff5fd132c0e4bb08fd Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 28 Jun 2021 09:24:48 -1000 Subject: [PATCH 01/54] Change default datetime index resolution to ns to match pandas (#8611) The default datetime resolution in cudf has been `ms` since we introduced datetimes because we did not initially support different resolutions. Now we can change this to match pandas. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Ashwin Srinath (https://github.com/shwina) - Michael Wang (https://github.com/isVoid) URL: https://github.com/rapidsai/cudf/pull/8611 --- python/cudf/cudf/core/index.py | 11 ++++++++++- python/cudf/cudf/tests/test_index.py | 3 --- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index a307aab93dd..c89718e8f07 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2123,6 +2123,15 @@ def __init__( if yearfirst is not False: raise NotImplementedError("yearfirst == True is not yet supported") + valid_dtypes = tuple( + f"datetime64[{res}]" for res in ("s", "ms", "us", "ns") + ) + if dtype is None: + # nanosecond default matches pandas + dtype = "datetime64[ns]" + elif dtype not in valid_dtypes: + raise TypeError("Invalid dtype") + if copy: data = column.as_column(data).copy() kwargs = _setdefault_name(data, name=name) @@ -2131,7 +2140,7 @@ def __init__( elif isinstance(data, pd.DatetimeIndex): data = column.as_column(data.values) elif isinstance(data, (list, tuple)): - data = column.as_column(np.array(data, dtype="datetime64[ms]")) + data = column.as_column(np.array(data, dtype=dtype)) super().__init__(data, **kwargs) @property diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 0d3380343f4..23e04831176 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -317,9 +317,6 @@ def test_index_copy_datetime(name, dtype, deep=True): pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype) cidx_copy = cidx.copy(name=name, deep=deep, dtype=dtype) - # By default, cudf.DatetimeIndex uses [ms] as base unit, pandas uses [ns] - if dtype == "int64": - cidx_copy = cidx_copy * 1000000 assert_eq(pidx_copy, cidx_copy) From ea82bf4f93427334f199c2e2cfd4d6e1feb6023b Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Mon, 28 Jun 2021 20:11:18 -0400 Subject: [PATCH 02/54] Propagate **kwargs through to as_*_column methods (#8618) Fixes #8616 Authors: - Ashwin Srinath (https://github.com/shwina) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - https://github.com/brandon-b-miller URL: https://github.com/rapidsai/cudf/pull/8618 --- python/cudf/cudf/core/column/categorical.py | 6 +++--- python/cudf/cudf/core/column/column.py | 8 ++++---- python/cudf/cudf/core/column/datetime.py | 4 ++-- python/cudf/cudf/core/column/decimal.py | 6 +++--- python/cudf/cudf/core/column/numerical.py | 4 ++-- python/cudf/cudf/core/column/string.py | 6 ++++-- python/cudf/cudf/core/column/timedelta.py | 4 ++-- python/cudf/cudf/tests/test_dataframe.py | 11 +++++++++++ 8 files changed, 31 insertions(+), 18 deletions(-) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index e2aa20cc948..135fb6e6f30 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -32,10 +32,10 @@ from cudf.core.dtypes import CategoricalDtype from cudf.utils.dtypes import ( is_categorical_dtype, + is_interval_dtype, is_mixed_with_object_dtype, min_signed_type, min_unsigned_type, - is_interval_dtype, ) if TYPE_CHECKING: @@ -1388,10 +1388,10 @@ def as_categorical_column( new_categories=dtype.categories, ordered=dtype.ordered ) - def as_numerical_column(self, dtype: Dtype) -> NumericalColumn: + def as_numerical_column(self, dtype: Dtype, **kwargs) -> NumericalColumn: return self._get_decategorized_column().as_numerical_column(dtype) - def as_string_column(self, dtype, format=None) -> StringColumn: + def as_string_column(self, dtype, format=None, **kwargs) -> StringColumn: return self._get_decategorized_column().as_string_column( dtype, format=format ) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 79d97a3dbe1..50367651146 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -877,7 +877,7 @@ def can_cast_safely(self, to_dtype: Dtype) -> bool: def astype(self, dtype: Dtype, **kwargs) -> ColumnBase: if _is_non_decimal_numeric_dtype(dtype): - return self.as_numerical_column(dtype) + return self.as_numerical_column(dtype, **kwargs) elif is_categorical_dtype(dtype): return self.as_categorical_column(dtype, **kwargs) elif pandas_dtype(dtype).type in { @@ -901,7 +901,7 @@ def astype(self, dtype: Dtype, **kwargs) -> ColumnBase: elif np.issubdtype(dtype, np.timedelta64): return self.as_timedelta_column(dtype, **kwargs) else: - return self.as_numerical_column(dtype) + return self.as_numerical_column(dtype, **kwargs) def as_categorical_column(self, dtype, **kwargs) -> ColumnBase: if "ordered" in kwargs: @@ -947,7 +947,7 @@ def as_categorical_column(self, dtype, **kwargs) -> ColumnBase: ) def as_numerical_column( - self, dtype: Dtype + self, dtype: Dtype, **kwargs ) -> "cudf.core.column.NumericalColumn": raise NotImplementedError @@ -967,7 +967,7 @@ def as_timedelta_column( raise NotImplementedError def as_string_column( - self, dtype: Dtype, format=None + self, dtype: Dtype, format=None, **kwargs ) -> "cudf.core.column.StringColumn": raise NotImplementedError diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index b96a49c2514..150ce2c48ec 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -241,14 +241,14 @@ def as_timedelta_column( ) def as_numerical_column( - self, dtype: Dtype + self, dtype: Dtype, **kwargs ) -> "cudf.core.column.NumericalColumn": return cast( "cudf.core.column.NumericalColumn", self.as_numerical.astype(dtype) ) def as_string_column( - self, dtype: Dtype, format=None + self, dtype: Dtype, format=None, **kwargs ) -> "cudf.core.column.StringColumn": if format is None: format = _dtype_to_format_conversion.get( diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index b6bd2f18144..2f0ddb78987 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -21,8 +21,8 @@ from cudf.utils.dtypes import is_scalar from cudf.utils.utils import pa_mask_buffer_to_mask -from .numerical_base import NumericalBaseColumn from ...api.types import is_integer_dtype +from .numerical_base import NumericalBaseColumn class DecimalColumn(NumericalBaseColumn): @@ -161,12 +161,12 @@ def as_decimal_column( return libcudf.unary.cast(self, dtype) def as_numerical_column( - self, dtype: Dtype + self, dtype: Dtype, **kwargs ) -> "cudf.core.column.NumericalColumn": return libcudf.unary.cast(self, dtype) def as_string_column( - self, dtype: Dtype, format=None + self, dtype: Dtype, format=None, **kwargs ) -> "cudf.core.column.StringColumn": if len(self) > 0: return cpp_from_decimal(self) diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 17e0b6e454f..64a0780e9f9 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -208,7 +208,7 @@ def int2ip(self) -> "cudf.core.column.StringColumn": return libcudf.string_casting.int2ip(self) def as_string_column( - self, dtype: Dtype, format=None + self, dtype: Dtype, format=None, **kwargs ) -> "cudf.core.column.StringColumn": if len(self) > 0: return string._numeric_to_str_typecast_functions[ @@ -252,7 +252,7 @@ def as_decimal_column( ) -> "cudf.core.column.DecimalColumn": return libcudf.unary.cast(self, dtype) - def as_numerical_column(self, dtype: Dtype) -> NumericalColumn: + def as_numerical_column(self, dtype: Dtype, **kwargs) -> NumericalColumn: dtype = np.dtype(dtype) if dtype == self.dtype: return self diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index dd1c0c1e4ac..c1d98ac5600 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5118,7 +5118,7 @@ def str(self, parent: ParentType = None) -> StringMethods: return StringMethods(self, parent=parent) def as_numerical_column( - self, dtype: Dtype + self, dtype: Dtype, **kwargs ) -> "cudf.core.column.NumericalColumn": out_dtype = np.dtype(dtype) @@ -5195,7 +5195,9 @@ def as_decimal_column( ) -> "cudf.core.column.DecimalColumn": return cpp_to_decimal(self, dtype) - def as_string_column(self, dtype: Dtype, format=None) -> StringColumn: + def as_string_column( + self, dtype: Dtype, format=None, **kwargs + ) -> StringColumn: return self @property diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index b202838662c..a27c20cc50c 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -322,7 +322,7 @@ def fillna( return super().fillna(method=method) def as_numerical_column( - self, dtype: Dtype + self, dtype: Dtype, **kwargs ) -> "cudf.core.column.NumericalColumn": return cast( "cudf.core.column.NumericalColumn", self.as_numerical.astype(dtype) @@ -336,7 +336,7 @@ def as_datetime_column( ) def as_string_column( - self, dtype: Dtype, format=None + self, dtype: Dtype, format=None, **kwargs ) -> "cudf.core.column.StringColumn": if format is None: format = _dtype_to_format_conversion.get( diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 853cfe2e88e..a89b9b58e6e 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -736,6 +736,17 @@ def test_dataframe_astype(nelem): np.testing.assert_equal(df["a"].to_array(), df["b"].to_array()) +def test_astype_dict(): + gdf = cudf.DataFrame({"a": [1, 2, 3], "b": ["1", "2", "3"]}) + pdf = gdf.to_pandas() + + assert_eq(pdf.astype({"a": "str"}), gdf.astype({"a": "str"})) + assert_eq( + pdf.astype({"a": "str", "b": np.int64}), + gdf.astype({"a": "str", "b": np.int64}), + ) + + @pytest.mark.parametrize("nelem", [0, 100]) def test_index_astype(nelem): df = cudf.DataFrame() From 5ff663a38ad48bf9e0be1b66b0d631237b45468d Mon Sep 17 00:00:00 2001 From: shaneding Date: Mon, 28 Jun 2021 20:38:03 -0400 Subject: [PATCH 03/54] Python changes for adding `__getitem__` for `struct` (#8577) Implements necessary `__getitem__` changes for python side. Partly addresses #8558, merge after #8578 Authors: - https://github.com/shaneding Approvers: - https://github.com/brandon-b-miller - Ashwin Srinath (https://github.com/shwina) URL: https://github.com/rapidsai/cudf/pull/8577 --- python/cudf/cudf/_lib/cpp/scalar/scalar.pxd | 5 +++- python/cudf/cudf/_lib/scalar.pyx | 32 +++++++++++++++++++-- python/cudf/cudf/core/column/struct.py | 9 ++++++ python/cudf/cudf/core/indexing.py | 2 +- python/cudf/cudf/tests/test_struct.py | 23 +++++++++++++++ 5 files changed, 67 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd index 72dcd654232..d82bf3cde5f 100644 --- a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd +++ b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd @@ -10,7 +10,7 @@ from cudf._lib.cpp.types cimport data_type from cudf._lib.cpp.wrappers.decimals cimport scale_type from cudf._lib.cpp.column.column_view cimport column_view - +from cudf._lib.cpp.table.table_view cimport table_view cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil: cdef cppclass scalar: @@ -67,3 +67,6 @@ cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil: cdef cppclass list_scalar(scalar): list_scalar(column_view col) except + column_view view() except + + + cdef cppclass struct_scalar(scalar): + table_view view() except + diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index c771afe568a..2759cc2999f 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -18,7 +18,7 @@ from libcpp.utility cimport move from libcpp cimport bool import cudf -from cudf.core.dtypes import ListDtype +from cudf.core.dtypes import ListDtype, StructDtype from cudf._lib.types import ( cudf_to_np_types, duration_unit_map @@ -28,6 +28,7 @@ from cudf._lib.types cimport underlying_type_t_type_id, dtype_from_column_view from cudf._lib.column cimport Column from cudf._lib.cpp.column.column_view cimport column_view +from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.table cimport Table from cudf._lib.interop import to_arrow @@ -52,8 +53,9 @@ from cudf._lib.cpp.scalar.scalar cimport ( string_scalar, fixed_point_scalar, list_scalar, + struct_scalar ) -from cudf.utils.dtypes import _decimal_to_int64, is_list_dtype +from cudf.utils.dtypes import _decimal_to_int64, is_list_dtype, is_struct_dtype cimport cudf._lib.cpp.types as libcudf_types cdef class DeviceScalar: @@ -109,6 +111,8 @@ cdef class DeviceScalar: def _to_host_scalar(self): if isinstance(self.dtype, cudf.Decimal64Dtype): result = _get_py_decimal_from_fixed_point(self.c_value) + elif is_struct_dtype(self.dtype): + result = _get_py_dict_from_struct(self.c_value) elif is_list_dtype(self.dtype): result = _get_py_list_from_list(self.c_value) elif pd.api.types.is_string_dtype(self.dtype): @@ -173,6 +177,12 @@ cdef class DeviceScalar: raise TypeError( "Must pass a dtype when constructing from a fixed-point scalar" ) + elif cdtype.id() == libcudf_types.STRUCT: + struct_table_view = (s.get_raw_ptr())[0].view() + s._dtype = StructDtype({ + str(i): dtype_from_column_view(struct_table_view.column(i)) + for i in range(struct_table_view.num_columns()) + }) elif cdtype.id() == libcudf_types.LIST: if ( s.get_raw_ptr() @@ -298,6 +308,23 @@ cdef _set_decimal64_from_scalar(unique_ptr[scalar]& s, ) ) +cdef _get_py_dict_from_struct(unique_ptr[scalar]& s): + if not s.get()[0].is_valid(): + return cudf.NA + + cdef table_view struct_table_view = (s.get()).view() + columns = [str(i) for i in range(struct_table_view.num_columns())] + + cdef Table to_arrow_table = Table.from_table_view( + struct_table_view, + None, + column_names=columns + ) + + python_dict = to_arrow(to_arrow_table, columns).to_pydict() + + return {k: _nested_na_replace(python_dict[k])[0] for k in python_dict} + cdef _set_list_from_pylist(unique_ptr[scalar]& s, object value, object dtype, @@ -333,6 +360,7 @@ cdef _get_py_list_from_list(unique_ptr[scalar]& s): result = arrow_table['col'].to_pylist() return _nested_na_replace(result) + cdef _get_py_string_from_string(unique_ptr[scalar]& s): if not s.get()[0].is_valid(): return cudf.NA diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index 03b1100c385..85c8293a91e 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -80,6 +80,15 @@ def to_arrow(self): pa_type, len(self), buffers, children=children ) + def __getitem__(self, args): + result = super().__getitem__(args) + if isinstance(result, dict): + return { + field: value + for field, value in zip(self.dtype.fields, result.values()) + } + return result + def copy(self, deep=True): result = super().copy(deep=deep) if deep: diff --git a/python/cudf/cudf/core/indexing.py b/python/cudf/cudf/core/indexing.py index e6359efacd6..6711171612a 100755 --- a/python/cudf/cudf/core/indexing.py +++ b/python/cudf/cudf/core/indexing.py @@ -93,7 +93,7 @@ def __getitem__(self, arg): data = self._sr._column[arg] if ( - isinstance(data, list) + isinstance(data, (dict, list)) or _is_scalar_or_zero_d_array(data) or _is_null_host_scalar(data) ): diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py index 938d99ff48a..21542a6c415 100644 --- a/python/cudf/cudf/tests/test_struct.py +++ b/python/cudf/cudf/tests/test_struct.py @@ -75,3 +75,26 @@ def test_serialize_struct_dtype(fields): dtype = cudf.StructDtype(fields) recreated = dtype.__class__.deserialize(*dtype.serialize()) assert recreated == dtype + + +@pytest.mark.parametrize( + "series, expected", + [ + ( + [ + {"a": "Hello world", "b": []}, + {"a": "CUDF", "b": [1, 2, 3], "c": 1}, + {}, + ], + {"a": "Hello world", "b": [], "c": cudf.NA}, + ), + ([{}], {}), + ( + [{"b": True}, {"a": 1, "c": [1, 2, 3], "d": "1", "b": False}], + {"a": cudf.NA, "c": cudf.NA, "d": cudf.NA, "b": True}, + ), + ], +) +def test_struct_getitem(series, expected): + sr = cudf.Series(series) + assert sr[0] == expected From e6a0fe31216e9435d920360e5b6049cf9a92e8e9 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Tue, 29 Jun 2021 08:32:40 -0400 Subject: [PATCH 04/54] Remove unneeded includes from cudf::string_view headers (#8594) This PR removes some unnecessary include statements from `string_view.hpp` and `string_view.cuh`. It also includes a `#ifdef` to help re-use this class in the https://github.com/rapidsai/strings_udf repo to avoid maintaining a separate string-view class that can be NVRTC compiled with https://github.com/NVIDIA/jitify. Also removing the `VARIABLE_CHAR_WIDTH` constant which is no longer needed and should've been removed in a previous PR. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Devavret Makkar (https://github.com/devavret) - Mike Wilson (https://github.com/hyperbolic2346) URL: https://github.com/rapidsai/cudf/pull/8594 --- cpp/include/cudf/strings/string_view.cuh | 16 ++++++++++++++-- cpp/include/cudf/strings/string_view.hpp | 9 +-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/cpp/include/cudf/strings/string_view.cuh b/cpp/include/cudf/strings/string_view.cuh index f5ab2046441..e3f284cdbf3 100644 --- a/cpp/include/cudf/strings/string_view.cuh +++ b/cpp/include/cudf/strings/string_view.cuh @@ -17,11 +17,16 @@ #pragma once #include + +#ifndef __CUDA_ARCH__ #include +#endif +// This is defined when including this header in a https://github.com/NVIDIA/jitify +// or jitify2 source file. The jitify cannot include thrust headers at this time. +#ifndef CUDF_JIT_UDF #include -#include -#include +#endif // This file should only include device code logic. // Host-only or host/device code should be defined in the string_view.hpp header file. @@ -41,8 +46,15 @@ __device__ inline size_type characters_in_string(const char* str, size_type byte { if ((str == 0) || (bytes == 0)) return 0; auto ptr = reinterpret_cast(str); +#ifndef CUDF_JIT_UDF return thrust::count_if( thrust::seq, ptr, ptr + bytes, [](uint8_t chr) { return is_begin_utf8_char(chr); }); +#else + size_type chars = 0; + auto const end = ptr + bytes; + while (ptr < end) { chars += is_begin_utf8_char(*ptr++); } + return chars; +#endif } /** diff --git a/cpp/include/cudf/strings/string_view.hpp b/cpp/include/cudf/strings/string_view.hpp index 4b1a901d72f..5a3dbd5c1bc 100644 --- a/cpp/include/cudf/strings/string_view.hpp +++ b/cpp/include/cudf/strings/string_view.hpp @@ -15,9 +15,8 @@ */ #pragma once -#include -#include #include + #include /** @@ -36,12 +35,6 @@ using char_utf8 = uint32_t; ///< UTF-8 characters are 1-4 bytes */ constexpr cudf::size_type UNKNOWN_STRING_LENGTH{-1}; -/** - * @brief This value is assigned to the _char_width member if the string - * contains characters of different widths. - */ -constexpr int8_t VARIABLE_CHAR_WIDTH{0}; - /** * @brief A non-owning, immutable view of device data that is a variable length * char array representing a UTF-8 string. From 2d9fd5fc38b32329bcc07046f4c522de88ec3b6d Mon Sep 17 00:00:00 2001 From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com> Date: Tue, 29 Jun 2021 09:02:17 -0700 Subject: [PATCH 05/54] Expose a Decimal32Dtype in cuDF Python (#8438) Fixes: #8218 Similarly to libcudf's 64-bit decimal type, this PR exposes the `Decimal32Dtype `and its corresponding `Decimal32Column` type. Following this implementation, user can create a series or dataframe with `decimal32` dtype. Note: Only `to_arrow` and `from_arrow` methods are currently being supported. **Example:** ``` >>> import cudf >>> s = cudf.Series([1,2,3,4], dtype=cudf.Decimal32Dtype(precision=8, scale=2)) >>> s 0 1.00 1 2.00 2 3.00 3 4.00 dtype: decimal32 ``` Authors: - Sheilah Kirui (https://github.com/skirui-source) Approvers: - Michael Wang (https://github.com/isVoid) URL: https://github.com/rapidsai/cudf/pull/8438 --- python/cudf/cudf/__init__.py | 1 + .../strings/convert/convert_fixed_point.pyx | 4 +- python/cudf/cudf/_lib/types.pyx | 40 +++-- python/cudf/cudf/api/types.py | 10 +- python/cudf/cudf/core/column/__init__.py | 5 +- python/cudf/cudf/core/column/column.py | 68 +++++++-- python/cudf/cudf/core/column/decimal.py | 78 ++++++++-- python/cudf/cudf/core/column/numerical.py | 6 +- python/cudf/cudf/core/column/string.py | 2 +- python/cudf/cudf/core/dtypes.py | 144 ++++++++++++++++-- python/cudf/cudf/core/frame.py | 2 +- python/cudf/cudf/core/series.py | 2 +- python/cudf/cudf/tests/test_decimal.py | 80 +++++----- python/cudf/cudf/utils/dtypes.py | 12 +- 14 files changed, 360 insertions(+), 94 deletions(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index b26d95e7951..84e612c1cbe 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -47,6 +47,7 @@ from cudf.core.dtypes import ( CategoricalDtype, Decimal64Dtype, + Decimal32Dtype, IntervalDtype, ListDtype, StructDtype, diff --git a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx index 38d238b8266..e002d630fc3 100644 --- a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx +++ b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx @@ -28,7 +28,7 @@ from libcpp.string cimport string def from_decimal(Column input_col): """ - Converts a `DecimalColumn` to a `StringColumn`. + Converts a `Decimal64Column` to a `StringColumn`. Parameters ---------- @@ -50,7 +50,7 @@ def from_decimal(Column input_col): def to_decimal(Column input_col, object out_type): """ - Returns a `DecimalColumn` from the provided `StringColumn` + Returns a `Decimal64Column` from the provided `StringColumn` using the scale in the `out_type`. Parameters diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx index e9ed4f21ddd..43e5c213947 100644 --- a/python/cudf/cudf/_lib/types.pyx +++ b/python/cudf/cudf/_lib/types.pyx @@ -14,9 +14,19 @@ from cudf._lib.types cimport ( ) from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view -from cudf.core.dtypes import ListDtype, StructDtype, Decimal64Dtype -from cudf.utils.dtypes import is_decimal_dtype, is_list_dtype, is_struct_dtype - +from cudf.core.dtypes import ( + ListDtype, + StructDtype, + Decimal64Dtype, + Decimal32Dtype +) +from cudf.utils.dtypes import ( + is_decimal_dtype, + is_list_dtype, + is_struct_dtype, + is_decimal64_dtype, + is_decimal32_dtype +) cimport cudf._lib.cpp.types as libcudf_types @@ -191,10 +201,6 @@ cdef dtype_from_structs_column_view(column_view cv): } return StructDtype(fields) -cdef dtype_from_decimal_column_view(column_view cv): - scale = -cv.type().scale() - return Decimal64Dtype(precision=Decimal64Dtype.MAX_PRECISION, scale=scale) - cdef dtype_from_column_view(column_view cv): cdef libcudf_types.type_id tid = cv.type().id() if tid == libcudf_types.type_id.LIST: @@ -202,10 +208,15 @@ cdef dtype_from_column_view(column_view cv): elif tid == libcudf_types.type_id.STRUCT: return dtype_from_structs_column_view(cv) elif tid == libcudf_types.type_id.DECIMAL64: - return dtype_from_decimal_column_view(cv) + return Decimal64Dtype( + precision=Decimal64Dtype.MAX_PRECISION, + scale=-cv.type().scale() + ) elif tid == libcudf_types.type_id.DECIMAL32: - raise NotImplementedError("decimal32 types are not supported yet. " - "Use decimal64 instead") + return Decimal32Dtype( + precision=Decimal32Dtype.MAX_PRECISION, + scale=-cv.type().scale() + ) else: return cudf_to_np_types[(tid)] @@ -214,14 +225,19 @@ cdef libcudf_types.data_type dtype_to_data_type(dtype) except *: tid = libcudf_types.type_id.LIST elif is_struct_dtype(dtype): tid = libcudf_types.type_id.STRUCT - elif is_decimal_dtype(dtype): + elif is_decimal64_dtype(dtype): tid = libcudf_types.type_id.DECIMAL64 + elif is_decimal32_dtype(dtype): + tid = libcudf_types.type_id.DECIMAL32 else: tid = ( ( np_to_cudf_types[np.dtype(dtype)])) - if tid == libcudf_types.type_id.DECIMAL64: + if tid in ( + libcudf_types.type_id.DECIMAL64, + libcudf_types.type_id.DECIMAL32 + ): return libcudf_types.data_type(tid, -dtype.scale) else: return libcudf_types.data_type(tid) diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py index a985efeca51..56398bd4f13 100644 --- a/python/cudf/cudf/api/types.py +++ b/python/cudf/cudf/api/types.py @@ -15,9 +15,11 @@ import cudf from cudf._lib.scalar import DeviceScalar -from cudf.core.dtypes import ( +from cudf.core.dtypes import ( # noqa: F401 _BaseDtype, is_categorical_dtype, + is_decimal32_dtype, + is_decimal64_dtype, is_decimal_dtype, is_interval_dtype, is_list_dtype, @@ -39,11 +41,15 @@ def is_numeric_dtype(obj): Whether or not the array or dtype is of a numeric dtype. """ if isclass(obj): - if issubclass(obj, cudf.Decimal64Dtype): + if issubclass(obj, (cudf.Decimal32Dtype, cudf.Decimal64Dtype)): return True if issubclass(obj, _BaseDtype): return False else: + if isinstance(obj, cudf.Decimal32Dtype) or isinstance( + getattr(obj, "dtype", None), cudf.Decimal32Dtype + ): + return True if isinstance(obj, cudf.Decimal64Dtype) or isinstance( getattr(obj, "dtype", None), cudf.Decimal64Dtype ): diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py index 76d38e00790..18d48e16480 100644 --- a/python/cudf/cudf/core/column/__init__.py +++ b/python/cudf/cudf/core/column/__init__.py @@ -23,4 +23,7 @@ from cudf.core.column.struct import StructColumn # noqa: F401 from cudf.core.column.timedelta import TimeDeltaColumn # noqa: F401 from cudf.core.column.interval import IntervalColumn # noqa: F401 -from cudf.core.column.decimal import DecimalColumn # noqa: F401 +from cudf.core.column.decimal import ( # noqa: F401 + Decimal32Column, + Decimal64Column, +) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 50367651146..111b96c6da7 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -47,28 +47,30 @@ ) from cudf.utils import ioutils, utils from cudf.utils.dtypes import ( - _is_non_decimal_numeric_dtype, - _is_scalar_or_zero_d_array, check_cast_unsupported_dtype, cudf_dtype_from_pa_type, get_time_unit, - is_categorical_dtype, - is_decimal_dtype, - is_interval_dtype, - is_list_dtype, - is_scalar, - is_string_dtype, - is_struct_dtype, min_unsigned_type, np_to_pa_dtype, ) from cudf.utils.utils import mask_dtype from ...api.types import ( + _is_non_decimal_numeric_dtype, + _is_scalar_or_zero_d_array, infer_dtype, is_bool_dtype, + is_categorical_dtype, + is_decimal32_dtype, + is_decimal64_dtype, + is_decimal_dtype, is_dtype_equal, is_integer_dtype, + is_interval_dtype, + is_list_dtype, + is_scalar, + is_string_dtype, + is_struct_dtype, pandas_dtype, ) @@ -279,7 +281,7 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase: ): return cudf.core.column.IntervalColumn.from_arrow(array) elif isinstance(array.type, pa.Decimal128Type): - return cudf.core.column.DecimalColumn.from_arrow(array) + return cudf.core.column.Decimal64Column.from_arrow(array) result = libcudf.interop.from_arrow(data, data.column_names)._data[ "None" @@ -973,7 +975,19 @@ def as_string_column( def as_decimal_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DecimalColumn": + ) -> Union[ + "cudf.core.column.Decimal32Column", "cudf.core.column.Decimal64Column" + ]: + raise NotImplementedError + + def as_decimal64_column( + self, dtype: Dtype, **kwargs + ) -> "cudf.core.column.Decimal64Column": + raise NotImplementedError + + def as_decimal32_column( + self, dtype: Dtype, **kwargs + ) -> "cudf.core.column.Decimal32Column": raise NotImplementedError def apply_boolean_mask(self, mask) -> ColumnBase: @@ -1468,10 +1482,22 @@ def build_column( null_count=null_count, children=children, ) - elif is_decimal_dtype(dtype): + elif is_decimal64_dtype(dtype): if size is None: raise TypeError("Must specify size") - return cudf.core.column.DecimalColumn( + return cudf.core.column.Decimal64Column( + data=data, + size=size, + offset=offset, + dtype=dtype, + mask=mask, + null_count=null_count, + children=children, + ) + elif is_decimal32_dtype(dtype): + if size is None: + raise TypeError("Must specify size") + return cudf.core.column.Decimal32Column( data=data, size=size, offset=offset, @@ -2020,8 +2046,20 @@ def as_column( precision=dtype.precision, scale=dtype.scale ), ) - return cudf.core.column.DecimalColumn.from_arrow(data) - dtype = pandas_dtype(dtype) + return cudf.core.column.Decimal64Column.from_arrow( + data + ) + if isinstance(dtype, cudf.core.dtypes.Decimal32Dtype): + data = pa.array( + arbitrary, + type=pa.decimal128( + precision=dtype.precision, scale=dtype.scale + ), + ) + return cudf.core.column.Decimal32Column.from_arrow( + data + ) + dtype = pd.api.types.pandas_dtype(dtype) if is_categorical_dtype(dtype) or is_interval_dtype(dtype): raise TypeError else: diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 2f0ddb78987..acb8c02a220 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -16,8 +16,8 @@ ) from cudf._typing import Dtype from cudf.core.buffer import Buffer -from cudf.core.column import ColumnBase, NumericalColumn, as_column -from cudf.core.dtypes import Decimal64Dtype +from cudf.core.column import ColumnBase, as_column +from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype from cudf.utils.dtypes import is_scalar from cudf.utils.utils import pa_mask_buffer_to_mask @@ -25,7 +25,60 @@ from .numerical_base import NumericalBaseColumn -class DecimalColumn(NumericalBaseColumn): +class Decimal32Column(NumericalBaseColumn): + dtype: Decimal32Dtype + + @classmethod + def from_arrow(cls, data: pa.Array): + dtype = Decimal32Dtype.from_arrow(data.type) + mask_buf = data.buffers()[0] + mask = ( + mask_buf + if mask_buf is None + else pa_mask_buffer_to_mask(mask_buf, len(data)) + ) + data_128 = cp.array(np.frombuffer(data.buffers()[1]).view("int32")) + data_32 = data_128[::4].copy() + return cls( + data=Buffer(data_32.view("uint8")), + size=len(data), + dtype=dtype, + offset=data.offset, + mask=mask, + ) + + def to_arrow(self): + data_buf_32 = self.base_data.to_host_array().view("int32") + data_buf_128 = np.empty(len(data_buf_32) * 4, dtype="int32") + + # use striding to set the first 32 bits of each 128-bit chunk: + data_buf_128[::4] = data_buf_32 + # use striding again to set the remaining bits of each 128-bit chunk: + # 0 for non-negative values, -1 for negative values: + data_buf_128[1::4] = np.piecewise( + data_buf_32, [data_buf_32 < 0], [-1, 0] + ) + data_buf_128[2::4] = np.piecewise( + data_buf_32, [data_buf_32 < 0], [-1, 0] + ) + data_buf_128[3::4] = np.piecewise( + data_buf_32, [data_buf_32 < 0], [-1, 0] + ) + data_buf = pa.py_buffer(data_buf_128) + mask_buf = ( + self.base_mask + if self.base_mask is None + else pa.py_buffer(self.base_mask.to_host_array()) + ) + return pa.Array.from_buffers( + type=self.dtype.to_arrow(), + offset=self._offset, + length=self.size, + buffers=[mask_buf, data_buf], + ) + + +class Decimal64Column(NumericalBaseColumn): dtype: Decimal64Dtype def __truediv__(self, other): @@ -61,6 +114,7 @@ def from_arrow(cls, data: pa.Array): def to_arrow(self): data_buf_64 = self.base_data.to_host_array().view("int64") data_buf_128 = np.empty(len(data_buf_64) * 2, dtype="int64") + # use striding to set the first 64 bits of each 128-bit chunk: data_buf_128[::2] = data_buf_64 # use striding again to set the remaining bits of each 128-bit chunk: @@ -99,7 +153,11 @@ def binary_operator(self, op, other, reflect=False): elif op in ("eq", "ne", "lt", "gt", "le", "ge"): if not isinstance( other, - (DecimalColumn, cudf.core.column.NumericalColumn, cudf.Scalar), + ( + Decimal64Column, + cudf.core.column.NumericalColumn, + cudf.Scalar, + ), ): raise TypeError( f"Operator {op} not supported between" @@ -146,7 +204,9 @@ def _decimal_quantile( def as_decimal_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DecimalColumn": + ) -> Union[ + "cudf.core.column.Decimal32Column", "cudf.core.column.Decimal64Column" + ]: if ( isinstance(dtype, Decimal64Dtype) and dtype.scale < self.dtype.scale @@ -185,8 +245,8 @@ def fillna( if isinstance(value, (int, Decimal)): value = cudf.Scalar(value, dtype=self.dtype) elif ( - isinstance(value, DecimalColumn) - or isinstance(value, NumericalColumn) + isinstance(value, Decimal64Column) + or isinstance(value, cudf.core.column.NumericalColumn) and is_integer_dtype(value.dtype) ): value = value.astype(self.dtype) @@ -220,8 +280,8 @@ def __cuda_array_interface__(self): ) def _with_type_metadata( - self: "cudf.core.column.DecimalColumn", dtype: Dtype - ) -> "cudf.core.column.DecimalColumn": + self: "cudf.core.column.Decimal64Column", dtype: Dtype + ) -> "cudf.core.column.Decimal64Column": if isinstance(dtype, Decimal64Dtype): self.dtype.precision = dtype.precision diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 64a0780e9f9..cee9b693bdf 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -139,14 +139,14 @@ def binary_operator( ( NumericalColumn, cudf.Scalar, - cudf.core.column.DecimalColumn, + cudf.core.column.Decimal64Column, ), ) or np.isscalar(rhs) ): msg = "{!r} operator not supported between {} and {}" raise TypeError(msg.format(binop, type(self), type(rhs))) - if isinstance(rhs, cudf.core.column.DecimalColumn): + if isinstance(rhs, cudf.core.column.Decimal64Column): lhs: Union[ScalarLike, ColumnBase] = self.as_decimal_column( Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0) ) @@ -249,7 +249,7 @@ def as_timedelta_column( def as_decimal_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DecimalColumn": + ) -> "cudf.core.column.Decimal64Column": return libcudf.unary.cast(self, dtype) def as_numerical_column(self, dtype: Dtype, **kwargs) -> NumericalColumn: diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index c1d98ac5600..af5b77124a1 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5192,7 +5192,7 @@ def as_timedelta_column( def as_decimal_column( self, dtype: Dtype, **kwargs - ) -> "cudf.core.column.DecimalColumn": + ) -> "cudf.core.column.Decimal64Column": return cpp_to_decimal(self, dtype) def as_string_column( diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index e63c538c108..6dbe55d0bb8 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -308,9 +308,117 @@ def deserialize(cls, header: dict, frames: list): return cls(fields) +class Decimal32Dtype(_BaseDtype): + + name = "decimal32" + _metadata = ("precision", "scale") + MAX_PRECISION = np.floor(np.log10(np.iinfo("int32").max)) + + def __init__(self, precision, scale=0): + """ + Parameters + ---------- + precision : int + The total number of digits in each value of this dtype + scale : int, optional + The scale of the Decimal32Dtype. See Notes below. + + Notes + ----- + When the scale is positive: + - numbers with fractional parts (e.g., 0.0042) can be represented + - the scale is the total number of digits to the right of the + decimal point + When the scale is negative: + - only multiples of powers of 10 (including 10**0) can be + represented (e.g., 1729, 4200, 1000000) + - the scale represents the number of trailing zeros in the value. + For example, 42 is representable with precision=2 and scale=0. + 13.0051 is representable with precision=6 and scale=4, + and *not* representable with precision<6 or scale<4. + """ + self._validate(precision, scale) + self._typ = pa.decimal128(precision, scale) + + @property + def str(self): + return f"decimal32({self.precision}, {self.scale})" + + @property + def precision(self): + return self._typ.precision + + @precision.setter + def precision(self, value): + self._validate(value, self.scale) + self._typ = pa.decimal128(precision=value, scale=self.scale) + + @property + def scale(self): + return self._typ.scale + + @property + def type(self): + # might need to account for precision and scale here + return decimal.Decimal + + def to_arrow(self): + return self._typ + + @classmethod + def from_arrow(cls, typ): + return cls(typ.precision, typ.scale) + + @property + def itemsize(self): + return 4 + + def __repr__(self): + return ( + f"{self.__class__.__name__}" + f"(precision={self.precision}, scale={self.scale})" + ) + + def __hash__(self): + return hash(self._typ) + + @classmethod + def _validate(cls, precision, scale=0): + if precision > Decimal32Dtype.MAX_PRECISION: + raise ValueError( + f"Cannot construct a {cls.__name__}" + f" with precision > {cls.MAX_PRECISION}" + ) + if abs(scale) > precision: + raise ValueError(f"scale={scale} exceeds precision={precision}") + + @classmethod + def _from_decimal(cls, decimal): + """ + Create a cudf.Decimal32Dtype from a decimal.Decimal object + """ + metadata = decimal.as_tuple() + precision = max(len(metadata.digits), -metadata.exponent) + return cls(precision, -metadata.exponent) + + def serialize(self) -> Tuple[dict, list]: + return ( + { + "type-serialized": pickle.dumps(type(self)), + "precision": self.precision, + "scale": self.scale, + }, + [], + ) + + @classmethod + def deserialize(cls, header: dict, frames: list): + return cls(header["precision"], header["scale"]) + + class Decimal64Dtype(_BaseDtype): - name = "decimal" + name = "decimal64" _metadata = ("precision", "scale") MAX_PRECISION = np.floor(np.log10(np.iinfo("int64").max)) @@ -578,15 +686,7 @@ def is_decimal_dtype(obj): bool Whether or not the array-like or dtype is of the decimal dtype. """ - return ( - type(obj) is cudf.core.dtypes.Decimal64Dtype - or obj is cudf.core.dtypes.Decimal64Dtype - or ( - isinstance(obj, str) - and obj == cudf.core.dtypes.Decimal64Dtype.name - ) - or (hasattr(obj, "dtype") and is_decimal_dtype(obj.dtype)) - ) + return is_decimal32_dtype(obj) or is_decimal64_dtype(obj) def is_interval_dtype(obj): @@ -613,3 +713,27 @@ def is_interval_dtype(obj): ) or (hasattr(obj, "dtype") and is_interval_dtype(obj.dtype)) ) + + +def is_decimal32_dtype(obj): + return ( + type(obj) is cudf.core.dtypes.Decimal32Dtype + or obj is cudf.core.dtypes.Decimal32Dtype + or ( + isinstance(obj, str) + and obj == cudf.core.dtypes.Decimal32Dtype.name + ) + or (hasattr(obj, "dtype") and is_decimal32_dtype(obj.dtype)) + ) + + +def is_decimal64_dtype(obj): + return ( + type(obj) is cudf.core.dtypes.Decimal64Dtype + or obj is cudf.core.dtypes.Decimal64Dtype + or ( + isinstance(obj, str) + and obj == cudf.core.dtypes.Decimal64Dtype.name + ) + or (hasattr(obj, "dtype") and is_decimal64_dtype(obj.dtype)) + ) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 5e4293f8f8b..3629358ee9f 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -506,7 +506,7 @@ def _concat( # Reassign precision for any decimal cols for name, col in out._data.items(): - if isinstance(col, cudf.core.column.DecimalColumn): + if isinstance(col, cudf.core.column.Decimal64Column): col = col._with_type_metadata(tables[0]._data[name].dtype) # Reassign index and column names diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 31ebf90b3c2..2dada48be4d 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2414,7 +2414,7 @@ def _concat(cls, objs, axis=0, index=True): col = _concat_columns([o._column for o in objs]) - if isinstance(col, cudf.core.column.DecimalColumn): + if isinstance(col, cudf.core.column.Decimal64Column): col = col._with_type_metadata(objs[0]._column.dtype) return cls(data=col, index=index, name=name) diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py index bf1845b9315..4816094814a 100644 --- a/python/cudf/cudf/tests/test_decimal.py +++ b/python/cudf/cudf/tests/test_decimal.py @@ -8,7 +8,7 @@ import pytest import cudf -from cudf.core.column import DecimalColumn, NumericalColumn +from cudf.core.column import Decimal32Column, Decimal64Column, NumericalColumn from cudf.core.dtypes import Decimal64Dtype from cudf.tests.utils import ( FLOAT_TYPES, @@ -18,39 +18,51 @@ assert_eq, ) - -@pytest.mark.parametrize( - "data", - [ - [Decimal("1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")], - [Decimal("-1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")], - [1], - [-1], - [1, 2, 3, 4], - [42, 1729, 4104], - [1, 2, None, 4], - [None, None, None], - [], - ], -) -@pytest.mark.parametrize( - "typ", - [ - pa.decimal128(precision=4, scale=2), - pa.decimal128(precision=5, scale=3), - pa.decimal128(precision=6, scale=4), - ], -) -def test_round_trip_decimal_column(data, typ): - pa_arr = pa.array(data, type=typ) - col = DecimalColumn.from_arrow(pa_arr) - assert pa_arr.equals(col.to_arrow()) +data_ = [ + [Decimal("1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")], + [Decimal("-1.1"), Decimal("2.2"), Decimal("3.3"), Decimal("4.4")], + [1], + [-1], + [1, 2, 3, 4], + [42, 1729, 4104], + [1, 2, None, 4], + [None, None, None], + [], +] +typ_ = [ + pa.decimal128(precision=4, scale=2), + pa.decimal128(precision=5, scale=3), + pa.decimal128(precision=6, scale=4), +] + + +@pytest.mark.parametrize("data_", data_) +@pytest.mark.parametrize("typ_", typ_) +def test_round_trip_decimal64_column(data_, typ_): + pa_arr = pa.array(data_, type=typ_) + col_64 = Decimal64Column.from_arrow(pa_arr) + assert pa_arr.equals(col_64.to_arrow()) + + +@pytest.mark.parametrize("data_", data_) +@pytest.mark.parametrize("typ_", typ_) +def test_round_trip_decimal32_column(data_, typ_): + pa_arr = pa.array(data_, type=typ_) + col_32 = Decimal32Column.from_arrow(pa_arr) + assert pa_arr.equals(col_32.to_arrow()) + + +def test_from_arrow_max_precision_decimal64(): + with pytest.raises(ValueError): + Decimal64Column.from_arrow( + pa.array([1, 2, 3], type=pa.decimal128(scale=0, precision=19)) + ) -def test_from_arrow_max_precision(): +def test_from_arrow_max_precision_decimal32(): with pytest.raises(ValueError): - DecimalColumn.from_arrow( - pa.array([1, 2, 3], type=pa.decimal128(scale=0, precision=19)) + Decimal32Column.from_arrow( + pa.array([1, 2, 3], type=pa.decimal128(scale=0, precision=10)) ) @@ -84,7 +96,7 @@ def test_typecast_from_float_to_decimal(data, from_dtype, to_dtype): pa_arr = got.to_arrow().cast( pa.decimal128(to_dtype.precision, to_dtype.scale) ) - expected = cudf.Series(DecimalColumn.from_arrow(pa_arr)) + expected = cudf.Series(Decimal64Column.from_arrow(pa_arr)) got = got.astype(to_dtype) @@ -124,7 +136,7 @@ def test_typecast_from_int_to_decimal(data, from_dtype, to_dtype): .cast("float64") .cast(pa.decimal128(to_dtype.precision, to_dtype.scale)) ) - expected = cudf.Series(DecimalColumn.from_arrow(pa_arr)) + expected = cudf.Series(Decimal64Column.from_arrow(pa_arr)) got = got.astype(to_dtype) @@ -164,7 +176,7 @@ def test_typecast_to_from_decimal(data, from_dtype, to_dtype): pa_arr = got.to_arrow().cast( pa.decimal128(to_dtype.precision, to_dtype.scale), safe=False ) - expected = cudf.Series(DecimalColumn.from_arrow(pa_arr)) + expected = cudf.Series(Decimal64Column.from_arrow(pa_arr)) got = got.astype(to_dtype) diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index 4f0c02f5002..e1ae87e5089 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -14,8 +14,11 @@ from cudf.api.types import ( # noqa: F401 _is_non_decimal_numeric_dtype, _is_scalar_or_zero_d_array, + infer_dtype, is_categorical_dtype, is_datetime_dtype as is_datetime_dtype, + is_decimal32_dtype, + is_decimal64_dtype, is_decimal_dtype, is_integer, is_integer_dtype, @@ -27,6 +30,7 @@ is_string_dtype, is_struct_dtype, is_timedelta_dtype, + pandas_dtype, ) from cudf.core._compat import PANDAS_GE_120 @@ -175,7 +179,9 @@ def cudf_dtype_from_pydata_dtype(dtype): if is_categorical_dtype(dtype): return cudf.core.dtypes.CategoricalDtype - elif is_decimal_dtype(dtype): + elif is_decimal32_dtype(dtype): + return cudf.core.dtypes.Decimal32Dtype + elif is_decimal64_dtype(dtype): return cudf.core.dtypes.Decimal64Dtype elif dtype in cudf._lib.types.np_to_cudf_types: return dtype.type @@ -210,7 +216,7 @@ def cudf_dtype_from_pa_type(typ): elif pa.types.is_decimal(typ): return cudf.core.dtypes.Decimal64Dtype.from_arrow(typ) else: - return pd.api.types.pandas_dtype(typ.to_pandas_dtype()) + return pandas_dtype(typ.to_pandas_dtype()) def to_cudf_compatible_scalar(val, dtype=None): @@ -250,7 +256,7 @@ def to_cudf_compatible_scalar(val, dtype=None): elif isinstance(val, pd.Timedelta): val = val.to_timedelta64() - val = pd.api.types.pandas_dtype(type(val)).type(val) + val = pandas_dtype(type(val)).type(val) if dtype is not None: val = val.astype(dtype) From 0206fc9ad1c394dd694a3ddd8d9460f3c2442be0 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 29 Jun 2021 11:28:43 -0700 Subject: [PATCH 06/54] Add column type tests (#8505) Addresses column requests for #8357 This PR adds nested type checks for `cudf::column`. Authors: - Michael Wang (https://github.com/isVoid) Approvers: - David Wendt (https://github.com/davidwendt) - Nghia Truong (https://github.com/ttnghia) - AJ Schmidt (https://github.com/ajschmidt8) - Robert Maynard (https://github.com/robertmaynard) URL: https://github.com/rapidsai/cudf/pull/8505 --- conda/recipes/libcudf/meta.yaml | 1 + cpp/CMakeLists.txt | 1 + cpp/include/cudf/lists/detail/scatter.cuh | 3 +- .../cudf/lists/detail/scatter_helper.cuh | 5 - cpp/include/cudf/utilities/type_checks.hpp | 38 +++ cpp/src/lists/copying/scatter_helper.cu | 10 - cpp/src/utilities/type_checks.cpp | 72 ++++++ cpp/tests/CMakeLists.txt | 3 +- .../utilities_tests/type_check_tests.cpp | 219 ++++++++++++++++++ 9 files changed, 335 insertions(+), 17 deletions(-) create mode 100644 cpp/include/cudf/utilities/type_checks.hpp create mode 100644 cpp/src/utilities/type_checks.cpp create mode 100644 cpp/tests/utilities_tests/type_check_tests.cpp diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 14b94dd2249..147c1685ecc 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -220,6 +220,7 @@ test: - test -f $PREFIX/include/cudf/utilities/error.hpp - test -f $PREFIX/include/cudf/utilities/traits.hpp - test -f $PREFIX/include/cudf/utilities/type_dispatcher.hpp + - test -f $PREFIX/include/cudf/utilities/type_checks.hpp - test -f $PREFIX/include/cudf/utilities/default_stream.hpp - test -f $PREFIX/include/cudf/wrappers/dictionary.hpp - test -f $PREFIX/include/cudf/wrappers/durations.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 678f202d106..03b1a6a9bfd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -411,6 +411,7 @@ add_library(cudf src/unary/nan_ops.cu src/unary/null_ops.cu src/utilities/default_stream.cpp + src/utilities/type_checks.cpp ) set_target_properties(cudf diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh index a440e456e25..94b0e830b15 100644 --- a/cpp/include/cudf/lists/detail/scatter.cuh +++ b/cpp/include/cudf/lists/detail/scatter.cuh @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -89,7 +90,7 @@ std::unique_ptr scatter_impl( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - assert_same_data_type(source, target); + CUDF_EXPECTS(column_types_equal(source, target), "Mismatched column types."); auto const child_column_type = lists_column_view(target).child().type(); diff --git a/cpp/include/cudf/lists/detail/scatter_helper.cuh b/cpp/include/cudf/lists/detail/scatter_helper.cuh index 76121bc35e9..7d0586ed6a6 100644 --- a/cpp/include/cudf/lists/detail/scatter_helper.cuh +++ b/cpp/include/cudf/lists/detail/scatter_helper.cuh @@ -129,11 +129,6 @@ struct unbound_list_view { size_type _size{}; // Number of elements in *this* list row. }; -/** - * @brief Checks that the specified columns have matching schemas, all the way down. - */ -void assert_same_data_type(column_view const& lhs, column_view const& rhs); - std::unique_ptr build_lists_child_column_recursive( data_type child_column_type, rmm::device_uvector const& list_vector, diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp new file mode 100644 index 00000000000..8d57ab3aaa5 --- /dev/null +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace cudf { + +/** + * @brief Compares the type of two `column_view`s + * + * This function returns true if the type of `lhs` equals that of `rhs`. + * - For fixed point types, the scale is compared. + * - For dictionary types, the type of the keys are compared if both are + * non-empty columns. + * - For lists types, the type of child columns are compared recursively. + * - For struct types, the type of each field are compared in order. + * - For all other types, the `id` of `data_type` is compared. + * + * @param lhs The first `column_view` to compare + * @param rhs The second `column_view` to compare + * @return true if column types match + */ +bool column_types_equal(column_view const& lhs, column_view const& rhs); + +} // namespace cudf diff --git a/cpp/src/lists/copying/scatter_helper.cu b/cpp/src/lists/copying/scatter_helper.cu index c57327569a4..d451540deb6 100644 --- a/cpp/src/lists/copying/scatter_helper.cu +++ b/cpp/src/lists/copying/scatter_helper.cu @@ -30,16 +30,6 @@ namespace cudf { namespace lists { namespace detail { -void assert_same_data_type(column_view const& lhs, column_view const& rhs) -{ - CUDF_EXPECTS(lhs.type().id() == rhs.type().id(), "Mismatched Data types."); - // Empty string column has no children - CUDF_EXPECTS(lhs.type().id() == type_id::STRING or lhs.num_children() == rhs.num_children(), - "Mismatched number of child columns."); - - for (int i{0}; i < lhs.num_children(); ++i) { assert_same_data_type(lhs.child(i), rhs.child(i)); } -} - /** * @brief Constructs null mask for a scattered list's child column * diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp new file mode 100644 index 00000000000..d297148de45 --- /dev/null +++ b/cpp/src/utilities/type_checks.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +#include + +namespace cudf { +namespace { + +struct columns_equal_fn { + template + bool operator()(column_view const&, column_view const&) + { + return true; + } +}; + +template <> +bool columns_equal_fn::operator()(column_view const& lhs, column_view const& rhs) +{ + auto const kidx = dictionary_column_view::keys_column_index; + return lhs.num_children() > 0 and rhs.num_children() > 0 + ? lhs.child(kidx).type() == rhs.child(kidx).type() + : lhs.is_empty() and rhs.is_empty(); +} + +template <> +bool columns_equal_fn::operator()(column_view const& lhs, column_view const& rhs) +{ + auto const& ci = lists_column_view::child_column_index; + return column_types_equal(lhs.child(ci), rhs.child(ci)); +} + +template <> +bool columns_equal_fn::operator()(column_view const& lhs, column_view const& rhs) +{ + return lhs.num_children() == rhs.num_children() and + std::all_of(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(lhs.num_children()), + [&](auto i) { return column_types_equal(lhs.child(i), rhs.child(i)); }); +} + +}; // namespace + +// Implementation note: avoid using double dispatch for this function +// as it increases code paths to NxN for N types. +bool column_types_equal(column_view const& lhs, column_view const& rhs) +{ + if (lhs.type() != rhs.type()) { return false; } + return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs); +} + +} // namespace cudf diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 4360b418e95..f844f708562 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -233,7 +233,8 @@ ConfigureTest(UTILITIES_TEST utilities_tests/column_utilities_tests.cpp utilities_tests/column_wrapper_tests.cpp utilities_tests/lists_column_wrapper_tests.cpp - utilities_tests/default_stream_tests.cpp) + utilities_tests/default_stream_tests.cpp + utilities_tests/type_check_tests.cpp) ################################################################################################### # - span tests ------------------------------------------------------------------------------- diff --git a/cpp/tests/utilities_tests/type_check_tests.cpp b/cpp/tests/utilities_tests/type_check_tests.cpp new file mode 100644 index 00000000000..bd94f724776 --- /dev/null +++ b/cpp/tests/utilities_tests/type_check_tests.cpp @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include + +namespace cudf { +namespace test { + +template +struct ColumnTypeCheckTestTyped : public cudf::test::BaseFixture { +}; + +struct ColumnTypeCheckTest : public cudf::test::BaseFixture { +}; + +TYPED_TEST_CASE(ColumnTypeCheckTestTyped, cudf::test::FixedWidthTypes); + +TYPED_TEST(ColumnTypeCheckTestTyped, SameFixedWidth) +{ + fixed_width_column_wrapper lhs{1, 1}, rhs{2}; + EXPECT_TRUE(column_types_equal(lhs, rhs)); +} + +TEST_F(ColumnTypeCheckTest, SameString) +{ + strings_column_wrapper lhs{{'a', 'a'}}, rhs{{'b'}}; + EXPECT_TRUE(column_types_equal(lhs, rhs)); + + strings_column_wrapper lhs2{}, rhs2{{'b'}}; + EXPECT_TRUE(column_types_equal(lhs2, rhs2)); + + strings_column_wrapper lhs3{}, rhs3{}; + EXPECT_TRUE(column_types_equal(lhs3, rhs3)); +} + +TEST_F(ColumnTypeCheckTest, SameList) +{ + using LCW = lists_column_wrapper; + + LCW lhs{}, rhs{}; + EXPECT_TRUE(column_types_equal(lhs, rhs)); + + LCW lhs2{{1, 2, 3}}, rhs2{{4, 5}}; + EXPECT_TRUE(column_types_equal(lhs2, rhs2)); + + LCW lhs3{{LCW{1}, LCW{2, 3}}}, rhs3{{LCW{4, 5}}}; + EXPECT_TRUE(column_types_equal(lhs3, rhs3)); + + LCW lhs4{{LCW{1}, LCW{}, LCW{2, 3}}}, rhs4{{LCW{4, 5}, LCW{}}}; + EXPECT_TRUE(column_types_equal(lhs4, rhs4)); +} + +TYPED_TEST(ColumnTypeCheckTestTyped, SameDictionary) +{ + using DCW = dictionary_column_wrapper; + DCW lhs{1, 1, 2, 3}, rhs{5, 5}; + EXPECT_TRUE(column_types_equal(lhs, rhs)); + + DCW lhs2{}, rhs2{}; + EXPECT_TRUE(column_types_equal(lhs2, rhs2)); +} + +TEST_F(ColumnTypeCheckTest, SameStruct) +{ + using SCW = structs_column_wrapper; + using FCW = fixed_width_column_wrapper; + using StringCW = strings_column_wrapper; + using LCW = lists_column_wrapper; + using DCW = dictionary_column_wrapper; + + FCW lf1{1, 2, 3}, rf1{0, 1}; + StringCW lf2{"a", "bb", ""}, rf2{"cc", "d"}; + LCW lf3{LCW{1, 2}, LCW{}, LCW{4}}, rf3{LCW{1}, LCW{2}}; + DCW lf4{5, 5, 5}, rf4{9, 9}; + + SCW lhs{lf1, lf2, lf3, lf4}, rhs{rf1, rf2, rf3, rf4}; + EXPECT_TRUE(column_types_equal(lhs, rhs)); +} + +TEST_F(ColumnTypeCheckTest, DifferentBasics) +{ + fixed_width_column_wrapper lhs1{1, 1}; + strings_column_wrapper rhs1{"a", "bb"}; + + EXPECT_FALSE(column_types_equal(lhs1, rhs1)); + + lists_column_wrapper lhs2{{"hello"}, {"world", "!"}}; + strings_column_wrapper rhs2{"", "kk"}; + + EXPECT_FALSE(column_types_equal(lhs2, rhs2)); + + fixed_width_column_wrapper lhs3{1, 1}; + dictionary_column_wrapper rhs3{2, 2}; + + EXPECT_FALSE(column_types_equal(lhs3, rhs3)); + + lists_column_wrapper lhs4{{8, 8, 8}, {10, 10}}; + structs_column_wrapper rhs4{rhs2, rhs3}; + + EXPECT_FALSE(column_types_equal(lhs4, rhs4)); +} + +TEST_F(ColumnTypeCheckTest, DifferentFixedWidth) +{ + fixed_width_column_wrapper lhs1{1, 1}; + fixed_width_column_wrapper rhs1{2}; + + EXPECT_FALSE(column_types_equal(lhs1, rhs1)); + + fixed_width_column_wrapper lhs2{1, 1}; + fixed_width_column_wrapper rhs2{2}; + + EXPECT_FALSE(column_types_equal(lhs2, rhs2)); + + fixed_width_column_wrapper lhs3{1, 1}; + fixed_width_column_wrapper rhs3{2}; + + EXPECT_FALSE(column_types_equal(lhs3, rhs3)); + + fixed_width_column_wrapper lhs4{}; + fixed_width_column_wrapper rhs4{42}; + + EXPECT_FALSE(column_types_equal(lhs4, rhs4)); + + // Same rep, different scale + fixed_point_column_wrapper lhs5({10000}, numeric::scale_type{-3}); + fixed_point_column_wrapper rhs5({10000}, numeric::scale_type{0}); + + EXPECT_FALSE(column_types_equal(lhs5, rhs5)); + + // Different rep, same scale + fixed_point_column_wrapper lhs6({10000}, numeric::scale_type{-1}); + fixed_point_column_wrapper rhs6({4200}, numeric::scale_type{-1}); + + EXPECT_FALSE(column_types_equal(lhs6, rhs6)); +} + +TEST_F(ColumnTypeCheckTest, DifferentDictionary) +{ + dictionary_column_wrapper lhs1{1, 1, 1, 2, 2, 3}; + dictionary_column_wrapper rhs1{0, 0, 42, 42}; + + EXPECT_FALSE(column_types_equal(lhs1, rhs1)); + + dictionary_column_wrapper lhs2{3.14, 3.14, 5.00}; + dictionary_column_wrapper rhs2{0, 0, 42, 42}; + + EXPECT_FALSE(column_types_equal(lhs2, rhs2)); + + dictionary_column_wrapper lhs3{1, 1, 1, 2, 2, 3}; + dictionary_column_wrapper rhs3{8, 8}; + + EXPECT_FALSE(column_types_equal(lhs3, rhs3)); + + dictionary_column_wrapper lhs4{1, 1, 2, 3}, rhs4{}; + EXPECT_FALSE(column_types_equal(lhs4, rhs4)); +} + +TEST_F(ColumnTypeCheckTest, DifferentLists) +{ + using LCW_i = lists_column_wrapper; + using LCW_f = lists_column_wrapper; + + // Different nested level + LCW_i lhs1{LCW_i{1, 1, 2, 3}, LCW_i{}, LCW_i{42, 42}}; + LCW_i rhs1{LCW_i{LCW_i{8, 8, 8}, LCW_i{9, 9}}, LCW_i{LCW_i{42, 42}}}; + + EXPECT_FALSE(column_types_equal(lhs1, rhs1)); + + // Different base column type + LCW_i lhs2{LCW_i{1, 1, 2, 3}, LCW_i{}, LCW_i{42, 42}}; + LCW_f rhs2{LCW_f{9.0, 9.1}, LCW_f{3.14}, LCW_f{}}; + + EXPECT_FALSE(column_types_equal(lhs2, rhs2)); +} + +TEST_F(ColumnTypeCheckTest, DifferentStructs) +{ + fixed_width_column_wrapper lf1{1, 1, 1}; + fixed_width_column_wrapper rf1{2, 2}; + + structs_column_wrapper lhs1{lf1}; + structs_column_wrapper rhs1{rf1}; + + EXPECT_FALSE(column_types_equal(lhs1, rhs1)); + + fixed_width_column_wrapper lf2{1, 1, 1}; + fixed_width_column_wrapper rf2{2, 2}; + + strings_column_wrapper lf3{"a", "b", "c"}; + + structs_column_wrapper lhs2{lf2, lf3}; + structs_column_wrapper rhs2{rf2}; + + EXPECT_FALSE(column_types_equal(lhs2, rhs2)); +} + +} // namespace test +} // namespace cudf From 1e53776c0e17da1809fb9d12a8b3ab6ffbc669c3 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 29 Jun 2021 18:40:02 -0500 Subject: [PATCH 07/54] Upgrade arrow to 4.0.1 (#7495) Fixes: https://github.com/rapidsai/cudf/issues/7224 This PR: - [x] Adds support for arrow 4.0.1 in cudf. - [x] Moves testing-related utilities to `cudf.testing` module. - [x] Fixes miscellaneous errors related to arrow upgrade. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) - Paul Taylor (https://github.com/trxcllnt) Approvers: - Robert Maynard (https://github.com/robertmaynard) - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu) - Jeremy Dyer (https://github.com/jdye64) - Paul Taylor (https://github.com/trxcllnt) - Dillon Cullinan (https://github.com/dillon-cullinan) - Devavret Makkar (https://github.com/devavret) - Keith Kraus (https://github.com/kkraus14) - Michael Wang (https://github.com/isVoid) - Dante Gama Dessavre (https://github.com/dantegd) URL: https://github.com/rapidsai/cudf/pull/7495 --- ci/gpu/build.sh | 2 +- conda/environments/cudf_dev_cuda11.0.yml | 4 +- conda/environments/cudf_dev_cuda11.2.yml | 4 +- conda/recipes/cudf/meta.yaml | 2 +- conda/recipes/libcudf/meta.yaml | 2 +- conda/recipes/libcudf_kafka/meta.yaml | 4 +- cpp/cmake/thirdparty/CUDF_GetArrow.cmake | 2 +- python/cudf/cudf/_fuzz_testing/avro.py | 2 +- python/cudf/cudf/_fuzz_testing/csv.py | 2 +- python/cudf/cudf/_fuzz_testing/json.py | 2 +- python/cudf/cudf/_fuzz_testing/orc.py | 2 +- python/cudf/cudf/_fuzz_testing/parquet.py | 2 +- .../cudf/_fuzz_testing/tests/fuzz_test_csv.py | 2 +- .../_fuzz_testing/tests/fuzz_test_json.py | 2 +- python/cudf/cudf/_fuzz_testing/utils.py | 2 +- python/cudf/cudf/_lib/gpuarrow.pyx | 4 +- python/cudf/cudf/_lib/utils.pyx | 16 +- python/cudf/cudf/core/dataframe.py | 11 +- .../{tests/utils.py => testing/_utils.py} | 0 .../{tests => testing}/dataset_generator.py | 0 python/cudf/cudf/tests/__init__.py | 0 python/cudf/cudf/tests/test_apply_rows.py | 2 +- python/cudf/cudf/tests/test_applymap.py | 2 +- python/cudf/cudf/tests/test_array_function.py | 2 +- python/cudf/cudf/tests/test_array_ufunc.py | 7 +- .../test_avro_reader_fastavro_integration.py | 2 +- python/cudf/cudf/tests/test_binops.py | 20 +- python/cudf/cudf/tests/test_categorical.py | 6 +- python/cudf/cudf/tests/test_column.py | 8 +- .../cudf/cudf/tests/test_column_accessor.py | 2 +- python/cudf/cudf/tests/test_concat.py | 6 +- python/cudf/cudf/tests/test_contains.py | 2 +- python/cudf/cudf/tests/test_copying.py | 2 +- python/cudf/cudf/tests/test_csv.py | 2 +- python/cudf/cudf/tests/test_cuda_apply.py | 2 +- .../cudf/tests/test_cuda_array_interface.py | 5 +- .../cudf/cudf/tests/test_custom_accessor.py | 4 +- python/cudf/cudf/tests/test_cut.py | 7 +- python/cudf/cudf/tests/test_dataframe.py | 4 +- python/cudf/cudf/tests/test_dataframe_copy.py | 2 +- python/cudf/cudf/tests/test_datasets.py | 2 +- python/cudf/cudf/tests/test_datetime.py | 2 +- python/cudf/cudf/tests/test_decimal.py | 2 +- python/cudf/cudf/tests/test_dlpack.py | 2 +- python/cudf/cudf/tests/test_dropna.py | 2 +- python/cudf/cudf/tests/test_dtypes.py | 2 +- python/cudf/cudf/tests/test_duplicates.py | 2 +- python/cudf/cudf/tests/test_factorize.py | 2 +- python/cudf/cudf/tests/test_feather.py | 2 +- python/cudf/cudf/tests/test_fill.py | 2 +- python/cudf/cudf/tests/test_gcs.py | 2 +- .../cudf/cudf/tests/test_gpu_arrow_parser.py | 5 +- python/cudf/cudf/tests/test_groupby.py | 4 +- python/cudf/cudf/tests/test_hdf.py | 2 +- python/cudf/cudf/tests/test_hdfs.py | 2 +- python/cudf/cudf/tests/test_index.py | 2 +- python/cudf/cudf/tests/test_indexing.py | 8 +- python/cudf/cudf/tests/test_interval.py | 2 +- python/cudf/cudf/tests/test_joining.py | 194 +++++++++++++++++- python/cudf/cudf/tests/test_json.py | 2 +- python/cudf/cudf/tests/test_list.py | 2 +- python/cudf/cudf/tests/test_monotonic.py | 2 +- python/cudf/cudf/tests/test_multiindex.py | 2 +- python/cudf/cudf/tests/test_numerical.py | 2 +- python/cudf/cudf/tests/test_numpy_interop.py | 2 +- python/cudf/cudf/tests/test_onehot.py | 2 +- python/cudf/cudf/tests/test_ops.py | 2 +- python/cudf/cudf/tests/test_orc.py | 6 +- python/cudf/cudf/tests/test_pandas_interop.py | 2 +- python/cudf/cudf/tests/test_parquet.py | 4 +- python/cudf/cudf/tests/test_pickling.py | 2 +- python/cudf/cudf/tests/test_quantiles.py | 2 +- python/cudf/cudf/tests/test_query.py | 2 +- python/cudf/cudf/tests/test_query_mask.py | 2 +- python/cudf/cudf/tests/test_rank.py | 2 +- python/cudf/cudf/tests/test_reductions.py | 4 +- python/cudf/cudf/tests/test_replace.py | 4 +- python/cudf/cudf/tests/test_repr.py | 2 +- python/cudf/cudf/tests/test_reshape.py | 2 +- python/cudf/cudf/tests/test_rolling.py | 2 +- python/cudf/cudf/tests/test_s3.py | 2 +- python/cudf/cudf/tests/test_scalar.py | 2 +- python/cudf/cudf/tests/test_scan.py | 7 +- python/cudf/cudf/tests/test_search.py | 2 +- python/cudf/cudf/tests/test_serialize.py | 4 +- python/cudf/cudf/tests/test_series.py | 2 +- python/cudf/cudf/tests/test_seriesmap.py | 4 +- python/cudf/cudf/tests/test_setitem.py | 2 +- python/cudf/cudf/tests/test_sorting.py | 2 +- python/cudf/cudf/tests/test_sparse_df.py | 2 +- python/cudf/cudf/tests/test_stats.py | 2 +- python/cudf/cudf/tests/test_string.py | 193 +---------------- python/cudf/cudf/tests/test_struct.py | 2 +- python/cudf/cudf/tests/test_testing.py | 2 +- python/cudf/cudf/tests/test_text.py | 2 +- python/cudf/cudf/tests/test_timedelta.py | 4 +- python/cudf/cudf/tests/test_transform.py | 2 +- python/cudf/cudf/tests/test_unaops.py | 2 +- .../custreamz/custreamz/tests/test_kafka.py | 2 +- .../dask_cudf/tests/test_accessor.py | 6 +- python/dask_cudf/dask_cudf/tests/test_core.py | 2 +- .../dask_cudf/tests/test_distributed.py | 2 +- 102 files changed, 359 insertions(+), 338 deletions(-) rename python/cudf/cudf/{tests/utils.py => testing/_utils.py} (100%) rename python/cudf/cudf/{tests => testing}/dataset_generator.py (100%) delete mode 100644 python/cudf/cudf/tests/__init__.py diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 1d0154aedc7..c854e67fbdf 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -217,7 +217,7 @@ fi cd "$WORKSPACE/python/cudf" gpuci_logger "Python py.test for cuDF" -py.test -n 6 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term +py.test -n 6 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term cd "$WORKSPACE/python/dask_cudf" gpuci_logger "Python py.test for dask-cudf" diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index 1568327f88c..5561a573609 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -17,7 +17,7 @@ dependencies: - numba>=0.53.1 - numpy - pandas>=1.0,<1.3.0dev0 - - pyarrow=1.0.1 + - pyarrow=4.0.1 - fastavro>=0.22.9 - notebook>=0.5.0 - cython>=0.29,<0.30 @@ -44,8 +44,8 @@ dependencies: - dask>=2021.6.0 - distributed>=2021.6.0 - streamz + - arrow-cpp=4.0.1 - dlpack>=0.5,<0.6.0a0 - - arrow-cpp=1.0.1 - arrow-cpp-proc * cuda - double-conversion - rapidjson diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index 9d520ada253..6c8ae4cb9b0 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -17,7 +17,7 @@ dependencies: - numba>=0.53.1 - numpy - pandas>=1.0,<1.3.0dev0 - - pyarrow=1.0.1 + - pyarrow=4.0.1 - fastavro>=0.22.9 - notebook>=0.5.0 - cython>=0.29,<0.30 @@ -44,8 +44,8 @@ dependencies: - dask>=2021.6.0 - distributed>=2021.6.0 - streamz + - arrow-cpp=4.0.1 - dlpack>=0.5,<0.6.0a0 - - arrow-cpp=1.0.1 - arrow-cpp-proc * cuda - double-conversion - rapidjson diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index d1aaf924555..3da7c63857d 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -30,7 +30,7 @@ requirements: - setuptools - numba >=0.53.1 - dlpack>=0.5,<0.6.0a0 - - pyarrow 1.0.1 + - pyarrow 4.0.1 - libcudf {{ version }} - rmm {{ minor_version }} - cudatoolkit {{ cuda_version }} diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 147c1685ecc..6464013d646 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -37,7 +37,7 @@ requirements: host: - librmm {{ minor_version }}.* - cudatoolkit {{ cuda_version }}.* - - arrow-cpp 1.0.1 + - arrow-cpp 4.0.1 - arrow-cpp-proc * cuda - dlpack>=0.5,<0.6.0a0 run: diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index f1ec813a17f..6b15890e7c7 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -25,8 +25,8 @@ requirements: build: - cmake >=3.20.1 host: - - libcudf {{ version }} - - librdkafka >=1.5.0,<1.5.3 + - libcudf {{version}} + - librdkafka >=1.6.0,<1.7.0a0 run: - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not diff --git a/cpp/cmake/thirdparty/CUDF_GetArrow.cmake b/cpp/cmake/thirdparty/CUDF_GetArrow.cmake index 0eee5abd2f3..e15f3f7e16d 100644 --- a/cpp/cmake/thirdparty/CUDF_GetArrow.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetArrow.cmake @@ -127,6 +127,6 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3) endfunction() -set(CUDF_VERSION_Arrow 1.0.1) +set(CUDF_VERSION_Arrow 4.0.1) find_and_configure_arrow(${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3}) diff --git a/python/cudf/cudf/_fuzz_testing/avro.py b/python/cudf/cudf/_fuzz_testing/avro.py index a07e3acf416..4c167ac627f 100644 --- a/python/cudf/cudf/_fuzz_testing/avro.py +++ b/python/cudf/cudf/_fuzz_testing/avro.py @@ -15,7 +15,7 @@ pandas_to_avro, pyarrow_to_pandas, ) -from cudf.tests import dataset_generator as dg +from cudf.testing import dataset_generator as dg logging.basicConfig( format="%(asctime)s %(levelname)-8s %(message)s", diff --git a/python/cudf/cudf/_fuzz_testing/csv.py b/python/cudf/cudf/_fuzz_testing/csv.py index 84346ed61ad..0acb9c8a471 100644 --- a/python/cudf/cudf/_fuzz_testing/csv.py +++ b/python/cudf/cudf/_fuzz_testing/csv.py @@ -12,7 +12,7 @@ _generate_rand_meta, pyarrow_to_pandas, ) -from cudf.tests import dataset_generator as dg +from cudf.testing import dataset_generator as dg from cudf.utils.dtypes import pandas_dtypes_to_cudf_dtypes logging.basicConfig( diff --git a/python/cudf/cudf/_fuzz_testing/json.py b/python/cudf/cudf/_fuzz_testing/json.py index 5ecb27f7665..df9226cf059 100644 --- a/python/cudf/cudf/_fuzz_testing/json.py +++ b/python/cudf/cudf/_fuzz_testing/json.py @@ -13,7 +13,7 @@ _generate_rand_meta, pyarrow_to_pandas, ) -from cudf.tests import dataset_generator as dg +from cudf.testing import dataset_generator as dg from cudf.utils.dtypes import pandas_dtypes_to_cudf_dtypes logging.basicConfig( diff --git a/python/cudf/cudf/_fuzz_testing/orc.py b/python/cudf/cudf/_fuzz_testing/orc.py index 607294a49c9..2aa01eb3967 100644 --- a/python/cudf/cudf/_fuzz_testing/orc.py +++ b/python/cudf/cudf/_fuzz_testing/orc.py @@ -16,7 +16,7 @@ pandas_to_orc, pyarrow_to_pandas, ) -from cudf.tests import dataset_generator as dg +from cudf.testing import dataset_generator as dg logging.basicConfig( format="%(asctime)s %(levelname)-8s %(message)s", diff --git a/python/cudf/cudf/_fuzz_testing/parquet.py b/python/cudf/cudf/_fuzz_testing/parquet.py index 8c63b12d972..5b00f96d88d 100644 --- a/python/cudf/cudf/_fuzz_testing/parquet.py +++ b/python/cudf/cudf/_fuzz_testing/parquet.py @@ -12,7 +12,7 @@ _generate_rand_meta, pyarrow_to_pandas, ) -from cudf.tests import dataset_generator as dg +from cudf.testing import dataset_generator as dg logging.basicConfig( format="%(asctime)s %(levelname)-8s %(message)s", diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py index e6a5d081980..9b6abeb1276 100644 --- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py +++ b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py @@ -13,7 +13,7 @@ compare_content, run_test, ) -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq @pythonfuzz(data_handle=CSVReader) diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py index f3da03f447b..2f5e6204f7c 100644 --- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py +++ b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py @@ -9,7 +9,7 @@ from cudf._fuzz_testing.json import JSONReader, JSONWriter from cudf._fuzz_testing.main import pythonfuzz from cudf._fuzz_testing.utils import ALL_POSSIBLE_VALUES, run_test -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq @pythonfuzz(data_handle=JSONReader) diff --git a/python/cudf/cudf/_fuzz_testing/utils.py b/python/cudf/cudf/_fuzz_testing/utils.py index 71b5a35a225..fe9ed4d4934 100644 --- a/python/cudf/cudf/_fuzz_testing/utils.py +++ b/python/cudf/cudf/_fuzz_testing/utils.py @@ -9,7 +9,7 @@ import pyorc import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq from cudf.utils.dtypes import ( pandas_dtypes_to_cudf_dtypes, pyarrow_dtypes_to_pandas_dtypes, diff --git a/python/cudf/cudf/_lib/gpuarrow.pyx b/python/cudf/cudf/_lib/gpuarrow.pyx index 6513cd59424..a7da22637b9 100644 --- a/python/cudf/cudf/_lib/gpuarrow.pyx +++ b/python/cudf/cudf/_lib/gpuarrow.pyx @@ -15,7 +15,7 @@ from pyarrow.includes.libarrow cimport ( CRecordBatchStreamReader ) from pyarrow.lib cimport ( - _CRecordBatchReader, + RecordBatchReader, Buffer, Schema, pyarrow_wrap_schema @@ -23,7 +23,7 @@ from pyarrow.lib cimport ( import pyarrow as pa -cdef class CudaRecordBatchStreamReader(_CRecordBatchReader): +cdef class CudaRecordBatchStreamReader(RecordBatchReader): cdef: CIpcReadOptions options diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx index 13eedb34c18..e5dfb5a5c35 100644 --- a/python/cudf/cudf/_lib/utils.pyx +++ b/python/cudf/cudf/_lib/utils.pyx @@ -138,12 +138,16 @@ cpdef generate_pandas_metadata(Table table, index): index_descriptors.append(descr) metadata = pa.pandas_compat.construct_metadata( - table, - col_names, - index_levels, - index_descriptors, - index, - types, + columns_to_convert=[ + col + for col in table._columns + ], + df=table, + column_names=col_names, + index_levels=index_levels, + index_descriptors=index_descriptors, + preserve_index=index, + types=types, ) md_dict = json.loads(metadata[b"pandas"]) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 436f14cf6e3..0901334396a 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -5652,11 +5652,12 @@ def to_arrow(self, preserve_index=True): out = super(DataFrame, data).to_arrow() metadata = pa.pandas_compat.construct_metadata( - self, - out.schema.names, - [self.index], - index_descr, - preserve_index, + columns_to_convert=[self[col] for col in self._data.names], + df=self, + column_names=out.schema.names, + index_levels=[self.index], + index_descriptors=index_descr, + preserve_index=preserve_index, types=out.schema.types, ) diff --git a/python/cudf/cudf/tests/utils.py b/python/cudf/cudf/testing/_utils.py similarity index 100% rename from python/cudf/cudf/tests/utils.py rename to python/cudf/cudf/testing/_utils.py diff --git a/python/cudf/cudf/tests/dataset_generator.py b/python/cudf/cudf/testing/dataset_generator.py similarity index 100% rename from python/cudf/cudf/tests/dataset_generator.py rename to python/cudf/cudf/testing/dataset_generator.py diff --git a/python/cudf/cudf/tests/__init__.py b/python/cudf/cudf/tests/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/python/cudf/cudf/tests/test_apply_rows.py b/python/cudf/cudf/tests/test_apply_rows.py index 0ba80278fca..f025549971f 100644 --- a/python/cudf/cudf/tests/test_apply_rows.py +++ b/python/cudf/cudf/tests/test_apply_rows.py @@ -2,7 +2,7 @@ import cudf from cudf.core.column import column -from cudf.tests.utils import assert_eq, gen_rand_series +from cudf.testing._utils import assert_eq, gen_rand_series def _kernel_multiply(a, b, out): diff --git a/python/cudf/cudf/tests/test_applymap.py b/python/cudf/cudf/tests/test_applymap.py index 1f35bc93c78..fa3c88a3551 100644 --- a/python/cudf/cudf/tests/test_applymap.py +++ b/python/cudf/cudf/tests/test_applymap.py @@ -7,7 +7,7 @@ import pytest from cudf import Series -from cudf.tests import utils +from cudf.testing import _utils as utils @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_array_function.py b/python/cudf/cudf/tests/test_array_function.py index 03f9cf1d7e5..cd4dd28f179 100644 --- a/python/cudf/cudf/tests/test_array_function.py +++ b/python/cudf/cudf/tests/test_array_function.py @@ -4,7 +4,7 @@ import pytest import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq from cudf.utils.utils import IS_NEP18_ACTIVE missing_arrfunc_cond = not IS_NEP18_ACTIVE diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py index f9e0bb2ce8a..8cfcf4d2b6d 100644 --- a/python/cudf/cudf/tests/test_array_ufunc.py +++ b/python/cudf/cudf/tests/test_array_ufunc.py @@ -1,9 +1,10 @@ -import cudf -import numpy as np import cupy as cp +import numpy as np import pandas as pd import pytest -from cudf.tests.utils import assert_eq + +import cudf +from cudf.testing._utils import assert_eq @pytest.fixture diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py index a52ee937574..48e3b0ec42c 100644 --- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py +++ b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py @@ -18,7 +18,7 @@ import pytest import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq def cudf_from_avro_util(schema, records): diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 090e03c9403..1c97cbb10ff 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -14,7 +14,7 @@ import cudf from cudf.core import Series from cudf.core.index import as_index -from cudf.tests import utils +from cudf.testing import _utils as utils from cudf.utils.dtypes import ( BOOL_TYPES, DATETIME_TYPES, @@ -1742,12 +1742,6 @@ def test_binops_with_NA_consistent(dtype, op): assert result._column.null_count == len(data) -def _decimal_series(input, dtype): - return cudf.Series( - [x if x is None else decimal.Decimal(x) for x in input], dtype=dtype, - ) - - @pytest.mark.parametrize( "args", [ @@ -2080,10 +2074,10 @@ def _decimal_series(input, dtype): def test_binops_decimal(args): op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype = args - a = _decimal_series(lhs, l_dtype) - b = _decimal_series(rhs, r_dtype) + a = utils._decimal_series(lhs, l_dtype) + b = utils._decimal_series(rhs, r_dtype) expect = ( - _decimal_series(expect, expect_dtype) + utils._decimal_series(expect, expect_dtype) if isinstance(expect_dtype, cudf.Decimal64Dtype) else cudf.Series(expect, dtype=expect_dtype) ) @@ -2242,7 +2236,7 @@ def test_binops_decimal(args): ), ], ) -@pytest.mark.parametrize("integer_dtype", cudf.tests.utils.INTEGER_TYPES) +@pytest.mark.parametrize("integer_dtype", utils.INTEGER_TYPES) @pytest.mark.parametrize("reflected", [True, False]) def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): """ @@ -2258,7 +2252,7 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected): else: op, ldata, ldtype, rdata, _, expected = args - lhs = _decimal_series(ldata, ldtype) + lhs = utils._decimal_series(ldata, ldtype) rhs = cudf.Series(rdata, dtype=integer_dtype) if reflected: @@ -2746,7 +2740,7 @@ def test_binops_decimal_scalar_compare(args, reflected): else: op, ldata, ldtype, rdata, _, expected = args - lhs = _decimal_series(ldata, ldtype) + lhs = utils._decimal_series(ldata, ldtype) rhs = rdata if reflected: diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py index 7b1aea174c8..6a23f568348 100644 --- a/python/cudf/cudf/tests/test_categorical.py +++ b/python/cudf/cudf/tests/test_categorical.py @@ -9,7 +9,11 @@ import cudf from cudf.core._compat import PANDAS_GE_110 -from cudf.tests.utils import NUMERIC_TYPES, assert_eq, assert_exceptions_equal +from cudf.testing._utils import ( + NUMERIC_TYPES, + assert_eq, + assert_exceptions_equal, +) @pytest.fixture diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py index 3ac6cc0bb44..f3387b3d27d 100644 --- a/python/cudf/cudf/tests/test_column.py +++ b/python/cudf/cudf/tests/test_column.py @@ -8,7 +8,7 @@ import cudf from cudf._lib.transform import mask_to_bools from cudf.core.column.column import as_column -from cudf.tests.utils import assert_eq, assert_exceptions_equal +from cudf.testing._utils import assert_eq, assert_exceptions_equal from cudf.utils import dtypes as dtypeutils dtypes = sorted( @@ -140,8 +140,8 @@ def test_column_series_multi_dim(data): @pytest.mark.parametrize( ("data", "error"), [ - ([1, "1.0", "2", -3], TypeError), - ([np.nan, 0, "null", cp.nan], TypeError), + ([1, "1.0", "2", -3], pa.lib.ArrowInvalid), + ([np.nan, 0, "null", cp.nan], pa.lib.ArrowInvalid), ( [np.int32(4), np.float64(1.5), np.float32(1.290994), np.int8(0)], None, @@ -152,7 +152,7 @@ def test_column_mixed_dtype(data, error): if error is None: cudf.Series(data) else: - with pytest.raises(TypeError): + with pytest.raises(error): cudf.Series(data) diff --git a/python/cudf/cudf/tests/test_column_accessor.py b/python/cudf/cudf/tests/test_column_accessor.py index 86a7927dcac..99d4bdd9910 100644 --- a/python/cudf/cudf/tests/test_column_accessor.py +++ b/python/cudf/cudf/tests/test_column_accessor.py @@ -6,7 +6,7 @@ import cudf from cudf.core.column_accessor import ColumnAccessor -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq simple_test_data = [ {}, diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py index 5c4c121db4d..2578cb13bff 100644 --- a/python/cudf/cudf/tests/test_concat.py +++ b/python/cudf/cudf/tests/test_concat.py @@ -1,16 +1,16 @@ # Copyright (c) 2018-2021, NVIDIA CORPORATION. import re +from decimal import Decimal import numpy as np import pandas as pd import pytest -from decimal import Decimal import cudf as gd -from cudf.tests.utils import assert_eq, assert_exceptions_equal -from cudf.utils.dtypes import is_categorical_dtype from cudf.core.dtypes import Decimal64Dtype +from cudf.testing._utils import assert_eq, assert_exceptions_equal +from cudf.utils.dtypes import is_categorical_dtype def make_frames(index=None, nulls="none"): diff --git a/python/cudf/cudf/tests/test_contains.py b/python/cudf/cudf/tests/test_contains.py index b669c40022e..b6650600261 100644 --- a/python/cudf/cudf/tests/test_contains.py +++ b/python/cudf/cudf/tests/test_contains.py @@ -6,7 +6,7 @@ from cudf import Series from cudf.core.index import RangeIndex, as_index -from cudf.tests.utils import ( +from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES, diff --git a/python/cudf/cudf/tests/test_copying.py b/python/cudf/cudf/tests/test_copying.py index ed6a1169a2a..0965b5298a4 100644 --- a/python/cudf/cudf/tests/test_copying.py +++ b/python/cudf/cudf/tests/test_copying.py @@ -6,7 +6,7 @@ import cudf from cudf.core import Series -from cudf.tests.utils import NUMERIC_TYPES, OTHER_TYPES, assert_eq +from cudf.testing._utils import NUMERIC_TYPES, OTHER_TYPES, assert_eq @pytest.mark.parametrize("dtype", NUMERIC_TYPES + OTHER_TYPES) diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index 925369048cb..c19fde8b5d6 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -14,7 +14,7 @@ import cudf from cudf import read_csv -from cudf.tests.utils import assert_eq, assert_exceptions_equal +from cudf.testing._utils import assert_eq, assert_exceptions_equal def make_numeric_dataframe(nrows, dtype): diff --git a/python/cudf/cudf/tests/test_cuda_apply.py b/python/cudf/cudf/tests/test_cuda_apply.py index fa880da6804..2604030097b 100644 --- a/python/cudf/cudf/tests/test_cuda_apply.py +++ b/python/cudf/cudf/tests/test_cuda_apply.py @@ -9,7 +9,7 @@ from numba import cuda from cudf import DataFrame -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq @pytest.mark.parametrize("nelem", [1, 2, 64, 128, 129]) diff --git a/python/cudf/cudf/tests/test_cuda_array_interface.py b/python/cudf/cudf/tests/test_cuda_array_interface.py index 42e5ab38f50..ecf961f133b 100644 --- a/python/cudf/cudf/tests/test_cuda_array_interface.py +++ b/python/cudf/cudf/tests/test_cuda_array_interface.py @@ -10,7 +10,7 @@ from numba import cuda import cudf -from cudf.tests.utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq +from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq @pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES) @@ -171,6 +171,9 @@ def test_column_from_ephemeral_cupy_try_lose_reference(): def test_cuda_array_interface_pytorch(): torch = pytest.importorskip("torch") + if not torch.cuda.is_available(): + pytest.skip("need gpu version of pytorch to be installed") + series = cudf.Series([1, -1, 10, -56]) tensor = torch.tensor(series) got = cudf.Series(tensor) diff --git a/python/cudf/cudf/tests/test_custom_accessor.py b/python/cudf/cudf/tests/test_custom_accessor.py index d72b5875677..16e5b345ce2 100644 --- a/python/cudf/cudf/tests/test_custom_accessor.py +++ b/python/cudf/cudf/tests/test_custom_accessor.py @@ -2,9 +2,9 @@ import pandas as pd import pytest -import cudf as gd -from cudf.tests.utils import assert_eq +import cudf as gd +from cudf.testing._utils import assert_eq @gd.api.extensions.register_dataframe_accessor("point") diff --git a/python/cudf/cudf/tests/test_cut.py b/python/cudf/cudf/tests/test_cut.py index 926826ac188..710df78e36b 100644 --- a/python/cudf/cudf/tests/test_cut.py +++ b/python/cudf/cudf/tests/test_cut.py @@ -4,11 +4,12 @@ Test related to Cut """ -import pandas as pd import numpy as np -from cudf.core.cut import cut +import pandas as pd import pytest -from cudf.tests.utils import assert_eq + +from cudf.core.cut import cut +from cudf.testing._utils import assert_eq @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index a89b9b58e6e..2b32471c30c 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -20,8 +20,8 @@ import cudf from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_120 from cudf.core.column import column -from cudf.tests import utils -from cudf.tests.utils import ( +from cudf.testing import _utils as utils +from cudf.testing._utils import ( ALL_TYPES, DATETIME_TYPES, NUMERIC_TYPES, diff --git a/python/cudf/cudf/tests/test_dataframe_copy.py b/python/cudf/cudf/tests/test_dataframe_copy.py index 35788e660ea..5b258c760b3 100644 --- a/python/cudf/cudf/tests/test_dataframe_copy.py +++ b/python/cudf/cudf/tests/test_dataframe_copy.py @@ -7,7 +7,7 @@ from numba import cuda from cudf.core.dataframe import DataFrame -from cudf.tests.utils import ALL_TYPES, assert_eq +from cudf.testing._utils import ALL_TYPES, assert_eq """ DataFrame copy expectations diff --git a/python/cudf/cudf/tests/test_datasets.py b/python/cudf/cudf/tests/test_datasets.py index a603a6b4658..b7bc89f008d 100644 --- a/python/cudf/cudf/tests/test_datasets.py +++ b/python/cudf/cudf/tests/test_datasets.py @@ -1,7 +1,7 @@ import numpy as np import cudf as gd -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq def test_dataset_timeseries(): diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 647ff5250ba..653ee8389fa 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -14,7 +14,7 @@ import cudf from cudf.core import DataFrame, Series from cudf.core.index import DatetimeIndex -from cudf.tests.utils import ( +from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, assert_eq, diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py index 4816094814a..d2de44b0c8f 100644 --- a/python/cudf/cudf/tests/test_decimal.py +++ b/python/cudf/cudf/tests/test_decimal.py @@ -10,7 +10,7 @@ import cudf from cudf.core.column import Decimal32Column, Decimal64Column, NumericalColumn from cudf.core.dtypes import Decimal64Dtype -from cudf.tests.utils import ( +from cudf.testing._utils import ( FLOAT_TYPES, INTEGER_TYPES, NUMERIC_TYPES, diff --git a/python/cudf/cudf/tests/test_dlpack.py b/python/cudf/cudf/tests/test_dlpack.py index b8175d05137..4b2fca0d12d 100644 --- a/python/cudf/cudf/tests/test_dlpack.py +++ b/python/cudf/cudf/tests/test_dlpack.py @@ -8,7 +8,7 @@ import pytest import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq nelems = [0, 3, 10] dtype = [np.uint16, np.int32, np.float64] diff --git a/python/cudf/cudf/tests/test_dropna.py b/python/cudf/cudf/tests/test_dropna.py index d01627309d6..e1d0c38c760 100644 --- a/python/cudf/cudf/tests/test_dropna.py +++ b/python/cudf/cudf/tests/test_dropna.py @@ -5,7 +5,7 @@ import pytest import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py index a5895caf49f..41d7f5d215e 100644 --- a/python/cudf/cudf/tests/test_dtypes.py +++ b/python/cudf/cudf/tests/test_dtypes.py @@ -14,7 +14,7 @@ ListDtype, StructDtype, ) -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq from cudf.utils.dtypes import np_to_pa_dtype diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py index f721b7a28e5..f464ac1a6c2 100644 --- a/python/cudf/cudf/tests/test_duplicates.py +++ b/python/cudf/cudf/tests/test_duplicates.py @@ -9,7 +9,7 @@ import cudf from cudf import concat -from cudf.tests.utils import assert_eq, assert_exceptions_equal +from cudf.testing._utils import assert_eq, assert_exceptions_equal # TODO: PANDAS 1.0 support # Revisit drop_duplicates() tests to update parameters like ignore_index. diff --git a/python/cudf/cudf/tests/test_factorize.py b/python/cudf/cudf/tests/test_factorize.py index 48ffef4a11c..3df0031745e 100644 --- a/python/cudf/cudf/tests/test_factorize.py +++ b/python/cudf/cudf/tests/test_factorize.py @@ -7,7 +7,7 @@ import cudf from cudf.core import DataFrame, Index -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq @pytest.mark.parametrize("ncats,nelem", [(2, 2), (2, 10), (10, 100)]) diff --git a/python/cudf/cudf/tests/test_feather.py b/python/cudf/cudf/tests/test_feather.py index 525b88fc7ff..6c83ee3c458 100644 --- a/python/cudf/cudf/tests/test_feather.py +++ b/python/cudf/cudf/tests/test_feather.py @@ -10,7 +10,7 @@ import pytest import cudf -from cudf.tests.utils import NUMERIC_TYPES, assert_eq +from cudf.testing._utils import NUMERIC_TYPES, assert_eq if LooseVersion(pd.__version__) < LooseVersion("0.24"): try: diff --git a/python/cudf/cudf/tests/test_fill.py b/python/cudf/cudf/tests/test_fill.py index 83d15b36e64..efbe2834486 100644 --- a/python/cudf/cudf/tests/test_fill.py +++ b/python/cudf/cudf/tests/test_fill.py @@ -2,7 +2,7 @@ import pytest import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_gcs.py b/python/cudf/cudf/tests/test_gcs.py index 5d287a57df8..99d79e41520 100644 --- a/python/cudf/cudf/tests/test_gcs.py +++ b/python/cudf/cudf/tests/test_gcs.py @@ -10,7 +10,7 @@ import pytest import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq gcsfs = pytest.importorskip("gcsfs") diff --git a/python/cudf/cudf/tests/test_gpu_arrow_parser.py b/python/cudf/cudf/tests/test_gpu_arrow_parser.py index e3c8e69695d..a088ae9f923 100644 --- a/python/cudf/cudf/tests/test_gpu_arrow_parser.py +++ b/python/cudf/cudf/tests/test_gpu_arrow_parser.py @@ -1,4 +1,5 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. + import logging import numpy as np @@ -8,7 +9,7 @@ import cudf from cudf.comm.gpuarrow import GpuArrowReader -from cudf.tests.utils import INTEGER_TYPES +from cudf.testing._utils import INTEGER_TYPES def make_gpu_parse_arrow_data_batch(): diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index f346edb4304..e423a64fe4d 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -15,14 +15,14 @@ import cudf from cudf.core import DataFrame, Series from cudf.core._compat import PANDAS_GE_110 -from cudf.tests.dataset_generator import rand_dataframe -from cudf.tests.utils import ( +from cudf.testing._utils import ( DATETIME_TYPES, SIGNED_TYPES, TIMEDELTA_TYPES, assert_eq, assert_exceptions_equal, ) +from cudf.testing.dataset_generator import rand_dataframe _now = np.datetime64("now") _tomorrow = _now + np.timedelta64(1, "D") diff --git a/python/cudf/cudf/tests/test_hdf.py b/python/cudf/cudf/tests/test_hdf.py index f908d5f51f5..1bf91a52c2f 100644 --- a/python/cudf/cudf/tests/test_hdf.py +++ b/python/cudf/cudf/tests/test_hdf.py @@ -8,7 +8,7 @@ import pytest import cudf -from cudf.tests.utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq +from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq try: import tables # noqa F401 diff --git a/python/cudf/cudf/tests/test_hdfs.py b/python/cudf/cudf/tests/test_hdfs.py index e3867c620fe..24554f113bb 100644 --- a/python/cudf/cudf/tests/test_hdfs.py +++ b/python/cudf/cudf/tests/test_hdfs.py @@ -11,7 +11,7 @@ from pyarrow import orc as orc import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq if not os.environ.get("RUN_HDFS_TESTS"): pytestmark = pytest.mark.skip("Env not configured to run HDFS tests") diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 23e04831176..f03454c479a 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -21,7 +21,7 @@ RangeIndex, as_index, ) -from cudf.tests.utils import ( +from cudf.testing._utils import ( FLOAT_TYPES, NUMERIC_TYPES, OTHER_TYPES, diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 1d34f7636da..58d39ff35a6 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -9,8 +9,12 @@ import cudf from cudf.core._compat import PANDAS_GE_110, PANDAS_GE_120 -from cudf.tests import utils -from cudf.tests.utils import INTEGER_TYPES, assert_eq, assert_exceptions_equal +from cudf.testing import _utils as utils +from cudf.testing._utils import ( + INTEGER_TYPES, + assert_eq, + assert_exceptions_equal, +) index_dtypes = INTEGER_TYPES diff --git a/python/cudf/cudf/tests/test_interval.py b/python/cudf/cudf/tests/test_interval.py index c7eafedd409..fc193441113 100644 --- a/python/cudf/cudf/tests/test_interval.py +++ b/python/cudf/cudf/tests/test_interval.py @@ -4,7 +4,7 @@ import pytest import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py index 2d8f451abb9..7b56f864272 100644 --- a/python/cudf/cudf/tests/test_joining.py +++ b/python/cudf/cudf/tests/test_joining.py @@ -7,7 +7,7 @@ import cudf from cudf.core._compat import PANDAS_GE_120 from cudf.core.dtypes import CategoricalDtype, Decimal64Dtype -from cudf.tests.utils import ( +from cudf.testing._utils import ( INTEGER_TYPES, NUMERIC_TYPES, assert_eq, @@ -96,7 +96,7 @@ def assert_join_results_equal(expect, got, how, **kwargs): got.sort_values(got.columns.to_list()).reset_index(drop=True), **kwargs, ) - elif isinstance(expect, (pd.Index, cudf.BaseIndex)): + elif isinstance(expect, (pd.Index, cudf.Index)): return assert_eq(expect.sort_values(), got.sort_values(), **kwargs) else: raise ValueError(f"Not a join result: {type(expect).__name__}") @@ -1922,3 +1922,193 @@ def test_join_merge_invalid_keys(on, how): with pytest.raises(KeyError): pd_left.merge(pd_right, on=on) gd_left.merge(gd_right, on=on) + + +@pytest.mark.parametrize( + "str_data", + [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]], +) +@pytest.mark.parametrize("num_keys", [1, 2, 3]) +@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"]) +def test_string_join_key(str_data, num_keys, how): + other_data = [1, 2, 3, 4, 5][: len(str_data)] + + pdf = pd.DataFrame() + gdf = cudf.DataFrame() + for i in range(num_keys): + pdf[i] = pd.Series(str_data, dtype="str") + gdf[i] = cudf.Series(str_data, dtype="str") + pdf["a"] = other_data + gdf["a"] = other_data + + pdf2 = pdf.copy() + gdf2 = gdf.copy() + + expect = pdf.merge(pdf2, on=list(range(num_keys)), how=how) + got = gdf.merge(gdf2, on=list(range(num_keys)), how=how) + + if len(expect) == 0 and len(got) == 0: + expect = expect.reset_index(drop=True) + got = got[expect.columns] # reorder columns + + if how == "right": + got = got[expect.columns] # reorder columns + + assert_join_results_equal(expect, got, how=how) + + +@pytest.mark.parametrize( + "str_data_nulls", + [ + ["a", "b", "c"], + ["a", "b", "f", "g"], + ["f", "g", "h", "i", "j"], + ["f", "g", "h"], + [None, None, None, None, None], + [], + ], +) +def test_string_join_key_nulls(str_data_nulls): + str_data = ["a", "b", "c", "d", "e"] + other_data = [1, 2, 3, 4, 5] + + other_data_nulls = [6, 7, 8, 9, 10][: len(str_data_nulls)] + + pdf = pd.DataFrame() + gdf = cudf.DataFrame() + pdf["key"] = pd.Series(str_data, dtype="str") + gdf["key"] = cudf.Series(str_data, dtype="str") + pdf["vals"] = other_data + gdf["vals"] = other_data + + pdf2 = pd.DataFrame() + gdf2 = cudf.DataFrame() + pdf2["key"] = pd.Series(str_data_nulls, dtype="str") + gdf2["key"] = cudf.Series(str_data_nulls, dtype="str") + pdf2["vals"] = pd.Series(other_data_nulls, dtype="int64") + gdf2["vals"] = cudf.Series(other_data_nulls, dtype="int64") + + expect = pdf.merge(pdf2, on="key", how="left") + got = gdf.merge(gdf2, on="key", how="left") + got["vals_y"] = got["vals_y"].fillna(-1) + + if len(expect) == 0 and len(got) == 0: + expect = expect.reset_index(drop=True) + got = got[expect.columns] + + expect["vals_y"] = expect["vals_y"].fillna(-1).astype("int64") + + assert_join_results_equal(expect, got, how="left") + + +@pytest.mark.parametrize( + "str_data", [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]] +) +@pytest.mark.parametrize("num_cols", [1, 2, 3]) +@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"]) +def test_string_join_non_key(str_data, num_cols, how): + other_data = [1, 2, 3, 4, 5][: len(str_data)] + + pdf = pd.DataFrame() + gdf = cudf.DataFrame() + for i in range(num_cols): + pdf[i] = pd.Series(str_data, dtype="str") + gdf[i] = cudf.Series(str_data, dtype="str") + pdf["a"] = other_data + gdf["a"] = other_data + + pdf2 = pdf.copy() + gdf2 = gdf.copy() + + expect = pdf.merge(pdf2, on=["a"], how=how) + got = gdf.merge(gdf2, on=["a"], how=how) + + if len(expect) == 0 and len(got) == 0: + expect = expect.reset_index(drop=True) + got = got[expect.columns] + + if how == "right": + got = got[expect.columns] # reorder columns + + assert_join_results_equal(expect, got, how=how) + + +@pytest.mark.parametrize( + "str_data_nulls", + [ + ["a", "b", "c"], + ["a", "b", "f", "g"], + ["f", "g", "h", "i", "j"], + ["f", "g", "h"], + [None, None, None, None, None], + [], + ], +) +def test_string_join_non_key_nulls(str_data_nulls): + str_data = ["a", "b", "c", "d", "e"] + other_data = [1, 2, 3, 4, 5] + + other_data_nulls = [6, 7, 8, 9, 10][: len(str_data_nulls)] + + pdf = pd.DataFrame() + gdf = cudf.DataFrame() + pdf["vals"] = pd.Series(str_data, dtype="str") + gdf["vals"] = cudf.Series(str_data, dtype="str") + pdf["key"] = other_data + gdf["key"] = other_data + + pdf2 = pd.DataFrame() + gdf2 = cudf.DataFrame() + pdf2["vals"] = pd.Series(str_data_nulls, dtype="str") + gdf2["vals"] = cudf.Series(str_data_nulls, dtype="str") + pdf2["key"] = pd.Series(other_data_nulls, dtype="int64") + gdf2["key"] = cudf.Series(other_data_nulls, dtype="int64") + + expect = pdf.merge(pdf2, on="key", how="left") + got = gdf.merge(gdf2, on="key", how="left") + + if len(expect) == 0 and len(got) == 0: + expect = expect.reset_index(drop=True) + got = got[expect.columns] + + assert_join_results_equal(expect, got, how="left") + + +def test_string_join_values_nulls(): + left_dict = [ + {"b": "MATCH 1", "a": 1.0}, + {"b": "MATCH 1", "a": 1.0}, + {"b": "LEFT NO MATCH 1", "a": -1.0}, + {"b": "MATCH 2", "a": 2.0}, + {"b": "MATCH 2", "a": 2.0}, + {"b": "MATCH 1", "a": 1.0}, + {"b": "MATCH 1", "a": 1.0}, + {"b": "MATCH 2", "a": 2.0}, + {"b": "MATCH 2", "a": 2.0}, + {"b": "LEFT NO MATCH 2", "a": -2.0}, + {"b": "MATCH 3", "a": 3.0}, + {"b": "MATCH 3", "a": 3.0}, + ] + + right_dict = [ + {"b": "RIGHT NO MATCH 1", "c": -1.0}, + {"b": "MATCH 3", "c": 3.0}, + {"b": "MATCH 2", "c": 2.0}, + {"b": "RIGHT NO MATCH 2", "c": -2.0}, + {"b": "RIGHT NO MATCH 3", "c": -3.0}, + {"b": "MATCH 1", "c": 1.0}, + ] + + left_pdf = pd.DataFrame(left_dict) + right_pdf = pd.DataFrame(right_dict) + + left_gdf = cudf.DataFrame.from_pandas(left_pdf) + right_gdf = cudf.DataFrame.from_pandas(right_pdf) + + expect = left_pdf.merge(right_pdf, how="left", on="b") + got = left_gdf.merge(right_gdf, how="left", on="b") + + expect = expect.sort_values(by=["a", "b", "c"]).reset_index(drop=True) + got = got.sort_values(by=["a", "b", "c"]).reset_index(drop=True) + + assert_join_results_equal(expect, got, how="left") diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py index 2da2cea164f..0b138f446ae 100644 --- a/python/cudf/cudf/tests/test_json.py +++ b/python/cudf/cudf/tests/test_json.py @@ -12,7 +12,7 @@ import cudf from cudf.core._compat import PANDAS_GE_110 -from cudf.tests.utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq +from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq def make_numeric_dataframe(nrows, dtype): diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 42541f1e8b1..a6a9ba97ef5 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -9,7 +9,7 @@ import cudf from cudf import NA from cudf._lib.copying import get_element -from cudf.tests.utils import ( +from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES, diff --git a/python/cudf/cudf/tests/test_monotonic.py b/python/cudf/cudf/tests/test_monotonic.py index b26887ad6ae..e9c828ec0f5 100644 --- a/python/cudf/cudf/tests/test_monotonic.py +++ b/python/cudf/cudf/tests/test_monotonic.py @@ -16,7 +16,7 @@ RangeIndex, StringIndex, ) -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq @pytest.mark.parametrize("testrange", [(10, 20, 1), (0, -10, -1), (5, 5, 1)]) diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index bd78612d6c7..c8e5a9f071b 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -15,7 +15,7 @@ import cudf from cudf.core.column import as_column from cudf.core.index import as_index -from cudf.tests.utils import assert_eq, assert_exceptions_equal, assert_neq +from cudf.testing._utils import assert_eq, assert_exceptions_equal, assert_neq def test_multiindex_levels_codes_validation(): diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py index 12b17447268..7a766a49a62 100644 --- a/python/cudf/cudf/tests/test_numerical.py +++ b/python/cudf/cudf/tests/test_numerical.py @@ -6,7 +6,7 @@ import cudf from cudf.core._compat import PANDAS_GE_100 -from cudf.tests.utils import NUMERIC_TYPES, assert_eq +from cudf.testing._utils import NUMERIC_TYPES, assert_eq from cudf.utils.dtypes import cudf_dtypes_to_pandas_dtypes diff --git a/python/cudf/cudf/tests/test_numpy_interop.py b/python/cudf/cudf/tests/test_numpy_interop.py index 521840f8a8a..e5efe2f027d 100644 --- a/python/cudf/cudf/tests/test_numpy_interop.py +++ b/python/cudf/cudf/tests/test_numpy_interop.py @@ -2,7 +2,7 @@ import pytest from cudf.core import DataFrame, Series -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq def test_to_records_noindex(): diff --git a/python/cudf/cudf/tests/test_onehot.py b/python/cudf/cudf/tests/test_onehot.py index 61195faa4d0..bbec4594e15 100644 --- a/python/cudf/cudf/tests/test_onehot.py +++ b/python/cudf/cudf/tests/test_onehot.py @@ -8,7 +8,7 @@ import cudf from cudf.core import DataFrame, GenericIndex, Series -from cudf.tests import utils +from cudf.testing import _utils as utils def test_onehot_simple(): diff --git a/python/cudf/cudf/tests/test_ops.py b/python/cudf/cudf/tests/test_ops.py index 8cdef19d9ba..ac3f784ecd4 100644 --- a/python/cudf/cudf/tests/test_ops.py +++ b/python/cudf/cudf/tests/test_ops.py @@ -5,7 +5,7 @@ import pytest import cudf -from cudf.tests.utils import assert_eq, gen_rand +from cudf.testing._utils import assert_eq, gen_rand def test_sqrt_float(): diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index bd8131d4673..213b7bf39d7 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -15,7 +15,11 @@ import cudf from cudf.core.dtypes import Decimal64Dtype from cudf.io.orc import ORCWriter -from cudf.tests.utils import assert_eq, gen_rand_series, supported_numpy_dtypes +from cudf.testing._utils import ( + assert_eq, + gen_rand_series, + supported_numpy_dtypes, +) @pytest.fixture(scope="module") diff --git a/python/cudf/cudf/tests/test_pandas_interop.py b/python/cudf/cudf/tests/test_pandas_interop.py index 24c60f12a2f..a8a45fc3c28 100644 --- a/python/cudf/cudf/tests/test_pandas_interop.py +++ b/python/cudf/cudf/tests/test_pandas_interop.py @@ -5,7 +5,7 @@ import cudf from cudf.core import DataFrame -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq def test_to_pandas(): diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index 54bf17e4c2b..2d0a4006f44 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -18,8 +18,8 @@ import cudf from cudf.io.parquet import ParquetWriter, merge_parquet_filemetadata -from cudf.tests import dataset_generator as dg -from cudf.tests.utils import ( +from cudf.testing import dataset_generator as dg +from cudf.testing._utils import ( TIMEDELTA_TYPES, assert_eq, assert_exceptions_equal, diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py index ca819c7f59b..48a25fcfadb 100644 --- a/python/cudf/cudf/tests/test_pickling.py +++ b/python/cudf/cudf/tests/test_pickling.py @@ -8,7 +8,7 @@ from cudf.core import DataFrame, GenericIndex, Series from cudf.core.buffer import Buffer -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq if sys.version_info < (3, 8): try: diff --git a/python/cudf/cudf/tests/test_quantiles.py b/python/cudf/cudf/tests/test_quantiles.py index 49a2603b9a3..4055485c49a 100644 --- a/python/cudf/cudf/tests/test_quantiles.py +++ b/python/cudf/cudf/tests/test_quantiles.py @@ -1,7 +1,7 @@ import pandas as pd import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq def test_single_q(): diff --git a/python/cudf/cudf/tests/test_query.py b/python/cudf/cudf/tests/test_query.py index b6915a63947..8dc5df2dd7c 100644 --- a/python/cudf/cudf/tests/test_query.py +++ b/python/cudf/cudf/tests/test_query.py @@ -12,7 +12,7 @@ import cudf from cudf.core import DataFrame -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq from cudf.utils import queryutils _params_query_parser = [] diff --git a/python/cudf/cudf/tests/test_query_mask.py b/python/cudf/cudf/tests/test_query_mask.py index 35479f8308c..ab1c085c6c0 100644 --- a/python/cudf/cudf/tests/test_query_mask.py +++ b/python/cudf/cudf/tests/test_query_mask.py @@ -3,7 +3,7 @@ import pytest import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq _data = [ {"a": [0, 1.0, 2.0, None, np.nan, None, 3, 5]}, diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/test_rank.py index c86b2c61aa5..3c98496def3 100644 --- a/python/cudf/cudf/tests/test_rank.py +++ b/python/cudf/cudf/tests/test_rank.py @@ -7,7 +7,7 @@ import pytest from cudf.core import DataFrame -from cudf.tests.utils import assert_eq, assert_exceptions_equal +from cudf.testing._utils import assert_eq, assert_exceptions_equal @pytest.fixture diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py index 0fa09bc5df7..7cbc56f943c 100644 --- a/python/cudf/cudf/tests/test_reductions.py +++ b/python/cudf/cudf/tests/test_reductions.py @@ -13,8 +13,8 @@ import cudf from cudf.core import Series from cudf.core.dtypes import Decimal64Dtype -from cudf.tests import utils -from cudf.tests.utils import NUMERIC_TYPES, assert_eq, gen_rand +from cudf.testing import _utils as utils +from cudf.testing._utils import NUMERIC_TYPES, assert_eq, gen_rand params_dtype = NUMERIC_TYPES diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py index 6dca539b8d5..b59428779c1 100644 --- a/python/cudf/cudf/tests/test_replace.py +++ b/python/cudf/cudf/tests/test_replace.py @@ -1,15 +1,15 @@ # Copyright (c) 2020-2021, NVIDIA CORPORATION. import re +from decimal import Decimal import numpy as np import pandas as pd import pytest -from decimal import Decimal import cudf from cudf.core.dtypes import Decimal64Dtype -from cudf.tests.utils import ( +from cudf.testing._utils import ( INTEGER_TYPES, NUMERIC_TYPES, assert_eq, diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index 093be41275a..4906349ecba 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -10,7 +10,7 @@ import cudf from cudf.core._compat import PANDAS_GE_110 -from cudf.tests import utils +from cudf.testing import _utils as utils from cudf.utils.dtypes import cudf_dtypes_to_pandas_dtypes repr_categories = utils.NUMERIC_TYPES + ["str", "category", "datetime64[ns]"] diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index b030924779d..0c4313eb47c 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -9,7 +9,7 @@ import cudf from cudf import melt as cudf_melt from cudf.core._compat import PANDAS_GE_120 -from cudf.tests.utils import ( +from cudf.testing._utils import ( ALL_TYPES, DATETIME_TYPES, NUMERIC_TYPES, diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py index fcc5591adda..07e7f43c992 100644 --- a/python/cudf/cudf/tests/test_rolling.py +++ b/python/cudf/cudf/tests/test_rolling.py @@ -8,7 +8,7 @@ import cudf from cudf.core._compat import PANDAS_GE_110 -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py index 2eefcfef7d2..133597b8f19 100644 --- a/python/cudf/cudf/tests/test_s3.py +++ b/python/cudf/cudf/tests/test_s3.py @@ -14,7 +14,7 @@ import pytest import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq moto = pytest.importorskip("moto", minversion="1.3.14") boto3 = pytest.importorskip("boto3") diff --git a/python/cudf/cudf/tests/test_scalar.py b/python/cudf/cudf/tests/test_scalar.py index 01e6b52f526..605005f41fc 100644 --- a/python/cudf/cudf/tests/test_scalar.py +++ b/python/cudf/cudf/tests/test_scalar.py @@ -11,7 +11,7 @@ import cudf from cudf import Scalar as pycudf_scalar from cudf._lib.copying import get_element -from cudf.tests.utils import ( +from cudf.testing._utils import ( ALL_TYPES, DATETIME_TYPES, NUMERIC_TYPES, diff --git a/python/cudf/cudf/tests/test_scan.py b/python/cudf/cudf/tests/test_scan.py index f7e8c5a8563..0ef7b89a606 100644 --- a/python/cudf/cudf/tests/test_scan.py +++ b/python/cudf/cudf/tests/test_scan.py @@ -5,8 +5,13 @@ import pytest import cudf -from cudf.tests.utils import INTEGER_TYPES, NUMERIC_TYPES, assert_eq, gen_rand from cudf.core.dtypes import Decimal64Dtype +from cudf.testing._utils import ( + INTEGER_TYPES, + NUMERIC_TYPES, + assert_eq, + gen_rand, +) params_sizes = [0, 1, 2, 5] diff --git a/python/cudf/cudf/tests/test_search.py b/python/cudf/cudf/tests/test_search.py index 4c42e2cb50f..c16c6486cd4 100644 --- a/python/cudf/cudf/tests/test_search.py +++ b/python/cudf/cudf/tests/test_search.py @@ -5,7 +5,7 @@ import pytest import cudf -from cudf.tests.utils import assert_eq, gen_rand, random_bitmask +from cudf.testing._utils import assert_eq, gen_rand, random_bitmask @pytest.mark.parametrize("side", ["left", "right"]) diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py index 4be5adf84de..b436825cf69 100644 --- a/python/cudf/cudf/tests/test_serialize.py +++ b/python/cudf/cudf/tests/test_serialize.py @@ -8,8 +8,8 @@ import pytest import cudf -from cudf.tests import utils -from cudf.tests.utils import assert_eq +from cudf.testing import _utils as utils +from cudf.testing._utils import assert_eq @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index d400a9ce8a9..f3da4275aea 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -9,7 +9,7 @@ import pytest import cudf -from cudf.tests.utils import ( +from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES, diff --git a/python/cudf/cudf/tests/test_seriesmap.py b/python/cudf/cudf/tests/test_seriesmap.py index 324074b6021..d4ef3ba235d 100644 --- a/python/cudf/cudf/tests/test_seriesmap.py +++ b/python/cudf/cudf/tests/test_seriesmap.py @@ -4,12 +4,12 @@ from math import floor import numpy as np -import cudf import pandas as pd import pytest +import cudf from cudf import Series -from cudf.tests.utils import assert_eq, assert_exceptions_equal +from cudf.testing._utils import assert_eq, assert_exceptions_equal def test_series_map_basic(): diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py index 28cb2568908..c7429f3c246 100644 --- a/python/cudf/cudf/tests/test_setitem.py +++ b/python/cudf/cudf/tests/test_setitem.py @@ -6,7 +6,7 @@ import cudf from cudf.core._compat import PANDAS_GE_120, PANDAS_LE_122 -from cudf.tests.utils import assert_eq, assert_exceptions_equal +from cudf.testing._utils import assert_eq, assert_exceptions_equal @pytest.mark.parametrize("df", [pd.DataFrame({"a": [1, 2, 3]})]) diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py index b90aebc33dc..95942045654 100644 --- a/python/cudf/cudf/tests/test_sorting.py +++ b/python/cudf/cudf/tests/test_sorting.py @@ -9,7 +9,7 @@ from cudf.core import DataFrame, Series from cudf.core.column import NumericalColumn -from cudf.tests.utils import ( +from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, assert_eq, diff --git a/python/cudf/cudf/tests/test_sparse_df.py b/python/cudf/cudf/tests/test_sparse_df.py index 4551f48845f..50c8f3f41a8 100644 --- a/python/cudf/cudf/tests/test_sparse_df.py +++ b/python/cudf/cudf/tests/test_sparse_df.py @@ -8,7 +8,7 @@ from cudf.comm.gpuarrow import GpuArrowReader from cudf.core import DataFrame, Series -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq def read_data(): diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py index 4e07c974280..d4e944848c9 100644 --- a/python/cudf/cudf/tests/test_stats.py +++ b/python/cudf/cudf/tests/test_stats.py @@ -9,7 +9,7 @@ import cudf from cudf.datasets import randomdata -from cudf.tests.utils import assert_eq, assert_exceptions_equal +from cudf.testing._utils import assert_eq, assert_exceptions_equal params_dtypes = [np.int32, np.uint32, np.float32, np.float64] methods = ["min", "max", "sum", "mean", "var", "std"] diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 58b3996ab5c..3c153a16a13 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -18,8 +18,7 @@ from cudf.core._compat import PANDAS_GE_110 from cudf.core.column.string import StringColumn from cudf.core.index import StringIndex, as_index -from cudf.tests.test_joining import assert_join_results_equal -from cudf.tests.utils import ( +from cudf.testing._utils import ( DATETIME_TYPES, NUMERIC_TYPES, assert_eq, @@ -919,196 +918,6 @@ def test_string_split(data, pat, n, expand): assert_eq(expect, got) -@pytest.mark.parametrize( - "str_data", - [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]], -) -@pytest.mark.parametrize("num_keys", [1, 2, 3]) -@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"]) -def test_string_join_key(str_data, num_keys, how): - other_data = [1, 2, 3, 4, 5][: len(str_data)] - - pdf = pd.DataFrame() - gdf = cudf.DataFrame() - for i in range(num_keys): - pdf[i] = pd.Series(str_data, dtype="str") - gdf[i] = cudf.Series(str_data, dtype="str") - pdf["a"] = other_data - gdf["a"] = other_data - - pdf2 = pdf.copy() - gdf2 = gdf.copy() - - expect = pdf.merge(pdf2, on=list(range(num_keys)), how=how) - got = gdf.merge(gdf2, on=list(range(num_keys)), how=how) - - if len(expect) == 0 and len(got) == 0: - expect = expect.reset_index(drop=True) - got = got[expect.columns] # reorder columns - - if how == "right": - got = got[expect.columns] # reorder columns - - assert_join_results_equal(expect, got, how=how) - - -@pytest.mark.parametrize( - "str_data_nulls", - [ - ["a", "b", "c"], - ["a", "b", "f", "g"], - ["f", "g", "h", "i", "j"], - ["f", "g", "h"], - [None, None, None, None, None], - [], - ], -) -def test_string_join_key_nulls(str_data_nulls): - str_data = ["a", "b", "c", "d", "e"] - other_data = [1, 2, 3, 4, 5] - - other_data_nulls = [6, 7, 8, 9, 10][: len(str_data_nulls)] - - pdf = pd.DataFrame() - gdf = cudf.DataFrame() - pdf["key"] = pd.Series(str_data, dtype="str") - gdf["key"] = cudf.Series(str_data, dtype="str") - pdf["vals"] = other_data - gdf["vals"] = other_data - - pdf2 = pd.DataFrame() - gdf2 = cudf.DataFrame() - pdf2["key"] = pd.Series(str_data_nulls, dtype="str") - gdf2["key"] = cudf.Series(str_data_nulls, dtype="str") - pdf2["vals"] = pd.Series(other_data_nulls, dtype="int64") - gdf2["vals"] = cudf.Series(other_data_nulls, dtype="int64") - - expect = pdf.merge(pdf2, on="key", how="left") - got = gdf.merge(gdf2, on="key", how="left") - got["vals_y"] = got["vals_y"].fillna(-1) - - if len(expect) == 0 and len(got) == 0: - expect = expect.reset_index(drop=True) - got = got[expect.columns] - - expect["vals_y"] = expect["vals_y"].fillna(-1).astype("int64") - - assert_join_results_equal(expect, got, how="left") - - -@pytest.mark.parametrize( - "str_data", [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]] -) -@pytest.mark.parametrize("num_cols", [1, 2, 3]) -@pytest.mark.parametrize("how", ["left", "right", "inner", "outer"]) -def test_string_join_non_key(str_data, num_cols, how): - other_data = [1, 2, 3, 4, 5][: len(str_data)] - - pdf = pd.DataFrame() - gdf = cudf.DataFrame() - for i in range(num_cols): - pdf[i] = pd.Series(str_data, dtype="str") - gdf[i] = cudf.Series(str_data, dtype="str") - pdf["a"] = other_data - gdf["a"] = other_data - - pdf2 = pdf.copy() - gdf2 = gdf.copy() - - expect = pdf.merge(pdf2, on=["a"], how=how) - got = gdf.merge(gdf2, on=["a"], how=how) - - if len(expect) == 0 and len(got) == 0: - expect = expect.reset_index(drop=True) - got = got[expect.columns] - - if how == "right": - got = got[expect.columns] # reorder columns - - assert_join_results_equal(expect, got, how=how) - - -@pytest.mark.parametrize( - "str_data_nulls", - [ - ["a", "b", "c"], - ["a", "b", "f", "g"], - ["f", "g", "h", "i", "j"], - ["f", "g", "h"], - [None, None, None, None, None], - [], - ], -) -def test_string_join_non_key_nulls(str_data_nulls): - str_data = ["a", "b", "c", "d", "e"] - other_data = [1, 2, 3, 4, 5] - - other_data_nulls = [6, 7, 8, 9, 10][: len(str_data_nulls)] - - pdf = pd.DataFrame() - gdf = cudf.DataFrame() - pdf["vals"] = pd.Series(str_data, dtype="str") - gdf["vals"] = cudf.Series(str_data, dtype="str") - pdf["key"] = other_data - gdf["key"] = other_data - - pdf2 = pd.DataFrame() - gdf2 = cudf.DataFrame() - pdf2["vals"] = pd.Series(str_data_nulls, dtype="str") - gdf2["vals"] = cudf.Series(str_data_nulls, dtype="str") - pdf2["key"] = pd.Series(other_data_nulls, dtype="int64") - gdf2["key"] = cudf.Series(other_data_nulls, dtype="int64") - - expect = pdf.merge(pdf2, on="key", how="left") - got = gdf.merge(gdf2, on="key", how="left") - - if len(expect) == 0 and len(got) == 0: - expect = expect.reset_index(drop=True) - got = got[expect.columns] - - assert_join_results_equal(expect, got, how="left") - - -def test_string_join_values_nulls(): - left_dict = [ - {"b": "MATCH 1", "a": 1.0}, - {"b": "MATCH 1", "a": 1.0}, - {"b": "LEFT NO MATCH 1", "a": -1.0}, - {"b": "MATCH 2", "a": 2.0}, - {"b": "MATCH 2", "a": 2.0}, - {"b": "MATCH 1", "a": 1.0}, - {"b": "MATCH 1", "a": 1.0}, - {"b": "MATCH 2", "a": 2.0}, - {"b": "MATCH 2", "a": 2.0}, - {"b": "LEFT NO MATCH 2", "a": -2.0}, - {"b": "MATCH 3", "a": 3.0}, - {"b": "MATCH 3", "a": 3.0}, - ] - - right_dict = [ - {"b": "RIGHT NO MATCH 1", "c": -1.0}, - {"b": "MATCH 3", "c": 3.0}, - {"b": "MATCH 2", "c": 2.0}, - {"b": "RIGHT NO MATCH 2", "c": -2.0}, - {"b": "RIGHT NO MATCH 3", "c": -3.0}, - {"b": "MATCH 1", "c": 1.0}, - ] - - left_pdf = pd.DataFrame(left_dict) - right_pdf = pd.DataFrame(right_dict) - - left_gdf = cudf.DataFrame.from_pandas(left_pdf) - right_gdf = cudf.DataFrame.from_pandas(right_pdf) - - expect = left_pdf.merge(right_pdf, how="left", on="b") - got = left_gdf.merge(right_gdf, how="left", on="b") - - expect = expect.sort_values(by=["a", "b", "c"]).reset_index(drop=True) - got = got.sort_values(by=["a", "b", "c"]).reset_index(drop=True) - - assert_join_results_equal(expect, got, how="left") - - @pytest.mark.parametrize( "str_data", [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]] ) diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py index 21542a6c415..da2af1469c0 100644 --- a/python/cudf/cudf/tests/test_struct.py +++ b/python/cudf/cudf/tests/test_struct.py @@ -6,7 +6,7 @@ import pytest import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_testing.py b/python/cudf/cudf/tests/test_testing.py index eee7078433d..b2e5ea70ddc 100644 --- a/python/cudf/cudf/tests/test_testing.py +++ b/python/cudf/cudf/tests/test_testing.py @@ -10,7 +10,7 @@ assert_index_equal, assert_series_equal, ) -from cudf.tests.utils import NUMERIC_TYPES, OTHER_TYPES, assert_eq +from cudf.testing._utils import NUMERIC_TYPES, OTHER_TYPES, assert_eq @pytest.mark.parametrize("rdata", [[1, 2, 5], [1, 2, 6], [1, 2, 5, 6]]) diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py index 072fc23abba..6c3fdd4640a 100644 --- a/python/cudf/cudf/tests/test_text.py +++ b/python/cudf/cudf/tests/test_text.py @@ -5,7 +5,7 @@ import pytest import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq def test_tokenize(): diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py index 3efc30af01e..a65fdeeb0dd 100644 --- a/python/cudf/cudf/tests/test_timedelta.py +++ b/python/cudf/cudf/tests/test_timedelta.py @@ -11,8 +11,8 @@ import cudf from cudf.core._compat import PANDAS_GE_120 -from cudf.tests import utils as utils -from cudf.tests.utils import assert_eq, assert_exceptions_equal +from cudf.testing import _utils as utils +from cudf.testing._utils import assert_eq, assert_exceptions_equal _TIMEDELTA_DATA = [ [1000000, 200000, 3000000], diff --git a/python/cudf/cudf/tests/test_transform.py b/python/cudf/cudf/tests/test_transform.py index ed409de196e..582d5a43edf 100644 --- a/python/cudf/cudf/tests/test_transform.py +++ b/python/cudf/cudf/tests/test_transform.py @@ -6,7 +6,7 @@ import pytest from cudf.core import Series -from cudf.tests.utils import NUMERIC_TYPES +from cudf.testing._utils import NUMERIC_TYPES supported_types = NUMERIC_TYPES diff --git a/python/cudf/cudf/tests/test_unaops.py b/python/cudf/cudf/tests/test_unaops.py index f132271cfd8..2089f764724 100644 --- a/python/cudf/cudf/tests/test_unaops.py +++ b/python/cudf/cudf/tests/test_unaops.py @@ -10,7 +10,7 @@ import cudf from cudf.core import Series -from cudf.tests import utils +from cudf.testing import _utils as utils _unaops = [operator.abs, operator.invert, operator.neg, np.ceil, np.floor] diff --git a/python/custreamz/custreamz/tests/test_kafka.py b/python/custreamz/custreamz/tests/test_kafka.py index 059655d4ca0..d29ebf8db8b 100644 --- a/python/custreamz/custreamz/tests/test_kafka.py +++ b/python/custreamz/custreamz/tests/test_kafka.py @@ -2,7 +2,7 @@ import confluent_kafka as ck import pytest -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq @pytest.mark.parametrize("commit_offset", [-1, 0, 1, 1000]) diff --git a/python/dask_cudf/dask_cudf/tests/test_accessor.py b/python/dask_cudf/dask_cudf/tests/test_accessor.py index 48e0d022a52..94e0169bdf9 100644 --- a/python/dask_cudf/dask_cudf/tests/test_accessor.py +++ b/python/dask_cudf/dask_cudf/tests/test_accessor.py @@ -5,11 +5,11 @@ from dask import dataframe as dd -from cudf import DataFrame, Series -from cudf.tests.utils import assert_eq, does_not_raise - import dask_cudf as dgd +from cudf import DataFrame, Series +from cudf.testing._utils import assert_eq, does_not_raise + ############################################################################# # Datetime Accessor # ############################################################################# diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py index 2f73534b45a..cf5203a22e5 100644 --- a/python/dask_cudf/dask_cudf/tests/test_core.py +++ b/python/dask_cudf/dask_cudf/tests/test_core.py @@ -706,7 +706,7 @@ def test_dataframe_set_index(): pddf = dd.from_pandas(pdf, npartitions=4) pddf = pddf.set_index("str") - from cudf.tests.utils import assert_eq + from cudf.testing._utils import assert_eq assert_eq(ddf.compute(), pddf.compute()) diff --git a/python/dask_cudf/dask_cudf/tests/test_distributed.py b/python/dask_cudf/dask_cudf/tests/test_distributed.py index 85354704902..876a66f78d7 100644 --- a/python/dask_cudf/dask_cudf/tests/test_distributed.py +++ b/python/dask_cudf/dask_cudf/tests/test_distributed.py @@ -7,7 +7,7 @@ from distributed.utils_test import loop # noqa: F401 import cudf -from cudf.tests.utils import assert_eq +from cudf.testing._utils import assert_eq import dask_cudf From c8e8526931443eb6121059c2a239d25b69741d37 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 30 Jun 2021 08:25:10 -0500 Subject: [PATCH 08/54] Use absolute imports in `cudf` (#8631) This PR removes usages of relative imports in `cudf` and replaces them with absolute imports. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - https://github.com/brandon-b-miller URL: https://github.com/rapidsai/cudf/pull/8631 --- python/cudf/cudf/core/column/column.py | 37 +++++++++++------------ python/cudf/cudf/core/column/decimal.py | 2 +- python/cudf/cudf/core/column/numerical.py | 2 +- python/cudf/cudf/core/column/string.py | 3 +- python/cudf/cudf/core/window/rolling.py | 3 +- 5 files changed, 22 insertions(+), 25 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 111b96c6da7..b03465bf8d0 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -37,25 +37,7 @@ from cudf._lib.stream_compaction import distinct_count as cpp_distinct_count from cudf._lib.transform import bools_to_mask from cudf._typing import BinaryOperand, ColumnLike, Dtype, ScalarLike -from cudf.core.abc import Serializable -from cudf.core.buffer import Buffer -from cudf.core.dtypes import ( - CategoricalDtype, - IntervalDtype, - ListDtype, - StructDtype, -) -from cudf.utils import ioutils, utils -from cudf.utils.dtypes import ( - check_cast_unsupported_dtype, - cudf_dtype_from_pa_type, - get_time_unit, - min_unsigned_type, - np_to_pa_dtype, -) -from cudf.utils.utils import mask_dtype - -from ...api.types import ( +from cudf.api.types import ( _is_non_decimal_numeric_dtype, _is_scalar_or_zero_d_array, infer_dtype, @@ -73,6 +55,23 @@ is_struct_dtype, pandas_dtype, ) +from cudf.core.abc import Serializable +from cudf.core.buffer import Buffer +from cudf.core.dtypes import ( + CategoricalDtype, + IntervalDtype, + ListDtype, + StructDtype, +) +from cudf.utils import ioutils, utils +from cudf.utils.dtypes import ( + check_cast_unsupported_dtype, + cudf_dtype_from_pa_type, + get_time_unit, + min_unsigned_type, + np_to_pa_dtype, +) +from cudf.utils.utils import mask_dtype T = TypeVar("T", bound="ColumnBase") diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index acb8c02a220..c667799c7c2 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -15,13 +15,13 @@ from_decimal as cpp_from_decimal, ) from cudf._typing import Dtype +from cudf.api.types import is_integer_dtype from cudf.core.buffer import Buffer from cudf.core.column import ColumnBase, as_column from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype from cudf.utils.dtypes import is_scalar from cudf.utils.utils import pa_mask_buffer_to_mask -from ...api.types import is_integer_dtype from .numerical_base import NumericalBaseColumn diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index cee9b693bdf..267f6082f96 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -12,6 +12,7 @@ import cudf from cudf import _lib as libcudf from cudf._typing import BinaryOperand, ColumnLike, Dtype, DtypeObj, ScalarLike +from cudf.api.types import is_integer_dtype, is_number from cudf.core.buffer import Buffer from cudf.core.column import ( ColumnBase, @@ -31,7 +32,6 @@ to_cudf_compatible_scalar, ) -from ...api.types import is_integer_dtype, is_number from .numerical_base import NumericalBaseColumn diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index af5b77124a1..a6a9de2e77b 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -158,6 +158,7 @@ ) from cudf._lib.strings.wrap import wrap as cpp_wrap from cudf._typing import ColumnLike, Dtype, ScalarLike +from cudf.api.types import is_integer from cudf.core.buffer import Buffer from cudf.core.column import column, datetime from cudf.core.column.methods import ColumnMethodsMixin @@ -170,8 +171,6 @@ is_string_dtype, ) -from ...api.types import is_integer - _str_to_numeric_typecast_functions = { np.dtype("int8"): str_cast.stoi8, np.dtype("int16"): str_cast.stoi16, diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py index 1a17d941da7..d9a2fd89165 100644 --- a/python/cudf/cudf/core/window/rolling.py +++ b/python/cudf/cudf/core/window/rolling.py @@ -7,13 +7,12 @@ import cudf from cudf import _lib as libcudf +from cudf.api.types import is_integer, is_number from cudf.core import column from cudf.core.column.column import as_column from cudf.utils import cudautils from cudf.utils.utils import GetAttrGetItemMixin -from ...api.types import is_integer, is_number - class Rolling(GetAttrGetItemMixin): """ From 0e2a448e15eec4b6d5d20f469a6f79be2f31b923 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 30 Jun 2021 06:26:51 -0700 Subject: [PATCH 09/54] Fix bug that columns only initialized once when specified `columns` and `index` in dataframe ctor (#8628) Closes #8621 This PR fixes a bug when initializing dataframe with `columns` and `index` parameters, the columns are only initialized once. Leading to all columns are pointing to the same object. Authors: - Michael Wang (https://github.com/isVoid) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/8628 --- python/cudf/cudf/core/dataframe.py | 10 +++++----- python/cudf/cudf/tests/test_dataframe.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 0901334396a..c02bf3d11a4 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -239,12 +239,12 @@ def __init__(self, data=None, index=None, columns=None, dtype=None): self._index = as_index(index) if columns is not None: self._data = ColumnAccessor( - dict.fromkeys( - columns, - column.column_empty( + { + k: column.column_empty( len(self), dtype="object", masked=True - ), - ) + ) + for k in columns + } ) elif hasattr(data, "__cuda_array_interface__"): arr_interface = data.__cuda_array_interface__ diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 2b32471c30c..951062f2b61 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -8164,6 +8164,16 @@ def assert_local_eq(actual, df, expected, host_columns): assert_local_eq(actual, df, expected, host_columns) +def test_dataframe_constructor_column_index_only(): + columns = ["a", "b", "c"] + index = ["r1", "r2", "r3"] + + gdf = cudf.DataFrame(index=index, columns=columns) + assert not id(gdf["a"]._column) == id(gdf["b"]._column) and not id( + gdf["b"]._column + ) == id(gdf["c"]._column) + + @pytest.mark.parametrize( "data", [ From 7f2cc4c21fc621cfc3f02a78fc16a24441d61652 Mon Sep 17 00:00:00 2001 From: Nick Becker Date: Wed, 30 Jun 2021 09:28:08 -0400 Subject: [PATCH 10/54] Add dayofyear and day_of_year to Series, DatetimeColumn, and DatetimeIndex (#8626) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR: - [x] Adds `[Series/DatetimeColumn/DatetimeIndex].dt.dayofyear` and `day_of_year` - [x] Updates the existing pytests to include dayofyear/day_of_year - [x] Includes docstrings in new methods ```python import cudf import pandas as pd ​ s = pd.Series(["2021-01-08", "2021-06-28", "2020-03-09", "2021-06-30"], dtype="datetime64[ms]") s = s.repeat(25000) # 100K elements gs = cudf.from_pandas(s) ​ %timeit gs.dt.dayofyear %timeit s.dt.dayofyear 39 µs ± 169 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each) 6.49 ms ± 39.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) ``` This closes #8625 Authors: - Nick Becker (https://github.com/beckernick) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/8626 --- python/cudf/cudf/_lib/cpp/datetime.pxd | 1 + python/cudf/cudf/_lib/datetime.pyx | 2 + python/cudf/cudf/core/column/datetime.py | 8 +++ python/cudf/cudf/core/index.py | 44 ++++++++++++++ python/cudf/cudf/core/series.py | 74 ++++++++++++++++++++++++ python/cudf/cudf/tests/test_datetime.py | 2 + 6 files changed, 131 insertions(+) diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd index 20fdd2e842a..f662bfb93f2 100644 --- a/python/cudf/cudf/_lib/cpp/datetime.pxd +++ b/python/cudf/cudf/_lib/cpp/datetime.pxd @@ -16,3 +16,4 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: const column_view& timestamps, const column_view& months ) except + + cdef unique_ptr[column] day_of_year(const column_view& column) except + diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index 3e40cb62f9c..09be55abe9d 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -46,6 +46,8 @@ def extract_datetime_component(Column col, object field): c_result = move(libcudf_datetime.extract_minute(col_view)) elif field == "second": c_result = move(libcudf_datetime.extract_second(col_view)) + elif field == "day_of_year": + c_result = move(libcudf_datetime.day_of_year(col_view)) else: raise ValueError(f"Invalid datetime field: '{field}'") diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 150ce2c48ec..f3d1880b290 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -133,6 +133,14 @@ def second(self) -> ColumnBase: def weekday(self) -> ColumnBase: return self.get_dt_field("weekday") + @property + def dayofyear(self) -> ColumnBase: + return self.get_dt_field("day_of_year") + + @property + def day_of_year(self) -> ColumnBase: + return self.get_dt_field("day_of_year") + def to_pandas( self, index: pd.Index = None, nullable: bool = False, **kwargs ) -> "cudf.Series": diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index c89718e8f07..13ea1755803 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2299,6 +2299,50 @@ def dayofweek(self): """ return self._get_dt_field("weekday") + @property + def dayofyear(self): + """ + The day of the year, from 1-365 in non-leap years and + from 1-366 in leap years. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_index = cudf.Index(pd.date_range("2016-12-31", + ... "2017-01-08", freq="D")) + >>> datetime_index + DatetimeIndex(['2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03', + '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07', + '2017-01-08'], + dtype='datetime64[ns]') + >>> datetime_index.dayofyear + Int16Index([366, 1, 2, 3, 4, 5, 6, 7, 8], dtype='int16') + """ + return self._get_dt_field("day_of_year") + + @property + def day_of_year(self): + """ + The day of the year, from 1-365 in non-leap years and + from 1-366 in leap years. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_index = cudf.Index(pd.date_range("2016-12-31", + ... "2017-01-08", freq="D")) + >>> datetime_index + DatetimeIndex(['2016-12-31', '2017-01-01', '2017-01-02', '2017-01-03', + '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07', + '2017-01-08'], + dtype='datetime64[ns]') + >>> datetime_index.day_of_year + Int16Index([366, 1, 2, 3, 4, 5, 6, 7, 8], dtype='int16') + """ + return self._get_dt_field("day_of_year") + def to_pandas(self): nanos = self._values.astype("datetime64[ns]") return pd.DatetimeIndex(nanos.to_pandas(), name=self.name) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 2dada48be4d..77640db6a1d 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -6292,6 +6292,80 @@ def dayofweek(self): """ return self._get_dt_field("weekday") + @property + def dayofyear(self): + """ + The day of the year, from 1-365 in non-leap years and + from 1-366 in leap years. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_series = cudf.Series(pd.date_range('2016-12-31', + ... '2017-01-08', freq='D')) + >>> datetime_series + 0 2016-12-31 + 1 2017-01-01 + 2 2017-01-02 + 3 2017-01-03 + 4 2017-01-04 + 5 2017-01-05 + 6 2017-01-06 + 7 2017-01-07 + 8 2017-01-08 + dtype: datetime64[ns] + >>> datetime_series.dt.dayofyear + 0 366 + 1 1 + 2 2 + 3 3 + 4 4 + 5 5 + 6 6 + 7 7 + 8 8 + dtype: int16 + """ + return self._get_dt_field("day_of_year") + + @property + def day_of_year(self): + """ + The day of the year, from 1-365 in non-leap years and + from 1-366 in leap years. + + Examples + -------- + >>> import pandas as pd + >>> import cudf + >>> datetime_series = cudf.Series(pd.date_range('2016-12-31', + ... '2017-01-08', freq='D')) + >>> datetime_series + 0 2016-12-31 + 1 2017-01-01 + 2 2017-01-02 + 3 2017-01-03 + 4 2017-01-04 + 5 2017-01-05 + 6 2017-01-06 + 7 2017-01-07 + 8 2017-01-08 + dtype: datetime64[ns] + >>> datetime_series.dt.day_of_year + 0 366 + 1 1 + 2 2 + 3 3 + 4 4 + 5 5 + 6 6 + 7 7 + 8 8 + dtype: int16 + """ + return self._get_dt_field("day_of_year") + def _get_dt_field(self, field): out_column = self.series._column.get_dt_field(field) return Series( diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index 653ee8389fa..12e169e699d 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -82,6 +82,8 @@ def numerical_data(): "second", "weekday", "dayofweek", + "dayofyear", + "day_of_year", ] From 25f3987cf18da3641881f88c0534eceb2ae90939 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Wed, 30 Jun 2021 09:33:19 -0500 Subject: [PATCH 11/54] Fix usage of deprecated arrow ipc API (#8632) After cudf upgraded to a new version of arrow one of the APIs we were using was deprecated. This updates the JNI code to use the new version of the API. Authors: - Robert (Bobby) Evans (https://github.com/revans2) Approvers: - Gera Shegalov (https://github.com/gerashegalov) - Jason Lowe (https://github.com/jlowe) URL: https://github.com/rapidsai/cudf/pull/8632 --- java/src/main/native/src/TableJni.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 018dd211139..790403d7594 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -237,7 +237,7 @@ class native_arrow_ipc_writer_handle final { } // There is an option to have a file writer too, with metadata - auto tmp_writer = arrow::ipc::NewStreamWriter(sink.get(), arrow_tab->schema()); + auto tmp_writer = arrow::ipc::MakeStreamWriter(sink, arrow_tab->schema()); if (!tmp_writer.ok()) { throw std::runtime_error(tmp_writer.status().message()); } From fa50b7d0461713979b13ba682eb2cee1d2414d06 Mon Sep 17 00:00:00 2001 From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com> Date: Wed, 30 Jun 2021 10:21:10 -0500 Subject: [PATCH 12/54] Fix for strings containing special JSON characters in get_json_object(). (#8556) Fixes https://github.com/rapidsai/cudf/issues/8387 Also adds proper handling for valid JSON escape sequences in strings. Authors: - https://github.com/nvdbaranec Approvers: - Jake Hemstad (https://github.com/jrhemstad) - Christopher Harris (https://github.com/cwharris) URL: https://github.com/rapidsai/cudf/pull/8556 --- cpp/src/strings/json/json_path.cu | 71 +++++++++++++++--- cpp/tests/strings/json_tests.cpp | 120 ++++++++++++++++++++++++++---- 2 files changed, 166 insertions(+), 25 deletions(-) diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu index dfdd3226844..0cf08892adc 100644 --- a/cpp/src/strings/json/json_path.cu +++ b/cpp/src/strings/json/json_path.cu @@ -99,6 +99,44 @@ class parser { return false; } + CUDA_HOST_DEVICE_CALLABLE bool is_hex_digit(char c) + { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); + } + + CUDA_HOST_DEVICE_CALLABLE int64_t chars_left() { return input_len - ((pos - input) + 1); } + + /** + * @brief Parse an escape sequence. + * + * Must be a valid sequence as specified by the JSON format + * https://www.json.org/json-en.html + * + * @returns True on success or false on fail. + */ + CUDA_HOST_DEVICE_CALLABLE bool parse_escape_seq() + { + if (*pos != '\\') { return false; } + char c = *++pos; + + // simple case + if (c == '\"' || c == '\\' || c == '/' || c == 'b' || c == 'f' || c == 'n' || c == 'r' || + c == 't') { + pos++; + return true; + } + + // hex digits: must be of the form uXXXX where each X is a valid hex digit + if (c == 'u' && chars_left() >= 4 && is_hex_digit(pos[1]) && is_hex_digit(pos[2]) && + is_hex_digit(pos[3]) && is_hex_digit(pos[4])) { + pos += 5; + return true; + } + + // an illegal escape sequence. + return false; + } + /** * @brief Parse a quote-enclosed JSON string. * @@ -123,12 +161,16 @@ class parser { const char* start = ++pos; while (!eof()) { - if (*pos == quote) { + // handle escaped characters + if (*pos == '\\') { + if (!parse_escape_seq()) { return parse_result::ERROR; } + } else if (*pos == quote) { str = string_view(start, pos - start); pos++; return parse_result::SUCCESS; + } else { + pos++; } - pos++; } } } @@ -230,15 +272,22 @@ class json_state : private parser { int arr_count = 0; while (!eof(end)) { - // could do some additional checks here. we know our current - // element type, so we could be more strict on what kinds of - // characters we expect to see. - switch (*end++) { - case '{': obj_count++; break; - case '}': obj_count--; break; - case '[': arr_count++; break; - case ']': arr_count--; break; - default: break; + // parse strings explicitly so we handle all interesting corner cases (such as strings + // containing {, }, [ or ] + if (is_quote(*end)) { + string_view str; + pos = end; + if (parse_string(str, false, *end) == parse_result::ERROR) { return parse_result::ERROR; } + end = pos; + } else { + char const c = *end++; + switch (c) { + case '{': obj_count++; break; + case '}': obj_count--; break; + case '[': arr_count++; break; + case ']': arr_count--; break; + default: break; + } } if (obj_count == 0 && arr_count == 0) { break; } } diff --git a/cpp/tests/strings/json_tests.cpp b/cpp/tests/strings/json_tests.cpp index 5c81057b6d7..dfcc646a8f6 100644 --- a/cpp/tests/strings/json_tests.cpp +++ b/cpp/tests/strings/json_tests.cpp @@ -76,10 +76,10 @@ std::unique_ptr drop_whitespace(cudf::column_view const& col) return cudf::strings::replace(strings, targets, replacements); } -struct JsonTests : public cudf::test::BaseFixture { +struct JsonPathTests : public cudf::test::BaseFixture { }; -TEST_F(JsonTests, GetJsonObjectRootOp) +TEST_F(JsonPathTests, GetJsonObjectRootOp) { // root cudf::test::strings_column_wrapper input{json_string}; @@ -92,7 +92,7 @@ TEST_F(JsonTests, GetJsonObjectRootOp) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected); } -TEST_F(JsonTests, GetJsonObjectChildOp) +TEST_F(JsonPathTests, GetJsonObjectChildOp) { { cudf::test::strings_column_wrapper input{json_string}; @@ -187,7 +187,7 @@ TEST_F(JsonTests, GetJsonObjectChildOp) } } -TEST_F(JsonTests, GetJsonObjectWildcardOp) +TEST_F(JsonPathTests, GetJsonObjectWildcardOp) { { cudf::test::strings_column_wrapper input{json_string}; @@ -291,7 +291,7 @@ TEST_F(JsonTests, GetJsonObjectWildcardOp) } } -TEST_F(JsonTests, GetJsonObjectSubscriptOp) +TEST_F(JsonPathTests, GetJsonObjectSubscriptOp) { { cudf::test::strings_column_wrapper input{json_string}; @@ -378,7 +378,7 @@ TEST_F(JsonTests, GetJsonObjectSubscriptOp) } } -TEST_F(JsonTests, GetJsonObjectFilter) +TEST_F(JsonPathTests, GetJsonObjectFilter) { // queries that result in filtering/collating results (mostly meaning - generates new // json instead of just returning parts of the existing string @@ -449,7 +449,7 @@ TEST_F(JsonTests, GetJsonObjectFilter) } } -TEST_F(JsonTests, GetJsonObjectNullInputs) +TEST_F(JsonPathTests, GetJsonObjectNullInputs) { { std::string str("{\"a\" : \"b\"}"); @@ -466,7 +466,7 @@ TEST_F(JsonTests, GetJsonObjectNullInputs) } } -TEST_F(JsonTests, GetJsonObjectEmptyQuery) +TEST_F(JsonPathTests, GetJsonObjectEmptyQuery) { // empty query -> null { @@ -480,7 +480,7 @@ TEST_F(JsonTests, GetJsonObjectEmptyQuery) } } -TEST_F(JsonTests, GetJsonObjectEmptyInputsAndOutputs) +TEST_F(JsonPathTests, GetJsonObjectEmptyInputsAndOutputs) { // empty input -> null { @@ -508,7 +508,7 @@ TEST_F(JsonTests, GetJsonObjectEmptyInputsAndOutputs) } // badly formed JSONpath strings -TEST_F(JsonTests, GetJsonObjectIllegalQuery) +TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) { // can't have more than one root operator, or a root operator anywhere other // than the beginning @@ -581,7 +581,7 @@ TEST_F(JsonTests, GetJsonObjectIllegalQuery) } // queries that are legal, but reference invalid parts of the input -TEST_F(JsonTests, GetJsonObjectInvalidQuery) +TEST_F(JsonPathTests, GetJsonObjectInvalidQuery) { // non-existent field { @@ -628,7 +628,7 @@ TEST_F(JsonTests, GetJsonObjectInvalidQuery) } } -TEST_F(JsonTests, MixedOutput) +TEST_F(JsonPathTests, MixedOutput) { // various queries on: // clang-format off @@ -760,7 +760,7 @@ TEST_F(JsonTests, MixedOutput) } } -TEST_F(JsonTests, StripQuotes) +TEST_F(JsonPathTests, StripQuotes) { // we normally expect our outputs here to be // b (no quotes) @@ -801,7 +801,7 @@ TEST_F(JsonTests, StripQuotes) } } -TEST_F(JsonTests, AllowSingleQuotes) +TEST_F(JsonPathTests, AllowSingleQuotes) { // Tests allowing single quotes for strings. // Note: this flag allows a mix of single and double quotes. it doesn't explicitly require @@ -876,3 +876,95 @@ TEST_F(JsonTests, AllowSingleQuotes) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); } } + +TEST_F(JsonPathTests, StringsWithSpecialChars) +{ + // make sure we properly handle strings containing special characters + // like { } [ ], etc + // various queries on: + + { + std::vector input_strings{ + // clang-format off + "{\"item\" : [{\"key\" : \"value[\"}]}", + // clang-format on + }; + + cudf::test::strings_column_wrapper input(input_strings.begin(), input_strings.end()); + { + std::string json_path("$.item"); + + cudf::strings::get_json_object_options options; + options.set_allow_single_quotes(true); + + auto result = + cudf::strings::get_json_object(cudf::strings_column_view(input), json_path, options); + + // clang-format off + cudf::test::strings_column_wrapper expected({ + "[{\"key\" : \"value[\"}]", + }); + // clang-format on + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + } + } + + { + std::vector input_strings{ + // clang-format off + "{\"a\" : \"[}{}][][{[\\\"}}[\\\"]\"}", + // clang-format on + }; + + cudf::test::strings_column_wrapper input(input_strings.begin(), input_strings.end()); + { + std::string json_path("$.a"); + + cudf::strings::get_json_object_options options; + options.set_allow_single_quotes(true); + + auto result = + cudf::strings::get_json_object(cudf::strings_column_view(input), json_path, options); + + // clang-format off + cudf::test::strings_column_wrapper expected({ + "[}{}][][{[\\\"}}[\\\"]", + }); + // clang-format on + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + } + } +} + +TEST_F(JsonPathTests, EscapeSequences) +{ + // valid escape sequences in JSON include + // \" \\ \/ \b \f \n \r \t + // \uXXXX where X is a valid hex digit + + std::vector input_strings{ + // clang-format off + "{\"a\" : \"\\\" \\\\ \\/ \\b \\f \\n \\r \\t\"}", + "{\"a\" : \"\\u1248 \\uacdf \\uACDF \\u10EF\"}" + // clang-format on + }; + + cudf::test::strings_column_wrapper input(input_strings.begin(), input_strings.end()); + { + std::string json_path("$.a"); + + cudf::strings::get_json_object_options options; + options.set_allow_single_quotes(true); + + auto result = + cudf::strings::get_json_object(cudf::strings_column_view(input), json_path, options); + + // clang-format off + cudf::test::strings_column_wrapper expected({ + "\\\" \\\\ \\/ \\b \\f \\n \\r \\t", + "\\u1248 \\uacdf \\uACDF \\u10EF" + }); + // clang-format on + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected); + } +} \ No newline at end of file From 96af10e64632798c2bea59a7772952a2a7378c0c Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 30 Jun 2021 14:46:12 -0400 Subject: [PATCH 13/54] Add NVBench in CMake (#8619) This PR added NVBench in cudf's CMake config file. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Robert Maynard (https://github.com/robertmaynard) - Jake Hemstad (https://github.com/jrhemstad) URL: https://github.com/rapidsai/cudf/pull/8619 --- cpp/CMakeLists.txt | 6 ++-- cpp/benchmarks/CMakeLists.txt | 1 + cpp/cmake/thirdparty/CUDF_GetNVBench.cmake | 34 ++++++++++++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 cpp/cmake/thirdparty/CUDF_GetNVBench.cmake diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 03b1a6a9bfd..d0eabd1e5cd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -40,7 +40,7 @@ set(THREADS_PREFER_PTHREAD_FLAG ON) option(USE_NVTX "Build with NVTX support" ON) option(BUILD_TESTS "Configure CMake to build tests" ON) -option(BUILD_BENCHMARKS "Configure CMake to build (google) benchmarks" OFF) +option(BUILD_BENCHMARKS "Configure CMake to build (google & nvbench) benchmarks" OFF) option(BUILD_SHARED_LIBS "Build cuDF shared libraries" ON) option(JITIFY_USE_CACHE "Use a file cache for JIT compiled kernels" ON) option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF) @@ -54,7 +54,7 @@ option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}") message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}") -message(VERBOSE "CUDF: Configure CMake to build (google) benchmarks: ${BUILD_BENCHMARKS}") +message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}") message(VERBOSE "CUDF: Build cuDF shared libraries: ${BUILD_SHARED_LIBS}") message(VERBOSE "CUDF: Use a file cache for JIT compiled kernels: ${JITIFY_USE_CACHE}") message(VERBOSE "CUDF: Build and statically link Arrow libraries: ${CUDF_USE_ARROW_STATIC}") @@ -576,6 +576,8 @@ if(CUDF_BUILD_BENCHMARKS) GIT_SHALLOW TRUE OPTIONS "BENCHMARK_ENABLE_TESTING OFF" "BENCHMARK_ENABLE_INSTALL OFF") + # Find or install NVBench + include(cmake/thirdparty/CUDF_GetNVBench.cmake) add_subdirectory(benchmarks) endif() diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index e8ccb24f44c..f8107d526c1 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -29,6 +29,7 @@ target_link_libraries(cudf_datagen GTest::gmock_main GTest::gtest_main benchmark::benchmark + nvbench::main Threads::Threads cudf) diff --git a/cpp/cmake/thirdparty/CUDF_GetNVBench.cmake b/cpp/cmake/thirdparty/CUDF_GetNVBench.cmake new file mode 100644 index 00000000000..09ceffb284f --- /dev/null +++ b/cpp/cmake/thirdparty/CUDF_GetNVBench.cmake @@ -0,0 +1,34 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +# NVBench doesn't have a public release yet + +function(find_and_configure_nvbench) + + if(TARGET nvbench::main) + return() + endif() + + CPMFindPackage(NAME nvbench + GIT_REPOSITORY https://github.com/NVIDIA/nvbench.git + GIT_TAG main + GIT_SHALLOW TRUE + OPTIONS "NVBench_ENABLE_EXAMPLES OFF" + "NVBench_ENABLE_TESTING OFF") + +endfunction() + +find_and_configure_nvbench() From dab8a629d94ec5f137c3abbb70cec47ff237a525 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Wed, 30 Jun 2021 14:49:02 -0400 Subject: [PATCH 14/54] Expose pack/unpack API to Python (#8153) Closes #7601 Adds a Python API for `pack`/`unpack`, so that we might be able to pack/unpack DataFrames in serialization: - `PackedColumns` is a Python representation of the `cudf::packed_columns` struct containing the struct itself along with some Python metadata for the DataFrame being packed; supports Dask/pickle serialization - `pack()` takes in a `Table` and returns a `PackedColumns` - `unpack()` takes in a `PackedColumns` and returns a `Table` cc @brandon-b-miller Authors: - Charles Blackmon-Luca (https://github.com/charlesbluca) Approvers: - Devavret Makkar (https://github.com/devavret) - https://github.com/brandon-b-miller - Karthikeyan (https://github.com/karthikeyann) - https://github.com/jakirkham URL: https://github.com/rapidsai/cudf/pull/8153 --- cpp/include/cudf/copying.hpp | 2 +- python/cudf/cudf/_lib/copying.pxd | 14 ++ python/cudf/cudf/_lib/copying.pyx | 169 +++++++++++++- python/cudf/cudf/_lib/cpp/copying.pxd | 16 +- python/cudf/cudf/tests/test_pack.py | 318 ++++++++++++++++++++++++++ 5 files changed, 516 insertions(+), 3 deletions(-) create mode 100644 python/cudf/cudf/_lib/copying.pxd create mode 100644 python/cudf/cudf/tests/test_pack.py diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 477c53535de..6be865ea993 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -629,7 +629,7 @@ packed_columns pack(cudf::table_view const& input, * guaranteeing that that all of the columns in the table point into `contiguous_buffer`. * * @param input View of the table to pack - * @param contgiuous_buffer A contiguous buffer of device memory which contains the data referenced + * @param contiguous_buffer A contiguous buffer of device memory which contains the data referenced * by the columns in `table` * @param buffer_size The size of `contiguous_buffer`. * @return Vector of bytes representing the metadata used to `unpack` a packed_columns struct. diff --git a/python/cudf/cudf/_lib/copying.pxd b/python/cudf/cudf/_lib/copying.pxd new file mode 100644 index 00000000000..1668ef05f3f --- /dev/null +++ b/python/cudf/cudf/_lib/copying.pxd @@ -0,0 +1,14 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. + +from cudf._lib.table cimport Table + +from cudf._lib.cpp.copying cimport packed_columns + +cdef class _CPackedColumns: + cdef packed_columns c_obj + cdef object column_names + cdef object column_dtypes + cdef object index_names + + @staticmethod + cdef _CPackedColumns from_py_table(Table input_table, keep_index=*) diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index 463082f0687..9ad552a0acb 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -1,12 +1,16 @@ # Copyright (c) 2020-2021, NVIDIA CORPORATION. +import pickle + import pandas as pd from libcpp cimport bool from libcpp.memory cimport make_unique, unique_ptr, shared_ptr, make_shared from libcpp.vector cimport vector from libcpp.utility cimport move -from libc.stdint cimport int32_t, int64_t +from libc.stdint cimport int32_t, int64_t, uint8_t, uintptr_t + +from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer from cudf._lib.column cimport Column from cudf._lib.scalar import as_device_scalar @@ -14,6 +18,8 @@ from cudf._lib.scalar cimport DeviceScalar from cudf._lib.table cimport Table from cudf._lib.reduce import minmax +from cudf.core.abc import Serializable + from cudf._lib.cpp.column.column cimport column from cudf._lib.cpp.column.column_view cimport ( column_view, @@ -776,3 +782,164 @@ def segmented_gather(Column source_column, Column gather_map): result = Column.from_unique_ptr(move(c_result)) return result + + +cdef class _CPackedColumns: + + @staticmethod + cdef _CPackedColumns from_py_table(Table input_table, keep_index=True): + """ + Construct a ``PackedColumns`` object from a ``cudf.DataFrame``. + """ + from cudf.core import RangeIndex, dtypes + + cdef _CPackedColumns p = _CPackedColumns.__new__(_CPackedColumns) + + if keep_index and not input_table.index.equals( + RangeIndex(start=0, stop=len(input_table), step=1) + ): + input_table_view = input_table.view() + p.index_names = input_table._index_names + else: + input_table_view = input_table.data_view() + + p.column_names = input_table._column_names + p.column_dtypes = {} + for name, col in input_table._data.items(): + if isinstance(col.dtype, dtypes._BaseDtype): + p.column_dtypes[name] = col.dtype + + p.c_obj = move(cpp_copying.pack(input_table_view)) + + return p + + @property + def gpu_data_ptr(self): + return int(self.c_obj.gpu_data.get()[0].data()) + + @property + def gpu_data_size(self): + return int(self.c_obj.gpu_data.get()[0].size()) + + def serialize(self): + header = {} + frames = [] + + header["column-names"] = self.column_names + header["index-names"] = self.index_names + header["gpu-data-ptr"] = self.gpu_data_ptr + header["gpu-data-size"] = self.gpu_data_size + header["metadata"] = list( + + self.c_obj.metadata_.get()[0].data() + ) + + column_dtypes = {} + for name, dtype in self.column_dtypes.items(): + dtype_header, dtype_frames = dtype.serialize() + column_dtypes[name] = ( + dtype_header, + (len(frames), len(frames) + len(dtype_frames)), + ) + frames.extend(dtype_frames) + header["column-dtypes"] = column_dtypes + + return header, frames + + @staticmethod + def deserialize(header, frames): + cdef _CPackedColumns p = _CPackedColumns.__new__(_CPackedColumns) + + dbuf = DeviceBuffer( + ptr=header["gpu-data-ptr"], + size=header["gpu-data-size"] + ) + + cdef cpp_copying.packed_columns data + data.metadata_ = move( + make_unique[cpp_copying.metadata]( + move(header["metadata"]) + ) + ) + data.gpu_data = move(dbuf.c_obj) + + p.c_obj = move(data) + p.column_names = header["column-names"] + p.index_names = header["index-names"] + + column_dtypes = {} + for name, dtype in header["column-dtypes"].items(): + dtype_header, (start, stop) = dtype + column_dtypes[name] = pickle.loads( + dtype_header["type-serialized"] + ).deserialize(dtype_header, frames[start:stop]) + p.column_dtypes = column_dtypes + + return p + + def unpack(self): + output_table = Table.from_table_view( + cpp_copying.unpack(self.c_obj), + self, + self.column_names, + self.index_names + ) + + for name, dtype in self.column_dtypes.items(): + output_table._data[name] = ( + output_table._data[name]._with_type_metadata(dtype) + ) + + return output_table + + +class PackedColumns(Serializable): + """ + A packed representation of a ``cudf.Table``, with all columns residing + in a single GPU memory buffer. + """ + + def __init__(self, data): + self._data = data + + def __reduce__(self): + return self.deserialize, self.serialize() + + @property + def __cuda_array_interface__(self): + return { + "data": (self._data.gpu_data_ptr, False), + "shape": (self._data.gpu_data_size,), + "strides": None, + "typestr": "|u1", + "version": 0 + } + + def serialize(self): + return self._data.serialize() + + @classmethod + def deserialize(cls, header, frames): + return cls(_CPackedColumns.deserialize(header, frames)) + + @classmethod + def from_py_table(cls, input_table, keep_index=True): + return cls(_CPackedColumns.from_py_table(input_table, keep_index)) + + def unpack(self): + return self._data.unpack() + + +def pack(input_table, keep_index=True): + """ + Pack the columns of a ``cudf.Table`` into a single GPU memory buffer. + """ + return PackedColumns.from_py_table(input_table, keep_index) + + +def unpack(packed): + """ + Unpack the results of packing a ``cudf.Table``, returning a new + ``Table`` in the process. + """ + return packed.unpack() diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd index c32eb13d908..1f24f51e9a9 100644 --- a/python/cudf/cudf/_lib/cpp/copying.pxd +++ b/python/cudf/cudf/_lib/cpp/copying.pxd @@ -3,7 +3,7 @@ from rmm._lib.device_buffer cimport device_buffer from libcpp cimport bool -from libc.stdint cimport int32_t, int64_t +from libc.stdint cimport int32_t, int64_t, uint8_t from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector @@ -20,6 +20,12 @@ from cudf._lib.cpp.types cimport size_type ctypedef const scalar constscalar +cdef extern from "cudf/copying.hpp" namespace "cudf::packed_columns" nogil: + cdef struct metadata: + metadata(vector[uint8_t]&& v) + const uint8_t* data () except + + size_type size () except + + cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: ctypedef enum out_of_bounds_policy: NULLIFY 'cudf::out_of_bounds_policy::NULLIFY' @@ -119,6 +125,10 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: vector[size_type] splits ) except + + cdef struct packed_columns: + unique_ptr[metadata] metadata_ + unique_ptr[device_buffer] gpu_data + cdef struct contiguous_split_result: table_view table vector[device_buffer] all_data @@ -128,6 +138,10 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: vector[size_type] splits ) except + + cdef packed_columns pack (const table_view& input) except + + + cdef table_view unpack (const packed_columns& input) except + + cdef unique_ptr[column] copy_if_else ( column_view lhs, column_view rhs, diff --git a/python/cudf/cudf/tests/test_pack.py b/python/cudf/cudf/tests/test_pack.py new file mode 100644 index 00000000000..dab74050437 --- /dev/null +++ b/python/cudf/cudf/tests/test_pack.py @@ -0,0 +1,318 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pickle +import sys + +import numpy as np +import pandas as pd + +from cudf._lib.copying import pack, unpack +from cudf.core import DataFrame, GenericIndex, Series +from cudf.testing._utils import assert_eq + + +def test_sizeof_packed_dataframe(): + np.random.seed(0) + df = DataFrame() + nelem = 1000 + df["keys"] = hkeys = np.arange(nelem, dtype=np.float64) + df["vals"] = hvals = np.random.random(nelem) + packed = pack(df) + + nbytes = hkeys.nbytes + hvals.nbytes + sizeof = sys.getsizeof(packed) + assert sizeof < nbytes + + serialized_nbytes = len( + pickle.dumps(packed, protocol=pickle.HIGHEST_PROTOCOL) + ) + + # assert at least sizeof bytes were serialized + assert serialized_nbytes >= sizeof + + +def check_packed_equality(df): + # basic + assert_packed_frame_equality(df) + # sliced + assert_packed_frame_equality(df[:-1]) + assert_packed_frame_equality(df[1:]) + assert_packed_frame_equality(df[2:-2]) + # sorted + sortvaldf = df.sort_values("vals") + assert isinstance(sortvaldf.index, GenericIndex) + assert_packed_frame_equality(sortvaldf) + + +def assert_packed_frame_equality(df): + pdf = df.to_pandas() + + packed = pack(df) + del df + unpacked = DataFrame._from_table(unpack(packed)) + + assert_eq(unpacked, pdf) + + +def test_packed_dataframe_equality_numeric(): + np.random.seed(0) + + df = DataFrame() + nelem = 10 + df["keys"] = np.arange(nelem, dtype=np.float64) + df["vals"] = np.random.random(nelem) + + check_packed_equality(df) + + +def test_packed_dataframe_equality_categorical(): + np.random.seed(0) + + df = DataFrame() + df["keys"] = pd.Categorical( + ["a", "a", "a", "b", "a", "b", "a", "b", "a", "c"] + ) + df["vals"] = np.random.random(len(df)) + + check_packed_equality(df) + + +def test_packed_dataframe_equality_list(): + np.random.seed(0) + + df = DataFrame() + df["keys"] = Series(list([i, i + 1, i + 2] for i in range(10))) + df["vals"] = np.random.random(len(df)) + + check_packed_equality(df) + + +def test_packed_dataframe_equality_struct(): + np.random.seed(0) + + df = DataFrame() + df["keys"] = Series( + list({"0": i, "1": i + 1, "2": i + 2} for i in range(10)) + ) + df["vals"] = np.random.random(len(df)) + + check_packed_equality(df) + + +def check_packed_unique_pointers(df): + # basic + assert_packed_frame_unique_pointers(df) + # sliced + assert_packed_frame_unique_pointers(df[:-1]) + assert_packed_frame_unique_pointers(df[1:]) + assert_packed_frame_unique_pointers(df[2:-2]) + # sorted + sortvaldf = df.sort_values("vals") + assert isinstance(sortvaldf.index, GenericIndex) + assert_packed_frame_unique_pointers(sortvaldf) + + +def assert_packed_frame_unique_pointers(df): + unpacked = unpack(pack(df)) + + for col in df: + if df._data[col].data: + assert df._data[col].data.ptr != unpacked._data[col].data.ptr + + +def test_packed_dataframe_unique_pointers_numeric(): + np.random.seed(0) + + df = DataFrame() + nelem = 10 + df["keys"] = np.arange(nelem, dtype=np.float64) + df["vals"] = np.random.random(nelem) + + check_packed_unique_pointers(df) + + +def test_packed_dataframe_unique_pointers_categorical(): + np.random.seed(0) + + df = DataFrame() + df["keys"] = pd.Categorical( + ["a", "a", "a", "b", "a", "b", "a", "b", "a", "c"] + ) + df["vals"] = np.random.random(len(df)) + + check_packed_unique_pointers(df) + + +def test_packed_dataframe_unique_pointers_list(): + np.random.seed(0) + + df = DataFrame() + df["keys"] = Series(list([i, i + 1, i + 2] for i in range(10))) + df["vals"] = np.random.random(len(df)) + + check_packed_unique_pointers(df) + + +def test_packed_dataframe_unique_pointers_struct(): + np.random.seed(0) + + df = DataFrame() + df["keys"] = Series( + list({"0": i, "1": i + 1, "2": i + 2} for i in range(10)) + ) + df["vals"] = np.random.random(len(df)) + + check_packed_unique_pointers(df) + + +def check_packed_pickled_equality(df): + # basic + assert_packed_frame_picklable(df) + # sliced + assert_packed_frame_picklable(df[:-1]) + assert_packed_frame_picklable(df[1:]) + assert_packed_frame_picklable(df[2:-2]) + # sorted + sortvaldf = df.sort_values("vals") + assert isinstance(sortvaldf.index, GenericIndex) + assert_packed_frame_picklable(sortvaldf) + # out-of-band + if pickle.HIGHEST_PROTOCOL >= 5: + buffers = [] + serialbytes = pickle.dumps( + pack(df), protocol=5, buffer_callback=buffers.append + ) + for b in buffers: + assert isinstance(b, pickle.PickleBuffer) + loaded = DataFrame._from_table( + unpack(pickle.loads(serialbytes, buffers=buffers)) + ) + assert_eq(loaded, df) + + +def assert_packed_frame_picklable(df): + serialbytes = pickle.dumps(pack(df)) + loaded = DataFrame._from_table(unpack(pickle.loads(serialbytes))) + assert_eq(loaded, df) + + +def test_pickle_packed_dataframe_numeric(): + np.random.seed(0) + + df = DataFrame() + nelem = 10 + df["keys"] = np.arange(nelem, dtype=np.float64) + df["vals"] = np.random.random(nelem) + + check_packed_pickled_equality(df) + + +def test_pickle_packed_dataframe_categorical(): + np.random.seed(0) + + df = DataFrame() + df["keys"] = pd.Categorical( + ["a", "a", "a", "b", "a", "b", "a", "b", "a", "c"] + ) + df["vals"] = np.random.random(len(df)) + + check_packed_pickled_equality(df) + + +def test_pickle_packed_dataframe_list(): + np.random.seed(0) + + df = DataFrame() + df["keys"] = Series(list([i, i + 1, i + 2] for i in range(10))) + df["vals"] = np.random.random(len(df)) + + check_packed_pickled_equality(df) + + +def test_pickle_packed_dataframe_struct(): + np.random.seed(0) + + df = DataFrame() + df["keys"] = Series( + list({"0": i, "1": i + 1, "2": i + 2} for i in range(10)) + ) + df["vals"] = np.random.random(len(df)) + + check_packed_pickled_equality(df) + + +def check_packed_serialized_equality(df): + # basic + assert_packed_frame_serializable(df) + # sliced + assert_packed_frame_serializable(df[:-1]) + assert_packed_frame_serializable(df[1:]) + assert_packed_frame_serializable(df[2:-2]) + # sorted + sortvaldf = df.sort_values("vals") + assert isinstance(sortvaldf.index, GenericIndex) + assert_packed_frame_serializable(sortvaldf) + + +def assert_packed_frame_serializable(df): + packed = pack(df) + header, frames = packed.serialize() + loaded = DataFrame._from_table(unpack(packed.deserialize(header, frames))) + assert_eq(loaded, df) + + +def test_serialize_packed_dataframe_numeric(): + np.random.seed(0) + + df = DataFrame() + nelem = 10 + df["keys"] = np.arange(nelem, dtype=np.float64) + df["vals"] = np.random.random(nelem) + + check_packed_serialized_equality(df) + + +def test_serialize_packed_dataframe_categorical(): + np.random.seed(0) + + df = DataFrame() + df["keys"] = pd.Categorical( + ["a", "a", "a", "b", "a", "b", "a", "b", "a", "c"] + ) + df["vals"] = np.random.random(len(df)) + + check_packed_serialized_equality(df) + + +def test_serialize_packed_dataframe_list(): + np.random.seed(0) + + df = DataFrame() + df["keys"] = Series(list([i, i + 1, i + 2] for i in range(10))) + df["vals"] = np.random.random(len(df)) + + check_packed_serialized_equality(df) + + +def test_serialize_packed_dataframe_struct(): + np.random.seed(0) + + df = DataFrame() + df["keys"] = Series( + list({"0": i, "1": i + 1, "2": i + 2} for i in range(10)) + ) + df["vals"] = np.random.random(len(df)) + + check_packed_serialized_equality(df) From 5884b95e7de7d528c82fb74c8be24e444a0dfcf8 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Wed, 30 Jun 2021 15:26:50 -0500 Subject: [PATCH 15/54] ListColumn `__setitem__` (#8606) Closes https://github.com/rapidsai/cudf/issues/8553 based on https://github.com/rapidsai/cudf/pull/8459 Authors: - https://github.com/brandon-b-miller Approvers: - Ashwin Srinath (https://github.com/shwina) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/8606 --- python/cudf/cudf/_lib/cpp/scalar/scalar.pxd | 1 + python/cudf/cudf/_lib/scalar.pyx | 34 +++++------ python/cudf/cudf/core/column/lists.py | 12 ++++ python/cudf/cudf/core/indexing.py | 5 +- python/cudf/cudf/core/scalar.py | 2 +- python/cudf/cudf/tests/test_list.py | 64 +++++++++++++++++++++ 6 files changed, 97 insertions(+), 21 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd index d82bf3cde5f..f0b6ea0b606 100644 --- a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd +++ b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd @@ -66,6 +66,7 @@ cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil: cdef cppclass list_scalar(scalar): list_scalar(column_view col) except + + list_scalar(column_view col, bool is_valid) except + column_view view() except + cdef cppclass struct_scalar(scalar): diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index 2759cc2999f..9429ab0ee57 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -333,20 +333,18 @@ cdef _set_list_from_pylist(unique_ptr[scalar]& s, value = value if valid else [cudf.NA] cdef Column col if isinstance(dtype.element_type, ListDtype): - col = cudf.core.column.as_column( - pa.array( - value, from_pandas=True, type=dtype.element_type.to_arrow() - ) - ) + pa_type = dtype.element_type.to_arrow() else: - col = cudf.core.column.as_column( - pa.array(value, from_pandas=True) - ) + pa_type = dtype.to_arrow().value_type + col = cudf.core.column.as_column( + pa.array(value, from_pandas=True, type=pa_type) + ) cdef column_view col_view = col.view() s.reset( - new list_scalar(col_view) + new list_scalar(col_view, valid) ) + cdef _get_py_list_from_list(unique_ptr[scalar]& s): if not s.get()[0].is_valid(): @@ -497,18 +495,16 @@ cdef _get_np_scalar_from_timedelta64(unique_ptr[scalar]& s): def as_device_scalar(val, dtype=None): - if dtype: - if isinstance(val, (cudf.Scalar, DeviceScalar)) and dtype != val.dtype: - raise TypeError("Can't update dtype of existing GPU scalar") + if isinstance(val, (cudf.Scalar, DeviceScalar)): + if dtype == val.dtype or dtype is None: + if isinstance(val, DeviceScalar): + return val + else: + return val.device_value else: - return cudf.Scalar(value=val, dtype=dtype).device_value + raise TypeError("Can't update dtype of existing GPU scalar") else: - if isinstance(val, DeviceScalar): - return val - if isinstance(val, cudf.Scalar): - return val.device_value - else: - return cudf.Scalar(val).device_value + return cudf.Scalar(val, dtype=dtype).device_value def _is_null_host_scalar(slr): diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 8257e8aa6d0..ff63d8c5aaa 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -74,6 +74,18 @@ def __sizeof__(self): return self._cached_sizeof + def __setitem__(self, key, value): + if isinstance(value, list): + value = cudf.Scalar(value) + if isinstance(value, cudf.Scalar): + if value.dtype != self.dtype: + raise TypeError("list nesting level mismatch") + elif value is cudf.NA: + value = cudf.Scalar(value, dtype=self.dtype) + else: + raise ValueError(f"Can not set {value} into ListColumn") + super().__setitem__(key, value) + @property def base_size(self): # in some cases, libcudf will return an empty ListColumn with no diff --git a/python/cudf/cudf/core/indexing.py b/python/cudf/cudf/core/indexing.py index 6711171612a..933fd768d7c 100755 --- a/python/cudf/cudf/core/indexing.py +++ b/python/cudf/cudf/core/indexing.py @@ -110,7 +110,10 @@ def __setitem__(self, key, value): # coerce value into a scalar or column if is_scalar(value): value = to_cudf_compatible_scalar(value) - else: + elif not ( + isinstance(value, list) + and isinstance(self._sr._column.dtype, cudf.ListDtype) + ): value = column.as_column(value) if ( not isinstance( diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py index a17d1dad9b3..ad39642cf60 100644 --- a/python/cudf/cudf/core/scalar.py +++ b/python/cudf/cudf/core/scalar.py @@ -127,7 +127,7 @@ def _preprocess_host_value(self, value, dtype): ) return value, dtype elif isinstance(dtype, ListDtype): - if value is not None: + if value not in {None, NA}: raise ValueError(f"Can not coerce {value} to ListDtype") else: return NA, dtype diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index a6a9ba97ef5..0e93ff8a232 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -457,3 +457,67 @@ def test_serialize_list_columns(data): df = cudf.DataFrame(data) recreated = df.__class__.deserialize(*df.serialize()) assert_eq(recreated, df) + + +@pytest.mark.parametrize( + "data,item", + [ + ( + # basic list into a list column + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + [0, 0, 0], + ), + ( + # nested list into nested list column + [ + [[1, 2, 3], [4, 5, 6]], + [[1, 2, 3], [4, 5, 6]], + [[1, 2, 3], [4, 5, 6]], + ], + [[0, 0, 0], [0, 0, 0]], + ), + ( + # NA into a list column + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + NA, + ), + ( + # NA into nested list column + [ + [[1, 2, 3], [4, 5, 6]], + [[1, 2, 3], [4, 5, 6]], + [[1, 2, 3], [4, 5, 6]], + ], + NA, + ), + ], +) +def test_listcol_setitem(data, item): + sr = cudf.Series(data) + + sr[1] = item + data[1] = item + expect = cudf.Series(data) + + assert_eq(expect, sr) + + +@pytest.mark.parametrize( + "data,item,error", + [ + ( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + [[1, 2, 3], [4, 5, 6]], + "list nesting level mismatch", + ), + ( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + 0, + "Can not set 0 into ListColumn", + ), + ], +) +def test_listcol_setitem_error_cases(data, item, error): + sr = cudf.Series(data) + with pytest.raises(BaseException, match=error): + sr[1] = item From df45976b5e27f565d9a4d6a435a74a59b2b4a9d6 Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Wed, 30 Jun 2021 19:12:23 -0400 Subject: [PATCH 16/54] Add Python bindings for `lists::concatenate_list_elements` and expose them as `.list.concat()` (#8006) Adds a method to concatenate the lists in a nested list Series: ```python In [15]: s Out[15]: 0 [[1, 2], [3, 4]] dtype: list In [16]: s.list.concat() Out[16]: 0 [1, 2, 3, 4] dtype: list ``` Authors: - Ashwin Srinath (https://github.com/shwina) Approvers: - Nghia Truong (https://github.com/ttnghia) - GALI PREM SAGAR (https://github.com/galipremsagar) - Charles Blackmon-Luca (https://github.com/charlesbluca) URL: https://github.com/rapidsai/cudf/pull/8006 --- python/cudf/cudf/_lib/cpp/lists/combine.pxd | 16 +++ python/cudf/cudf/_lib/lists.pyx | 22 +++- python/cudf/cudf/core/column/categorical.py | 6 +- python/cudf/cudf/core/column/lists.py | 111 +++++++++++++++----- python/cudf/cudf/core/column/methods.py | 12 ++- python/cudf/cudf/core/column/string.py | 5 +- python/cudf/cudf/tests/test_list.py | 38 +++++++ 7 files changed, 170 insertions(+), 40 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/lists/combine.pxd b/python/cudf/cudf/_lib/cpp/lists/combine.pxd index ea9ade178e2..164253e39b5 100644 --- a/python/cudf/cudf/_lib/cpp/lists/combine.pxd +++ b/python/cudf/cudf/_lib/cpp/lists/combine.pxd @@ -3,10 +3,26 @@ from libcpp.memory cimport unique_ptr from cudf._lib.cpp.column.column cimport column +from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.table.table_view cimport table_view cdef extern from "cudf/lists/combine.hpp" namespace \ "cudf::lists" nogil: + + ctypedef enum concatenate_null_policy: + IGNORE "cudf::lists::concatenate_null_policy::IGNORE" + NULLIFY_OUTPUT_ROW \ + "cudf::lists::concatenate_null_policy::NULLIFY_OUTPUT_ROW" + cdef unique_ptr[column] concatenate_rows( const table_view input_table ) except + + + cdef unique_ptr[column] concatenate_list_elements( + const table_view input_table, + ) except + + + cdef unique_ptr[column] concatenate_list_elements( + const column_view input_table, + concatenate_null_policy null_policy + ) except + diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx index 7d8909610dc..9fd7d7611ae 100644 --- a/python/cudf/cudf/_lib/lists.pyx +++ b/python/cudf/cudf/_lib/lists.pyx @@ -17,8 +17,11 @@ from cudf._lib.cpp.lists.sorting cimport ( sort_lists as cpp_sort_lists ) from cudf._lib.cpp.lists.combine cimport ( - concatenate_rows as cpp_concatenate_rows + concatenate_rows as cpp_concatenate_rows, + concatenate_null_policy, + concatenate_list_elements as cpp_concatenate_list_elements ) + from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.column.column cimport column @@ -181,3 +184,20 @@ def concatenate_rows(Table tbl): result = Column.from_unique_ptr(move(c_result)) return result + + +def concatenate_list_elements(Column input_column, dropna=False): + cdef concatenate_null_policy policy = ( + concatenate_null_policy.IGNORE if dropna + else concatenate_null_policy.NULLIFY_OUTPUT_ROW + ) + cdef column_view c_input = input_column.view() + cdef unique_ptr[column] c_result + + with nogil: + c_result = move(cpp_concatenate_list_elements( + c_input, + policy + )) + + return Column.from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 135fb6e6f30..cbcc30d38a7 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -12,7 +12,6 @@ Optional, Sequence, Tuple, - Union, cast, ) @@ -28,7 +27,7 @@ from cudf._typing import ColumnLike, Dtype, ScalarLike from cudf.core.buffer import Buffer from cudf.core.column import column -from cudf.core.column.methods import ColumnMethodsMixin +from cudf.core.column.methods import ColumnMethodsMixin, ParentType from cudf.core.dtypes import CategoricalDtype from cudf.utils.dtypes import ( is_categorical_dtype, @@ -48,9 +47,6 @@ ) -ParentType = Union["cudf.Series", "cudf.Index"] - - class CategoricalAccessor(ColumnMethodsMixin): _column: CategoricalColumn diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index ff63d8c5aaa..843190f38aa 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -8,6 +8,7 @@ import cudf from cudf._lib.copying import segmented_gather from cudf._lib.lists import ( + concatenate_list_elements, concatenate_rows, contains_scalar, count_elements, @@ -16,15 +17,17 @@ sort_lists, ) from cudf._lib.table import Table -from cudf._typing import BinaryOperand, Dtype +from cudf._typing import BinaryOperand, ColumnLike, Dtype, ScalarLike from cudf.core.buffer import Buffer from cudf.core.column import ColumnBase, as_column, column -from cudf.core.column.methods import ColumnMethodsMixin +from cudf.core.column.methods import ColumnMethodsMixin, ParentType from cudf.core.dtypes import ListDtype from cudf.utils.dtypes import _is_non_decimal_numeric_dtype, is_list_dtype class ListColumn(ColumnBase): + dtype: ListDtype + def __init__( self, size, dtype, mask=None, offset=0, null_count=None, children=(), ): @@ -278,14 +281,16 @@ class ListMethods(ColumnMethodsMixin): List methods for Series """ - def __init__(self, column, parent=None): + _column: ListColumn + + def __init__(self, column: ListColumn, parent: ParentType = None): if not is_list_dtype(column.dtype): raise AttributeError( "Can only use .list accessor with a 'list' dtype" ) super().__init__(column=column, parent=parent) - def get(self, index): + def get(self, index: int) -> ParentType: """ Extract element at the given index from each component @@ -317,10 +322,10 @@ def get(self, index): else: raise IndexError("list index out of range") - def contains(self, search_key): + def contains(self, search_key: ScalarLike) -> ParentType: """ - Creates a column of bool values indicating whether the specified scalar - is an element of each row of a list column. + Returns boolean values indicating whether the specified scalar + is an element of each row. Parameters ---------- @@ -329,7 +334,7 @@ def contains(self, search_key): Returns ------- - Column + Series or Index Examples -------- @@ -357,14 +362,14 @@ def contains(self, search_key): return res @property - def leaves(self): + def leaves(self) -> ParentType: """ From a Series of (possibly nested) lists, obtain the elements from the innermost lists as a flat Series (one value per row). Returns ------- - Series + Series or Index Examples -------- @@ -385,7 +390,7 @@ def leaves(self): self._column.elements, retain_index=False ) - def len(self): + def len(self) -> ParentType: """ Computes the length of each element in the Series/Index. @@ -409,18 +414,18 @@ def len(self): """ return self._return_or_inplace(count_elements(self._column)) - def take(self, lists_indices): + def take(self, lists_indices: ColumnLike) -> ParentType: """ Collect list elements based on given indices. Parameters ---------- - lists_indices: List type arrays + lists_indices: Series-like of lists Specifies what to collect from each row Returns ------- - ListColumn + Series or Index Examples -------- @@ -464,14 +469,14 @@ def take(self, lists_indices): else: return res - def unique(self): + def unique(self) -> ParentType: """ - Returns unique element for each list in the column, order for each - unique element is not guaranteed. + Returns the unique elements in each list. + The ordering of elements is not guaranteed. Returns ------- - ListColumn + Series or Index Examples -------- @@ -501,12 +506,12 @@ def unique(self): def sort_values( self, - ascending=True, - inplace=False, - kind="quicksort", - na_position="last", - ignore_index=False, - ): + ascending: bool = True, + inplace: bool = False, + kind: str = "quicksort", + na_position: str = "last", + ignore_index: bool = False, + ) -> ParentType: """ Sort each list by the values. @@ -523,7 +528,7 @@ def sort_values( Returns ------- - ListColumn with each list sorted + Series or Index with each list sorted Notes ----- @@ -552,3 +557,59 @@ def sort_values( sort_lists(self._column, ascending, na_position), retain_index=not ignore_index, ) + + def concat(self, dropna=True) -> ParentType: + """ + For a column with at least one level of nesting, concatenate the + lists in each row. + + Parameters + ---------- + dropna: bool, optional + If True (default), ignores top-level null elements in each row. + If False, and top-level null elements are present, the resulting + row in the output is null. + + Returns + ------- + Series or Index + + Examples + -------- + >>> s1 + 0 [[1.0, 2.0], [3.0, 4.0, 5.0]] + 1 [[6.0, None], [7.0], [8.0, 9.0]] + dtype: list + >>> s1.list.concat() + 0 [1.0, 2.0, 3.0, 4.0, 5.0] + 1 [6.0, None, 7.0, 8.0, 9.0] + dtype: list + + Null values at the top-level in each row are dropped by default: + + >>> s2 + 0 [[1.0, 2.0], None, [3.0, 4.0, 5.0]] + 1 [[6.0, None], [7.0], [8.0, 9.0]] + dtype: list + >>> s2.list.concat() + 0 [1.0, 2.0, 3.0, 4.0, 5.0] + 1 [6.0, None, 7.0, 8.0, 9.0] + dtype: list + + Use ``dropna=False`` to produce a null instead: + + >>> s2.list.concat(dropna=False) + 0 None + 1 [6.0, nan, 7.0, 8.0, 9.0] + dtype: list + """ + try: + result = concatenate_list_elements(self._column, dropna=dropna) + except RuntimeError as e: + if "Rows of the input column must be lists." in str(e): + raise ValueError( + "list.concat() can only be called on " + "list columns with at least one level " + "of nesting" + ) + return self._return_or_inplace(result) diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py index d7b416d06c9..4b448e27a53 100644 --- a/python/cudf/cudf/core/column/methods.py +++ b/python/cudf/cudf/core/column/methods.py @@ -11,15 +11,17 @@ if TYPE_CHECKING: from cudf.core.column import ColumnBase +ParentType = Union["cudf.Series", "cudf.BaseIndex"] + class ColumnMethodsMixin: _column: ColumnBase - _parent: Optional[Union["cudf.Series", "cudf.Index"]] + _parent: Optional[Union["cudf.Series", "cudf.BaseIndex"]] def __init__( self, column: ColumnBase, - parent: Union["cudf.Series", "cudf.Index"] = None, + parent: Union["cudf.Series", "cudf.BaseIndex"] = None, ): self._column = column self._parent = parent @@ -27,13 +29,13 @@ def __init__( @overload def _return_or_inplace( self, new_col, inplace: Literal[False], expand=False, retain_index=True - ) -> Union["cudf.Series", "cudf.Index"]: + ) -> Union["cudf.Series", "cudf.BaseIndex"]: ... @overload def _return_or_inplace( self, new_col, expand: bool = False, retain_index: bool = True - ) -> Union["cudf.Series", "cudf.Index"]: + ) -> Union["cudf.Series", "cudf.BaseIndex"]: ... @overload @@ -49,7 +51,7 @@ def _return_or_inplace( inplace: bool = False, expand: bool = False, retain_index: bool = True, - ) -> Optional[Union["cudf.Series", "cudf.Index"]]: + ) -> Optional[Union["cudf.Series", "cudf.BaseIndex"]]: ... def _return_or_inplace( diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index a6a9de2e77b..11051b63920 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -161,7 +161,7 @@ from cudf.api.types import is_integer from cudf.core.buffer import Buffer from cudf.core.column import column, datetime -from cudf.core.column.methods import ColumnMethodsMixin +from cudf.core.column.methods import ColumnMethodsMixin, ParentType from cudf.utils import utils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import ( @@ -216,9 +216,6 @@ } -ParentType = Union["cudf.Series", "cudf.core.index.BaseIndex"] - - class StringMethods(ColumnMethodsMixin): def __init__(self, column, parent=None): """ diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index 0e93ff8a232..abd24ddd0fd 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -1,5 +1,6 @@ # Copyright (c) 2020-2021, NVIDIA CORPORATION. import functools +import operator import numpy as np import pandas as pd @@ -324,6 +325,43 @@ def test_contains_null_search_key(data, expect): assert_eq(expect, got) +@pytest.mark.parametrize( + "row", + [ + [[]], + [[1]], + [[1, 2]], + [[1, 2], [3, 4, 5]], + [[1, 2], [], [3, 4, 5]], + [[1, 2, None], [3, 4, 5]], + [[1, 2, None], None, [3, 4, 5]], + [[1, 2, None], None, [], [3, 4, 5]], + [[[1, 2], [3, 4]], [[5, 6, 7], [8, 9]]], + [[["a", "c", "de", None], None, ["fg"]], [["abc", "de"], None]], + ], +) +@pytest.mark.parametrize("dropna", [True, False]) +def test_concat_elements(row, dropna): + if any(x is None for x in row): + if dropna: + row = [x for x in row if x is not None] + result = functools.reduce(operator.add, row) + else: + result = None + else: + result = functools.reduce(operator.add, row) + + expect = pd.Series([result]) + got = cudf.Series([row]).list.concat(dropna=dropna) + assert_eq(expect, got) + + +def test_concat_elements_raise(): + s = cudf.Series([[1, 2, 3]]) # no nesting + with pytest.raises(ValueError): + s.list.concat() + + def test_concatenate_rows_of_lists(): pdf = pd.DataFrame({"val": [["a", "a"], ["b"], ["c"]]}) gdf = cudf.from_pandas(pdf) From 51b1c23b1b02e4578299fd5af13c326e5daca17e Mon Sep 17 00:00:00 2001 From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com> Date: Wed, 30 Jun 2021 22:21:32 -0700 Subject: [PATCH 17/54] String-to-boolean conversion is different from Pandas (#8549) Fixes: #7875 Previously: Pandas treats all non-empty strings as true values when it converts strings to booleans, whereas cuDF accepts only those that match with the true string (which is `True` by default). This PR resolves the mismatch by introducing the `str_to_boolean` method, which filters a string column to check if len(StringColumn)> 0 and replaces `NAN `values with `False` to mimick Pandas behavior **Example:** ``` >>> import pandas as pd >>> import cudf >>> gs = cudf.Series(["True", None, "", "True", "False", "False"]) >>> gs 0 True 1 2 3 True 4 False 5 False dtype: object >>> gs.astype(bool) 0 True 1 False 2 False 3 True 4 True 5 True dtype: bool >>> gs.to_pandas().astype(bool) 0 True 1 False 2 False 3 True 4 True 5 True dtype: bool ``` Authors: - Sheilah Kirui (https://github.com/skirui-source) Approvers: - Nghia Truong (https://github.com/ttnghia) - Michael Wang (https://github.com/isVoid) URL: https://github.com/rapidsai/cudf/pull/8549 --- python/cudf/cudf/core/column/string.py | 8 +++++++- python/cudf/cudf/tests/test_string.py | 6 +----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 11051b63920..0902167be8b 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -171,6 +171,12 @@ is_string_dtype, ) + +def str_to_boolean(column: StringColumn): + """Takes in string column and returns boolean column """ + return (column.str().len() > cudf.Scalar(0, dtype="int8")).fillna(False) + + _str_to_numeric_typecast_functions = { np.dtype("int8"): str_cast.stoi8, np.dtype("int16"): str_cast.stoi16, @@ -182,7 +188,7 @@ np.dtype("uint64"): str_cast.stoul, np.dtype("float32"): str_cast.stof, np.dtype("float64"): str_cast.stod, - np.dtype("bool"): str_cast.to_booleans, + np.dtype("bool"): str_to_boolean, } _numeric_to_str_typecast_functions = { diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 3c153a16a13..a8c00ce031e 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -200,11 +200,7 @@ def test_string_astype(dtype): ps = pd.Series(data) gs = cudf.Series(data) - # Pandas str --> bool typecasting always returns True if there's a string - if dtype.startswith("bool"): - expect = ps == "True" - else: - expect = ps.astype(dtype) + expect = ps.astype(dtype) got = gs.astype(dtype) assert_eq(expect, got) From a0b0eab8e0730a86a638d86aeef11088f12c935c Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 1 Jul 2021 10:59:55 -0400 Subject: [PATCH 18/54] Add delimiter parameter to cudf::strings::capitalize() (#8620) Closes #8597 Add a parameter to the `cudf::strings::capitalize()` function to support capitalizing characters after a specified delimiter. This should meet the requirements of #8597 by passing a single ' ' character string as follows: ``` auto results = cudf::strings::capitalize(strings_view, std::string(" ")); ``` The new parameter has a default of empty string that keeps the current behavior so no updates are required to python/cython layer. The new parameter requires this PR to be a breaking change. The source code for `title()` and `capitalize()` were further refactored using the CRTP pattern since much of the main internal code logic is the same. This PR also includes additional gtests for this new parameter as well as adding some missing tests for empty columns. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Nghia Truong (https://github.com/ttnghia) - Jason Lowe (https://github.com/jlowe) URL: https://github.com/rapidsai/cudf/pull/8620 --- cpp/include/cudf/strings/capitalize.hpp | 23 ++++- cpp/src/strings/capitalize.cu | 123 +++++++++++++----------- cpp/tests/strings/case_tests.cpp | 78 ++++++++++----- 3 files changed, 137 insertions(+), 87 deletions(-) diff --git a/cpp/include/cudf/strings/capitalize.hpp b/cpp/include/cudf/strings/capitalize.hpp index 372d9faf13f..604756b5d09 100644 --- a/cpp/include/cudf/strings/capitalize.hpp +++ b/cpp/include/cudf/strings/capitalize.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -30,21 +31,33 @@ namespace strings { /** * @brief Returns a column of capitalized strings. * - * Any null string entries return corresponding null output column entries. + * If the `delimiters` is an empty string, then only the first character of each + * row is capitalized. Otherwise, a non-delimiter character is capitalized after + * any delimiter character is found. * * @code{.pseudo} * Example: - * input = ["tesT1", "a Test", "Another Test"]; + * input = ["tesT1", "a Test", "Another Test", "a\tb"]; * output = capitalize(input) - * output is ["Test1", "A test", "Another test"] + * output is ["Test1", "A test", "Another test", "A\tb"] + * output = capitalize(input, " ") + * output is ["Test1", "A Test", "Another Test", "A\tb"] + * output = capitalize(input, " \t") + * output is ["Test1", "A Test", "Another Test", "A\tB"] * @endcode * - * @param[in] input String column. - * @param[in] mr Device memory resource used to allocate the returned column's device memory + * Any null string entries return corresponding null output column entries. + * + * @throw cudf::logic_error if `delimiter.is_valid()` is `false`. + * + * @param input String column. + * @param delimiters Characters for identifying words to capitalize. + * @param mr Device memory resource used to allocate the returned column's device memory * @return Column of strings capitalized from the input column. */ std::unique_ptr capitalize( strings_column_view const& input, + string_scalar const& delimiters = string_scalar(""), rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** diff --git a/cpp/src/strings/capitalize.cu b/cpp/src/strings/capitalize.cu index c1e341217ab..93b0edc1855 100644 --- a/cpp/src/strings/capitalize.cu +++ b/cpp/src/strings/capitalize.cu @@ -38,12 +38,24 @@ namespace { * @brief Base class for capitalize and title functors. * * Utility functions here manage access to the character case and flags tables. + * Any derived class must supply a `capitalize_next` member function. + * + * @tparam Derived class uses the CRTP pattern to reuse code logic. */ +template struct base_fn { character_flags_table_type const* d_flags; character_cases_table_type const* d_case_table; + column_device_view const d_column; + offset_type* d_offsets{}; + char* d_chars{}; - base_fn() : d_flags(get_character_flags_table()), d_case_table(get_character_cases_table()) {} + base_fn(column_device_view const& d_column) + : d_flags(get_character_flags_table()), + d_case_table(get_character_cases_table()), + d_column(d_column) + { + } using char_info = thrust::pair; @@ -58,35 +70,31 @@ struct base_fn { { return codepoint_to_utf8(d_case_table[info.first]); } -}; - -/** - * @brief Capitalize functor. - * - * This capitalizes the first letter of the string. - * Also lower-case any characters after the first letter. - */ -struct capitalize_fn : base_fn { - column_device_view const d_column; - offset_type* d_offsets{}; - char* d_chars{}; - - capitalize_fn(column_device_view const& d_column) : base_fn(), d_column(d_column) {} + /** + * @brief Operator called for each row in `d_column`. + * + * This logic is shared by capitalize() and title() functions. + * The derived class must supply a `capitalize_next` member function. + */ __device__ void operator()(size_type idx) { if (d_column.is_null(idx)) { if (!d_chars) d_offsets[idx] = 0; } + Derived& derived = static_cast(*this); auto const d_str = d_column.element(idx); offset_type bytes = 0; auto d_buffer = d_chars ? d_chars + d_offsets[idx] : nullptr; + bool capitalize = true; for (auto itr = d_str.begin(); itr != d_str.end(); ++itr) { auto const info = get_char_info(*itr); auto const flag = info.second; - auto const change_case = (itr == d_str.begin()) ? IS_LOWER(flag) : IS_UPPER(flag); + auto const change_case = capitalize ? IS_LOWER(flag) : IS_UPPER(flag); auto const new_char = change_case ? convert_char(info) : *itr; + // capitalize the next char if this one is a delimiter + capitalize = derived.capitalize_next(*itr, flag); if (d_buffer) d_buffer += detail::from_char_utf8(new_char, d_buffer); @@ -97,51 +105,48 @@ struct capitalize_fn : base_fn { } }; +/** + * @brief Capitalize functor. + * + * This capitalizes the first character of the string and lower-cases + * the remaining characters. + * If a delimiter is specified, capitalization continues within the string + * on the first eligible character after any delimiter. + */ +struct capitalize_fn : base_fn { + string_view const d_delimiters; + + capitalize_fn(column_device_view const& d_column, string_view const& d_delimiters) + : base_fn(d_column), d_delimiters(d_delimiters) + { + } + + __device__ bool capitalize_next(char_utf8 const chr, character_flags_table_type const) + { + return !d_delimiters.empty() && (d_delimiters.find(chr) >= 0); + } +}; + /** * @brief Title functor. * * This capitalizes the first letter of each word. - * The beginning of a word is identified as the first alphabetic - * character after a non-alphabetic character. - * Also, lower-case all other alpabetic characters. + * The beginning of a word is identified as the first sequence_type + * character after a non-sequence_type character. + * Also, lower-case all other alphabetic characters. */ -struct title_fn : base_fn { - column_device_view const d_column; +struct title_fn : base_fn { string_character_types sequence_type; - offset_type* d_offsets{}; - char* d_chars{}; title_fn(column_device_view const& d_column, string_character_types sequence_type) - : base_fn(), d_column(d_column), sequence_type(sequence_type) + : base_fn(d_column), sequence_type(sequence_type) { } - __device__ void operator()(size_type idx) + __device__ bool capitalize_next(char_utf8 const, character_flags_table_type const flag) { - if (d_column.is_null(idx)) { - if (!d_chars) d_offsets[idx] = 0; - } - - auto const d_str = d_column.element(idx); - offset_type bytes = 0; - auto d_buffer = d_chars ? d_chars + d_offsets[idx] : nullptr; - bool capitalize = true; - for (auto itr = d_str.begin(); itr != d_str.end(); ++itr) { - auto const info = get_char_info(*itr); - auto const flag = info.second; - auto const change_case = - (flag & sequence_type) && (capitalize ? IS_LOWER(flag) : IS_UPPER(flag)); - auto const new_char = change_case ? convert_char(info) : *itr; - // capitalize the next char if this one is not a sequence_type - capitalize = (flag & sequence_type) == 0; - - if (d_buffer) - d_buffer += detail::from_char_utf8(new_char, d_buffer); - else - bytes += detail::bytes_in_char_utf8(new_char); - } - if (!d_chars) d_offsets[idx] = bytes; - } + return (flag & sequence_type) == 0; + }; }; /** @@ -154,10 +159,10 @@ struct title_fn : base_fn { * @param mr Device memory resource used for allocating the new device_buffer */ template -std::unique_ptr capitalize_utility(CapitalFn cfn, - strings_column_view const& input, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr capitalizer(CapitalFn cfn, + strings_column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { auto children = cudf::strings::detail::make_strings_children(cfn, input.size(), stream, mr); @@ -173,12 +178,15 @@ std::unique_ptr capitalize_utility(CapitalFn cfn, } // namespace std::unique_ptr capitalize(strings_column_view const& input, + string_scalar const& delimiters, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + CUDF_EXPECTS(delimiters.is_valid(stream), "Delimiter must be a valid string"); if (input.is_empty()) return make_empty_column(data_type{type_id::STRING}); - auto d_column = column_device_view::create(input.parent(), stream); - return capitalize_utility(capitalize_fn{*d_column}, input, stream, mr); + auto const d_column = column_device_view::create(input.parent(), stream); + auto const d_delimiters = delimiters.value(stream); + return capitalizer(capitalize_fn{*d_column, d_delimiters}, input, stream, mr); } std::unique_ptr title(strings_column_view const& input, @@ -188,16 +196,17 @@ std::unique_ptr title(strings_column_view const& input, { if (input.is_empty()) return make_empty_column(data_type{type_id::STRING}); auto d_column = column_device_view::create(input.parent(), stream); - return capitalize_utility(title_fn{*d_column, sequence_type}, input, stream, mr); + return capitalizer(title_fn{*d_column, sequence_type}, input, stream, mr); } } // namespace detail std::unique_ptr capitalize(strings_column_view const& input, + string_scalar const& delimiter, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); - return detail::capitalize(input, rmm::cuda_stream_default, mr); + return detail::capitalize(input, delimiter, rmm::cuda_stream_default, mr); } std::unique_ptr title(strings_column_view const& input, diff --git a/cpp/tests/strings/case_tests.cpp b/cpp/tests/strings/case_tests.cpp index f04905282df..ae6e1e8db69 100644 --- a/cpp/tests/strings/case_tests.cpp +++ b/cpp/tests/strings/case_tests.cpp @@ -97,36 +97,34 @@ TEST_F(StringsCaseTest, Swapcase) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } -TEST_F(StringsCaseTest, EmptyStringsColumn) -{ - cudf::column_view zero_size_strings_column( - cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); - auto strings_view = cudf::strings_column_view(zero_size_strings_column); - auto results = cudf::strings::to_lower(strings_view); - auto view = results->view(); - cudf::test::expect_strings_empty(results->view()); -} - TEST_F(StringsCaseTest, Capitalize) { - std::vector h_strings{ - "SȺȺnich xyZ", "Examples aBc", "thesé", nullptr, "ARE THE", "tést strings", ""}; - std::vector h_expected{ - "Sⱥⱥnich xyz", "Examples abc", "Thesé", nullptr, "Are the", "Tést strings", ""}; - cudf::test::strings_column_wrapper strings( - h_strings.begin(), - h_strings.end(), - thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); + {"SȺȺnich xyZ", "Examples aBc", "thesé", "", "ARE\tTHE", "tést\tstrings", ""}, + {1, 1, 1, 0, 1, 1, 1}); auto strings_view = cudf::strings_column_view(strings); - auto results = cudf::strings::capitalize(strings_view); - - cudf::test::strings_column_wrapper expected( - h_expected.begin(), - h_expected.end(), - thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + { + auto results = cudf::strings::capitalize(strings_view); + cudf::test::strings_column_wrapper expected( + {"Sⱥⱥnich xyz", "Examples abc", "Thesé", "", "Are\tthe", "Tést\tstrings", ""}, + {1, 1, 1, 0, 1, 1, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + } + { + auto results = cudf::strings::capitalize(strings_view, std::string(" ")); + cudf::test::strings_column_wrapper expected( + {"Sⱥⱥnich Xyz", "Examples Abc", "Thesé", "", "Are\tthe", "Tést\tstrings", ""}, + {1, 1, 1, 0, 1, 1, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + } + { + auto results = cudf::strings::capitalize(strings_view, std::string(" \t")); + cudf::test::strings_column_wrapper expected( + {"Sⱥⱥnich Xyz", "Examples Abc", "Thesé", "", "Are\tThe", "Tést\tStrings", ""}, + {1, 1, 1, 0, 1, 1, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + } } TEST_F(StringsCaseTest, Title) @@ -174,3 +172,33 @@ TEST_F(StringsCaseTest, MultiCharLower) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } + +TEST_F(StringsCaseTest, EmptyStringsColumn) +{ + cudf::column_view zero_size_strings_column( + cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0); + auto strings_view = cudf::strings_column_view(zero_size_strings_column); + + auto results = cudf::strings::to_lower(strings_view); + cudf::test::expect_strings_empty(results->view()); + + results = cudf::strings::to_upper(strings_view); + cudf::test::expect_strings_empty(results->view()); + + results = cudf::strings::swapcase(strings_view); + cudf::test::expect_strings_empty(results->view()); + + results = cudf::strings::capitalize(strings_view); + cudf::test::expect_strings_empty(results->view()); + + results = cudf::strings::title(strings_view); + cudf::test::expect_strings_empty(results->view()); +} + +TEST_F(StringsCaseTest, ErrorTest) +{ + cudf::test::strings_column_wrapper input{"the column intentionally left blank"}; + auto view = cudf::strings_column_view(input); + + EXPECT_THROW(cudf::strings::capitalize(view, cudf::string_scalar("", false)), cudf::logic_error); +} From fba09e66d822ee55654d6a362abb5190dd2c1521 Mon Sep 17 00:00:00 2001 From: Liangcai Li Date: Fri, 2 Jul 2021 11:16:52 +0800 Subject: [PATCH 19/54] JNI support for capitalize (#8624) Add JNI for the string operator `capitalize`. Signed-off-by: Firestarman Authors: - Liangcai Li (https://github.com/firestarman) Approvers: - Nghia Truong (https://github.com/ttnghia) - Jason Lowe (https://github.com/jlowe) - Robert (Bobby) Evans (https://github.com/revans2) URL: https://github.com/rapidsai/cudf/pull/8624 --- .../main/java/ai/rapids/cudf/ColumnView.java | 29 +++++++++++++++++++ java/src/main/native/src/ColumnViewJni.cpp | 17 +++++++++++ .../java/ai/rapids/cudf/ColumnVectorTest.java | 25 ++++++++++++++++ 3 files changed, 71 insertions(+) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 7912a525597..7299a6a716b 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -1497,6 +1497,33 @@ public final ColumnVector toTitle() { assert type.equals(DType.STRING); return new ColumnVector(title(getNativeView())); } + + /** + * Returns a column of capitalized strings. + * + * If the `delimiters` is an empty string, then only the first character of each + * row is capitalized. Otherwise, a non-delimiter character is capitalized after + * any delimiter character is found. + * + * Example: + * input = ["tesT1", "a Test", "Another Test", "a\tb"]; + * delimiters = "" + * output is ["Test1", "A test", "Another test", "A\tb"] + * delimiters = " " + * output is ["Test1", "A Test", "Another Test", "A\tb"] + * + * Any null string entries return corresponding null output column entries. + * + * @param delimiters Used if identifying words to capitalize. Should not be null. + * @return a column of capitalized strings. Users should close the returned column. + */ + public final ColumnVector capitalize(Scalar delimiters) { + if (DType.STRING.equals(type) && DType.STRING.equals(delimiters.getType())) { + return new ColumnVector(capitalize(getNativeView(), delimiters.getScalarHandle())); + } + throw new IllegalArgumentException("Both input column and delimiters scalar should be" + + " string type. But got column: " + type + ", scalar: " + delimiters.getType()); + } ///////////////////////////////////////////////////////////////////////////// // TYPE CAST ///////////////////////////////////////////////////////////////////////////// @@ -3322,6 +3349,8 @@ private static native long clamper(long nativeView, long loScalarHandle, long lo protected static native long title(long handle); + private static native long capitalize(long strsColHandle, long delimitersHandle); + private static native long makeStructView(long[] handles, long rowCount); private static native long isTimestamp(long nativeView, String format); diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 44ac3a91c77..38c38d853ac 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1793,6 +1793,23 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_title(JNIEnv *env, jobjec CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_capitalize(JNIEnv *env, jobject j_object, + jlong strs_handle, + jlong delimiters_handle) { + + JNI_NULL_CHECK(env, strs_handle, "native view handle is null", 0) + JNI_NULL_CHECK(env, delimiters_handle, "delimiters scalar handle is null", 0) + + try { + cudf::jni::auto_set_device(env); + cudf::column_view *view = reinterpret_cast(strs_handle); + cudf::string_scalar *deli = reinterpret_cast(delimiters_handle); + std::unique_ptr result = cudf::strings::capitalize(*view, *deli); + return reinterpret_cast(result.release()); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_makeStructView(JNIEnv *env, jobject j_object, jlongArray handles, jlong row_count) { diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index a121309d8aa..753deceb59d 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -4458,6 +4458,31 @@ void testStringTitlize() { } } + @Test + void testStringCapitalize() { + try (ColumnVector cv = ColumnVector.fromStrings("s Park", "S\nqL", "lower \tcase", + null, "", "UPPER\rCASE")) { + try (Scalar deli = Scalar.fromString(""); + ColumnVector result = cv.capitalize(deli); + ColumnVector expected = ColumnVector.fromStrings("S park", "S\nql", "Lower \tcase", + null, "", "Upper\rcase")) { + assertColumnsAreEqual(expected, result); + } + try (Scalar deli = Scalar.fromString(" "); + ColumnVector result = cv.capitalize(deli); + ColumnVector expected = ColumnVector.fromStrings("S Park", "S\nql", "Lower \tcase", + null, "", "Upper\rcase")) { + assertColumnsAreEqual(expected, result); + } + try (Scalar deli = Scalar.fromString(" \t\n"); + ColumnVector result = cv.capitalize(deli); + ColumnVector expected = ColumnVector.fromStrings("S Park", "S\nQl", "Lower \tCase", + null, "", "Upper\rcase")) { + assertColumnsAreEqual(expected, result); + } + } + } + @Test void testNansToNulls() { Float[] floats = new Float[]{1.2f, Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY, null, From 5632f2fea1aa043908bd3fd3f6c2db065d8dd7bc Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 2 Jul 2021 08:57:41 -0700 Subject: [PATCH 20/54] Temporarily disable libcudf example build tests (#8642) Currently when CI build libcudf examples, it rebuilds the entire libcudf library. Until we figure out how to reuse the built libcudf artifacts in CI, we should disable the build to save CI build times. Looking to be re-enabled by #8638 Authors: - Michael Wang (https://github.com/isVoid) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cudf/pull/8642 --- ci/gpu/build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index c854e67fbdf..355b18f4543 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -201,8 +201,8 @@ fi ################################################################################ # If examples grows too large to build, should move to cpu side -gpuci_logger "Building libcudf examples" -$WORKSPACE/cpp/examples/build.sh +# gpuci_logger "Building libcudf examples" +# $WORKSPACE/cpp/examples/build.sh # set environment variable for numpy 1.16 # will be enabled for later versions by default From 167c2b75dbda9b9752a2e3c6edd2e59b5d5ac12b Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Fri, 2 Jul 2021 14:01:49 -0400 Subject: [PATCH 21/54] Ensure dev environment uses Arrow GPU packages (#8637) With #7495 merged, it seems like the dev environment files create an environment with the CPU packages for `pyarrow` and `arrow-cpp`; this results in failure when trying to compile libcudf or `import cudf`: ```python ModuleNotFoundError: No module named 'pyarrow._cuda' from cudf import rmm File "/opt/conda/lib/python3.8/site-packages/cudf/__init__.py", line 11, in from cudf import core, datasets, testing File "/opt/conda/lib/python3.8/site-packages/cudf/core/__init__.py", line 3, in from cudf.core import _internals, buffer, column, column_accessor, common File "/opt/conda/lib/python3.8/site-packages/cudf/core/_internals/__init__.py", line 3, in from cudf.core._internals.where import where File "/opt/conda/lib/python3.8/site-packages/cudf/core/_internals/where.py", line 11, in from cudf.core.column import ColumnBase File "/opt/conda/lib/python3.8/site-packages/cudf/core/column/__init__.py", line 3, in from cudf.core.column.categorical import CategoricalColumn File "/opt/conda/lib/python3.8/site-packages/cudf/core/column/categorical.py", line 25, in from cudf import _lib as libcudf File "/opt/conda/lib/python3.8/site-packages/cudf/_lib/__init__.py", line 4, in from . import ( ImportError: libarrow_cuda.so.400: cannot open shared object file: No such file or directory ``` This updates the dev environments and recipe to ensure that the GPU package of `pyarrow` (and `arrow-cpp` accordingly) are used. Authors: - Charles Blackmon-Luca (https://github.com/charlesbluca) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/cudf/pull/8637 --- conda/environments/cudf_dev_cuda11.0.yml | 2 +- conda/environments/cudf_dev_cuda11.2.yml | 2 +- conda/recipes/cudf/meta.yaml | 2 +- conda/recipes/libcudf/meta.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index 5561a573609..c2a7f3d9b94 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -17,7 +17,7 @@ dependencies: - numba>=0.53.1 - numpy - pandas>=1.0,<1.3.0dev0 - - pyarrow=4.0.1 + - pyarrow=4.0.1=*cuda - fastavro>=0.22.9 - notebook>=0.5.0 - cython>=0.29,<0.30 diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index 6c8ae4cb9b0..ad2b8cd5403 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -17,7 +17,7 @@ dependencies: - numba>=0.53.1 - numpy - pandas>=1.0,<1.3.0dev0 - - pyarrow=4.0.1 + - pyarrow=4.0.1=*cuda - fastavro>=0.22.9 - notebook>=0.5.0 - cython>=0.29,<0.30 diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 3da7c63857d..c0636d11ee8 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -30,7 +30,7 @@ requirements: - setuptools - numba >=0.53.1 - dlpack>=0.5,<0.6.0a0 - - pyarrow 4.0.1 + - pyarrow 4.0.1 *cuda - libcudf {{ version }} - rmm {{ minor_version }} - cudatoolkit {{ cuda_version }} diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 6464013d646..b69c0aa8169 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -37,7 +37,7 @@ requirements: host: - librmm {{ minor_version }}.* - cudatoolkit {{ cuda_version }}.* - - arrow-cpp 4.0.1 + - arrow-cpp 4.0.1 *cuda - arrow-cpp-proc * cuda - dlpack>=0.5,<0.6.0a0 run: From e25180361e27a37223606580bd2bf72131a27776 Mon Sep 17 00:00:00 2001 From: Dillon Cullinan Date: Fri, 2 Jul 2021 16:17:33 -0400 Subject: [PATCH 22/54] ENH Add Java CI build script (#8627) Adds a build/test script for the Java component of cudf. Authors: - Dillon Cullinan (https://github.com/dillon-cullinan) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cudf/pull/8627 --- ci/gpu/java.sh | 145 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100755 ci/gpu/java.sh diff --git a/ci/gpu/java.sh b/ci/gpu/java.sh new file mode 100755 index 00000000000..8c4b597d12d --- /dev/null +++ b/ci/gpu/java.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# Copyright (c) 2018-2020, NVIDIA CORPORATION. +############################################## +# cuDF GPU build and test script for CI # +############################################## +set -e +NUMARGS=$# +ARGS=$* + +# Arg parsing function +function hasArg { + (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ") +} + +# Set path and build parallel level +export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH +export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} + +# Set home to the job's workspace +export HOME="$WORKSPACE" + +# Switch to project root; also root of repo checkout +cd "$WORKSPACE" + +# Determine CUDA release version +export CUDA_REL=${CUDA_VERSION%.*} +export CONDA_ARTIFACT_PATH="$WORKSPACE/ci/artifacts/cudf/cpu/.conda-bld/" + +# Parse git describe +export GIT_DESCRIBE_TAG=`git describe --tags` +export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` + +################################################################################ +# TRAP - Setup trap for removing jitify cache +################################################################################ + +# Set `LIBCUDF_KERNEL_CACHE_PATH` environment variable to $HOME/.jitify-cache +# because it's local to the container's virtual file system, and not shared with +# other CI jobs like `/tmp` is +export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache" + +function remove_libcudf_kernel_cache_dir { + EXITCODE=$? + gpuci_logger "TRAP: Removing kernel cache dir: $LIBCUDF_KERNEL_CACHE_PATH" + rm -rf "$LIBCUDF_KERNEL_CACHE_PATH" \ + || gpuci_logger "[ERROR] TRAP: Could not rm -rf $LIBCUDF_KERNEL_CACHE_PATH" + exit $EXITCODE +} + +# Set trap to run on exit +gpuci_logger "TRAP: Set trap to remove jitify cache on exit" +trap remove_libcudf_kernel_cache_dir EXIT + +mkdir -p "$LIBCUDF_KERNEL_CACHE_PATH" \ + || gpuci_logger "[ERROR] TRAP: Could not mkdir -p $LIBCUDF_KERNEL_CACHE_PATH" + +################################################################################ +# SETUP - Check environment +################################################################################ + +gpuci_logger "Check environment variables" +env + +gpuci_logger "Check GPU usage" +nvidia-smi + +gpuci_logger "Activate conda env" +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids + +gpuci_logger "Check conda environment" +conda info +conda config --show-sources +conda list --show-channel-urls + +gpuci_logger "Install dependencies" +gpuci_conda_retry install -y \ + "cudatoolkit=$CUDA_REL" \ + "rapids-build-env=$MINOR_VERSION.*" \ + "rapids-notebook-env=$MINOR_VERSION.*" \ + "dask-cuda=${MINOR_VERSION}" \ + "rmm=$MINOR_VERSION.*" \ + "ucx-py=0.21.*" \ + "openjdk=8.*" \ + "maven" + +# https://docs.rapids.ai/maintainers/depmgmt/ +# gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env +# gpuci_conda_retry install -y "your-pkg=1.0.0" + + +gpuci_logger "Check compiler versions" +python --version +$CC --version +$CXX --version + +gpuci_logger "Check conda environment" +conda info +conda config --show-sources +conda list --show-channel-urls + +function install_dask { + # Install the main version of dask, distributed, and streamz + gpuci_logger "Install the main version of dask, distributed, and streamz" + set -x + pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps + pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps + # Need to uninstall streamz that is already in the env. + pip uninstall -y streamz + pip install "git+https://github.com/python-streamz/streamz.git@master" --upgrade --no-deps + set +x +} + +################################################################################ +# INSTALL - Install libcudf artifacts +################################################################################ + +export LIB_BUILD_DIR="$WORKSPACE/ci/artifacts/cudf/cpu/libcudf_work/cpp/build" +export CUDF_ROOT=${LIB_BUILD_DIR} +export LD_LIBRARY_PATH="$LIB_BUILD_DIR:$CONDA_PREFIX/lib:$LD_LIBRARY_PATH" + +CUDF_CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "libcudf-*.tar.bz2"` +CUDF_CONDA_FILE=`basename "$CUDF_CONDA_FILE" .tar.bz2` #get filename without extension +CUDF_CONDA_FILE=${CUDF_CONDA_FILE//-/=} #convert to conda install +KAFKA_CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "libcudf_kafka-*.tar.bz2"` +KAFKA_CONDA_FILE=`basename "$KAFKA_CONDA_FILE" .tar.bz2` #get filename without extension +KAFKA_CONDA_FILE=${KAFKA_CONDA_FILE//-/=} #convert to conda install + +gpuci_logger "Installing $CUDF_CONDA_FILE & $KAFKA_CONDA_FILE" +conda install -c ${CONDA_ARTIFACT_PATH} "$CUDF_CONDA_FILE" "$KAFKA_CONDA_FILE" + +install_dask + +################################################################################ +# TEST - Run java tests +################################################################################ + +gpuci_logger "Check GPU usage" +nvidia-smi + +gpuci_logger "Running Java Tests" +cd ${WORKSPACE}/java +mvn test -B -DCUDF_JNI_ARROW_STATIC=OFF + +return ${EXITCODE} From c4c960755545127b1347e9ca19b3af6074ff1e2f Mon Sep 17 00:00:00 2001 From: pxLi Date: Mon, 5 Jul 2021 11:22:06 +0800 Subject: [PATCH 23/54] Update GDS lib version to 1.0.0 (#8654) Signed-off-by: Peixin Li use released GDS 1.0.0 lib Authors: - pxLi (https://github.com/pxLi) Approvers: - https://github.com/NvTimLiu URL: https://github.com/rapidsai/cudf/pull/8654 --- java/ci/Dockerfile.centos7 | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/java/ci/Dockerfile.centos7 b/java/ci/Dockerfile.centos7 index 79f18632416..a6264a84696 100644 --- a/java/ci/Dockerfile.centos7 +++ b/java/ci/Dockerfile.centos7 @@ -31,14 +31,16 @@ RUN yum install -y git zlib-devel maven tar wget patch ## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins RUN mkdir /usr/local/rapids && mkdir /rapids && chmod 777 /usr/local/rapids && chmod 777 /rapids -RUN cd /usr/local/ && wget --quiet https://github.com/Kitware/CMake/releases/download/v3.20.5/cmake-3.20.5-linux-x86_64.tar.gz && \ - tar zxf cmake-3.20.5-linux-x86_64.tar.gz && \ - rm cmake-3.20.5-linux-x86_64.tar.gz -ENV PATH /usr/local/cmake-3.20.5-linux-x86_64/bin:$PATH +ARG CMAKE_VERSION=3.20.5 +RUN cd /usr/local/ && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \ + tar zxf cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \ + rm cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz +ENV PATH /usr/local/cmake-${CMAKE_VERSION}-linux-x86_64/bin:$PATH # get GDS user-space lib -RUN cd /tmp/ && wget https://developer.download.nvidia.com/gds/redist/rel-0.95.1/gds-redistrib-0.95.1.tgz && \ - tar zxf gds-redistrib-0.95.1.tgz && \ - cp -R ./gds-redistrib-0.95.1/targets/x86_64-linux/lib/* /usr/local/cuda/targets/x86_64-linux/lib && \ - cp -R ./gds-redistrib-0.95.1/targets/x86_64-linux/include/* /usr/local/cuda/targets/x86_64-linux/include && \ - rm -rf gds-redistrib-0.95.1* +ARG GDS_VERSION=1.0.0 +RUN cd /tmp/ && wget https://developer.download.nvidia.com/gds/redist/rel-${GDS_VERSION}/gds-redistrib-${GDS_VERSION}.tgz && \ + tar zxf gds-redistrib-${GDS_VERSION}.tgz && \ + cp -R ./gds-redistrib-${GDS_VERSION}/targets/x86_64-linux/lib/* /usr/local/cuda/targets/x86_64-linux/lib && \ + cp -R ./gds-redistrib-${GDS_VERSION}/targets/x86_64-linux/include/* /usr/local/cuda/targets/x86_64-linux/include && \ + rm -rf gds-redistrib-* From de8d8d042ed1e70a127e10785d118722fc931c4d Mon Sep 17 00:00:00 2001 From: jakirkham Date: Mon, 5 Jul 2021 10:02:12 -0700 Subject: [PATCH 24/54] Pin `*arrow` to use `*cuda` in `run` (#8651) Follow up to PR ( https://github.com/rapidsai/cudf/pull/8637 ) Even though we pinned `*cuda` in `host`, this didn't carry over to the `run` dependencies. To fix this, tack on `*cuda` to `pyarrow` and `arrow-cpp` in `run`. This should fix installation errors where the CPU builds were getting pulled in unintentionally. cc @ajschmidt8 @charlesbluca @pentschev Authors: - https://github.com/jakirkham Approvers: - Jordan Jacobelli (https://github.com/Ethyling) URL: https://github.com/rapidsai/cudf/pull/8651 --- conda/recipes/cudf/meta.yaml | 2 +- conda/recipes/libcudf/meta.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index c0636d11ee8..9023e89c2f5 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -42,7 +42,7 @@ requirements: - cupy >7.1.0,<10.0.0a0 - numba >=0.53.1 - numpy - - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} + - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} *cuda - fastavro >=0.22.0 - {{ pin_compatible('rmm', max_pin='x.x') }} - fsspec>=0.6.0 diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index b69c0aa8169..6c4175a2539 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -42,6 +42,7 @@ requirements: - dlpack>=0.5,<0.6.0a0 run: - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} + - arrow-cpp * *cuda - arrow-cpp-proc * cuda - {{ pin_compatible('dlpack', max_pin='x.x') }} From 3ee264c4c38a3d8682812ecc7b28d8d3249c2e62 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Tue, 6 Jul 2021 01:29:08 -0400 Subject: [PATCH 25/54] Support multi-char case conversion in capitalize function (#8647) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #8644 Multi-character case conversion support added for strings `to_upper` and `to_lower` is reused for `capitalize` and `title` functions. For example, converting from a single character `ʼn` to its upper-case equivalent is actually two distinct characters `'N` (apostrophe and capital-N). This is different than conversion of a single multi-byte character to another single multi-byte character with different byte lengths. Here a single character is converted into two characters. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cudf/pull/8647 --- cpp/src/strings/capitalize.cu | 51 +++++++++++++++++++++++++------- cpp/tests/strings/case_tests.cpp | 9 ++++-- 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/cpp/src/strings/capitalize.cu b/cpp/src/strings/capitalize.cu index 93b0edc1855..29ff7b242e6 100644 --- a/cpp/src/strings/capitalize.cu +++ b/cpp/src/strings/capitalize.cu @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -46,6 +47,7 @@ template struct base_fn { character_flags_table_type const* d_flags; character_cases_table_type const* d_case_table; + special_case_mapping const* d_special_case_mapping; column_device_view const d_column; offset_type* d_offsets{}; char* d_chars{}; @@ -53,6 +55,7 @@ struct base_fn { base_fn(column_device_view const& d_column) : d_flags(get_character_flags_table()), d_case_table(get_character_cases_table()), + d_special_case_mapping(get_special_case_mapping_table()), d_column(d_column) { } @@ -66,9 +69,28 @@ struct base_fn { return char_info{code_point, flag}; } - __device__ char_utf8 convert_char(char_info const& info) const + __device__ int32_t convert_char(char_info const& info, char* d_buffer) const { - return codepoint_to_utf8(d_case_table[info.first]); + auto const code_point = info.first; + auto const flag = info.second; + + if (!IS_SPECIAL(flag)) { + auto const new_char = codepoint_to_utf8(d_case_table[code_point]); + return d_buffer ? detail::from_char_utf8(new_char, d_buffer) + : detail::bytes_in_char_utf8(new_char); + } + + special_case_mapping m = d_special_case_mapping[get_special_case_hash_index(code_point)]; + + auto const count = IS_LOWER(flag) ? m.num_upper_chars : m.num_lower_chars; + auto const* chars = IS_LOWER(flag) ? m.upper : m.lower; + size_type bytes = 0; + for (uint16_t idx = 0; idx < count; idx++) { + bytes += d_buffer + ? detail::from_char_utf8(detail::codepoint_to_utf8(chars[idx]), d_buffer + bytes) + : detail::bytes_in_char_utf8(detail::codepoint_to_utf8(chars[idx])); + } + return bytes; } /** @@ -88,18 +110,25 @@ struct base_fn { offset_type bytes = 0; auto d_buffer = d_chars ? d_chars + d_offsets[idx] : nullptr; bool capitalize = true; - for (auto itr = d_str.begin(); itr != d_str.end(); ++itr) { - auto const info = get_char_info(*itr); + for (auto const chr : d_str) { + auto const info = get_char_info(chr); auto const flag = info.second; auto const change_case = capitalize ? IS_LOWER(flag) : IS_UPPER(flag); - auto const new_char = change_case ? convert_char(info) : *itr; - // capitalize the next char if this one is a delimiter - capitalize = derived.capitalize_next(*itr, flag); - if (d_buffer) - d_buffer += detail::from_char_utf8(new_char, d_buffer); - else - bytes += detail::bytes_in_char_utf8(new_char); + if (change_case) { + auto const char_bytes = convert_char(info, d_buffer); + bytes += char_bytes; + d_buffer += d_buffer ? char_bytes : 0; + } else { + if (d_buffer) { + d_buffer += detail::from_char_utf8(chr, d_buffer); + } else { + bytes += detail::bytes_in_char_utf8(chr); + } + } + + // capitalize the next char if this one is a delimiter + capitalize = derived.capitalize_next(chr, flag); } if (!d_chars) d_offsets[idx] = bytes; } diff --git a/cpp/tests/strings/case_tests.cpp b/cpp/tests/strings/case_tests.cpp index ae6e1e8db69..da55e967266 100644 --- a/cpp/tests/strings/case_tests.cpp +++ b/cpp/tests/strings/case_tests.cpp @@ -151,13 +151,18 @@ TEST_F(StringsCaseTest, Title) TEST_F(StringsCaseTest, MultiCharUpper) { - cudf::test::strings_column_wrapper strings{"\u1f52", "\u1f83", "\u1e98", "\ufb05", "\u0149"}; + cudf::test::strings_column_wrapper strings{"\u1f52 \u1f83", "\u1e98 \ufb05", "\u0149"}; cudf::test::strings_column_wrapper expected{ - "\u03a5\u0313\u0300", "\u1f0b\u0399", "\u0057\u030a", "\u0053\u0054", "\u02bc\u004e"}; + "\u03a5\u0313\u0300 \u1f0b\u0399", "\u0057\u030a \u0053\u0054", "\u02bc\u004e"}; auto strings_view = cudf::strings_column_view(strings); auto results = cudf::strings::to_upper(strings_view); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + + results = cudf::strings::capitalize(strings_view, std::string(" ")); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + results = cudf::strings::title(strings_view); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } From c54346e717d1b7ce6508931c67c6edac40d7f16e Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Tue, 6 Jul 2021 12:42:40 -0400 Subject: [PATCH 26/54] Add collect list to dask-cudf groupby aggregations (#8045) Closes #7812 Adds support for cuDF's `collect` aggregation in dask-cuDF. Authors: - Charles Blackmon-Luca (https://github.com/charlesbluca) Approvers: - Richard (Rick) Zamora (https://github.com/rjzamora) URL: https://github.com/rapidsai/cudf/pull/8045 --- python/dask_cudf/dask_cudf/groupby.py | 50 ++++++++++++++++--- .../dask_cudf/dask_cudf/tests/test_groupby.py | 27 ++++++++++ 2 files changed, 71 insertions(+), 6 deletions(-) diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py index 73fe1bd2196..336fdaf009c 100644 --- a/python/dask_cudf/dask_cudf/groupby.py +++ b/python/dask_cudf/dask_cudf/groupby.py @@ -62,7 +62,16 @@ def aggregate(self, arg, split_every=None, split_out=1): return self.size() arg = _redirect_aggs(arg) - _supported = {"count", "mean", "std", "var", "sum", "min", "max"} + _supported = { + "count", + "mean", + "std", + "var", + "sum", + "min", + "max", + "collect", + } if ( isinstance(self.obj, DaskDataFrame) and isinstance(self.index, (str, list)) @@ -109,7 +118,16 @@ def aggregate(self, arg, split_every=None, split_out=1): return self.size() arg = _redirect_aggs(arg) - _supported = {"count", "mean", "std", "var", "sum", "min", "max"} + _supported = { + "count", + "mean", + "std", + "var", + "sum", + "min", + "max", + "collect", + } if ( isinstance(self.obj, DaskDataFrame) and isinstance(self.index, (str, list)) @@ -147,7 +165,7 @@ def groupby_agg( This aggregation algorithm only supports the following options: - {"count", "mean", "std", "var", "sum", "min", "max"} + {"count", "mean", "std", "var", "sum", "min", "max", "collect"} This "optimized" approach is more performant than the algorithm in `dask.dataframe`, because it allows the cudf backend to @@ -173,7 +191,7 @@ def groupby_agg( # strings (no lists) str_cols_out = True for col in aggs: - if isinstance(aggs[col], str): + if isinstance(aggs[col], str) or callable(aggs[col]): aggs[col] = [aggs[col]] else: str_cols_out = False @@ -181,7 +199,16 @@ def groupby_agg( columns.append(col) # Assert that aggregations are supported - _supported = {"count", "mean", "std", "var", "sum", "min", "max"} + _supported = { + "count", + "mean", + "std", + "var", + "sum", + "min", + "max", + "collect", + } if not _is_supported(aggs, _supported): raise ValueError( f"Supported aggs include {_supported} for groupby_agg API. " @@ -282,7 +309,13 @@ def groupby_agg( def _redirect_aggs(arg): """ Redirect aggregations to their corresponding name in cuDF """ - redirects = {sum: "sum", max: "max", min: "min"} + redirects = { + sum: "sum", + max: "max", + min: "min", + list: "collect", + "list": "collect", + } if isinstance(arg, dict): new_arg = dict() for col in arg: @@ -400,6 +433,8 @@ def _tree_node_agg(dfs, gb_cols, split_out, dropna, sort, sep): agg_dict[col] = ["sum"] elif agg in ("min", "max"): agg_dict[col] = [agg] + elif agg == "collect": + agg_dict[col] = ["collect"] else: raise ValueError(f"Unexpected aggregation: {agg}") @@ -478,6 +513,9 @@ def _finalize_gb_agg( gb.drop(columns=[sum_name], inplace=True) if "count" not in agg_list: gb.drop(columns=[count_name], inplace=True) + if "collect" in agg_list: + collect_name = _make_name(col, "collect", sep=sep) + gb[collect_name] = gb[collect_name].list.concat() # Ensure sorted keys if `sort=True` if sort: diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py index e3a3045dcc7..356567fdef0 100644 --- a/python/dask_cudf/dask_cudf/tests/test_groupby.py +++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py @@ -125,6 +125,33 @@ def test_groupby_std(func): dd.assert_eq(a, b) +@pytest.mark.parametrize( + "func", + [ + lambda df: df.groupby("x").agg({"y": "collect"}), + pytest.param( + lambda df: df.groupby("x").y.agg("collect"), marks=pytest.mark.skip + ), + ], +) +def test_groupby_collect(func): + pdf = pd.DataFrame( + { + "x": np.random.randint(0, 5, size=10000), + "y": np.random.normal(size=10000), + } + ) + + gdf = cudf.DataFrame.from_pandas(pdf) + + ddf = dask_cudf.from_cudf(gdf, npartitions=5) + + a = func(gdf).to_pandas() + b = func(ddf).compute().to_pandas() + + dd.assert_eq(a, b) + + # reason gotattr in cudf @pytest.mark.parametrize( "func", From e855eb9ea0a59e9490094b60fc092e2f0958a451 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" <42624703+rgsl888prabhu@users.noreply.github.com> Date: Tue, 6 Jul 2021 12:31:52 -0500 Subject: [PATCH 27/54] Adding support for list and struct type in ORC Reader (#8599) This PR adds support for lists and struct in ORC reader. The columns are processed as per nesting level since in case of list, you need to extract number of child rows per stripe and number of child rows in total, before you can process them. But in case of struct, all the child columns will have same number of rows, so struct children are processed along with the parent in the same level. So, you will observe that there is a distinction on how child columns of list and struct are handled in the PR. closes #8582 Authors: - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu) - Jeremy Dyer (https://github.com/jdye64) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Devavret Makkar (https://github.com/devavret) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/8599 --- cpp/src/io/orc/orc.cpp | 21 +- cpp/src/io/orc/orc.h | 26 + cpp/src/io/orc/orc_gpu.h | 24 +- cpp/src/io/orc/reader_impl.cu | 849 ++++++++++++++------- cpp/src/io/orc/reader_impl.hpp | 119 ++- cpp/src/io/orc/stripe_data.cu | 156 ++-- cpp/src/io/orc/stripe_init.cu | 38 +- cpp/src/io/utilities/hostdevice_vector.hpp | 9 + python/cudf/cudf/_lib/io/utils.pxd | 12 +- python/cudf/cudf/_lib/io/utils.pyx | 45 +- python/cudf/cudf/_lib/orc.pyx | 17 +- python/cudf/cudf/_lib/parquet.pyx | 38 +- python/cudf/cudf/core/column/column.py | 6 + python/cudf/cudf/tests/test_orc.py | 158 +++- 14 files changed, 1110 insertions(+), 408 deletions(-) diff --git a/cpp/src/io/orc/orc.cpp b/cpp/src/io/orc/orc.cpp index ea6d6b6ac85..7358f0e6404 100644 --- a/cpp/src/io/orc/orc.cpp +++ b/cpp/src/io/orc/orc.cpp @@ -37,9 +37,7 @@ void ProtobufReader::skip_struct_field(int t) case PB_TYPE_FIXED64: skip_bytes(8); break; case PB_TYPE_FIXEDLEN: skip_bytes(get()); break; case PB_TYPE_FIXED32: skip_bytes(4); break; - default: - // printf("invalid type (%d)\n", t); - break; + default: break; } } @@ -471,20 +469,13 @@ void metadata::init_column_names() const auto const &types = ff.types; for (int32_t col_id = 0; col_id < get_num_columns(); ++col_id) { std::string col_name; - uint32_t parent_idx = col_id; - uint32_t idx = col_id; - do { - idx = parent_idx; - parent_idx = (idx < types.size()) ? static_cast(schema_idxs[idx].parent) : ~0; - if (parent_idx >= types.size()) break; - - auto const field_idx = - (parent_idx < types.size()) ? static_cast(schema_idxs[idx].field) : ~0; + if (schema_idxs[col_id].parent >= 0 and schema_idxs[col_id].field >= 0) { + auto const parent_idx = static_cast(schema_idxs[col_id].parent); + auto const field_idx = static_cast(schema_idxs[col_id].field); if (field_idx < types[parent_idx].fieldNames.size()) { - col_name = - types[parent_idx].fieldNames[field_idx] + (col_name.empty() ? "" : ("." + col_name)); + col_name = types[parent_idx].fieldNames[field_idx]; } - } while (parent_idx != idx); + } // If we have no name (root column), generate a name column_names.push_back(col_name.empty() ? "col" + std::to_string(col_id) : col_name); } diff --git a/cpp/src/io/orc/orc.h b/cpp/src/io/orc/orc.h index e6fec8afb0f..224820550e1 100644 --- a/cpp/src/io/orc/orc.h +++ b/cpp/src/io/orc/orc.h @@ -537,6 +537,32 @@ class OrcDecompressor { std::vector m_buf; }; +/** + * @brief Stores orc id for each column and its adjacent number of children + * in case of struct or number of children in case of list column. + * If list column has struct column, then all child columns of that struct are treated as child + * column of list. + * + * @code{.pseudo} + * Consider following data where a struct has two members and a list column + * {"struct": [{"a": 1, "b": 2}, {"a":3, "b":5}], "list":[[1, 2], [2, 3]]} + * + * `orc_column_meta` for struct column would be + * id = 0 + * num_children = 2 + * + * `orc_column_meta` for list column would be + * id = 3 + * num_children = 1 + * @endcode + * + */ +struct orc_column_meta { + // orc_column_meta(uint32_t _id, uint32_t _num_children) : id(_id), num_children(_num_children){}; + uint32_t id; // orc id for the column + uint32_t num_children; // number of children at the same level of nesting in case of struct +}; + /** * @brief A helper class for ORC file metadata. Provides some additional * convenience methods for initializing and accessing metadata. diff --git a/cpp/src/io/orc/orc_gpu.h b/cpp/src/io/orc/orc_gpu.h index 66734df86c0..b86a350fb64 100644 --- a/cpp/src/io/orc/orc_gpu.h +++ b/cpp/src/io/orc/orc_gpu.h @@ -95,7 +95,10 @@ struct ColumnDesc { uint32_t *valid_map_base; // base pointer of valid bit map for this column void *column_data_base; // base pointer of column data uint32_t start_row; // starting row of the stripe - uint32_t num_rows; // starting row of the stripe + uint32_t num_rows; // number of rows in stripe + uint32_t column_num_rows; // number of rows in whole column + uint32_t num_child_rows; // store number of child rows if it's list column + uint32_t num_rowgroups; // number of rowgroups in the chunk uint32_t dictionary_start; // start position in global dictionary uint32_t dict_len; // length of local dictionary uint32_t null_count; // number of null values in this stripe's column @@ -115,6 +118,9 @@ struct RowGroup { uint32_t chunk_id; // Column chunk this entry belongs to uint32_t strm_offset[2]; // Index offset for CI_DATA and CI_DATA2 streams uint16_t run_pos[2]; // Run position for CI_DATA and CI_DATA2 + uint32_t num_rows; // number of rows in rowgroup + uint32_t start_row; // starting row of the rowgroup + uint32_t num_child_rows; // number of rows of children in rowgroup in case of list type }; /** @@ -224,6 +230,9 @@ void PostDecompressionReassemble(CompressedStreamInfo *strm_info, * @param[in] num_columns Number of columns * @param[in] num_stripes Number of stripes * @param[in] num_rowgroups Number of row groups + * @param[in] rowidx_stride Row index stride + * @param[in] use_base_stride Whether to use base stride obtained from meta or use the computed + * value * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ void ParseRowGroupIndex(RowGroup *row_groups, @@ -233,6 +242,7 @@ void ParseRowGroupIndex(RowGroup *row_groups, uint32_t num_stripes, uint32_t num_rowgroups, uint32_t rowidx_stride, + bool use_base_stride, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** @@ -242,7 +252,6 @@ void ParseRowGroupIndex(RowGroup *row_groups, * @param[in] global_dictionary Global dictionary device array * @param[in] num_columns Number of columns * @param[in] num_stripes Number of stripes - * @param[in] max_rows Maximum number of rows to load * @param[in] first_row Crop all rows below first_row * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ @@ -250,7 +259,6 @@ void DecodeNullsAndStringDictionaries(ColumnDesc *chunks, DictionaryEntry *global_dictionary, uint32_t num_columns, uint32_t num_stripes, - size_t max_rows = ~0, size_t first_row = 0, rmm::cuda_stream_view stream = rmm::cuda_stream_default); @@ -261,25 +269,25 @@ void DecodeNullsAndStringDictionaries(ColumnDesc *chunks, * @param[in] global_dictionary Global dictionary device array * @param[in] num_columns Number of columns * @param[in] num_stripes Number of stripes - * @param[in] max_rows Maximum number of rows to load * @param[in] first_row Crop all rows below first_row * @param[in] tz_table Timezone translation table * @param[in] tz_len Length of timezone translation table - * @param[in] row_groups Optional row index data + * @param[in] row_groups Optional row index data [rowgroup][column] * @param[in] num_rowgroups Number of row groups in row index data * @param[in] rowidx_stride Row index stride + * @param[in] level Current nesting level being processed * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void DecodeOrcColumnData(ColumnDesc const *chunks, +void DecodeOrcColumnData(ColumnDesc *chunks, DictionaryEntry *global_dictionary, + device_2dspan row_groups, uint32_t num_columns, uint32_t num_stripes, - size_t max_rows = ~0, size_t first_row = 0, timezone_table_view tz_table = {}, - const RowGroup *row_groups = 0, uint32_t num_rowgroups = 0, uint32_t rowidx_stride = 0, + size_t level = 0, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 499cb3f0432..3221c754349 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -26,6 +26,7 @@ #include #include "orc.h" +#include #include #include #include @@ -34,6 +35,7 @@ #include #include #include +#include #include #include @@ -76,6 +78,8 @@ constexpr type_id to_type_id(const orc::SchemaType &schema, // There isn't a (DAYS -> np.dtype) mapping return (use_np_dtypes) ? type_id::TIMESTAMP_MILLISECONDS : type_id::TIMESTAMP_DAYS; case orc::DECIMAL: return (decimals_as_float64) ? type_id::FLOAT64 : type_id::DECIMAL64; + case orc::LIST: return type_id::LIST; + case orc::STRUCT: return type_id::STRUCT; default: break; } @@ -123,6 +127,26 @@ constexpr std::pair get_index_type_and_pos( } // namespace namespace { +/** + * @brief struct to store buffer data and size of list buffer + */ +struct list_buffer_data { + size_type *data; + size_type size; +}; + +// Generates offsets for list buffer from number of elements in a row. +void generate_offsets_for_list(rmm::device_uvector const &buff_data, + rmm::cuda_stream_view stream) +{ + auto transformer = [] __device__(list_buffer_data list_data) { + thrust::exclusive_scan( + thrust::seq, list_data.data, list_data.data + list_data.size, list_data.data); + }; + thrust::for_each(rmm::exec_policy(stream), buff_data.begin(), buff_data.end(), transformer); + stream.synchronize(); +} + /** * @brief Struct that maps ORC streams to columns */ @@ -151,16 +175,15 @@ size_t gather_stream_info(const size_t stripe_index, const orc::StripeInformation *stripeinfo, const orc::StripeFooter *stripefooter, const std::vector &orc2gdf, - const std::vector &gdf2orc, + const std::vector &gdf2orc, const std::vector types, bool use_index, size_t *num_dictionary_entries, - hostdevice_vector &chunks, + cudf::detail::hostdevice_2dvector &chunks, std::vector &stream_info) { - const auto num_columns = gdf2orc.size(); - uint64_t src_offset = 0; - uint64_t dst_offset = 0; + uint64_t src_offset = 0; + uint64_t dst_offset = 0; for (const auto &stream : stripefooter->streams) { if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { dst_offset += stream.length; @@ -181,7 +204,7 @@ size_t gather_stream_info(const size_t stripe_index, auto child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; if (child_idx >= 0) { col = child_idx; - auto &chunk = chunks[stripe_index * num_columns + col]; + auto &chunk = chunks[stripe_index][col]; chunk.strm_id[gpu::CI_PRESENT] = stream_info.size(); chunk.strm_len[gpu::CI_PRESENT] = stream.length; } @@ -192,7 +215,7 @@ size_t gather_stream_info(const size_t stripe_index, if (col != -1) { if (src_offset >= stripeinfo->indexLength || use_index) { // NOTE: skip_count field is temporarily used to track index ordering - auto &chunk = chunks[stripe_index * num_columns + col]; + auto &chunk = chunks[stripe_index][col]; const auto idx = get_index_type_and_pos(stream.kind, chunk.skip_count, col == orc2gdf[column_id]); if (idx.first < gpu::CI_NUM_STREAMS) { @@ -434,59 +457,114 @@ class aggregate_orc_metadata { return selected_stripes_mapping; } + /** + * @brief Adds column as per the request and saves metadata about children. + * Struct children are in the same level as struct, only list column + * children are pushed to next level. + * + * @param selection A vector that saves list of columns as per levels of nesting. + * @param types A vector of schema types of columns. + * @param level current level of nesting. + * @param id current column id that needs to be added. + * @param has_timestamp_column True if timestamp column present and false otherwise. + * + * @return returns number of child columns at same level in case of struct and next level in case + * of list + */ + uint32_t add_column(std::vector> &selection, + std::vector const &types, + const size_t level, + const uint32_t id, + bool &has_timestamp_column, + bool &has_list_column) + { + uint32_t num_lvl_child_columns = 0; + if (level == selection.size()) { selection.emplace_back(); } + selection[level].push_back({id, 0}); + const int col_id = selection[level].size() - 1; + if (types[id].kind == orc::TIMESTAMP) { has_timestamp_column = true; } + + switch (types[id].kind) { + case orc::LIST: { + uint32_t lvl_cols = 0; + if (not types[id].subtypes.empty()) { + has_list_column = true; + // Since list column needs to be processed before its child can be processed, + // child column is being added to next level + lvl_cols = + add_column(selection, types, level + 1, id + 1, has_timestamp_column, has_list_column); + } + // The list child column may be a struct in which case lvl_cols will be > 1 + selection[level][col_id].num_children = lvl_cols; + } break; + + case orc::STRUCT: + for (const auto child_id : types[id].subtypes) { + num_lvl_child_columns += + add_column(selection, types, level, child_id, has_timestamp_column, has_list_column); + } + selection[level][col_id].num_children = num_lvl_child_columns; + break; + + default: break; + } + + return num_lvl_child_columns + 1; + } + /** * @brief Filters and reduces down to a selection of columns * * @param use_names List of column names to select * @param has_timestamp_column True if timestamp column present and false otherwise * - * @return input column information, output column information, list of output column schema - * indices + * @return Vector of list of ORC column meta-data */ - std::vector select_columns(std::vector const &use_names, - bool &has_timestamp_column) const + std::vector> select_columns( + std::vector const &use_names, bool &has_timestamp_column, bool &has_list_column) { auto const &pfm = per_file_metadata[0]; + std::vector> selection; - std::vector output_column_schema_idxs; if (not use_names.empty()) { - int index = 0; - for (auto const &use_name : use_names) { + uint32_t index = 0; + // Have to check only parent columns + auto const num_columns = pfm.ff.types[0].subtypes.size(); + + for (const auto &use_name : use_names) { bool name_found = false; - for (int i = 0; i < pfm.get_num_columns(); ++i, ++index) { - if (index >= pfm.get_num_columns()) { index = 0; } - if (pfm.get_column_name(index).compare(use_name) == 0) { + for (uint32_t i = 0; i < num_columns; ++i, ++index) { + if (index >= num_columns) { index = 0; } + auto col_id = pfm.ff.types[0].subtypes[index]; + if (pfm.get_column_name(col_id) == use_name) { name_found = true; - output_column_schema_idxs.emplace_back(index); - if (pfm.ff.types[index].kind == orc::TIMESTAMP) { has_timestamp_column = true; } - index++; + add_column(selection, pfm.ff.types, 0, col_id, has_timestamp_column, has_list_column); + // Should start with next index + index = i + 1; break; } } CUDF_EXPECTS(name_found, "Unknown column name : " + std::string(use_name)); } } else { - // For now, only select all leaf nodes - for (int i = 1; i < pfm.get_num_columns(); ++i) { - if (pfm.ff.types[i].subtypes.empty()) { - output_column_schema_idxs.emplace_back(i); - if (pfm.ff.types[i].kind == orc::TIMESTAMP) { has_timestamp_column = true; } - } + for (auto const &col_id : pfm.ff.types[0].subtypes) { + add_column(selection, pfm.ff.types, 0, col_id, has_timestamp_column, has_list_column); } } - return output_column_schema_idxs; + return selection; } }; rmm::device_buffer reader::impl::decompress_stripe_data( - hostdevice_vector &chunks, + cudf::detail::hostdevice_2dvector &chunks, const std::vector &stripe_data, const OrcDecompressor *decompressor, std::vector &stream_info, size_t num_stripes, - device_span row_groups, + cudf::detail::hostdevice_2dvector &row_groups, size_t row_index_stride, + bool use_base_stride, rmm::cuda_stream_view stream) { // Parse the columns' compressed info @@ -569,11 +647,11 @@ rmm::device_buffer reader::impl::decompress_stripe_data( // decompression failed. compinfo.device_to_host(stream, true); - const size_t num_columns = chunks.size() / num_stripes; + const size_t num_columns = chunks.size().second; for (size_t i = 0; i < num_stripes; ++i) { for (size_t j = 0; j < num_columns; ++j) { - auto &chunk = chunks[i * num_columns + j]; + auto &chunk = chunks[i][j]; for (int k = 0; k < gpu::CI_NUM_STREAMS; ++k) { if (chunk.strm_len[k] > 0 && chunk.strm_id[k] < compinfo.size()) { chunk.streams[k] = compinfo[chunk.strm_id[k]].uncompressed_data; @@ -583,38 +661,40 @@ rmm::device_buffer reader::impl::decompress_stripe_data( } } - if (not row_groups.empty()) { + if (row_groups.size().first) { chunks.host_to_device(stream); - gpu::ParseRowGroupIndex(row_groups.data(), + row_groups.host_to_device(stream); + gpu::ParseRowGroupIndex(row_groups.base_device_ptr(), compinfo.device_ptr(), - chunks.device_ptr(), + chunks.base_device_ptr(), num_columns, num_stripes, - row_groups.size() / num_columns, + row_groups.size().first, row_index_stride, + use_base_stride, stream); } return decomp_data; } -void reader::impl::decode_stream_data(hostdevice_vector &chunks, +void reader::impl::decode_stream_data(cudf::detail::hostdevice_2dvector &chunks, size_t num_dicts, size_t skip_rows, - size_t num_rows, timezone_table_view tz_table, - device_span row_groups, + cudf::detail::hostdevice_2dvector &row_groups, size_t row_index_stride, std::vector &out_buffers, + size_t level, rmm::cuda_stream_view stream) { - const auto num_columns = out_buffers.size(); - const auto num_stripes = chunks.size() / out_buffers.size(); + const auto num_stripes = chunks.size().first; + const auto num_columns = chunks.size().second; // Update chunks with pointers to column data for (size_t i = 0; i < num_stripes; ++i) { for (size_t j = 0; j < num_columns; ++j) { - auto &chunk = chunks[i * num_columns + j]; + auto &chunk = chunks[i][j]; chunk.column_data_base = out_buffers[j].data(); chunk.valid_map_base = out_buffers[j].null_mask(); } @@ -625,27 +705,189 @@ void reader::impl::decode_stream_data(hostdevice_vector &chunks chunks.host_to_device(stream); gpu::DecodeNullsAndStringDictionaries( - chunks.device_ptr(), global_dict.data(), num_columns, num_stripes, num_rows, skip_rows, stream); - gpu::DecodeOrcColumnData(chunks.device_ptr(), + chunks.base_device_ptr(), global_dict.data(), num_columns, num_stripes, skip_rows, stream); + gpu::DecodeOrcColumnData(chunks.base_device_ptr(), global_dict.data(), + row_groups, num_columns, num_stripes, - num_rows, skip_rows, tz_table, - row_groups.data(), - row_groups.size() / num_columns, + row_groups.size().first, row_index_stride, + level, stream); chunks.device_to_host(stream, true); for (size_t i = 0; i < num_stripes; ++i) { for (size_t j = 0; j < num_columns; ++j) { - out_buffers[j].null_count() += chunks[i * num_columns + j].null_count; + out_buffers[j].null_count() += chunks[i][j].null_count; } } } +// Aggregate child column metadata per stripe and per column +void reader::impl::aggregate_child_meta(cudf::detail::host_2dspan chunks, + cudf::detail::host_2dspan row_groups, + std::vector const &list_col, + const int32_t level) +{ + const auto num_of_stripes = chunks.size().first; + const auto num_of_rowgroups = row_groups.size().first; + const auto num_parent_cols = _selected_columns[level].size(); + const auto num_child_cols = _selected_columns[level + 1].size(); + const auto number_of_child_chunks = num_child_cols * num_of_stripes; + auto &num_child_rows = _col_meta.num_child_rows; + + // Reset the meta to store child column details. + num_child_rows.resize(_selected_columns[level + 1].size()); + std::fill(num_child_rows.begin(), num_child_rows.end(), 0); + _col_meta.child_start_row.resize(number_of_child_chunks); + _col_meta.num_child_rows_per_stripe.resize(number_of_child_chunks); + _col_meta.rwgrp_meta.resize(num_of_rowgroups * num_child_cols); + + auto child_start_row = cudf::detail::host_2dspan( + _col_meta.child_start_row.data(), num_of_stripes, num_child_cols); + auto num_child_rows_per_stripe = cudf::detail::host_2dspan( + _col_meta.num_child_rows_per_stripe.data(), num_of_stripes, num_child_cols); + auto rwgrp_meta = cudf::detail::host_2dspan( + _col_meta.rwgrp_meta.data(), num_of_rowgroups, num_child_cols); + + int index = 0; // number of child column processed + + // For each parent column, update its child column meta for each stripe. + std::for_each(list_col.cbegin(), list_col.cend(), [&](const auto p_col) { + const auto parent_col_idx = _col_meta.orc_col_map[level][p_col.id]; + auto start_row = 0; + auto processed_row_groups = 0; + + for (size_t stripe_id = 0; stripe_id < num_of_stripes; stripe_id++) { + // Aggregate num_rows and start_row from processed parent columns per row groups + if (num_of_rowgroups) { + auto stripe_num_row_groups = chunks[stripe_id][parent_col_idx].num_rowgroups; + auto processed_child_rows = 0; + + for (size_t rowgroup_id = 0; rowgroup_id < stripe_num_row_groups; + rowgroup_id++, processed_row_groups++) { + const auto child_rows = row_groups[processed_row_groups][parent_col_idx].num_child_rows; + for (uint32_t id = 0; id < p_col.num_children; id++) { + const auto child_col_idx = index + id; + rwgrp_meta[processed_row_groups][child_col_idx].start_row = processed_child_rows; + rwgrp_meta[processed_row_groups][child_col_idx].num_rows = child_rows; + } + processed_child_rows += child_rows; + } + } + + // Aggregate start row, number of rows per chunk and total number of rows in a column + const auto child_rows = chunks[stripe_id][parent_col_idx].num_child_rows; + for (uint32_t id = 0; id < p_col.num_children; id++) { + const auto child_col_idx = index + id; + + num_child_rows[child_col_idx] += child_rows; + num_child_rows_per_stripe[stripe_id][child_col_idx] = child_rows; + // start row could be different for each column when there is nesting at each stripe level + child_start_row[stripe_id][child_col_idx] = (stripe_id == 0) ? 0 : start_row; + } + start_row += child_rows; + } + index += p_col.num_children; + }); +} + +std::unique_ptr reader::impl::create_empty_column(const int32_t orc_col_id, + column_name_info &schema_info, + rmm::cuda_stream_view stream) +{ + schema_info.name = _metadata->get_column_name(0, orc_col_id); + // If the column type is orc::DECIMAL see if the user + // desires it to be converted to float64 or not + auto const decimal_as_float64 = should_convert_decimal_column_to_float( + _decimal_cols_as_float, _metadata->per_file_metadata[0], orc_col_id); + auto const type = to_type_id( + _metadata->get_schema(orc_col_id), _use_np_dtypes, _timestamp_type.id(), decimal_as_float64); + int32_t scale = 0; + std::vector> child_columns; + std::unique_ptr out_col = nullptr; + + switch (type) { + case type_id::LIST: + schema_info.children.emplace_back("offsets"); + schema_info.children.emplace_back(""); + out_col = make_lists_column( + 0, + make_empty_column(data_type(type_id::INT32)), + create_empty_column( + _metadata->get_col_type(orc_col_id).subtypes[0], schema_info.children.back(), stream), + 0, + rmm::device_buffer{0, stream}, + stream); + + break; + + case type_id::STRUCT: + for (const auto col : _metadata->get_col_type(orc_col_id).subtypes) { + schema_info.children.emplace_back(""); + child_columns.push_back(create_empty_column(col, schema_info.children.back(), stream)); + } + out_col = + make_structs_column(0, std::move(child_columns), 0, rmm::device_buffer{0, stream}, stream); + break; + + case type_id::DECIMAL64: + scale = -static_cast(_metadata->get_types()[orc_col_id].scale.value_or(0)); + default: out_col = make_empty_column(data_type(type, scale)); + } + + return out_col; +} + +// Adds child column buffers to parent column +column_buffer &&reader::impl::assemble_buffer(const int32_t orc_col_id, + std::vector> &col_buffers, + const size_t level) +{ + auto const col_id = _col_meta.orc_col_map[level][orc_col_id]; + auto &col_buffer = col_buffers[level][col_id]; + + col_buffer.name = _metadata->get_column_name(0, orc_col_id); + switch (col_buffer.type.id()) { + case type_id::LIST: + col_buffer.children.emplace_back( + assemble_buffer(_metadata->get_col_type(orc_col_id).subtypes[0], col_buffers, level + 1)); + break; + + case type_id::STRUCT: + for (auto const &col : _metadata->get_col_type(orc_col_id).subtypes) { + col_buffer.children.emplace_back(assemble_buffer(col, col_buffers, level)); + } + + break; + + default: break; + } + + return std::move(col_buffer); +} + +// creates columns along with schema information for each column +void reader::impl::create_columns(std::vector> &&col_buffers, + std::vector> &out_columns, + std::vector &schema_info, + rmm::cuda_stream_view stream) +{ + for (size_t i = 0; i < _selected_columns[0].size();) { + auto const &col_meta = _selected_columns[0][i]; + schema_info.emplace_back(""); + + auto col_buffer = assemble_buffer(col_meta.id, col_buffers, 0); + out_columns.emplace_back(make_column(col_buffer, &schema_info.back(), stream, _mr)); + + // Need to skip child columns of struct which are at the same level and have been processed + i += (col_buffers[0][i].type.id() == type_id::STRUCT) ? col_meta.num_children + 1 : 1; + } +} + reader::impl::impl(std::vector> &&sources, orc_reader_options const &options, rmm::mr::device_memory_resource *mr) @@ -655,7 +897,8 @@ reader::impl::impl(std::vector> &&sources, _metadata = std::make_unique(_sources); // Select only columns required by the options - _selected_columns = _metadata->select_columns(options.get_columns(), _has_timestamp_column); + _selected_columns = + _metadata->select_columns(options.get_columns(), _has_timestamp_column, _has_list_column); // Override output timestamp resolution if requested if (options.get_timestamp_type().id() != type_id::EMPTY) { @@ -677,248 +920,334 @@ table_with_metadata reader::impl::read(size_type skip_rows, const std::vector> &stripes, rmm::cuda_stream_view stream) { + CUDF_EXPECTS(skip_rows == 0 or (not _has_list_column), + "skip_rows is not supported by list column"); + std::vector> out_columns; + // buffer and stripe data are stored as per nesting level + std::vector> out_buffers(_selected_columns.size()); + std::vector schema_info; + std::vector> lvl_stripe_data(_selected_columns.size()); table_metadata out_metadata; - // There are no columns in table + // There are no columns in the table if (_selected_columns.size() == 0) return {std::make_unique(), std::move(out_metadata)}; // Select only stripes required (aka row groups) const auto selected_stripes = _metadata->select_stripes(stripes, skip_rows, num_rows); - // Association between each ORC column and its cudf::column - std::vector orc_col_map(_metadata->get_num_cols(), -1); - - // Get a list of column data types - std::vector column_types; - for (const auto &col : _selected_columns) { - // If the column type is orc::DECIMAL see if the user - // desires it to be converted to float64 or not - auto const decimal_as_float64 = should_convert_decimal_column_to_float( - _decimal_cols_as_float, _metadata->per_file_metadata[0], col); - - auto col_type = to_type_id( - _metadata->get_col_type(col), _use_np_dtypes, _timestamp_type.id(), decimal_as_float64); - CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); - // Remove this once we support Decimal128 data type - CUDF_EXPECTS((col_type != type_id::DECIMAL64) or (_metadata->get_col_type(col).precision <= 18), - "Decimal data has precision > 18, Decimal64 data type doesn't support it."); - if (col_type == type_id::DECIMAL64) { - // sign of the scale is changed since cuDF follows c++ libraries like CNL - // which uses negative scaling, but liborc and other libraries - // follow positive scaling. - auto const scale = -static_cast(_metadata->get_col_type(col).scale.value_or(0)); - column_types.emplace_back(col_type, scale); - } else { - column_types.emplace_back(col_type); - } + // Iterates through levels of nested columns, struct columns and its children will be + // in the same level since child column also have same number of rows, + // list column children will be 1 level down compared to parent. + for (size_t level = 0; level < _selected_columns.size(); level++) { + auto &selected_columns = _selected_columns[level]; + // Association between each ORC column and its cudf::column + _col_meta.orc_col_map.emplace_back(_metadata->get_num_cols(), -1); + std::vector list_col; + + // Get a list of column data types + std::vector column_types; + for (auto &col : selected_columns) { + // If the column type is orc::DECIMAL see if the user + // desires it to be converted to float64 or not + auto const decimal_as_float64 = should_convert_decimal_column_to_float( + _decimal_cols_as_float, _metadata->per_file_metadata[0], col.id); + auto col_type = to_type_id( + _metadata->get_col_type(col.id), _use_np_dtypes, _timestamp_type.id(), decimal_as_float64); + CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); + // Remove this once we support Decimal128 data type + CUDF_EXPECTS( + (col_type != type_id::DECIMAL64) or (_metadata->get_col_type(col.id).precision <= 18), + "Decimal data has precision > 18, Decimal64 data type doesn't support it."); + if (col_type == type_id::DECIMAL64) { + // sign of the scale is changed since cuDF follows c++ libraries like CNL + // which uses negative scaling, but liborc and other libraries + // follow positive scaling. + auto const scale = -static_cast(_metadata->get_col_type(col.id).scale.value_or(0)); + column_types.emplace_back(col_type, scale); + } else { + column_types.emplace_back(col_type); + } - // Map each ORC column to its column - orc_col_map[col] = column_types.size() - 1; - } + // Map each ORC column to its column + _col_meta.orc_col_map[level][col.id] = column_types.size() - 1; + if (col_type == type_id::LIST) list_col.emplace_back(col); + } - // If no rows or stripes to read, return empty columns - if (num_rows <= 0 || selected_stripes.empty()) { - std::transform(column_types.cbegin(), - column_types.cend(), - std::back_inserter(out_columns), - [](auto const &dtype) { return make_empty_column(dtype); }); - } else { - // Get the total number of stripes across all input files. - size_t total_num_stripes = - std::accumulate(selected_stripes.begin(), - selected_stripes.end(), - 0, - [](size_t sum, auto &stripe_source_mapping) { - return sum + stripe_source_mapping.stripe_info.size(); - }); - - const auto num_columns = _selected_columns.size(); - const auto num_chunks = total_num_stripes * num_columns; - hostdevice_vector chunks(num_chunks, stream); - memset(chunks.host_ptr(), 0, chunks.memory_size()); - - const bool use_index = - (_use_index == true) && - // Only use if we don't have much work with complete columns & stripes - // TODO: Consider nrows, gpu, and tune the threshold - (num_rows > _metadata->get_row_index_stride() && !(_metadata->get_row_index_stride() & 7) && - _metadata->get_row_index_stride() > 0 && num_columns * total_num_stripes < 8 * 128) && - // Only use if first row is aligned to a stripe boundary - // TODO: Fix logic to handle unaligned rows - (skip_rows == 0); - - // Logically view streams as columns - std::vector stream_info; - - // Tracker for eventually deallocating compressed and uncompressed data - std::vector stripe_data; - - size_t stripe_start_row = 0; - size_t num_dict_entries = 0; - size_t num_rowgroups = 0; - size_t stripe_chunk_index = 0; - - for (auto &stripe_source_mapping : selected_stripes) { - // Iterate through the source files selected stripes - for (size_t stripe_pos_index = 0; stripe_pos_index < stripe_source_mapping.stripe_info.size(); - stripe_pos_index++) { - auto &stripe_pair = stripe_source_mapping.stripe_info[stripe_pos_index]; - const auto stripe_info = stripe_pair.first; - const auto stripe_footer = stripe_pair.second; - - auto stream_count = stream_info.size(); - const auto total_data_size = gather_stream_info(stripe_chunk_index, - stripe_info, - stripe_footer, - orc_col_map, - _selected_columns, - _metadata->get_types(), - use_index, - &num_dict_entries, - chunks, - stream_info); - - CUDF_EXPECTS(total_data_size > 0, "Expected streams data within stripe"); - - stripe_data.emplace_back(total_data_size, stream); - auto dst_base = static_cast(stripe_data.back().data()); - - // Coalesce consecutive streams into one read - while (stream_count < stream_info.size()) { - const auto d_dst = dst_base + stream_info[stream_count].dst_pos; - const auto offset = stream_info[stream_count].offset; - auto len = stream_info[stream_count].length; - stream_count++; - - while (stream_count < stream_info.size() && - stream_info[stream_count].offset == offset + len) { - len += stream_info[stream_count].length; + // If no rows or stripes to read, return empty columns + if (num_rows <= 0 || selected_stripes.empty()) { + for (size_t i = 0; i < _selected_columns[0].size();) { + auto const &col_meta = _selected_columns[0][i]; + auto const schema = _metadata->get_schema(col_meta.id); + schema_info.emplace_back(""); + out_columns.push_back( + std::move(create_empty_column(col_meta.id, schema_info.back(), stream))); + // Since struct children will be in the same level, have to skip them. + i += (schema.kind == orc::STRUCT) ? col_meta.num_children + 1 : 1; + } + break; + } else { + // Get the total number of stripes across all input files. + size_t total_num_stripes = + std::accumulate(selected_stripes.begin(), + selected_stripes.end(), + 0, + [](size_t sum, auto &stripe_source_mapping) { + return sum + stripe_source_mapping.stripe_info.size(); + }); + const auto num_columns = selected_columns.size(); + cudf::detail::hostdevice_2dvector chunks( + total_num_stripes, num_columns, stream); + memset(chunks.base_host_ptr(), 0, chunks.memory_size()); + + const bool use_index = + (_use_index == true) && + // Only use if we don't have much work with complete columns & stripes + // TODO: Consider nrows, gpu, and tune the threshold + (num_rows > _metadata->get_row_index_stride() && !(_metadata->get_row_index_stride() & 7) && + _metadata->get_row_index_stride() > 0 && num_columns * total_num_stripes < 8 * 128) && + // Only use if first row is aligned to a stripe boundary + // TODO: Fix logic to handle unaligned rows + (skip_rows == 0); + + // Logically view streams as columns + std::vector stream_info; + + // Tracker for eventually deallocating compressed and uncompressed data + auto &stripe_data = lvl_stripe_data[level]; + + size_t stripe_start_row = 0; + size_t num_dict_entries = 0; + size_t num_rowgroups = 0; + int stripe_idx = 0; + + for (auto const &stripe_source_mapping : selected_stripes) { + // Iterate through the source files selected stripes + for (auto const &stripe : stripe_source_mapping.stripe_info) { + const auto stripe_info = stripe.first; + const auto stripe_footer = stripe.second; + + auto stream_count = stream_info.size(); + const auto total_data_size = gather_stream_info(stripe_idx, + stripe_info, + stripe_footer, + _col_meta.orc_col_map[level], + selected_columns, + _metadata->get_types(), + use_index, + &num_dict_entries, + chunks, + stream_info); + + CUDF_EXPECTS(total_data_size > 0, "Expected streams data within stripe"); + + stripe_data.emplace_back(total_data_size, stream); + auto dst_base = static_cast(stripe_data.back().data()); + + // Coalesce consecutive streams into one read + while (stream_count < stream_info.size()) { + const auto d_dst = dst_base + stream_info[stream_count].dst_pos; + const auto offset = stream_info[stream_count].offset; + auto len = stream_info[stream_count].length; stream_count++; + + while (stream_count < stream_info.size() && + stream_info[stream_count].offset == offset + len) { + len += stream_info[stream_count].length; + stream_count++; + } + if (_metadata->per_file_metadata[stripe_source_mapping.source_idx] + .source->is_device_read_preferred(len)) { + CUDF_EXPECTS( + _metadata->per_file_metadata[stripe_source_mapping.source_idx].source->device_read( + offset, len, d_dst, stream) == len, + "Unexpected discrepancy in bytes read."); + } else { + const auto buffer = + _metadata->per_file_metadata[stripe_source_mapping.source_idx].source->host_read( + offset, len); + CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); + CUDA_TRY(cudaMemcpyAsync( + d_dst, buffer->data(), len, cudaMemcpyHostToDevice, stream.value())); + stream.synchronize(); + } } - if (_metadata->per_file_metadata[stripe_source_mapping.source_idx] - .source->is_device_read_preferred(len)) { - CUDF_EXPECTS( - _metadata->per_file_metadata[stripe_source_mapping.source_idx].source->device_read( - offset, len, d_dst, stream) == len, - "Unexpected discrepancy in bytes read."); - } else { - const auto buffer = - _metadata->per_file_metadata[stripe_source_mapping.source_idx].source->host_read( - offset, len); - CUDF_EXPECTS(buffer->size() == len, "Unexpected discrepancy in bytes read."); - CUDA_TRY( - cudaMemcpyAsync(d_dst, buffer->data(), len, cudaMemcpyHostToDevice, stream.value())); - stream.synchronize(); - } - } - // Update chunks to reference streams pointers - for (size_t col_idx = 0; col_idx < num_columns; col_idx++) { - auto &chunk = chunks[stripe_chunk_index * num_columns + col_idx]; - chunk.start_row = stripe_start_row; - chunk.num_rows = stripe_info->numberOfRows; - chunk.encoding_kind = stripe_footer->columns[_selected_columns[col_idx]].kind; - chunk.type_kind = _metadata->per_file_metadata[stripe_source_mapping.source_idx] - .ff.types[_selected_columns[col_idx]] - .kind; - auto const decimal_as_float64 = should_convert_decimal_column_to_float( - _decimal_cols_as_float, _metadata->per_file_metadata[0], _selected_columns[col_idx]); - chunk.decimal_scale = _metadata->per_file_metadata[stripe_source_mapping.source_idx] - .ff.types[_selected_columns[col_idx]] - .scale.value_or(0) | - (decimal_as_float64 ? orc::gpu::orc_decimal2float64_scale : 0); - chunk.rowgroup_id = num_rowgroups; - chunk.dtype_len = (column_types[col_idx].id() == type_id::STRING) - ? sizeof(string_index_pair) - : cudf::size_of(column_types[col_idx]); - if (chunk.type_kind == orc::TIMESTAMP) { - chunk.ts_clock_rate = to_clockrate(_timestamp_type.id()); + const auto num_rows_per_stripe = stripe_info->numberOfRows; + const auto rowgroup_id = num_rowgroups; + auto stripe_num_rowgroups = 0; + if (use_index) { + stripe_num_rowgroups = (num_rows_per_stripe + _metadata->get_row_index_stride() - 1) / + _metadata->get_row_index_stride(); } - for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { - chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k]].dst_pos; + // Update chunks to reference streams pointers + for (size_t col_idx = 0; col_idx < num_columns; col_idx++) { + auto &chunk = chunks[stripe_idx][col_idx]; + // start row, number of rows in a each stripe and total number of rows + // may change in lower levels of nesting + chunk.start_row = (level == 0) + ? stripe_start_row + : _col_meta.child_start_row[stripe_idx * num_columns + col_idx]; + chunk.num_rows = + (level == 0) + ? stripe_info->numberOfRows + : _col_meta.num_child_rows_per_stripe[stripe_idx * num_columns + col_idx]; + chunk.column_num_rows = (level == 0) ? num_rows : _col_meta.num_child_rows[col_idx]; + chunk.encoding_kind = stripe_footer->columns[selected_columns[col_idx].id].kind; + chunk.type_kind = _metadata->per_file_metadata[stripe_source_mapping.source_idx] + .ff.types[selected_columns[col_idx].id] + .kind; + auto const decimal_as_float64 = + should_convert_decimal_column_to_float(_decimal_cols_as_float, + _metadata->per_file_metadata[0], + selected_columns[col_idx].id); + chunk.decimal_scale = _metadata->per_file_metadata[stripe_source_mapping.source_idx] + .ff.types[selected_columns[col_idx].id] + .scale.value_or(0) | + (decimal_as_float64 ? orc::gpu::orc_decimal2float64_scale : 0); + + chunk.rowgroup_id = rowgroup_id; + chunk.dtype_len = (column_types[col_idx].id() == type_id::STRING) + ? sizeof(string_index_pair) + : ((column_types[col_idx].id() == type_id::LIST) or + (column_types[col_idx].id() == type_id::STRUCT)) + ? sizeof(int32_t) + : cudf::size_of(column_types[col_idx]); + chunk.num_rowgroups = stripe_num_rowgroups; + if (chunk.type_kind == orc::TIMESTAMP) { + chunk.ts_clock_rate = to_clockrate(_timestamp_type.id()); + } + for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { + chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k]].dst_pos; + } } - } + stripe_start_row += num_rows_per_stripe; + num_rowgroups += stripe_num_rowgroups; - stripe_start_row += stripe_info->numberOfRows; - if (use_index) { - num_rowgroups += (stripe_info->numberOfRows + _metadata->get_row_index_stride() - 1) / - _metadata->get_row_index_stride(); + stripe_idx++; } - stripe_chunk_index++; } - } - // Process dataset chunk pages into output columns - if (stripe_data.size() != 0) { - // Setup row group descriptors if using indexes - rmm::device_uvector row_groups(num_rowgroups * num_columns, stream); - if (_metadata->per_file_metadata[0].ps.compression != orc::NONE) { - auto decomp_data = - decompress_stripe_data(chunks, - stripe_data, - _metadata->per_file_metadata[0].decompressor.get(), - stream_info, - total_num_stripes, - row_groups, - _metadata->get_row_index_stride(), - stream); - stripe_data.clear(); - stripe_data.push_back(std::move(decomp_data)); - } else { - if (not row_groups.is_empty()) { - chunks.host_to_device(stream); - gpu::ParseRowGroupIndex(row_groups.data(), - nullptr, - chunks.device_ptr(), - num_columns, - total_num_stripes, - num_rowgroups, - _metadata->get_row_index_stride(), - stream); + // Process dataset chunk pages into output columns + if (stripe_data.size() != 0) { + auto row_groups = + cudf::detail::hostdevice_2dvector(num_rowgroups, num_columns, stream); + if (level > 0 and row_groups.size().first) { + cudf::host_span row_groups_span(row_groups.base_host_ptr(), + num_rowgroups * num_columns); + auto &rw_grp_meta = _col_meta.rwgrp_meta; + + // Update start row and num rows per row group + std::transform(rw_grp_meta.begin(), + rw_grp_meta.end(), + row_groups_span.begin(), + rw_grp_meta.begin(), + [&](auto meta, auto &row_grp) { + row_grp.num_rows = meta.num_rows; + row_grp.start_row = meta.start_row; + return meta; + }); + } + // Setup row group descriptors if using indexes + if (_metadata->per_file_metadata[0].ps.compression != orc::NONE) { + auto decomp_data = + decompress_stripe_data(chunks, + stripe_data, + _metadata->per_file_metadata[0].decompressor.get(), + stream_info, + total_num_stripes, + row_groups, + _metadata->get_row_index_stride(), + level == 0, + stream); + stripe_data.clear(); + stripe_data.push_back(std::move(decomp_data)); + } else { + if (row_groups.size().first) { + chunks.host_to_device(stream); + row_groups.host_to_device(stream); + gpu::ParseRowGroupIndex(row_groups.base_device_ptr(), + nullptr, + chunks.base_device_ptr(), + num_columns, + total_num_stripes, + num_rowgroups, + _metadata->get_row_index_stride(), + level == 0, + stream); + } } - } - // Setup table for converting timestamp columns from local to UTC time - auto const tz_table = _has_timestamp_column - ? build_timezone_transition_table( - selected_stripes[0].stripe_info[0].second->writerTimezone, stream) - : timezone_table{}; - - std::vector out_buffers; - for (size_t i = 0; i < column_types.size(); ++i) { - bool is_nullable = false; - for (size_t j = 0; j < total_num_stripes; ++j) { - if (chunks[j * num_columns + i].strm_len[gpu::CI_PRESENT] != 0) { - is_nullable = true; - break; + // Setup table for converting timestamp columns from local to UTC time + auto const tz_table = + _has_timestamp_column + ? build_timezone_transition_table( + selected_stripes[0].stripe_info[0].second->writerTimezone, stream) + : timezone_table{}; + + for (size_t i = 0; i < column_types.size(); ++i) { + bool is_nullable = false; + for (size_t j = 0; j < total_num_stripes; ++j) { + if (chunks[j][i].strm_len[gpu::CI_PRESENT] != 0) { + is_nullable = true; + break; + } } + auto is_list_type = (column_types[i].id() == type_id::LIST); + auto n_rows = (level == 0) ? num_rows : _col_meta.num_child_rows[i]; + // For list column, offset column will be always size + 1 + if (is_list_type) n_rows++; + out_buffers[level].emplace_back(column_types[i], n_rows, is_nullable, stream, _mr); + } + + decode_stream_data(chunks, + num_dict_entries, + skip_rows, + tz_table.view(), + row_groups, + _metadata->get_row_index_stride(), + out_buffers[level], + level, + stream); + + // Extract information to process list child columns + if (list_col.size()) { + row_groups.device_to_host(stream, true); + aggregate_child_meta(chunks, row_groups, list_col, level); } - out_buffers.emplace_back(column_types[i], num_rows, is_nullable, stream, _mr); - } - decode_stream_data(chunks, - num_dict_entries, - skip_rows, - num_rows, - tz_table.view(), - row_groups, - _metadata->get_row_index_stride(), - out_buffers, - stream); - - for (size_t i = 0; i < column_types.size(); ++i) { - out_columns.emplace_back(make_column(out_buffers[i], nullptr, stream, _mr)); + // ORC stores number of elements at each row, so we need to generate offsets from that + if (list_col.size()) { + std::vector buff_data; + std::for_each( + out_buffers[level].begin(), out_buffers[level].end(), [&buff_data](auto &out_buffer) { + if (out_buffer.type.id() == type_id::LIST) { + auto data = static_cast(out_buffer.data()); + buff_data.emplace_back(list_buffer_data{data, out_buffer.size}); + } + }); + + auto const dev_buff_data = cudf::detail::make_device_uvector_async(buff_data, stream); + generate_offsets_for_list(dev_buff_data, stream); + } } } } - // Return column names (must match order of returned columns) - out_metadata.column_names.resize(_selected_columns.size()); - for (size_t i = 0; i < _selected_columns.size(); i++) { - out_metadata.column_names[i] = _metadata->get_column_name(0, _selected_columns[i]); + // If out_columns is empty, then create columns from buffer. + if (out_columns.empty()) { + create_columns(std::move(out_buffers), out_columns, schema_info, stream); } + // Return column names (must match order of returned columns) + out_metadata.column_names.reserve(schema_info.size()); + std::transform(schema_info.cbegin(), + schema_info.cend(), + std::back_inserter(out_metadata.column_names), + [](auto info) { return info.name; }); + + out_metadata.schema_info = std::move(schema_info); + for (const auto &meta : _metadata->per_file_metadata) { for (const auto &kv : meta.ff.metadata) { out_metadata.user_data.insert({kv.name, kv.value}); } } diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 0307d84cd1b..1769fb6f193 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -48,6 +48,24 @@ struct stripe_source_mapping; } // namespace class aggregate_orc_metadata; +/** + * @brief Keeps track of orc mapping and child column details. + */ +struct reader_column_meta { + std::vector> + orc_col_map; // Mapping between column id in orc to processing order. + std::vector num_child_rows; // number of rows in child columns + std::vector child_start_row; // start row of child columns [stripe][column] + std::vector + num_child_rows_per_stripe; // number of rows of child columns [stripe][column] + struct row_group_meta { + uint32_t num_rows; // number of rows in a column in a row group + uint32_t start_row; // start row in a column in a row group + }; + // num_rowgroups * num_columns + std::vector rwgrp_meta; // rowgroup metadata [rowgroup][column] +}; + /** * @brief Implementation for ORC reader */ @@ -60,9 +78,9 @@ class reader::impl { * @param options Settings for controlling reading behavior * @param mr Device memory resource to use for device memory allocation */ - explicit impl(std::vector> &&sources, - orc_reader_options const &options, - rmm::mr::device_memory_resource *mr); + explicit impl(std::vector>&& sources, + orc_reader_options const& options, + rmm::mr::device_memory_resource* mr); /** * @brief Read an entire set or a subset of data and returns a set of columns @@ -76,68 +94,123 @@ class reader::impl { */ table_with_metadata read(size_type skip_rows, size_type num_rows, - const std::vector> &stripes, + const std::vector>& stripes, rmm::cuda_stream_view stream); private: /** * @brief Decompresses the stripe data, at stream granularity * - * @param chunks List of column chunk descriptors + * @param chunks Vector of list of column chunk descriptors * @param stripe_data List of source stripe column data * @param decompressor Originally host decompressor * @param stream_info List of stream to column mappings * @param num_stripes Number of stripes making up column chunks - * @param row_groups List of row index descriptors + * @param row_groups Vector of list of row index descriptors * @param row_index_stride Distance between each row index + * @param use_base_stride Whether to use base stride obtained from meta or use the computed value * @param stream CUDA stream used for device memory operations and kernel launches. * * @return Device buffer to decompressed page data */ - rmm::device_buffer decompress_stripe_data(hostdevice_vector &chunks, - const std::vector &stripe_data, - const OrcDecompressor *decompressor, - std::vector &stream_info, - size_t num_stripes, - device_span row_groups, - size_t row_index_stride, - rmm::cuda_stream_view stream); + rmm::device_buffer decompress_stripe_data( + cudf::detail::hostdevice_2dvector& chunks, + const std::vector& stripe_data, + const OrcDecompressor* decompressor, + std::vector& stream_info, + size_t num_stripes, + cudf::detail::hostdevice_2dvector& row_groups, + size_t row_index_stride, + bool use_base_stride, + rmm::cuda_stream_view stream); /** * @brief Converts the stripe column data and outputs to columns * - * @param chunks List of column chunk descriptors + * @param chunks Vector of list of column chunk descriptors * @param num_dicts Number of dictionary entries required * @param skip_rows Number of rows to offset from start - * @param num_rows Number of rows to output * @param tz_table Local time to UTC conversion table - * @param row_groups List of row index descriptors + * @param row_groups Vector of list of row index descriptors * @param row_index_stride Distance between each row index * @param out_buffers Output columns' device buffers + * @param level Current nesting level being processed * @param stream CUDA stream used for device memory operations and kernel launches. */ - void decode_stream_data(hostdevice_vector &chunks, + void decode_stream_data(cudf::detail::hostdevice_2dvector& chunks, size_t num_dicts, size_t skip_rows, - size_t num_rows, timezone_table_view tz_table, - device_span row_groups, + cudf::detail::hostdevice_2dvector& row_groups, size_t row_index_stride, - std::vector &out_buffers, + std::vector& out_buffers, + size_t level, rmm::cuda_stream_view stream); + /** + * @brief Aggregate child metadata from parent column chunks. + * + * @param chunks Vector of list of parent column chunks. + * @param chunks Vector of list of parent column row groups. + * @param list_col Vector of column metadata of list type parent columns. + * @param level Current nesting level being processed. + */ + void aggregate_child_meta(cudf::detail::host_2dspan chunks, + cudf::detail::host_2dspan row_groups, + std::vector const& list_col, + const int32_t level); + + /** + * @brief Assemble the buffer with child columns. + * + * @param orc_col_id Column id in orc. + * @param col_buffers Column buffers for columns and children. + * @param level Current nesting level. + */ + column_buffer&& assemble_buffer(const int32_t orc_col_id, + std::vector>& col_buffers, + const size_t level); + + /** + * @brief Create columns and respective schema information from the buffer. + * + * @param col_buffers Column buffers for columns and children. + * @param out_columns Vector of columns formed from column buffers. + * @param schema_info Vector of schema information formed from column buffers. + * @param stream CUDA stream used for device memory operations and kernel launches. + */ + void create_columns(std::vector>&& col_buffers, + std::vector>& out_columns, + std::vector& schema_info, + rmm::cuda_stream_view stream); + + /** + * @brief Create empty columns and respective schema information from the buffer. + * + * @param col_buffers Column buffers for columns and children. + * @param schema_info Vector of schema information formed from column buffers. + * @param stream CUDA stream used for device memory operations and kernel launches. + * + * @return An empty column equivalent to orc column type. + */ + std::unique_ptr create_empty_column(const int32_t orc_col_id, + column_name_info& schema_info, + rmm::cuda_stream_view stream); + private: - rmm::mr::device_memory_resource *_mr = nullptr; + rmm::mr::device_memory_resource* _mr = nullptr; std::vector> _sources; std::unique_ptr _metadata; // _output_columns associated schema indices - std::vector _selected_columns; + std::vector> _selected_columns; bool _use_index = true; bool _use_np_dtypes = true; bool _has_timestamp_column = false; + bool _has_list_column = false; std::vector _decimal_cols_as_float; data_type _timestamp_type{type_id::EMPTY}; + reader_column_meta _col_meta; }; } // namespace orc diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 6bc0e475a27..81fb1a394bb 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -113,6 +113,7 @@ struct orcdec_state_s { orc_bytestream_s bs; orc_bytestream_s bs2; int is_string; + uint64_t num_child_rows; union { orc_strdict_state_s dict; uint32_t nulls_desc_row; // number of rows processed for nulls. @@ -700,11 +701,11 @@ static __device__ uint32_t Integer_RLEv2( l += deltapos; } } - if (numvals + n > maxvals) break; + if ((numvals != 0) and (numvals + n > maxvals)) break; pos += l; if (pos > maxpos) break; - lastpos = pos; - numvals += n; + ((numvals == 0) and (n > maxvals)) ? numvals = maxvals : numvals += n; + lastpos = pos; numruns++; } rle->num_vals = numvals; @@ -926,10 +927,11 @@ static __device__ uint32_t Byte_RLE(orc_bytestream_s *bs, n = 0x100 - n; pos += n; } - if (pos > maxpos || numvals + n > maxvals) { break; } + if ((numvals != 0) and (numvals + n > maxvals)) break; + if (pos > maxpos) break; numruns++; - numvals += n; - lastpos = pos; + ((numvals == 0) and (n > maxvals)) ? numvals = maxvals : numvals += n; + lastpos = pos; } rle->num_runs = numruns; rle->num_vals = numvals; @@ -1117,7 +1119,6 @@ __global__ void __launch_bounds__(block_size) DictionaryEntry *global_dictionary, uint32_t num_columns, uint32_t num_stripes, - size_t max_num_rows, size_t first_row) { __shared__ __align__(16) orcdec_state_s state_g; @@ -1129,14 +1130,16 @@ __global__ void __launch_bounds__(block_size) } temp_storage; orcdec_state_s *const s = &state_g; - bool is_nulldec = (blockIdx.y >= num_stripes); - uint32_t column = blockIdx.x; - uint32_t stripe = (is_nulldec) ? blockIdx.y - num_stripes : blockIdx.y; - uint32_t chunk_id = stripe * num_columns + column; + const bool is_nulldec = (blockIdx.y >= num_stripes); + const uint32_t column = blockIdx.x; + const uint32_t stripe = (is_nulldec) ? blockIdx.y - num_stripes : blockIdx.y; + const uint32_t chunk_id = stripe * num_columns + column; int t = threadIdx.x; if (t == 0) s->chunk = chunks[chunk_id]; __syncthreads(); + const size_t max_num_rows = s->chunk.column_num_rows; + if (is_nulldec) { uint32_t null_count = 0; // Decode NULLs @@ -1372,54 +1375,62 @@ static const __device__ __constant__ uint32_t kTimestampNanoScale[8] = { * @param[in] global_dictionary Global dictionary device array * @param[in] tz_table Timezone translation table * @param[in] row_groups Optional row index data - * @param[in] max_num_rows Maximum number of rows to load * @param[in] first_row Crop all rows below first_row - * @param[in] num_chunks Number of column chunks (num_columns * num_stripes) - * @param[in] num_rowgroups Number of row groups in row index data * @param[in] rowidx_stride Row index stride + * @param[in] level nesting level being processed */ // blockDim {block_size,1,1} template __global__ void __launch_bounds__(block_size) - gpuDecodeOrcColumnData(ColumnDesc const *chunks, + gpuDecodeOrcColumnData(ColumnDesc *chunks, DictionaryEntry *global_dictionary, timezone_table_view tz_table, - const RowGroup *row_groups, - size_t max_num_rows, + device_2dspan row_groups, size_t first_row, - uint32_t num_columns, - uint32_t num_rowgroups, - uint32_t rowidx_stride) + uint32_t rowidx_stride, + size_t level) { __shared__ __align__(16) orcdec_state_s state_g; - __shared__ typename cub::BlockReduce::TempStorage temp_storage; + using block_reduce = cub::BlockReduce; + __shared__ union { + typename cub::BlockReduce::TempStorage blk_uint32; + typename cub::BlockReduce::TempStorage blk_uint64; + } temp_storage; orcdec_state_s *const s = &state_g; uint32_t chunk_id; - int t = threadIdx.x; + int t = threadIdx.x; + auto num_rowgroups = row_groups.size().first; if (num_rowgroups > 0) { - if (t == 0) s->top.data.index = row_groups[blockIdx.y * num_columns + blockIdx.x]; + if (t == 0) { s->top.data.index = row_groups[blockIdx.y][blockIdx.x]; } __syncthreads(); chunk_id = s->top.data.index.chunk_id; } else { chunk_id = blockIdx.x; } - if (t == 0) s->chunk = chunks[chunk_id]; - - __syncthreads(); if (t == 0) { + s->chunk = chunks[chunk_id]; + s->num_child_rows = 0; + } + __syncthreads(); + // Struct doesn't have any data in itself, so skip + const bool is_valid = s->chunk.type_kind != STRUCT; + const size_t max_num_rows = s->chunk.column_num_rows; + if (t == 0 and is_valid) { // If we have an index, seek to the initial run and update row positions if (num_rowgroups > 0) { uint32_t ofs0 = min(s->top.data.index.strm_offset[0], s->chunk.strm_len[CI_DATA]); uint32_t ofs1 = min(s->top.data.index.strm_offset[1], s->chunk.strm_len[CI_DATA2]); - uint32_t rowgroup_rowofs; + uint32_t rowgroup_rowofs = + (level == 0) ? (blockIdx.y - min(s->chunk.rowgroup_id, blockIdx.y)) * rowidx_stride + : s->top.data.index.start_row; + ; s->chunk.streams[CI_DATA] += ofs0; s->chunk.strm_len[CI_DATA] -= ofs0; s->chunk.streams[CI_DATA2] += ofs1; s->chunk.strm_len[CI_DATA2] -= ofs1; - rowgroup_rowofs = min((blockIdx.y - min(s->chunk.rowgroup_id, blockIdx.y)) * rowidx_stride, - s->chunk.num_rows); + rowgroup_rowofs = min(rowgroup_rowofs, s->chunk.num_rows); s->chunk.start_row += rowgroup_rowofs; s->chunk.num_rows -= rowgroup_rowofs; } @@ -1433,7 +1444,8 @@ __global__ void __launch_bounds__(block_size) s->top.data.end_row = static_cast(first_row + max_num_rows); } if (num_rowgroups > 0) { - s->top.data.end_row = min(s->top.data.end_row, s->chunk.start_row + rowidx_stride); + s->top.data.end_row = + min(s->top.data.end_row, s->chunk.start_row + s->top.data.index.num_rows); } if (!is_dictionary(s->chunk.encoding_kind)) { s->chunk.dictionary_start = 0; } @@ -1443,7 +1455,9 @@ __global__ void __launch_bounds__(block_size) bytestream_init(&s->bs2, s->chunk.streams[CI_DATA2], s->chunk.strm_len[CI_DATA2]); } __syncthreads(); - while (s->top.data.cur_row < s->top.data.end_row) { + + while (is_valid && (s->top.data.cur_row < s->top.data.end_row)) { + uint32_t list_child_elements = 0; bytestream_fill(&s->bs, t); bytestream_fill(&s->bs2, t); __syncthreads(); @@ -1533,8 +1547,9 @@ __global__ void __launch_bounds__(block_size) __syncthreads(); // Account for skipped values if (num_rowgroups > 0 && !s->is_string) { - uint32_t run_pos = (s->chunk.type_kind == DECIMAL) ? s->top.data.index.run_pos[CI_DATA2] - : s->top.data.index.run_pos[CI_DATA]; + uint32_t run_pos = (s->chunk.type_kind == DECIMAL || s->chunk.type_kind == LIST) + ? s->top.data.index.run_pos[CI_DATA2] + : s->top.data.index.run_pos[CI_DATA]; numvals = min(numvals + run_pos, (s->chunk.type_kind == BOOLEAN) ? blockDim.x * 2 : blockDim.x); } @@ -1546,6 +1561,25 @@ __global__ void __launch_bounds__(block_size) } else { numvals = Integer_RLEv2(&s->bs, &s->u.rlev2, s->vals.i32, numvals, t); } + __syncthreads(); + } else if (s->chunk.type_kind == LIST) { + if (is_rlev1(s->chunk.encoding_kind)) { + numvals = Integer_RLEv1(&s->bs2, &s->u.rlev1, s->vals.u64, numvals, t); + } else { + numvals = Integer_RLEv2(&s->bs2, &s->u.rlev2, s->vals.u64, numvals, t); + } + // If we're using an index, we may have to drop values from the initial run + uint32_t skip = 0; + if (num_rowgroups > 0 and false) { + uint32_t run_pos = s->top.data.index.run_pos[CI_DATA2]; + if (run_pos) { + skip = min(numvals, run_pos); + __syncthreads(); + if (t == 0) { s->top.data.index.run_pos[CI_DATA2] = 0; } + numvals -= skip; + } + } + __syncthreads(); } else if (s->chunk.type_kind == BYTE) { numvals = Byte_RLE(&s->bs, &s->u.rle8, s->vals.u8, numvals, t); @@ -1629,12 +1663,16 @@ __global__ void __launch_bounds__(block_size) } else { vals_skipped = 0; if (num_rowgroups > 0) { - uint32_t run_pos = s->top.data.index.run_pos[CI_DATA]; + uint32_t run_pos = (s->chunk.type_kind == LIST) ? s->top.data.index.run_pos[CI_DATA2] + : s->top.data.index.run_pos[CI_DATA]; if (run_pos) { vals_skipped = min(numvals, run_pos); numvals -= vals_skipped; __syncthreads(); - if (t == 0) { s->top.data.index.run_pos[CI_DATA] = 0; } + if (t == 0) { + (s->chunk.type_kind == LIST) ? s->top.data.index.run_pos[CI_DATA2] = 0 + : s->top.data.index.run_pos[CI_DATA] = 0; + } } } } @@ -1647,12 +1685,13 @@ __global__ void __launch_bounds__(block_size) __syncthreads(); // Use the valid bits to compute non-null row positions until we get a full batch of values to // decode - DecodeRowPositions(s, first_row, t, temp_storage); + DecodeRowPositions(s, first_row, t, temp_storage.blk_uint32); if (!s->top.data.nrows && !s->u.rowdec.nz_count && !vals_skipped) { // This is a bug (could happen with bitstream errors with a bad run that would produce more // values than the number of remaining rows) return; } + // Store decoded values to output if (t < min(min(s->top.data.max_vals, s->u.rowdec.nz_count), s->top.data.nrows) && s->u.rowdec.row[t] != 0 && @@ -1668,6 +1707,15 @@ __global__ void __launch_bounds__(block_size) case DECIMAL: static_cast(data_out)[row] = s->vals.u64[t + vals_skipped]; break; + case LIST: { + // Since the offsets column in cudf is `size_type`, + // If the limit exceeds then value will be 0, which is Fail. + cudf_assert( + (s->vals.u64[t + vals_skipped] > std::numeric_limits::max()) and + "Number of elements is more than what size_type can handle"); + list_child_elements = s->vals.u64[t + vals_skipped]; + static_cast(data_out)[row] = list_child_elements; + } break; case SHORT: static_cast(data_out)[row] = static_cast(s->vals.u32[t + vals_skipped]); @@ -1734,6 +1782,10 @@ __global__ void __launch_bounds__(block_size) } } } + // Aggregate num of elements for the chunk + if (s->chunk.type_kind == LIST) { + list_child_elements = block_reduce(temp_storage.blk_uint64).Sum(list_child_elements); + } __syncthreads(); // Buffer secondary stream values if (s->chunk.type_kind == TIMESTAMP) { @@ -1748,12 +1800,19 @@ __global__ void __launch_bounds__(block_size) __syncthreads(); if (t == 0) { s->top.data.cur_row += s->top.data.nrows; + if (s->chunk.type_kind == LIST) { s->num_child_rows += list_child_elements; } if (s->is_string && !is_dictionary(s->chunk.encoding_kind) && s->top.data.max_vals > 0) { s->chunk.dictionary_start += s->vals.u32[s->top.data.max_vals - 1]; } } __syncthreads(); } + if (t == 0 and s->chunk.type_kind == LIST) { + if (num_rowgroups > 0) { + row_groups[blockIdx.y][blockIdx.x].num_child_rows = s->num_child_rows; + } + atomicAdd(&chunks[chunk_id].num_child_rows, s->num_child_rows); + } } /** @@ -1763,7 +1822,6 @@ __global__ void __launch_bounds__(block_size) * @param[in] global_dictionary Global dictionary device array * @param[in] num_columns Number of columns * @param[in] num_stripes Number of stripes - * @param[in] max_rows Maximum number of rows to load * @param[in] first_row Crop all rows below first_row * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ @@ -1771,14 +1829,13 @@ void __host__ DecodeNullsAndStringDictionaries(ColumnDesc *chunks, DictionaryEntry *global_dictionary, uint32_t num_columns, uint32_t num_stripes, - size_t max_num_rows, size_t first_row, rmm::cuda_stream_view stream) { dim3 dim_block(block_size, 1); dim3 dim_grid(num_columns, num_stripes * 2); // 1024 threads per chunk gpuDecodeNullsAndStringDictionaries<<>>( - chunks, global_dictionary, num_columns, num_stripes, max_num_rows, first_row); + chunks, global_dictionary, num_columns, num_stripes, first_row); } /** @@ -1788,39 +1845,32 @@ void __host__ DecodeNullsAndStringDictionaries(ColumnDesc *chunks, * @param[in] global_dictionary Global dictionary device array * @param[in] num_columns Number of columns * @param[in] num_stripes Number of stripes - * @param[in] max_rows Maximum number of rows to load * @param[in] first_row Crop all rows below first_row * @param[in] tz_table Timezone translation table - * @param[in] row_groups Optional row index data + * @param[in] row_groups Optional row index data [row_group][column] * @param[in] num_rowgroups Number of row groups in row index data * @param[in] rowidx_stride Row index stride + * @param[in] level nesting level being processed * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void __host__ DecodeOrcColumnData(ColumnDesc const *chunks, +void __host__ DecodeOrcColumnData(ColumnDesc *chunks, DictionaryEntry *global_dictionary, + device_2dspan row_groups, uint32_t num_columns, uint32_t num_stripes, - size_t max_num_rows, size_t first_row, timezone_table_view tz_table, - const RowGroup *row_groups, uint32_t num_rowgroups, uint32_t rowidx_stride, + size_t level, rmm::cuda_stream_view stream) { uint32_t num_chunks = num_columns * num_stripes; dim3 dim_block(block_size, 1); // 1024 threads per chunk dim3 dim_grid((num_rowgroups > 0) ? num_columns : num_chunks, (num_rowgroups > 0) ? num_rowgroups : 1); - gpuDecodeOrcColumnData<<>>(chunks, - global_dictionary, - tz_table, - row_groups, - max_num_rows, - first_row, - num_columns, - num_rowgroups, - rowidx_stride); + gpuDecodeOrcColumnData<<>>( + chunks, global_dictionary, tz_table, row_groups, first_row, rowidx_stride, level); } } // namespace gpu diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu index 42cb15a56b7..b0f2cb4b739 100644 --- a/cpp/src/io/orc/stripe_init.cu +++ b/cpp/src/io/orc/stripe_init.cu @@ -392,6 +392,9 @@ static __device__ void gpuMapRowIndexToUncompressed(rowindex_state_s *s, * @param[in] num_columns Number of columns * @param[in] num_stripes Number of stripes * @param[in] num_rowgroups Number of row groups + * @param[in] rowidx_stride Row index stride + * @param[in] use_base_stride Whether to use base stride obtained from meta or use the computed + * value */ // blockDim {128,1,1} extern "C" __global__ void __launch_bounds__(128, 8) @@ -401,7 +404,8 @@ extern "C" __global__ void __launch_bounds__(128, 8) uint32_t num_columns, uint32_t num_stripes, uint32_t num_rowgroups, - uint32_t rowidx_stride) + uint32_t rowidx_stride, + bool use_base_stride) { __shared__ __align__(16) rowindex_state_s state_g; rowindex_state_s *const s = &state_g; @@ -415,11 +419,10 @@ extern "C" __global__ void __launch_bounds__(128, 8) if (s->chunk.strm_len[1] > 0) s->strm_info[1] = strm_info[s->chunk.strm_id[1]]; } - uint32_t rowgroups_in_chunk = - (rowidx_stride > 0) ? (s->chunk.num_rows + rowidx_stride - 1) / rowidx_stride : 1; - s->rowgroup_start = s->chunk.rowgroup_id; - s->rowgroup_end = s->rowgroup_start + rowgroups_in_chunk; - s->is_compressed = (strm_info != NULL); + uint32_t rowgroups_in_chunk = s->chunk.num_rowgroups; + s->rowgroup_start = s->chunk.rowgroup_id; + s->rowgroup_end = s->rowgroup_start + rowgroups_in_chunk; + s->is_compressed = (strm_info != NULL); } __syncthreads(); while (s->rowgroup_start < s->rowgroup_end) { @@ -443,10 +446,19 @@ extern "C" __global__ void __launch_bounds__(128, 8) t4 = t & 3; t32 = t >> 2; for (int i = t32; i < num_rowgroups; i += 32) { + auto const num_rows = + (use_base_stride) ? rowidx_stride + : row_groups[(s->rowgroup_start + i) * num_columns + blockIdx.x].num_rows; + auto const start_row = + (use_base_stride) + ? rowidx_stride + : row_groups[(s->rowgroup_start + i) * num_columns + blockIdx.x].start_row; for (int j = t4; j < rowgroup_size4; j += 4) { ((uint32_t *)&row_groups[(s->rowgroup_start + i) * num_columns + blockIdx.x])[j] = ((volatile uint32_t *)&s->rowgroups[i])[j]; } + row_groups[(s->rowgroup_start + i) * num_columns + blockIdx.x].num_rows = num_rows; + row_groups[(s->rowgroup_start + i) * num_columns + blockIdx.x].start_row = start_row; } __syncthreads(); if (t == 0) { s->rowgroup_start += num_rowgroups; } @@ -485,6 +497,9 @@ void __host__ PostDecompressionReassemble(CompressedStreamInfo *strm_info, * @param[in] num_columns Number of columns * @param[in] num_stripes Number of stripes * @param[in] num_rowgroups Number of row groups + * @param[in] rowidx_stride Row index stride + * @param[in] use_base_stride Whether to use base stride obtained from meta or use the computed + * value * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ void __host__ ParseRowGroupIndex(RowGroup *row_groups, @@ -494,12 +509,19 @@ void __host__ ParseRowGroupIndex(RowGroup *row_groups, uint32_t num_stripes, uint32_t num_rowgroups, uint32_t rowidx_stride, + bool use_base_stride, rmm::cuda_stream_view stream) { dim3 dim_block(128, 1); dim3 dim_grid(num_columns, num_stripes); // 1 column chunk per block - gpuParseRowGroupIndex<<>>( - row_groups, strm_info, chunks, num_columns, num_stripes, num_rowgroups, rowidx_stride); + gpuParseRowGroupIndex<<>>(row_groups, + strm_info, + chunks, + num_columns, + num_stripes, + num_rowgroups, + rowidx_stride, + use_base_stride); } } // namespace gpu diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index ee4b23bf831..8758042241f 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -175,6 +175,15 @@ class hostdevice_2dvector { auto size() const noexcept { return _size; } + T *base_host_ptr(size_t offset = 0) { return _data.host_ptr(offset); } + T *base_device_ptr(size_t offset = 0) { return _data.device_ptr(offset); } + + T const *base_host_ptr(size_t offset = 0) const { return _data.host_ptr(offset); } + + T const *base_device_ptr(size_t offset = 0) const { return _data.device_ptr(offset); } + + size_t memory_size() const noexcept { return _data.memory_size(); } + void host_to_device(rmm::cuda_stream_view stream, bool synchronize = false) { _data.host_to_device(stream, synchronize); diff --git a/python/cudf/cudf/_lib/io/utils.pxd b/python/cudf/cudf/_lib/io/utils.pxd index 0a793b2d018..589d48db812 100644 --- a/python/cudf/cudf/_lib/io/utils.pxd +++ b/python/cudf/cudf/_lib/io/utils.pxd @@ -1,8 +1,18 @@ # Copyright (c) 2020, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr +from libcpp.vector cimport vector -from cudf._lib.cpp.io.types cimport source_info, sink_info, data_sink +from cudf._lib.cpp.io.types cimport ( + source_info, + sink_info, + data_sink, + column_name_info +) +from cudf._lib.table cimport Table cdef source_info make_source_info(list src) except* cdef sink_info make_sink_info(src, unique_ptr[data_sink] & data) except* +cdef update_struct_field_names( + Table table, + vector[column_name_info]& schema_info) diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx index 6598a7af626..44951c59525 100644 --- a/python/cudf/cudf/_lib/io/utils.pyx +++ b/python/cudf/cudf/_lib/io/utils.pyx @@ -8,10 +8,18 @@ from libcpp.utility cimport move from libcpp.vector cimport vector from libcpp.pair cimport pair from libcpp.string cimport string +from cudf._lib.column cimport Column from cudf._lib.cpp.io.types cimport source_info, io_type, host_buffer -from cudf._lib.cpp.io.types cimport sink_info, data_sink, datasource +from cudf._lib.cpp.io.types cimport ( + sink_info, + data_sink, + datasource, + column_name_info, +) from cudf._lib.io.datasource cimport Datasource +from cudf.utils.dtypes import is_struct_dtype + import codecs import errno import io @@ -108,3 +116,38 @@ cdef cppclass iobase_data_sink(data_sink): size_t bytes_written() with gil: return buf.tell() + + +cdef update_struct_field_names( + Table table, + vector[column_name_info]& schema_info +): + for i, (name, col) in enumerate(table._data.items()): + table._data[name] = _update_column_struct_field_names( + col, schema_info[i] + ) + + +cdef Column _update_column_struct_field_names( + Column col, + column_name_info& info +): + cdef vector[string] field_names + + if is_struct_dtype(col): + field_names.reserve(len(col.base_children)) + for i in range(info.children.size()): + field_names.push_back(info.children[i].name) + col = col._rename_fields( + field_names + ) + + if col.children: + children = list(col.children) + for i, child in enumerate(children): + children[i] = _update_column_struct_field_names( + child, + info.children[i] + ) + col.set_base_children(tuple(children)) + return col diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx index 6b00f0e0173..944300cc167 100644 --- a/python/cudf/cudf/_lib/orc.pyx +++ b/python/cudf/cudf/_lib/orc.pyx @@ -9,6 +9,10 @@ from libcpp.vector cimport vector from libcpp.utility cimport move from cudf._lib.cpp.column.column cimport column +from cudf.utils.dtypes import is_struct_dtype + +from cudf._lib.column cimport Column + from cudf._lib.cpp.io.orc_metadata cimport ( raw_orc_statistics, read_raw_orc_statistics as libcudf_read_raw_orc_statistics @@ -22,6 +26,7 @@ from cudf._lib.cpp.io.orc cimport ( orc_chunked_writer ) from cudf._lib.cpp.io.types cimport ( + column_name_info, compression_type, data_sink, sink_info, @@ -36,7 +41,11 @@ from cudf._lib.cpp.types cimport ( data_type, type_id, size_type ) -from cudf._lib.io.utils cimport make_source_info, make_sink_info +from cudf._lib.io.utils cimport ( + make_source_info, + make_sink_info, + update_struct_field_names, +) from cudf._lib.table cimport Table from cudf._lib.types import np_to_cudf_types from cudf._lib.types cimport underlying_type_t_type_id @@ -106,7 +115,11 @@ cpdef read_orc(object filepaths_or_buffers, names = [name.decode() for name in c_result.metadata.column_names] - return Table.from_unique_ptr(move(c_result.tbl), names) + tbl = Table.from_unique_ptr(move(c_result.tbl), names) + + update_struct_field_names(tbl, c_result.metadata.schema_info) + + return tbl cdef compression_type _get_comp_type(object compression): diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index 4ea2adec23a..3208b175c6e 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -61,7 +61,8 @@ from cudf._lib.cpp.io.parquet cimport ( from cudf._lib.column cimport Column from cudf._lib.io.utils cimport ( make_source_info, - make_sink_info + make_sink_info, + update_struct_field_names, ) cimport cudf._lib.cpp.types as cudf_types @@ -187,7 +188,7 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None, ) ) - _update_struct_field_names(df, c_out_table.metadata.schema_info) + update_struct_field_names(df, c_out_table.metadata.schema_info) if df.empty and meta is not None: cols_dtype_map = {} @@ -519,39 +520,6 @@ cdef cudf_io_types.compression_type _get_comp_type(object compression): raise ValueError("Unsupported `compression` type") -cdef _update_struct_field_names( - Table table, - vector[cudf_io_types.column_name_info]& schema_info -): - for i, (name, col) in enumerate(table._data.items()): - table._data[name] = _update_column_struct_field_names( - col, schema_info[i] - ) - -cdef Column _update_column_struct_field_names( - Column col, - cudf_io_types.column_name_info& info -): - cdef vector[string] field_names - - if is_struct_dtype(col): - field_names.reserve(len(col.base_children)) - for i in range(info.children.size()): - field_names.push_back(info.children[i].name) - col = col._rename_fields( - field_names - ) - - if col.children: - children = list(col.children) - for i, child in enumerate(children): - children[i] = _update_column_struct_field_names( - child, - info.children[i] - ) - col.set_base_children(tuple(children)) - return col - cdef _set_col_metadata(Column col, column_in_metadata& col_meta): if is_struct_dtype(col): for i, (child_col, name) in enumerate( diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index b03465bf8d0..9e0fd9da824 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -893,6 +893,12 @@ def astype(self, dtype: Dtype, **kwargs) -> ColumnBase: "Casting list columns not currently supported" ) return self + elif is_struct_dtype(dtype): + if not self.dtype == dtype: + raise NotImplementedError( + "Casting struct columns not currently supported" + ) + return self elif is_interval_dtype(self.dtype): return self.as_interval_column(dtype, **kwargs) elif is_decimal_dtype(dtype): diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 213b7bf39d7..894c84eeb3e 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -3,13 +3,14 @@ import datetime import decimal import os +import random from io import BytesIO import numpy as np import pandas as pd import pyarrow as pa import pyarrow.orc -import pyorc +import pyorc as po import pytest import cudf @@ -304,7 +305,7 @@ def test_orc_read_skiprows(tmpdir): {"a": [1, 0, 1, 0, None, 1, 1, 1, 0, None, 0, 0, 1, 1, 1, 1]}, dtype=pd.BooleanDtype(), ) - writer = pyorc.Writer(buff, pyorc.Struct(a=pyorc.Boolean())) + writer = po.Writer(buff, po.Struct(a=po.Boolean())) tuples = list( map( lambda x: (None,) if x[0] is pd.NA else x, @@ -842,6 +843,159 @@ def test_orc_string_stream_offset_issue(): assert_eq(df, cudf.read_orc(buffer)) +def generate_list_struct_buff(size=28000): + rd = random.Random(0) + np.random.seed(seed=0) + + buff = BytesIO() + + schema = { + "lvl3_list": po.Array(po.Array(po.Array(po.BigInt()))), + "lvl1_list": po.Array(po.BigInt()), + "lvl1_struct": po.Struct(**{"a": po.BigInt(), "b": po.BigInt()}), + "lvl2_struct": po.Struct( + **{ + "a": po.BigInt(), + "lvl1_struct": po.Struct( + **{"c": po.BigInt(), "d": po.BigInt()} + ), + } + ), + "list_nests_struct": po.Array( + po.Array(po.Struct(**{"a": po.BigInt(), "b": po.BigInt()})) + ), + "struct_nests_list": po.Struct( + **{ + "struct": po.Struct(**{"a": po.BigInt(), "b": po.BigInt()}), + "list": po.Array(po.BigInt()), + } + ), + } + + schema = po.Struct(**schema) + + lvl3_list = [ + [ + [ + [ + rd.choice([None, np.random.randint(1, 3)]) + for z in range(np.random.randint(1, 3)) + ] + for z in range(np.random.randint(0, 3)) + ] + for y in range(np.random.randint(0, 3)) + ] + for x in range(size) + ] + lvl1_list = [ + [ + rd.choice([None, np.random.randint(0, 3)]) + for y in range(np.random.randint(1, 4)) + ] + for x in range(size) + ] + lvl1_struct = [ + (np.random.randint(0, 3), np.random.randint(0, 3)) for x in range(size) + ] + lvl2_struct = [ + ( + rd.choice([None, np.random.randint(0, 3)]), + ( + rd.choice([None, np.random.randint(0, 3)]), + np.random.randint(0, 3), + ), + ) + for x in range(size) + ] + list_nests_struct = [ + [ + [rd.choice(lvl1_struct), rd.choice(lvl1_struct)] + for y in range(np.random.randint(1, 4)) + ] + for x in range(size) + ] + struct_nests_list = [(lvl1_struct[x], lvl1_list[x]) for x in range(size)] + + df = pd.DataFrame( + { + "lvl3_list": lvl3_list, + "lvl1_list": lvl1_list, + "lvl1_struct": lvl1_struct, + "lvl2_struct": lvl2_struct, + "list_nests_struct": list_nests_struct, + "struct_nests_list": struct_nests_list, + } + ) + + writer = po.Writer(buff, schema, stripe_size=1024) + tuples = list( + map( + lambda x: (None,) if x[0] is pd.NA else x, + list(df.itertuples(index=False, name=None)), + ) + ) + writer.writerows(tuples) + writer.close() + + return buff + + +list_struct_buff = generate_list_struct_buff() + + +@pytest.mark.parametrize( + "columns", + [ + None, + ["lvl3_list", "list_nests_struct", "lvl2_struct", "struct_nests_list"], + ["lvl2_struct", "lvl1_struct"], + ], +) +@pytest.mark.parametrize("num_rows", [0, 15, 1005, 10561, 28000]) +@pytest.mark.parametrize("use_index", [True, False]) +@pytest.mark.parametrize("skip_rows", [0, 101, 1007]) +def test_lists_struct_nests( + columns, num_rows, use_index, skip_rows, +): + + has_lists = ( + any("list" in col_name for col_name in columns) if columns else True + ) + + if has_lists and skip_rows > 0: + with pytest.raises( + RuntimeError, match="skip_rows is not supported by list column" + ): + cudf.read_orc( + list_struct_buff, + columns=columns, + num_rows=num_rows, + use_index=use_index, + skiprows=skip_rows, + ) + else: + gdf = cudf.read_orc( + list_struct_buff, + columns=columns, + num_rows=num_rows, + use_index=use_index, + skiprows=skip_rows, + ) + + pyarrow_tbl = pyarrow.orc.ORCFile(list_struct_buff).read() + + pyarrow_tbl = ( + pyarrow_tbl[skip_rows : skip_rows + num_rows] + if columns is None + else pyarrow_tbl.select(columns)[skip_rows : skip_rows + num_rows] + ) + + if num_rows > 0: + assert_eq(True, pyarrow_tbl.equals(gdf.to_arrow())) + else: + assert_eq(pyarrow_tbl.to_pandas(), gdf) + + @pytest.mark.parametrize( "data", [["_col0"], ["FakeName", "_col0", "TerriblyFakeColumnName"]] ) From c1bdbfa362febbe522a7c5c9e10a6c658a23276e Mon Sep 17 00:00:00 2001 From: ochan1 Date: Tue, 6 Jul 2021 13:03:05 -0700 Subject: [PATCH 28/54] Fix double counting of selected columns in CSV reader (#8508) Fix a bug where the program can Segmentation Fault as a result of selecting the same column twice using index + column_name OR the column_name is typed twice Authors: - https://github.com/ochan1 - Karthikeyan (https://github.com/karthikeyann) Approvers: - Karthikeyan (https://github.com/karthikeyann) - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu) - Paul Taylor (https://github.com/trxcllnt) URL: https://github.com/rapidsai/cudf/pull/8508 --- cpp/src/io/csv/reader_impl.cu | 12 +++++++++--- cpp/tests/io/csv_test.cpp | 36 +++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 71391c8c444..c5ffd75341e 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -40,6 +40,7 @@ #include #include #include +#include using std::string; using std::vector; @@ -336,13 +337,18 @@ table_with_metadata reader::impl::read(rmm::cuda_stream_view stream) for (const auto index : opts_.get_use_cols_indexes()) { column_flags_[index] = column_parse::enabled; } - num_active_cols_ = opts_.get_use_cols_indexes().size(); + num_active_cols_ = std::unordered_set(opts_.get_use_cols_indexes().begin(), + opts_.get_use_cols_indexes().end()) + .size(); for (const auto &name : opts_.get_use_cols_names()) { const auto it = std::find(col_names_.begin(), col_names_.end(), name); if (it != col_names_.end()) { - column_flags_[it - col_names_.begin()] = column_parse::enabled; - num_active_cols_++; + auto curr_it = it - col_names_.begin(); + if (column_flags_[curr_it] == column_parse::disabled) { + column_flags_[curr_it] = column_parse::enabled; + num_active_cols_++; + } } } } diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index 8996dd95e06..9c3a9a1b015 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -527,6 +527,42 @@ TEST_F(CsvReaderTest, MultiColumn) expect_column_data_equal(float64_values, view.column(14)); } +TEST_F(CsvReaderTest, RepeatColumn) +{ + constexpr auto num_rows = 10; + auto int16_values = random_values(num_rows); + auto int64_values = random_values(num_rows); + auto uint64_values = random_values(num_rows); + auto float32_values = random_values(num_rows); + + auto filepath = temp_env->get_temp_dir() + "RepeatColumn.csv"; + { + std::ostringstream line; + for (int i = 0; i < num_rows; ++i) { + line << int16_values[i] << "," << int64_values[i] << "," << uint64_values[i] << "," + << float32_values[i] << "\n"; + } + std::ofstream outfile(filepath, std::ofstream::out); + outfile << line.str(); + } + + // repeats column in indexes and names, misses 1 column. + cudf_io::csv_reader_options in_opts = + cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}) + .dtypes(std::vector{"int16", "int64", "uint64", "float"}) + .names({"A", "B", "C", "D"}) + .use_cols_indexes({1, 0, 0}) + .use_cols_names({"D", "B", "B"}) + .header(-1); + auto result = cudf_io::read_csv(in_opts); + + const auto view = result.tbl->view(); + EXPECT_EQ(3, view.num_columns()); + expect_column_data_equal(int16_values, view.column(0)); + expect_column_data_equal(int64_values, view.column(1)); + expect_column_data_equal(float32_values, view.column(2)); +} + TEST_F(CsvReaderTest, Booleans) { auto filepath = temp_env->get_temp_dir() + "Booleans.csv"; From d77ba8292e6bf1b963f6891a60799fe516a49a94 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Wed, 7 Jul 2021 01:36:09 +0530 Subject: [PATCH 29/54] Fix repeated mangled names in read_csv with duplicate column names (#8645) Fixes mangled name bug `read_csv` with duplicate columns. mismatch with pandas behavior. #### csv file: ```csv A,A,A.1,A,A.2,A,A.4,A,A 1,2,3,4.0,a,a,a.4,a,a 2,4,6,8.0,b,b,b.4,b,a 3,6,2,6.0,c,c,c.4,c,c ``` |A| A| A.1| A| A.2| A| A.4| A| A| |-|-|-|-|-|-|-|-|-| |A| A.1| A.1.1| A.2| A.2.1| A.3| A.4| A.4.1| A.5| #### Pandas: ```python In [1]: import pandas as pd In [2]: pd.read_csv("test.csv") Out[2]: A A.1 A.1.1 A.2 A.2.1 A.3 A.4 A.4.1 A.5 0 1 2 3 4.0 a a a.4 a a 1 2 4 6 8.0 b b b.4 b a 2 3 6 2 6.0 c c c.4 c c ``` #### cudf: (21.08 nightly docker) ```python In [1]: import cudf In [2]: cudf.__version__ Out[2]: '21.08.00a+238.gfba09e66d8' In [3]: cudf.read_csv("test.csv") Out[3]: A A.1 A.2 A.3 A.4 A.5 0 1 3 a a a a 1 2 6 b b b a 2 3 2 c c c c ``` This PR fixes this issue. ```python In [2]: cudf.read_csv("test.csv") Out[2]: A A.1 A.1.1 A.2 A.2.1 A.3 A.4 A.4.1 A.5 0 1 2 3 4.0 a a a.4 a a 1 2 4 6 8.0 b b b.4 b a 2 3 6 2 6.0 c c c.4 c c ``` Related info (sparks): Spark duplicate column naming. https://issues.apache.org/jira/browse/SPARK-16896 https://github.com/apache/spark/pull/14745 cudf sparks addon doesn't use libcudf names. So, this PR does not affect it. Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Elias Stehle (https://github.com/elstehle) - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu) - Mike Wilson (https://github.com/hyperbolic2346) URL: https://github.com/rapidsai/cudf/pull/8645 --- cpp/src/io/csv/reader_impl.cu | 4 +++- python/cudf/cudf/tests/test_csv.py | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index c5ffd75341e..cae930b8197 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -316,7 +316,9 @@ table_with_metadata reader::impl::read(rmm::cuda_stream_view stream) if (opts_.is_enabled_mangle_dupe_cols()) { // Rename duplicates of column X as X.1, X.2, ...; First appearance // stays as X - col_name += "." + std::to_string(col_names_histogram[col_name] - 1); + do { + col_name += "." + std::to_string(col_names_histogram[col_name] - 1); + } while (col_names_histogram[col_name]++); } else { // All duplicate columns will be ignored; First appearance is parsed const auto idx = &col_name - col_names_.data(); diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index c19fde8b5d6..5511a65d0a4 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -1261,6 +1261,17 @@ def test_csv_reader_column_names(names): assert list(df) == list(names) +def test_csv_reader_repeated_column_name(): + buffer = """A,A,A.1,A,A.2,A,A.4,A,A + 1,2,3.1,4,a.2,a,a.4,a,a + 2,4,6.1,8,b.2,b,b.4,b,b""" + + # pandas and cudf to have same repeated column names + pdf = pd.read_csv(StringIO(buffer)) + gdf = cudf.read_csv(StringIO(buffer)) + assert_eq(pdf.columns, gdf.columns) + + def test_csv_reader_bools_false_positives(tmpdir): # values that are equal to ["True", "TRUE", "False", "FALSE"] # when using ints to detect bool values From 7721819eeed68115fd4d7033cba016830b0afcd8 Mon Sep 17 00:00:00 2001 From: Conor Hoekstra <36027403+codereport@users.noreply.github.com> Date: Tue, 6 Jul 2021 22:10:15 -0400 Subject: [PATCH 30/54] Updating Clang Version to 11.0.0 (#6695) This resolves: https://github.com/rapidsai/cudf/issues/5187 PR description copied from: https://github.com/rapidsai/cuml/pull/3121 Depends on: https://github.com/rapidsai/integration/pull/304 This PR will upgrade the clang version required to 11.0.0 in order to enable us with running clang-tidy on .cu files, while running on cuda v11. See rapidsai/raft#88 for more details. CI will not pass as the underlying conda-env still uses 8.0.1. Once we have the rapids-build-env meta package updated, this should pass. ----- ### Fixes from Clang 8.0.1 to Clang 11.0.0 (that are observed in delta) * Missing spaces * Incorrect alignment when ternary expression splits across multiple lines * Comment alignment on macros * Fixed where function signatures have line breaks * Aligning macros * Always left align pointer/reference * Don't allow single line for loops ----- To do list: * [x] Update python file * [x] Update conda environment files * [x] Run formatter to apply all changes from upgrading * [x] Add changes from https://github.com/rapidsai/cudf/issues/5187 * [x] Review list of new changes from 8.0.1 to 11; choose which to incorporate * [x] Get working with RAPID compose Authors: - Conor Hoekstra (https://github.com/codereport) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Nghia Truong (https://github.com/ttnghia) - Mark Harris (https://github.com/harrism) - Dillon Cullinan (https://github.com/dillon-cullinan) URL: https://github.com/rapidsai/cudf/pull/6695 --- conda/environments/cudf_dev_cuda11.0.yml | 4 +- conda/environments/cudf_dev_cuda11.2.yml | 4 +- cpp/.clang-format | 19 +- cpp/benchmarks/ast/transform_benchmark.cpp | 4 +- .../binaryop/binaryop_benchmark.cpp | 4 +- .../common/generate_benchmark_input.cpp | 2 +- .../hashing/partition_benchmark.cpp | 4 +- cpp/benchmarks/io/cuio_benchmark_common.cpp | 3 +- cpp/benchmarks/iterator/iterator_benchmark.cu | 20 +- cpp/benchmarks/reduction/anyall_benchmark.cpp | 2 +- .../reduction/dictionary_benchmark.cpp | 2 +- cpp/benchmarks/reduction/minmax_benchmark.cpp | 2 +- cpp/benchmarks/reduction/reduce_benchmark.cpp | 2 +- cpp/benchmarks/search/search_benchmark.cpp | 3 +- .../apply_boolean_mask_benchmark.cpp | 12 +- .../drop_duplicates_benchmark.cpp | 2 +- cpp/benchmarks/string/extract_benchmark.cpp | 4 +- cpp/benchmarks/text/replace_benchmark.cpp | 3 +- cpp/benchmarks/text/subword_benchmark.cpp | 6 +- .../type_dispatcher_benchmark.cu | 4 +- cpp/include/cudf/detail/copy_if_else.cuh | 4 +- cpp/include/cudf/detail/merge.cuh | 8 +- cpp/include/cudf/detail/null_mask.cuh | 14 +- cpp/include/cudf/detail/null_mask.hpp | 32 +- .../cudf/detail/utilities/device_atomics.cuh | 8 +- .../utilities/transform_unary_functions.cuh | 10 +- cpp/include/cudf/io/avro.hpp | 2 +- cpp/include/cudf/io/csv.hpp | 16 +- cpp/include/cudf/io/datasource.hpp | 6 +- cpp/include/cudf/io/detail/avro.hpp | 14 +- cpp/include/cudf/io/detail/csv.hpp | 20 +- cpp/include/cudf/io/detail/json.hpp | 14 +- cpp/include/cudf/io/json.hpp | 2 +- cpp/include/cudf/io/orc.hpp | 6 +- cpp/include/cudf/io/parquet.hpp | 6 +- cpp/include/cudf/reduction.hpp | 16 +- cpp/include/cudf/strings/string_view.cuh | 22 +- cpp/include/cudf/table/row_operators.cuh | 4 +- cpp/include/cudf/types.hpp | 4 +- cpp/include/cudf/utilities/error.hpp | 2 +- cpp/include/cudf_test/base_fixture.hpp | 20 +- cpp/include/cudf_test/cudf_gtest.hpp | 10 +- cpp/include/cudf_test/cxxopts.hpp | 8 +- cpp/include/cudf_test/file_utilities.hpp | 10 +- cpp/scripts/run-clang-format.py | 2 +- cpp/src/bitmask/null_mask.cu | 98 +++--- cpp/src/column/column.cu | 44 +-- cpp/src/copying/get_element.cu | 46 +-- cpp/src/dictionary/set_keys.cu | 3 +- cpp/src/groupby/hash/groupby.cu | 4 +- cpp/src/groupby/sort/group_collect.cu | 10 +- cpp/src/groupby/sort/group_nth_element.cu | 6 +- cpp/src/hash/managed.cuh | 6 +- cpp/src/hash/unordered_multiset.cuh | 20 +- cpp/src/io/avro/avro.cpp | 10 +- cpp/src/io/avro/avro.h | 18 +- cpp/src/io/avro/avro_gpu.cu | 52 ++-- cpp/src/io/avro/avro_gpu.h | 8 +- cpp/src/io/avro/reader_impl.cu | 72 ++--- cpp/src/io/avro/reader_impl.hpp | 16 +- cpp/src/io/comp/brotli_dict.cpp | 2 +- cpp/src/io/comp/brotli_dict.h | 2 +- cpp/src/io/comp/cpu_unbz2.cpp | 61 ++-- cpp/src/io/comp/debrotli.cu | 278 +++++++++--------- cpp/src/io/comp/gpuinflate.cu | 203 +++++++------ cpp/src/io/comp/gpuinflate.h | 24 +- cpp/src/io/comp/snap.cu | 50 ++-- cpp/src/io/comp/unbz2.h | 32 +- cpp/src/io/comp/uncomp.cpp | 80 ++--- cpp/src/io/comp/unsnap.cu | 80 ++--- cpp/src/io/csv/csv_gpu.cu | 177 +++++------ cpp/src/io/csv/csv_gpu.h | 16 +- cpp/src/io/csv/datetime.cuh | 4 +- cpp/src/io/csv/reader_impl.cu | 78 ++--- cpp/src/io/csv/reader_impl.hpp | 10 +- cpp/src/io/json/json_gpu.cu | 200 ++++++------- cpp/src/io/json/json_gpu.h | 16 +- cpp/src/io/json/reader_impl.cu | 70 ++--- cpp/src/io/json/reader_impl.hpp | 20 +- cpp/src/io/orc/dict_enc.cu | 54 ++-- cpp/src/io/orc/orc.cpp | 70 ++--- cpp/src/io/orc/orc.h | 177 +++++------ cpp/src/io/orc/orc_field_reader.hpp | 20 +- cpp/src/io/orc/orc_field_writer.hpp | 35 ++- cpp/src/io/orc/orc_gpu.h | 96 +++--- cpp/src/io/orc/reader_impl.cu | 188 ++++++------ cpp/src/io/orc/stats_enc.cu | 56 ++-- cpp/src/io/orc/stripe_data.cu | 153 +++++----- cpp/src/io/orc/stripe_enc.cu | 90 +++--- cpp/src/io/orc/stripe_init.cu | 91 +++--- cpp/src/io/orc/timezone.cpp | 34 ++- cpp/src/io/orc/timezone.cuh | 10 +- cpp/src/io/orc/writer_impl.cu | 252 ++++++++-------- cpp/src/io/orc/writer_impl.hpp | 2 +- .../io/parquet/compact_protocol_writer.cpp | 45 +-- .../io/parquet/compact_protocol_writer.hpp | 36 +-- cpp/src/io/parquet/page_data.cu | 258 ++++++++-------- cpp/src/io/parquet/page_dict.cu | 48 ++- cpp/src/io/parquet/page_enc.cu | 143 ++++----- cpp/src/io/parquet/page_hdr.cu | 74 ++--- cpp/src/io/parquet/parquet.cpp | 61 ++-- cpp/src/io/parquet/parquet.hpp | 148 +++++----- cpp/src/io/parquet/parquet_gpu.hpp | 88 +++--- cpp/src/io/parquet/reader_impl.cu | 226 +++++++------- cpp/src/io/parquet/reader_impl.hpp | 44 +-- cpp/src/io/parquet/writer_impl.cu | 140 ++++----- cpp/src/io/statistics/column_statistics.cuh | 57 ++-- .../io/statistics/orc_column_statistics.cu | 10 +- .../statistics/parquet_column_statistics.cu | 10 +- cpp/src/io/statistics/statistics.cuh | 14 +- cpp/src/io/utilities/block_utils.cuh | 18 +- cpp/src/io/utilities/column_utils.cuh | 4 +- cpp/src/io/utilities/datasource.cpp | 34 +-- cpp/src/io/utilities/file_io_utilities.cpp | 44 +-- cpp/src/io/utilities/file_io_utilities.hpp | 40 +-- cpp/src/io/utilities/hostdevice_vector.hpp | 28 +- cpp/src/io/utilities/parsing_utils.cuh | 4 +- cpp/src/io/utilities/trie.cu | 10 +- cpp/src/io/utilities/type_conversion.cuh | 2 +- cpp/src/jit/parser.cpp | 16 +- cpp/src/join/hash_join.cu | 58 ++-- cpp/src/lists/explode.cu | 6 +- cpp/src/reductions/minmax.cu | 30 +- cpp/src/reductions/reductions.cpp | 30 +- cpp/src/reshape/tile.cu | 8 +- cpp/src/rolling/rolling_detail.hpp | 16 +- cpp/src/rolling/rolling_jit_detail.hpp | 8 +- cpp/src/sort/rank.cu | 16 +- cpp/src/strings/combine/join.cu | 6 +- cpp/src/strings/combine/join_list_elements.cu | 8 +- cpp/src/strings/contains.cu | 4 +- cpp/src/strings/convert/convert_datetime.cu | 3 +- cpp/src/strings/convert/convert_durations.cu | 3 +- cpp/src/strings/convert/convert_floats.cu | 3 +- cpp/src/strings/convert/convert_hex.cu | 4 +- cpp/src/strings/convert/convert_ipv4.cu | 3 +- cpp/src/strings/convert/convert_urls.cu | 6 +- cpp/src/strings/convert/utilities.cuh | 3 +- cpp/src/strings/padding.cu | 15 +- cpp/src/strings/regex/regcomp.cpp | 24 +- cpp/src/strings/regex/regex.inl | 3 +- cpp/src/strings/replace/multi_re.cu | 4 +- cpp/src/strings/replace/replace.cu | 14 +- cpp/src/strings/split/split.cu | 12 +- cpp/src/table/table.cpp | 16 +- cpp/src/table/table_view.cpp | 4 +- cpp/tests/bitmask/bitmask_tests.cpp | 28 +- cpp/tests/bitmask/set_nullmask_tests.cu | 3 +- cpp/tests/column/factories_test.cpp | 16 +- cpp/tests/copying/get_value_tests.cpp | 72 ++--- cpp/tests/copying/pack_tests.cpp | 2 +- cpp/tests/copying/split_tests.cpp | 12 +- cpp/tests/filling/repeat_tests.cpp | 4 +- cpp/tests/groupby/collect_set_tests.cpp | 10 +- cpp/tests/io/csv_test.cpp | 19 +- cpp/tests/io/parquet_test.cpp | 4 +- cpp/tests/reductions/reduction_tests.cpp | 114 +++---- cpp/tests/replace/replace_nulls_tests.cpp | 18 +- cpp/tests/search/search_test.cpp | 172 +++++------ cpp/tests/sort/rank_test.cpp | 4 +- cpp/tests/strings/chars_types_tests.cpp | 3 +- cpp/tests/strings/hash_string.cu | 3 +- cpp/tests/text/subword_tests.cpp | 6 +- cpp/tests/transpose/transpose_test.cpp | 8 +- cpp/tests/utilities_tests/span_tests.cu | 4 +- cpp/tests/utilities_tests/type_list_tests.cpp | 15 +- 166 files changed, 3072 insertions(+), 2864 deletions(-) diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index c2a7f3d9b94..d50dde7ff0e 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -7,8 +7,8 @@ channels: - rapidsai-nightly - conda-forge dependencies: - - clang=8.0.1 - - clang-tools=8.0.1 + - clang=11.0.0 + - clang-tools=11.0.0 - cupy>7.1.0,<10.0.0a0 - rmm=21.08.* - cmake>=3.20.1 diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index ad2b8cd5403..31118fdbdbc 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -7,8 +7,8 @@ channels: - rapidsai-nightly - conda-forge dependencies: - - clang=8.0.1 - - clang-tools=8.0.1 + - clang=11.0.0 + - clang-tools=11.0.0 - cupy>7.1.0,<10.0.0a0 - rmm=21.08.* - cmake>=3.20.1 diff --git a/cpp/.clang-format b/cpp/.clang-format index 11404b0226e..0c05436e922 100644 --- a/cpp/.clang-format +++ b/cpp/.clang-format @@ -6,16 +6,22 @@ Language: Cpp AccessModifierOffset: -1 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: true +AlignConsecutiveBitFields: true AlignConsecutiveDeclarations: false +AlignConsecutiveMacros: true AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: true AllowShortCaseLabelsOnASingleLine: true +AllowShortEnumsOnASingleLine: true AllowShortFunctionsOnASingleLine: All AllowShortIfStatementsOnASingleLine: true -AllowShortLoopsOnASingleLine: true +AllowShortLambdasOnASingleLine: true +AllowShortLoopsOnASingleLine: false # This is deprecated AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None @@ -40,14 +46,14 @@ BraceWrapping: SplitEmptyFunction: false SplitEmptyRecord: false SplitEmptyNamespace: false +BreakAfterJavaFieldAnnotations: false BreakBeforeBinaryOperators: None BreakBeforeBraces: WebKit BreakBeforeInheritanceComma: false -BreakInheritanceList: BeforeColon BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false BreakConstructorInitializers: BeforeColon -BreakAfterJavaFieldAnnotations: false +BreakInheritanceList: BeforeColon BreakStringLiterals: true ColumnLimit: 100 CommentPragmas: '^ IWYU pragma:' @@ -57,7 +63,7 @@ ConstructorInitializerAllOnOneLineOrOnePerLine: true ConstructorInitializerIndentWidth: 2 ContinuationIndentWidth: 2 Cpp11BracedListStyle: true -DerivePointerAlignment: true +DerivePointerAlignment: false DisableFormat: false ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true @@ -139,14 +145,17 @@ SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true SpaceBeforeParens: ControlStatements SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 2 SpacesInAngles: false +SpacesInConditionalStatement: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false -Standard: Cpp11 +Standard: c++17 StatementMacros: - Q_UNUSED - QT_REQUIRE_VERSION diff --git a/cpp/benchmarks/ast/transform_benchmark.cpp b/cpp/benchmarks/ast/transform_benchmark.cpp index d39faec3ac4..e1d52d7f0e6 100644 --- a/cpp/benchmarks/ast/transform_benchmark.cpp +++ b/cpp/benchmarks/ast/transform_benchmark.cpp @@ -135,7 +135,9 @@ static void CustomRanges(benchmark::internal::Benchmark* b) auto row_counts = std::vector{100'000, 1'000'000, 10'000'000, 100'000'000}; auto operation_counts = std::vector{1, 5, 10}; for (auto const& row_count : row_counts) { - for (auto const& operation_count : operation_counts) { b->Args({row_count, operation_count}); } + for (auto const& operation_count : operation_counts) { + b->Args({row_count, operation_count}); + } } } diff --git a/cpp/benchmarks/binaryop/binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/binaryop_benchmark.cpp index 753dcc83b54..314d657679b 100644 --- a/cpp/benchmarks/binaryop/binaryop_benchmark.cpp +++ b/cpp/benchmarks/binaryop/binaryop_benchmark.cpp @@ -113,7 +113,9 @@ static void CustomRanges(benchmark::internal::Benchmark* b) auto row_counts = std::vector{100'000, 1'000'000, 10'000'000, 100'000'000}; auto operation_counts = std::vector{1, 2, 5, 10}; for (auto const& row_count : row_counts) { - for (auto const& operation_count : operation_counts) { b->Args({row_count, operation_count}); } + for (auto const& operation_count : operation_counts) { + b->Args({row_count, operation_count}); + } } } diff --git a/cpp/benchmarks/common/generate_benchmark_input.cpp b/cpp/benchmarks/common/generate_benchmark_input.cpp index 591e42ceddf..4280fd0c2ba 100644 --- a/cpp/benchmarks/common/generate_benchmark_input.cpp +++ b/cpp/benchmarks/common/generate_benchmark_input.cpp @@ -53,7 +53,7 @@ T get_distribution_mean(distribution_params const& dist) auto const range_size = dist.lower_bound < dist.upper_bound ? dist.upper_bound - dist.lower_bound : dist.lower_bound - dist.upper_bound; - auto const p = geometric_dist_p(range_size); + auto const p = geometric_dist_p(range_size); if (dist.lower_bound < dist.upper_bound) return dist.lower_bound + (1. / p); else diff --git a/cpp/benchmarks/hashing/partition_benchmark.cpp b/cpp/benchmarks/hashing/partition_benchmark.cpp index d10b63dc4e1..185f19f28e5 100644 --- a/cpp/benchmarks/hashing/partition_benchmark.cpp +++ b/cpp/benchmarks/hashing/partition_benchmark.cpp @@ -65,7 +65,9 @@ static void CustomRanges(benchmark::internal::Benchmark* b) { for (int columns = 1; columns <= 256; columns *= 16) { for (int partitions = 64; partitions <= 1024; partitions *= 2) { - for (int rows = 1 << 17; rows <= 1 << 21; rows *= 2) { b->Args({rows, columns, partitions}); } + for (int rows = 1 << 17; rows <= 1 << 21; rows *= 2) { + b->Args({rows, columns, partitions}); + } } } } diff --git a/cpp/benchmarks/io/cuio_benchmark_common.cpp b/cpp/benchmarks/io/cuio_benchmark_common.cpp index f2aa216d413..627ac9ccc04 100644 --- a/cpp/benchmarks/io/cuio_benchmark_common.cpp +++ b/cpp/benchmarks/io/cuio_benchmark_common.cpp @@ -94,7 +94,8 @@ std::vector select_column_indexes(int num_cols, column_selection col_sel) (col_sel == column_selection::SECOND_HALF) ? num_cols / 2 : 0); break; case column_selection::ALTERNATE: - for (size_t i = 0; i < col_idxs.size(); ++i) col_idxs[i] = 2 * i; + for (size_t i = 0; i < col_idxs.size(); ++i) + col_idxs[i] = 2 * i; break; } return col_idxs; diff --git a/cpp/benchmarks/iterator/iterator_benchmark.cu b/cpp/benchmarks/iterator/iterator_benchmark.cu index 04307f5db25..b4bb99abdde 100644 --- a/cpp/benchmarks/iterator/iterator_benchmark.cu +++ b/cpp/benchmarks/iterator/iterator_benchmark.cu @@ -61,7 +61,7 @@ inline auto reduce_by_cub(OutputIterator result, InputIterator d_in, int num_ite // ----------------------------------------------------------------------------- template -void raw_stream_bench_cub(cudf::column_view &col, rmm::device_uvector &result) +void raw_stream_bench_cub(cudf::column_view& col, rmm::device_uvector& result) { // std::cout << "raw stream cub: " << "\t"; @@ -73,7 +73,7 @@ void raw_stream_bench_cub(cudf::column_view &col, rmm::device_uvector &result }; template -void iterator_bench_cub(cudf::column_view &col, rmm::device_uvector &result) +void iterator_bench_cub(cudf::column_view& col, rmm::device_uvector& result) { // std::cout << "iterator cub " << ( (has_null) ? ": " : ": " ) << "\t"; @@ -91,7 +91,7 @@ void iterator_bench_cub(cudf::column_view &col, rmm::device_uvector &result) // ----------------------------------------------------------------------------- template -void raw_stream_bench_thrust(cudf::column_view &col, rmm::device_uvector &result) +void raw_stream_bench_thrust(cudf::column_view& col, rmm::device_uvector& result) { // std::cout << "raw stream thust: " << "\t\t"; @@ -102,7 +102,7 @@ void raw_stream_bench_thrust(cudf::column_view &col, rmm::device_uvector &res } template -void iterator_bench_thrust(cudf::column_view &col, rmm::device_uvector &result) +void iterator_bench_thrust(cudf::column_view& col, rmm::device_uvector& result) { // std::cout << "iterator thust " << ( (has_null) ? ": " : ": " ) << "\t"; @@ -124,7 +124,7 @@ class Iterator : public cudf::benchmark { }; template -void BM_iterator(benchmark::State &state) +void BM_iterator(benchmark::State& state) { const cudf::size_type column_size{(cudf::size_type)state.range(0)}; using T = TypeParam; @@ -165,8 +165,8 @@ __device__ thrust::pair operator+(thrust::pair lhs, thrust::pa } // ----------------------------------------------------------------------------- template -void pair_iterator_bench_cub(cudf::column_view &col, - rmm::device_uvector> &result) +void pair_iterator_bench_cub(cudf::column_view& col, + rmm::device_uvector>& result) { thrust::pair init{0, false}; auto d_col = cudf::column_device_view::create(col); @@ -176,8 +176,8 @@ void pair_iterator_bench_cub(cudf::column_view &col, } template -void pair_iterator_bench_thrust(cudf::column_view &col, - rmm::device_uvector> &result) +void pair_iterator_bench_thrust(cudf::column_view& col, + rmm::device_uvector>& result) { thrust::pair init{0, false}; auto d_col = cudf::column_device_view::create(col); @@ -187,7 +187,7 @@ void pair_iterator_bench_thrust(cudf::column_view &col, } template -void BM_pair_iterator(benchmark::State &state) +void BM_pair_iterator(benchmark::State& state) { const cudf::size_type column_size{(cudf::size_type)state.range(0)}; using T = TypeParam; diff --git a/cpp/benchmarks/reduction/anyall_benchmark.cpp b/cpp/benchmarks/reduction/anyall_benchmark.cpp index 97d66585f8c..3dcb433ec52 100644 --- a/cpp/benchmarks/reduction/anyall_benchmark.cpp +++ b/cpp/benchmarks/reduction/anyall_benchmark.cpp @@ -48,7 +48,7 @@ void BM_reduction_anyall(benchmark::State& state, std::unique_ptr co } #define concat(a, b, c) a##b##c -#define get_agg(op) concat(cudf::make_, op, _aggregation()) +#define get_agg(op) concat(cudf::make_, op, _aggregation()) // TYPE, OP #define RBM_BENCHMARK_DEFINE(name, type, aggregation) \ diff --git a/cpp/benchmarks/search/search_benchmark.cpp b/cpp/benchmarks/search/search_benchmark.cpp index 7fb196fb500..c3529c7e79c 100644 --- a/cpp/benchmarks/search/search_benchmark.cpp +++ b/cpp/benchmarks/search/search_benchmark.cpp @@ -131,7 +131,8 @@ BENCHMARK_DEFINE_F(Search, Table)(::benchmark::State& state) { BM_table(state); static void CustomArguments(benchmark::internal::Benchmark* b) { for (int num_cols = 1; num_cols <= 10; num_cols *= 2) - for (int col_size = 1000; col_size <= 100000000; col_size *= 10) b->Args({num_cols, col_size}); + for (int col_size = 1000; col_size <= 100000000; col_size *= 10) + b->Args({num_cols, col_size}); } BENCHMARK_REGISTER_F(Search, Table) diff --git a/cpp/benchmarks/stream_compaction/apply_boolean_mask_benchmark.cpp b/cpp/benchmarks/stream_compaction/apply_boolean_mask_benchmark.cpp index 5cd2278ca14..7246d113ade 100644 --- a/cpp/benchmarks/stream_compaction/apply_boolean_mask_benchmark.cpp +++ b/cpp/benchmarks/stream_compaction/apply_boolean_mask_benchmark.cpp @@ -35,13 +35,15 @@ constexpr cudf::size_type fifty_percent = 50; void percent_range(benchmark::internal::Benchmark* b) { b->Unit(benchmark::kMillisecond); - for (int percent = 0; percent <= 100; percent += 10) b->Args({hundredM, percent}); + for (int percent = 0; percent <= 100; percent += 10) + b->Args({hundredM, percent}); } void size_range(benchmark::internal::Benchmark* b) { b->Unit(benchmark::kMillisecond); - for (int size = tenK; size <= hundredM; size *= 10) b->Args({size, fifty_percent}); + for (int size = tenK; size <= hundredM; size *= 10) + b->Args({size, fifty_percent}); } template @@ -64,9 +66,9 @@ void calculate_bandwidth(benchmark::State& state, cudf::size_type num_columns) cudf::size_type const column_size_out = fraction * column_size; int64_t const mask_size = sizeof(bool) * column_size + cudf::bitmask_allocation_size_bytes(column_size); - int64_t const validity_bytes_in = (fraction >= 1.0f / 32) - ? cudf::bitmask_allocation_size_bytes(column_size) - : 4 * column_size_out; + int64_t const validity_bytes_in = (fraction >= 1.0f / 32) + ? cudf::bitmask_allocation_size_bytes(column_size) + : 4 * column_size_out; int64_t const validity_bytes_out = cudf::bitmask_allocation_size_bytes(column_size_out); int64_t const column_bytes_out = sizeof(T) * column_size_out; int64_t const column_bytes_in = column_bytes_out; // we only read unmasked inputs diff --git a/cpp/benchmarks/stream_compaction/drop_duplicates_benchmark.cpp b/cpp/benchmarks/stream_compaction/drop_duplicates_benchmark.cpp index 16bae725621..8039d7d065f 100644 --- a/cpp/benchmarks/stream_compaction/drop_duplicates_benchmark.cpp +++ b/cpp/benchmarks/stream_compaction/drop_duplicates_benchmark.cpp @@ -50,7 +50,7 @@ void BM_compaction(benchmark::State& state, cudf::duplicate_keep_option keep) } #define concat(a, b, c) a##b##c -#define get_keep(op) cudf::duplicate_keep_option::KEEP_##op +#define get_keep(op) cudf::duplicate_keep_option::KEEP_##op // TYPE, OP #define RBM_BENCHMARK_DEFINE(name, type, keep) \ diff --git a/cpp/benchmarks/string/extract_benchmark.cpp b/cpp/benchmarks/string/extract_benchmark.cpp index aa1e59a22bf..161e30c6f25 100644 --- a/cpp/benchmarks/string/extract_benchmark.cpp +++ b/cpp/benchmarks/string/extract_benchmark.cpp @@ -48,7 +48,9 @@ static void BM_extract(benchmark::State& state, int groups) }); std::string pattern; - while (static_cast(pattern.size()) < groups) { pattern += "(\\d+) "; } + while (static_cast(pattern.size()) < groups) { + pattern += "(\\d+) "; + } std::uniform_int_distribution distribution(0, samples.size() - 1); auto elements = cudf::detail::make_counting_transform_iterator( diff --git a/cpp/benchmarks/text/replace_benchmark.cpp b/cpp/benchmarks/text/replace_benchmark.cpp index 8f6704ab1af..0a0e6a1667c 100644 --- a/cpp/benchmarks/text/replace_benchmark.cpp +++ b/cpp/benchmarks/text/replace_benchmark.cpp @@ -41,7 +41,8 @@ static void BM_replace(benchmark::State& state) std::default_random_engine generator; std::uniform_int_distribution tokens_dist(0, words.size() - 1); std::string row; // build a row of random tokens - while (static_cast(row.size()) < n_length) row += words[tokens_dist(generator)]; + while (static_cast(row.size()) < n_length) + row += words[tokens_dist(generator)]; std::uniform_int_distribution position_dist(0, 16); diff --git a/cpp/benchmarks/text/subword_benchmark.cpp b/cpp/benchmarks/text/subword_benchmark.cpp index 3670fa7c9a7..2406ddd39ae 100644 --- a/cpp/benchmarks/text/subword_benchmark.cpp +++ b/cpp/benchmarks/text/subword_benchmark.cpp @@ -37,7 +37,8 @@ static std::string create_hash_vocab_file() std::vector> coefficients(23, {65559, 0}); std::ofstream outfile(hash_file, std::ofstream::out); outfile << "1\n0\n" << coefficients.size() << "\n"; - for (auto c : coefficients) outfile << c.first << " " << c.second << "\n"; + for (auto c : coefficients) + outfile << c.first << " " << c.second << "\n"; std::vector hash_table(23, 0); outfile << hash_table.size() << "\n"; hash_table[0] = 3015668L; @@ -45,7 +46,8 @@ static std::string create_hash_vocab_file() hash_table[5] = 6358029; hash_table[16] = 451412625363L; hash_table[20] = 6206321707968235495L; - for (auto h : hash_table) outfile << h << "\n"; + for (auto h : hash_table) + outfile << h << "\n"; outfile << "100\n101\n102\n\n"; return hash_file; } diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu index b09a7911595..8e51bcca63d 100644 --- a/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu +++ b/cpp/benchmarks/type_dispatcher/type_dispatcher_benchmark.cu @@ -64,7 +64,9 @@ __global__ void no_dispatching_kernel(T** A, cudf::size_type n_rows, cudf::size_ using F = Functor; cudf::size_type index = blockIdx.x * blockDim.x + threadIdx.x; while (index < n_rows) { - for (int c = 0; c < n_cols; c++) { A[c][index] = F::f(A[c][index]); } + for (int c = 0; c < n_cols; c++) { + A[c][index] = F::f(A[c][index]); + } index += blockDim.x * gridDim.x; } } diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh index 1acdcadaacf..74a94f34ad8 100644 --- a/cpp/include/cudf/detail/copy_if_else.cuh +++ b/cpp/include/cudf/detail/copy_if_else.cuh @@ -46,7 +46,7 @@ __launch_bounds__(block_size) __global__ RightIter rhs, Filter filter, mutable_column_device_view out, - size_type *__restrict__ const valid_count) + size_type* __restrict__ const valid_count) { const size_type tid = threadIdx.x + blockIdx.x * block_size; const int warp_id = tid / warp_size; @@ -166,7 +166,7 @@ std::unique_ptr copy_if_else( FilterFn filter, cudf::data_type output_type, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { using Element = typename thrust::tuple_element<0, typename thrust::iterator_traits::value_type>::type; diff --git a/cpp/include/cudf/detail/merge.cuh b/cpp/include/cudf/detail/merge.cuh index a938a3a053a..a779c3defbb 100644 --- a/cpp/include/cudf/detail/merge.cuh +++ b/cpp/include/cudf/detail/merge.cuh @@ -77,8 +77,8 @@ struct tagged_element_relational_comparator { { } - __device__ weak_ordering compare(index_type lhs_tagged_index, index_type rhs_tagged_index) const - noexcept + __device__ weak_ordering compare(index_type lhs_tagged_index, + index_type rhs_tagged_index) const noexcept { side const l_side = thrust::get<0>(lhs_tagged_index); side const r_side = thrust::get<0>(rhs_tagged_index); @@ -117,8 +117,8 @@ struct row_lexicographic_tagged_comparator { CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns."); } - __device__ bool operator()(index_type lhs_tagged_index, index_type rhs_tagged_index) const - noexcept + __device__ bool operator()(index_type lhs_tagged_index, + index_type rhs_tagged_index) const noexcept { for (size_type i = 0; i < _lhs.num_columns(); ++i) { bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING); diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index 08dae998944..e507bacb919 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -38,7 +38,7 @@ namespace detail { template __global__ void offset_bitmask_binop(Binop op, device_span destination, - device_span source, + device_span source, device_span source_begin_bits, size_type source_size_bits) { @@ -73,16 +73,16 @@ __global__ void offset_bitmask_binop(Binop op, template rmm::device_buffer bitmask_binop( Binop op, - host_span masks, + host_span masks, host_span masks_begin_bits, size_type mask_size_bits, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto dest_mask = rmm::device_buffer{bitmask_allocation_size_bytes(mask_size_bits), stream, mr}; inplace_bitmask_binop(op, - device_span(static_cast(dest_mask.data()), + device_span(static_cast(dest_mask.data()), num_bitmask_words(mask_size_bits)), masks, masks_begin_bits, @@ -110,11 +110,11 @@ template void inplace_bitmask_binop( Binop op, device_span dest_mask, - host_span masks, + host_span masks, host_span masks_begin_bits, size_type mask_size_bits, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { CUDF_EXPECTS( std::all_of(masks_begin_bits.begin(), masks_begin_bits.end(), [](auto b) { return b >= 0; }), @@ -123,7 +123,7 @@ void inplace_bitmask_binop( CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), "Mask pointer cannot be null"); - rmm::device_uvector d_masks(masks.size(), stream, mr); + rmm::device_uvector d_masks(masks.size(), stream, mr); rmm::device_uvector d_begin_bits(masks_begin_bits.size(), stream, mr); CUDA_TRY(cudaMemcpyAsync( diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index 77cb321a12c..29d75e466de 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -34,14 +34,14 @@ rmm::device_buffer create_null_mask( size_type size, mask_state state, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @copydoc cudf::set_null_mask(bitmask_type*, size_type, size_type, bool) * * @param stream CUDA stream used for device memory operations and kernel launches. */ -void set_null_mask(bitmask_type *bitmask, +void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit, bool valid, @@ -52,7 +52,7 @@ void set_null_mask(bitmask_type *bitmask, * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::vector segmented_count_set_bits(bitmask_type const *bitmask, +std::vector segmented_count_set_bits(bitmask_type const* bitmask, host_span indices, rmm::cuda_stream_view stream); @@ -61,7 +61,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::vector segmented_count_unset_bits(bitmask_type const *bitmask, +std::vector segmented_count_unset_bits(bitmask_type const* bitmask, host_span indices, rmm::cuda_stream_view stream); @@ -72,11 +72,11 @@ std::vector segmented_count_unset_bits(bitmask_type const *bitmask, * @param stream CUDA stream used for device memory operations and kernel launches. */ rmm::device_buffer copy_bitmask( - bitmask_type const *mask, + bitmask_type const* mask, size_type begin_bit, size_type end_bit, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @copydoc cudf::copy_bitmask(column_view const& view, rmm::mr::device_memory_resource*) @@ -84,9 +84,9 @@ rmm::device_buffer copy_bitmask( * @param stream CUDA stream used for device memory operations and kernel launches. */ rmm::device_buffer copy_bitmask( - column_view const &view, + column_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @copydoc bitmask_and(host_span const, host_span const, @@ -95,11 +95,11 @@ rmm::device_buffer copy_bitmask( * @param stream CUDA stream used for device memory operations and kernel launches */ rmm::device_buffer bitmask_and( - host_span masks, + host_span masks, host_span masks_begin_bits, size_type mask_size_bits, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @copydoc cudf::bitmask_and @@ -107,9 +107,9 @@ rmm::device_buffer bitmask_and( * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ rmm::device_buffer bitmask_and( - table_view const &view, + table_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @copydoc cudf::bitmask_or @@ -117,9 +117,9 @@ rmm::device_buffer bitmask_and( * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ rmm::device_buffer bitmask_or( - table_view const &view, + table_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Performs a bitwise AND of the specified bitmasks, @@ -135,11 +135,11 @@ rmm::device_buffer bitmask_or( */ void inplace_bitmask_and( device_span dest_mask, - host_span masks, + host_span masks, host_span masks_begin_bits, size_type mask_size_bits, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail diff --git a/cpp/include/cudf/detail/utilities/device_atomics.cuh b/cpp/include/cudf/detail/utilities/device_atomics.cuh index 16b7da0a083..b94257daacf 100644 --- a/cpp/include/cudf/detail/utilities/device_atomics.cuh +++ b/cpp/include/cudf/detail/utilities/device_atomics.cuh @@ -95,12 +95,12 @@ struct genericAtomicOperationImpl { do { assumed = old; - T target_value = (is_32_align) ? T(old & 0xffff) : T(old >> 16); + T const target_value = (is_32_align) ? T(old & 0xffff) : T(old >> 16); uint16_t updating_value = type_reinterpret(op(target_value, update_value)); - T_int new_value = (is_32_align) ? (old & 0xffff0000) | updating_value - : (old & 0xffff) | (T_int(updating_value) << 16); - old = atomicCAS(address_uint32, assumed, new_value); + T_int const new_value = (is_32_align) ? (old & 0xffff0000) | updating_value + : (old & 0xffff) | (T_int(updating_value) << 16); + old = atomicCAS(address_uint32, assumed, new_value); } while (assumed != old); return (is_32_align) ? T(old & 0xffff) : T(old >> 16); diff --git a/cpp/include/cudf/detail/utilities/transform_unary_functions.cuh b/cpp/include/cudf/detail/utilities/transform_unary_functions.cuh index 8c0abbad49f..05a788abd45 100644 --- a/cpp/include/cudf/detail/utilities/transform_unary_functions.cuh +++ b/cpp/include/cudf/detail/utilities/transform_unary_functions.cuh @@ -50,7 +50,7 @@ struct null_replacing_transformer { } template - CUDA_HOST_DEVICE_CALLABLE type operator()(thrust::pair const &pair_value) + CUDA_HOST_DEVICE_CALLABLE type operator()(thrust::pair const& pair_value) { if (pair_value.second) return f(pair_value.first); @@ -83,7 +83,7 @@ struct meanvar { using this_t = cudf::meanvar; CUDA_HOST_DEVICE_CALLABLE - this_t operator+(this_t const &rhs) const + this_t operator+(this_t const& rhs) const { return this_t((this->value + rhs.value), (this->value_squared + rhs.value_squared), @@ -91,7 +91,7 @@ struct meanvar { }; CUDA_HOST_DEVICE_CALLABLE - bool operator==(this_t const &rhs) const + bool operator==(this_t const& rhs) const { return ((this->value == rhs.value) && (this->value_squared == rhs.value_squared) && (this->count == rhs.count)); @@ -114,7 +114,7 @@ struct meanvar { template struct transformer_squared { CUDA_HOST_DEVICE_CALLABLE - ElementType operator()(ElementType const &value) { return (value * value); }; + ElementType operator()(ElementType const& value) { return (value * value); }; }; /** @@ -131,7 +131,7 @@ struct transformer_meanvar { using ResultType = meanvar; CUDA_HOST_DEVICE_CALLABLE - ResultType operator()(thrust::pair const &pair) + ResultType operator()(thrust::pair const& pair) { ElementType v = pair.first; return meanvar(v, v * v, (pair.second) ? 1 : 0); diff --git a/cpp/include/cudf/io/avro.hpp b/cpp/include/cudf/io/avro.hpp index 18398ff4ceb..34410209c72 100644 --- a/cpp/include/cudf/io/avro.hpp +++ b/cpp/include/cudf/io/avro.hpp @@ -180,7 +180,7 @@ class avro_reader_options_builder { /** * @brief move avro_reader_options member once it's built. */ - operator avro_reader_options &&() { return std::move(options); } + operator avro_reader_options&&() { return std::move(options); } /** * @brief move avro_reader_options member once it's built. diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp index 8efe871ad3a..1dff99735ec 100644 --- a/cpp/include/cudf/io/csv.hpp +++ b/cpp/include/cudf/io/csv.hpp @@ -574,9 +574,9 @@ class csv_reader_options { * * @param types Vector of dtypes in which the column needs to be read. */ - [ - [deprecated("The string-based interface will be deprecated." - "Use dtypes(std::vector) instead.")]] void + [[deprecated( + "The string-based interface will be deprecated." + "Use dtypes(std::vector) instead.")]] void set_dtypes(std::vector types) { _dtypes = std::move(types); @@ -997,9 +997,9 @@ class csv_reader_options_builder { * @param types Vector of dtypes in which the column needs to be read. * @return this for chaining. */ - [ - [deprecated("The string-based interface will be deprecated." - "Use dtypes(std::vector) instead.")]] csv_reader_options_builder& + [[deprecated( + "The string-based interface will be deprecated." + "Use dtypes(std::vector) instead.")]] csv_reader_options_builder& dtypes(std::vector types) { options._dtypes = std::move(types); @@ -1093,7 +1093,7 @@ class csv_reader_options_builder { /** * @brief move csv_reader_options member once it's built. */ - operator csv_reader_options &&() { return std::move(options); } + operator csv_reader_options&&() { return std::move(options); } /** * @brief move csv_reader_options member once it's built. @@ -1422,7 +1422,7 @@ class csv_writer_options_builder { /** * @brief move `csv_writer_options` member once it's built. */ - operator csv_writer_options &&() { return std::move(options); } + operator csv_writer_options&&() { return std::move(options); } /** * @brief move `csv_writer_options` member once it's built. diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp index 6c885a874ee..c1aff818121 100644 --- a/cpp/include/cudf/io/datasource.hpp +++ b/cpp/include/cudf/io/datasource.hpp @@ -322,9 +322,9 @@ class arrow_io_source : public datasource { filesystem = result.ValueOrDie(); // Parse the path from the URI - size_t start = arrow_uri.find(uri_start_delimiter) == std::string::npos - ? 0 - : arrow_uri.find(uri_start_delimiter) + uri_start_delimiter.size(); + size_t start = arrow_uri.find(uri_start_delimiter) == std::string::npos + ? 0 + : arrow_uri.find(uri_start_delimiter) + uri_start_delimiter.size(); size_t end = arrow_uri.find(uri_end_delimiter) - start; std::string_view path = arrow_uri.substr(start, end); diff --git a/cpp/include/cudf/io/detail/avro.hpp b/cpp/include/cudf/io/detail/avro.hpp index 4310d0e7c4b..98483d1c03e 100644 --- a/cpp/include/cudf/io/detail/avro.hpp +++ b/cpp/include/cudf/io/detail/avro.hpp @@ -46,10 +46,10 @@ class reader { * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ - explicit reader(std::vector const &filepaths, - avro_reader_options const &options, + explicit reader(std::vector const& filepaths, + avro_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr); + rmm::mr::device_memory_resource* mr); /** * @brief Constructor from an array of datasources @@ -59,10 +59,10 @@ class reader { * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ - explicit reader(std::vector> &&sources, - avro_reader_options const &options, + explicit reader(std::vector>&& sources, + avro_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr); + rmm::mr::device_memory_resource* mr); /** * @brief Destructor explicitly-declared to avoid inlined in header @@ -77,7 +77,7 @@ class reader { * * @return The set of columns along with table metadata */ - table_with_metadata read(avro_reader_options const &options, + table_with_metadata read(avro_reader_options const& options, rmm::cuda_stream_view stream = rmm::cuda_stream_default); }; } // namespace avro diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp index 8ec2818c2ca..89e589d306a 100644 --- a/cpp/include/cudf/io/detail/csv.hpp +++ b/cpp/include/cudf/io/detail/csv.hpp @@ -41,10 +41,10 @@ class reader { * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ - explicit reader(std::vector const &filepaths, - csv_reader_options const &options, + explicit reader(std::vector const& filepaths, + csv_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr); + rmm::mr::device_memory_resource* mr); /** * @brief Constructor from an array of datasources @@ -54,10 +54,10 @@ class reader { * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ - explicit reader(std::vector> &&sources, - csv_reader_options const &options, + explicit reader(std::vector>&& sources, + csv_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr); + rmm::mr::device_memory_resource* mr); /** * @brief Destructor explicitly-declared to avoid inlined in header @@ -91,9 +91,9 @@ class writer { * @param mr Device memory resource to use for device memory allocation */ writer(std::unique_ptr sinkp, - csv_writer_options const &options, + csv_writer_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr); // cannot provide definition here (because + rmm::mr::device_memory_resource* mr); // cannot provide definition here (because // _impl is incomplete hence unique_ptr has // not enough sizeof() info) @@ -109,8 +109,8 @@ class writer { * @param metadata Table metadata and column names * @param stream CUDA stream used for device memory operations and kernel launches. */ - void write(table_view const &table, - const table_metadata *metadata = nullptr, + void write(table_view const& table, + const table_metadata* metadata = nullptr, rmm::cuda_stream_view stream = rmm::cuda_stream_default); }; } // namespace csv diff --git a/cpp/include/cudf/io/detail/json.hpp b/cpp/include/cudf/io/detail/json.hpp index 6ed93dc5c25..e6d8f2de483 100644 --- a/cpp/include/cudf/io/detail/json.hpp +++ b/cpp/include/cudf/io/detail/json.hpp @@ -54,10 +54,10 @@ class reader { * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ - explicit reader(std::vector const &filepaths, - json_reader_options const &options, + explicit reader(std::vector const& filepaths, + json_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr); + rmm::mr::device_memory_resource* mr); /** * @brief Constructor from an array of datasources @@ -67,10 +67,10 @@ class reader { * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ - explicit reader(std::vector> &&sources, - json_reader_options const &options, + explicit reader(std::vector>&& sources, + json_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr); + rmm::mr::device_memory_resource* mr); /** * @brief Destructor explicitly-declared to avoid inlined in header @@ -83,7 +83,7 @@ class reader { * @param[in] options Settings for controlling reading behavior * @return cudf::table object that contains the array of cudf::column. */ - table_with_metadata read(json_reader_options const &options, + table_with_metadata read(json_reader_options const& options, rmm::cuda_stream_view stream = rmm::cuda_stream_default); }; diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index 7d56c1c0fc6..2f4d0936d8b 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -277,7 +277,7 @@ class json_reader_options_builder { /** * @brief move json_reader_options member once it's built. */ - operator json_reader_options &&() { return std::move(options); } + operator json_reader_options&&() { return std::move(options); } /** * @brief move json_reader_options member once it's built. diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index bd1e4e96d7d..997f35ed922 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -331,7 +331,7 @@ class orc_reader_options_builder { /** * @brief move orc_reader_options member once it's built. */ - operator orc_reader_options &&() { return std::move(options); } + operator orc_reader_options&&() { return std::move(options); } /** * @brief move orc_reader_options member once it's built. @@ -550,7 +550,7 @@ class orc_writer_options_builder { /** * @brief move orc_writer_options member once it's built. */ - operator orc_writer_options &&() { return std::move(options); } + operator orc_writer_options&&() { return std::move(options); } /** * @brief move orc_writer_options member once it's built. @@ -724,7 +724,7 @@ class chunked_orc_writer_options_builder { /** * @brief move chunked_orc_writer_options member once it's built. */ - operator chunked_orc_writer_options &&() { return std::move(options); } + operator chunked_orc_writer_options&&() { return std::move(options); } /** * @brief move chunked_orc_writer_options member once it's built. diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index 178e46a0c5c..2dd123ca2bc 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -339,7 +339,7 @@ class parquet_reader_options_builder { /** * @brief move parquet_reader_options member once it's built. */ - operator parquet_reader_options &&() { return std::move(options); } + operator parquet_reader_options&&() { return std::move(options); } /** * @brief move parquet_reader_options member once it's built. @@ -769,7 +769,7 @@ class parquet_writer_options_builder { /** * @brief move parquet_writer_options member once it's built. */ - operator parquet_writer_options &&() { return std::move(options); } + operator parquet_writer_options&&() { return std::move(options); } /** * @brief move parquet_writer_options member once it's built. @@ -987,7 +987,7 @@ class chunked_parquet_writer_options_builder { /** * @brief move chunked_parquet_writer_options member once it's built. */ - operator chunked_parquet_writer_options &&() { return std::move(options); } + operator chunked_parquet_writer_options&&() { return std::move(options); } /** * @brief move chunked_parquet_writer_options member once it's is built. diff --git a/cpp/include/cudf/reduction.hpp b/cpp/include/cudf/reduction.hpp index 9fd913517fc..d094118293b 100644 --- a/cpp/include/cudf/reduction.hpp +++ b/cpp/include/cudf/reduction.hpp @@ -64,10 +64,10 @@ enum class scan_type : bool { INCLUSIVE, EXCLUSIVE }; * @returns Output scalar with reduce result. */ std::unique_ptr reduce( - column_view const &col, - std::unique_ptr const &agg, + column_view const& col, + std::unique_ptr const& agg, data_type output_dtype, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Computes the scan of a column. @@ -88,11 +88,11 @@ std::unique_ptr reduce( * @returns unique pointer to new output column */ std::unique_ptr scan( - const column_view &input, - std::unique_ptr const &agg, + const column_view& input, + std::unique_ptr const& agg, scan_type inclusive, null_policy null_handling = null_policy::EXCLUDE, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Determines the minimum and maximum values of a column. @@ -104,8 +104,8 @@ std::unique_ptr scan( * and the second scalar being the maximum value of the input column. */ std::pair, std::unique_ptr> minmax( - column_view const &col, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + column_view const& col, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @} */ // end of group diff --git a/cpp/include/cudf/strings/string_view.cuh b/cpp/include/cudf/strings/string_view.cuh index e3f284cdbf3..a7559c7fbcb 100644 --- a/cpp/include/cudf/strings/string_view.cuh +++ b/cpp/include/cudf/strings/string_view.cuh @@ -52,7 +52,9 @@ __device__ inline size_type characters_in_string(const char* str, size_type byte #else size_type chars = 0; auto const end = ptr + bytes; - while (ptr < end) { chars += is_begin_utf8_char(*ptr++); } + while (ptr < end) { + chars += is_begin_utf8_char(*ptr++); + } return chars; #endif } @@ -133,7 +135,8 @@ __device__ inline string_view::const_iterator string_view::const_iterator::opera { const_iterator tmp(*this); size_type adjust = abs(offset); - while (adjust-- > 0) offset > 0 ? ++tmp : --tmp; + while (adjust-- > 0) + offset > 0 ? ++tmp : --tmp; return tmp; } @@ -141,7 +144,8 @@ __device__ inline string_view::const_iterator& string_view::const_iterator::oper string_view::const_iterator::difference_type offset) { size_type adjust = abs(offset); - while (adjust-- > 0) offset > 0 ? operator++() : operator--(); + while (adjust-- > 0) + offset > 0 ? operator++() : operator--(); return *this; } @@ -165,7 +169,8 @@ __device__ inline string_view::const_iterator& string_view::const_iterator::oper string_view::const_iterator::difference_type offset) { size_type adjust = abs(offset); - while (adjust-- > 0) offset > 0 ? operator--() : operator++(); + while (adjust-- > 0) + offset > 0 ? operator--() : operator++(); return *this; } @@ -174,7 +179,8 @@ __device__ inline string_view::const_iterator string_view::const_iterator::opera { const_iterator tmp(*this); size_type adjust = abs(offset); - while (adjust-- > 0) offset > 0 ? --tmp : ++tmp; + while (adjust-- > 0) + offset > 0 ? --tmp : ++tmp; return tmp; } @@ -339,7 +345,8 @@ __device__ inline size_type string_view::find(const char* str, const char* ptr2 = str; for (size_type idx = 0; idx < len1; ++idx) { bool match = true; - for (size_type jdx = 0; match && (jdx < len2); ++jdx) match = (ptr1[jdx] == ptr2[jdx]); + for (size_type jdx = 0; match && (jdx < len2); ++jdx) + match = (ptr1[jdx] == ptr2[jdx]); if (match) return character_offset(idx + spos); ptr1++; } @@ -380,7 +387,8 @@ __device__ inline size_type string_view::rfind(const char* str, const char* ptr2 = str; for (int idx = 0; idx < len1; ++idx) { bool match = true; - for (size_type jdx = 0; match && (jdx < len2); ++jdx) match = (ptr1[jdx] == ptr2[jdx]); + for (size_type jdx = 0; match && (jdx < len2); ++jdx) + match = (ptr1[jdx] == ptr2[jdx]); if (match) return character_offset(epos - len2 - idx); ptr1--; // go backwards } diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index bec5299ab77..d174222b2ff 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -191,8 +191,8 @@ class element_equality_comparator { */ template ()>* = nullptr> - __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index) const - noexcept + __device__ bool operator()(size_type lhs_element_index, + size_type rhs_element_index) const noexcept { if (has_nulls) { bool const lhs_is_null{lhs.is_null(lhs_element_index)}; diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp index 8116097e38e..e1037efb5c8 100644 --- a/cpp/include/cudf/types.hpp +++ b/cpp/include/cudf/types.hpp @@ -18,10 +18,10 @@ #ifdef __CUDACC__ #define CUDA_HOST_DEVICE_CALLABLE __host__ __device__ inline -#define CUDA_DEVICE_CALLABLE __device__ inline +#define CUDA_DEVICE_CALLABLE __device__ inline #else #define CUDA_HOST_DEVICE_CALLABLE inline -#define CUDA_DEVICE_CALLABLE inline +#define CUDA_DEVICE_CALLABLE inline #endif #include diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp index 15613c8caa7..2036723a6ed 100644 --- a/cpp/include/cudf/utilities/error.hpp +++ b/cpp/include/cudf/utilities/error.hpp @@ -37,7 +37,7 @@ struct cuda_error : public std::runtime_error { } // namespace cudf #define STRINGIFY_DETAIL(x) #x -#define CUDF_STRINGIFY(x) STRINGIFY_DETAIL(x) +#define CUDF_STRINGIFY(x) STRINGIFY_DETAIL(x) /** * @addtogroup utility_error diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp index 9fa67dccb52..462317ad66b 100644 --- a/cpp/include/cudf_test/base_fixture.hpp +++ b/cpp/include/cudf_test/base_fixture.hpp @@ -42,14 +42,14 @@ namespace test { * ``` */ class BaseFixture : public ::testing::Test { - rmm::mr::device_memory_resource *_mr{rmm::mr::get_current_device_resource()}; + rmm::mr::device_memory_resource* _mr{rmm::mr::get_current_device_resource()}; public: /** * @brief Returns pointer to `device_memory_resource` that should be used for * all tests inheriting from this fixture */ - rmm::mr::device_memory_resource *mr() { return _mr; } + rmm::mr::device_memory_resource* mr() { return _mr; } }; template @@ -131,7 +131,7 @@ class UniformRandomGenerator { * @param lower Lower bound of the range * @param upper Upper bound of the desired range */ - template ()> * = nullptr> + template ()>* = nullptr> UniformRandomGenerator(T lower, T upper, uint64_t seed = detail::random_generator_incrementing_seed()) @@ -146,7 +146,7 @@ class UniformRandomGenerator { * @param lower Lower bound of the range * @param upper Upper bound of the desired range */ - template ()> * = nullptr> + template ()>* = nullptr> UniformRandomGenerator(typename TL::rep lower, typename TL::rep upper, uint64_t seed = detail::random_generator_incrementing_seed()) @@ -157,13 +157,13 @@ class UniformRandomGenerator { /** * @brief Returns the next random number. */ - template ()> * = nullptr> + template ()>* = nullptr> T generate() { return T{dist(rng)}; } - template ()> * = nullptr> + template ()>* = nullptr> T generate() { return T{typename T::duration{dist(rng)}}; @@ -237,7 +237,7 @@ inline auto make_binning() * @return Memory resource instance */ inline std::shared_ptr create_memory_resource( - std::string const &allocation_mode) + std::string const& allocation_mode) { if (allocation_mode == "binning") return make_binning(); if (allocation_mode == "cuda") return make_cuda(); @@ -257,7 +257,7 @@ inline std::shared_ptr create_memory_resource( * * @return Parsing results in the form of unordered map */ -inline auto parse_cudf_test_opts(int argc, char **argv) +inline auto parse_cudf_test_opts(int argc, char** argv) { try { cxxopts::Options options(argv[0], " - cuDF tests command line options"); @@ -265,7 +265,7 @@ inline auto parse_cudf_test_opts(int argc, char **argv) "rmm_mode", "RMM allocation mode", cxxopts::value()->default_value("pool")); return options.parse(argc, argv); - } catch (const cxxopts::OptionException &e) { + } catch (const cxxopts::OptionException& e) { CUDF_FAIL("Error parsing command line options"); } } @@ -281,7 +281,7 @@ inline auto parse_cudf_test_opts(int argc, char **argv) * allocation mode used for creating the default memory resource. */ #define CUDF_TEST_PROGRAM_MAIN() \ - int main(int argc, char **argv) \ + int main(int argc, char** argv) \ { \ ::testing::InitGoogleTest(&argc, argv); \ auto const cmd_opts = parse_cudf_test_opts(argc, argv); \ diff --git a/cpp/include/cudf_test/cudf_gtest.hpp b/cpp/include/cudf_test/cudf_gtest.hpp index b60c94394d1..1e2e44c79d1 100644 --- a/cpp/include/cudf_test/cudf_gtest.hpp +++ b/cpp/include/cudf_test/cudf_gtest.hpp @@ -34,10 +34,10 @@ * redefines them properly. */ -#define Types Types_NOT_USED -#define Types0 Types0_NOT_USED -#define TypeList TypeList_NOT_USED -#define Templates Templates_NOT_USED +#define Types Types_NOT_USED +#define Types0 Types0_NOT_USED +#define TypeList TypeList_NOT_USED +#define Templates Templates_NOT_USED #define Templates0 Templates0_NOT_USED #include #undef Types @@ -104,7 +104,7 @@ struct TypeList> { { \ try { \ x; \ - } catch (const exception &e) { \ + } catch (const exception& e) { \ ASSERT_NE(nullptr, e.what()); \ EXPECT_THAT(e.what(), testing::StartsWith((startswith))); \ EXPECT_THAT(e.what(), testing::EndsWith((endswith))); \ diff --git a/cpp/include/cudf_test/cxxopts.hpp b/cpp/include/cudf_test/cxxopts.hpp index 49c551ab2f1..5135fd02e21 100644 --- a/cpp/include/cudf_test/cxxopts.hpp +++ b/cpp/include/cudf_test/cxxopts.hpp @@ -89,7 +89,9 @@ inline String& stringAppend(String& s, String a) { return s.append(std::move(a)) inline String& stringAppend(String& s, int n, UChar32 c) { - for (int i = 0; i != n; ++i) { s.append(c); } + for (int i = 0; i != n; ++i) { + s.append(c); + } return s; } @@ -1449,7 +1451,9 @@ inline void Options::generate_all_groups_help(String& result) const std::vector all_groups; all_groups.reserve(m_help.size()); - for (auto& group : m_help) { all_groups.push_back(group.first); } + for (auto& group : m_help) { + all_groups.push_back(group.first); + } generate_group_help(result, all_groups); } diff --git a/cpp/include/cudf_test/file_utilities.hpp b/cpp/include/cudf_test/file_utilities.hpp index 13394445922..90bf0cd99dc 100644 --- a/cpp/include/cudf_test/file_utilities.hpp +++ b/cpp/include/cudf_test/file_utilities.hpp @@ -28,17 +28,17 @@ class temp_directory { std::string _path; public: - temp_directory(const std::string &base_name) + temp_directory(const std::string& base_name) { std::string dir_template("/tmp"); - if (const char *env_p = std::getenv("WORKSPACE")) dir_template = env_p; + if (const char* env_p = std::getenv("WORKSPACE")) dir_template = env_p; dir_template += "/" + base_name + ".XXXXXX"; - auto const tmpdirptr = mkdtemp(const_cast(dir_template.data())); + auto const tmpdirptr = mkdtemp(const_cast(dir_template.data())); if (tmpdirptr == nullptr) CUDF_FAIL("Temporary directory creation failure: " + dir_template); _path = dir_template + "/"; } - static int rm_files(const char *pathname, const struct stat *sbuf, int type, struct FTW *ftwb) + static int rm_files(const char* pathname, const struct stat* sbuf, int type, struct FTW* ftwb) { return std::remove(pathname); } @@ -49,5 +49,5 @@ class temp_directory { nftw(_path.c_str(), rm_files, 10, FTW_DEPTH | FTW_MOUNT | FTW_PHYS); } - const std::string &path() const { return _path; } + const std::string& path() const { return _path; } }; diff --git a/cpp/scripts/run-clang-format.py b/cpp/scripts/run-clang-format.py index 2a7b66d4f77..c32e984278f 100755 --- a/cpp/scripts/run-clang-format.py +++ b/cpp/scripts/run-clang-format.py @@ -22,7 +22,7 @@ import sys import tempfile -EXPECTED_VERSION = "8.0.1" +EXPECTED_VERSION = "11.0.0" VERSION_REGEX = re.compile(r"clang-format version ([0-9.]+)") # NOTE: populate this list with more top-level dirs as we add more of them to # the cudf repo diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 28d1411c30d..e202a682311 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -80,7 +80,7 @@ namespace detail { rmm::device_buffer create_null_mask(size_type size, mask_state state, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { size_type mask_size{0}; @@ -91,14 +91,14 @@ rmm::device_buffer create_null_mask(size_type size, if (state != mask_state::UNINITIALIZED) { uint8_t fill_value = (state == mask_state::ALL_VALID) ? 0xff : 0x00; CUDA_TRY(cudaMemsetAsync( - static_cast(mask.data()), fill_value, mask_size, stream.value())); + static_cast(mask.data()), fill_value, mask_size, stream.value())); } return mask; } namespace { -__global__ void set_null_mask_kernel(bitmask_type *__restrict__ destination, +__global__ void set_null_mask_kernel(bitmask_type* __restrict__ destination, size_type begin_bit, size_type end_bit, bool valid, @@ -130,7 +130,7 @@ __global__ void set_null_mask_kernel(bitmask_type *__restrict__ destination, // Set pre-allocated null mask of given bit range [begin_bit, end_bit) to valid, if valid==true, // or null, otherwise; -void set_null_mask(bitmask_type *bitmask, +void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit, bool valid, @@ -145,7 +145,7 @@ void set_null_mask(bitmask_type *bitmask, num_bitmask_words(end_bit) - begin_bit / detail::size_in_bits(); cudf::detail::grid_1d config(number_of_mask_words, 256); set_null_mask_kernel<<>>( - static_cast(bitmask), begin_bit, end_bit, valid, number_of_mask_words); + static_cast(bitmask), begin_bit, end_bit, valid, number_of_mask_words); CHECK_CUDA(stream.value()); } } @@ -155,14 +155,14 @@ void set_null_mask(bitmask_type *bitmask, // Create a device_buffer for a null mask rmm::device_buffer create_null_mask(size_type size, mask_state state, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { return detail::create_null_mask(size, state, rmm::cuda_stream_default, mr); } // Set pre-allocated null mask of given bit range [begin_bit, end_bit) to valid, if valid==true, // or null, otherwise; -void set_null_mask(bitmask_type *bitmask, size_type begin_bit, size_type end_bit, bool valid) +void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit, bool valid) { return detail::set_null_mask(bitmask, begin_bit, end_bit, valid); } @@ -181,10 +181,10 @@ namespace { * @param[out] global_count The number of non-zero bits in the specified range */ template -__global__ void count_set_bits_kernel(bitmask_type const *bitmask, +__global__ void count_set_bits_kernel(bitmask_type const* bitmask, size_type first_bit_index, size_type last_bit_index, - size_type *global_count) + size_type* global_count) { constexpr auto const word_size{detail::size_in_bits()}; @@ -215,7 +215,7 @@ __global__ void count_set_bits_kernel(bitmask_type const *bitmask, if (num_slack_bits > 0) { bitmask_type word = bitmask[word_index]; auto slack_mask = (first) ? set_least_significant_bits(num_slack_bits) - : set_most_significant_bits(num_slack_bits); + : set_most_significant_bits(num_slack_bits); thread_count -= __popc(word & slack_mask); } @@ -248,7 +248,7 @@ __global__ void count_set_bits_kernel(bitmask_type const *bitmask, * updated */ template -__global__ void subtract_set_bits_range_boundaries_kerenel(bitmask_type const *bitmask, +__global__ void subtract_set_bits_range_boundaries_kerenel(bitmask_type const* bitmask, size_type num_ranges, OffsetIterator first_bit_indices, OffsetIterator last_bit_indices, @@ -305,8 +305,8 @@ __global__ void subtract_set_bits_range_boundaries_kerenel(bitmask_type const *b * @param number_of_mask_words The number of `cudf::bitmask_type` words to copy */ // TODO: Also make binops test that uses offset in column_view -__global__ void copy_offset_bitmask(bitmask_type *__restrict__ destination, - bitmask_type const *__restrict__ source, +__global__ void copy_offset_bitmask(bitmask_type* __restrict__ destination, + bitmask_type const* __restrict__ source, size_type source_begin_bit, size_type source_end_bit, size_type number_of_mask_words) @@ -323,7 +323,7 @@ __global__ void copy_offset_bitmask(bitmask_type *__restrict__ destination, // [first_word_index,last_word_index) struct to_word_index : public thrust::unary_function { const bool _inclusive = false; - size_type const *const _d_bit_indices = nullptr; + size_type const* const _d_bit_indices = nullptr; /** * @brief Constructor of a functor that converts bit indices to bitmask word @@ -333,12 +333,12 @@ struct to_word_index : public thrust::unary_function { * or exclusive. * @param[in] d_bit_indices Pointer to an array of bit indices */ - __host__ to_word_index(bool inclusive, size_type const *d_bit_indices) + __host__ to_word_index(bool inclusive, size_type const* d_bit_indices) : _inclusive(inclusive), _d_bit_indices(d_bit_indices) { } - __device__ size_type operator()(const size_type &i) const + __device__ size_type operator()(const size_type& i) const { auto bit_index = _d_bit_indices[i]; return word_index(bit_index) + ((_inclusive || intra_word_index(bit_index) == 0) ? 0 : 1); @@ -350,11 +350,11 @@ struct to_word_index : public thrust::unary_function { namespace detail { // Create a bitmask from a specific range -rmm::device_buffer copy_bitmask(bitmask_type const *mask, +rmm::device_buffer copy_bitmask(bitmask_type const* mask, size_type begin_bit, size_type end_bit, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS(begin_bit >= 0, "Invalid range."); @@ -363,26 +363,22 @@ rmm::device_buffer copy_bitmask(bitmask_type const *mask, auto num_bytes = bitmask_allocation_size_bytes(end_bit - begin_bit); if ((mask == nullptr) || (num_bytes == 0)) { return dest_mask; } if (begin_bit == 0) { - dest_mask = rmm::device_buffer{static_cast(mask), num_bytes, stream, mr}; + dest_mask = rmm::device_buffer{static_cast(mask), num_bytes, stream, mr}; } else { auto number_of_mask_words = num_bitmask_words(end_bit - begin_bit); dest_mask = rmm::device_buffer{num_bytes, stream, mr}; cudf::detail::grid_1d config(number_of_mask_words, 256); copy_offset_bitmask<<>>( - static_cast(dest_mask.data()), - mask, - begin_bit, - end_bit, - number_of_mask_words); + static_cast(dest_mask.data()), mask, begin_bit, end_bit, number_of_mask_words); CHECK_CUDA(stream.value()); } return dest_mask; } // Create a bitmask from a column view -rmm::device_buffer copy_bitmask(column_view const &view, +rmm::device_buffer copy_bitmask(column_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); rmm::device_buffer null_mask{0, stream, mr}; @@ -395,11 +391,11 @@ rmm::device_buffer copy_bitmask(column_view const &view, // Inplace Bitwise AND of the masks void inplace_bitmask_and(device_span dest_mask, - host_span masks, + host_span masks, host_span begin_bits, size_type mask_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { inplace_bitmask_binop( [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, @@ -412,11 +408,11 @@ void inplace_bitmask_and(device_span dest_mask, } // Bitwise AND of the masks -rmm::device_buffer bitmask_and(host_span masks, +rmm::device_buffer bitmask_and(host_span masks, host_span begin_bits, size_type mask_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { return bitmask_binop( [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, @@ -427,7 +423,7 @@ rmm::device_buffer bitmask_and(host_span masks, mr); } -cudf::size_type count_set_bits(bitmask_type const *bitmask, +cudf::size_type count_set_bits(bitmask_type const* bitmask, size_type start, size_type stop, rmm::cuda_stream_view stream = rmm::cuda_stream_default) @@ -455,7 +451,7 @@ cudf::size_type count_set_bits(bitmask_type const *bitmask, return non_zero_count.value(stream); } -cudf::size_type count_unset_bits(bitmask_type const *bitmask, +cudf::size_type count_unset_bits(bitmask_type const* bitmask, size_type start, size_type stop, rmm::cuda_stream_view stream = rmm::cuda_stream_default) @@ -465,7 +461,7 @@ cudf::size_type count_unset_bits(bitmask_type const *bitmask, return (num_bits - detail::count_set_bits(bitmask, start, stop, stream)); } -std::vector segmented_count_set_bits(bitmask_type const *bitmask, +std::vector segmented_count_set_bits(bitmask_type const* bitmask, host_span indices, rmm::cuda_stream_view stream) { @@ -570,7 +566,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, return ret; } -std::vector segmented_count_unset_bits(bitmask_type const *bitmask, +std::vector segmented_count_unset_bits(bitmask_type const* bitmask, host_span indices, rmm::cuda_stream_view stream) { @@ -591,17 +587,17 @@ std::vector segmented_count_unset_bits(bitmask_type const *bitmask, } // Returns the bitwise AND of the null masks of all columns in the table view -rmm::device_buffer bitmask_and(table_view const &view, +rmm::device_buffer bitmask_and(table_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); rmm::device_buffer null_mask{0, stream, mr}; if (view.num_rows() == 0 or view.num_columns() == 0) { return null_mask; } - std::vector masks; + std::vector masks; std::vector offsets; - for (auto &&col : view) { + for (auto&& col : view) { if (col.nullable()) { masks.push_back(col.null_mask()); offsets.push_back(col.offset()); @@ -622,17 +618,17 @@ rmm::device_buffer bitmask_and(table_view const &view, } // Returns the bitwise OR of the null masks of all columns in the table view -rmm::device_buffer bitmask_or(table_view const &view, +rmm::device_buffer bitmask_or(table_view const& view, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); rmm::device_buffer null_mask{0, stream, mr}; if (view.num_rows() == 0 or view.num_columns() == 0) { return null_mask; } - std::vector masks; + std::vector masks; std::vector offsets; - for (auto &&col : view) { + for (auto&& col : view) { if (col.nullable()) { masks.push_back(col.null_mask()); offsets.push_back(col.offset()); @@ -654,21 +650,21 @@ rmm::device_buffer bitmask_or(table_view const &view, } // namespace detail // Count non-zero bits in the specified range -cudf::size_type count_set_bits(bitmask_type const *bitmask, size_type start, size_type stop) +cudf::size_type count_set_bits(bitmask_type const* bitmask, size_type start, size_type stop) { CUDF_FUNC_RANGE(); return detail::count_set_bits(bitmask, start, stop); } // Count zero bits in the specified range -cudf::size_type count_unset_bits(bitmask_type const *bitmask, size_type start, size_type stop) +cudf::size_type count_unset_bits(bitmask_type const* bitmask, size_type start, size_type stop) { CUDF_FUNC_RANGE(); return detail::count_unset_bits(bitmask, start, stop); } // Count non-zero bits in the specified ranges -std::vector segmented_count_set_bits(bitmask_type const *bitmask, +std::vector segmented_count_set_bits(bitmask_type const* bitmask, host_span indices) { CUDF_FUNC_RANGE(); @@ -676,7 +672,7 @@ std::vector segmented_count_set_bits(bitmask_type const *bitmask, } // Count zero bits in the specified ranges -std::vector segmented_count_unset_bits(bitmask_type const *bitmask, +std::vector segmented_count_unset_bits(bitmask_type const* bitmask, host_span indices) { CUDF_FUNC_RANGE(); @@ -684,26 +680,26 @@ std::vector segmented_count_unset_bits(bitmask_type const *bitmask, } // Create a bitmask from a specific range -rmm::device_buffer copy_bitmask(bitmask_type const *mask, +rmm::device_buffer copy_bitmask(bitmask_type const* mask, size_type begin_bit, size_type end_bit, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { return detail::copy_bitmask(mask, begin_bit, end_bit, rmm::cuda_stream_default, mr); } // Create a bitmask from a column view -rmm::device_buffer copy_bitmask(column_view const &view, rmm::mr::device_memory_resource *mr) +rmm::device_buffer copy_bitmask(column_view const& view, rmm::mr::device_memory_resource* mr) { return detail::copy_bitmask(view, rmm::cuda_stream_default, mr); } -rmm::device_buffer bitmask_and(table_view const &view, rmm::mr::device_memory_resource *mr) +rmm::device_buffer bitmask_and(table_view const& view, rmm::mr::device_memory_resource* mr) { return detail::bitmask_and(view, rmm::cuda_stream_default, mr); } -rmm::device_buffer bitmask_or(table_view const &view, rmm::mr::device_memory_resource *mr) +rmm::device_buffer bitmask_or(table_view const& view, rmm::mr::device_memory_resource* mr) { return detail::bitmask_or(view, rmm::cuda_stream_default, mr); } diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu index 3ee8e0a33a9..2a0496b316b 100644 --- a/cpp/src/column/column.cu +++ b/cpp/src/column/column.cu @@ -45,9 +45,9 @@ namespace cudf { // Copy ctor w/ optional stream/mr -column::column(column const &other, +column::column(column const& other, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : _type{other._type}, _size{other._size}, _data{other._data, stream, mr}, @@ -55,13 +55,13 @@ column::column(column const &other, _null_count{other._null_count} { _children.reserve(other.num_children()); - for (auto const &c : other._children) { + for (auto const& c : other._children) { _children.emplace_back(std::make_unique(*c, stream, mr)); } } // Move constructor -column::column(column &&other) noexcept +column::column(column&& other) noexcept : _type{other._type}, _size{other._size}, _data{std::move(other._data)}, @@ -91,12 +91,14 @@ column_view column::view() const // Create views of children std::vector child_views; child_views.reserve(_children.size()); - for (auto const &c : _children) { child_views.emplace_back(*c); } + for (auto const& c : _children) { + child_views.emplace_back(*c); + } return column_view{type(), size(), _data.data(), - static_cast(_null_mask.data()), + static_cast(_null_mask.data()), null_count(), 0, child_views}; @@ -110,7 +112,9 @@ mutable_column_view column::mutable_view() // create views of children std::vector child_views; child_views.reserve(_children.size()); - for (auto const &c : _children) { child_views.emplace_back(*c); } + for (auto const& c : _children) { + child_views.emplace_back(*c); + } // Store the old null count before resetting it. By accessing the value directly instead of // calling `null_count()`, we can avoid a potential invocation of `count_unset_bits()`. This does @@ -126,7 +130,7 @@ mutable_column_view column::mutable_view() return mutable_column_view{type(), size(), _data.data(), - static_cast(_null_mask.data()), + static_cast(_null_mask.data()), current_null_count, 0, child_views}; @@ -138,12 +142,12 @@ size_type column::null_count() const CUDF_FUNC_RANGE(); if (_null_count <= cudf::UNKNOWN_NULL_COUNT) { _null_count = - cudf::count_unset_bits(static_cast(_null_mask.data()), 0, size()); + cudf::count_unset_bits(static_cast(_null_mask.data()), 0, size()); } return _null_count; } -void column::set_null_mask(rmm::device_buffer &&new_null_mask, size_type new_null_count) +void column::set_null_mask(rmm::device_buffer&& new_null_mask, size_type new_null_count) { if (new_null_count > 0) { CUDF_EXPECTS(new_null_mask.size() >= cudf::bitmask_allocation_size_bytes(this->size()), @@ -154,7 +158,7 @@ void column::set_null_mask(rmm::device_buffer &&new_null_mask, size_type new_nul _null_count = new_null_count; } -void column::set_null_mask(rmm::device_buffer const &new_null_mask, +void column::set_null_mask(rmm::device_buffer const& new_null_mask, size_type new_null_count, rmm::cuda_stream_view stream) { @@ -177,10 +181,10 @@ namespace { struct create_column_from_view { cudf::column_view view; rmm::cuda_stream_view stream{}; - rmm::mr::device_memory_resource *mr; + rmm::mr::device_memory_resource* mr; template ::value> * = nullptr> + std::enable_if_t::value>* = nullptr> std::unique_ptr operator()() { cudf::strings_column_view sview(view); @@ -188,7 +192,7 @@ struct create_column_from_view { } template ::value> * = nullptr> + std::enable_if_t::value>* = nullptr> std::unique_ptr operator()() { std::vector> children; @@ -211,10 +215,10 @@ struct create_column_from_view { std::move(children)); } - template ()> * = nullptr> + template ()>* = nullptr> std::unique_ptr operator()() { - auto op = [&](auto const &child) { return std::make_unique(child, stream, mr); }; + auto op = [&](auto const& child) { return std::make_unique(child, stream, mr); }; auto begin = thrust::make_transform_iterator(view.child_begin(), op); auto children = std::vector>(begin, begin + view.num_children()); @@ -222,7 +226,7 @@ struct create_column_from_view { view.type(), view.size(), rmm::device_buffer{ - static_cast(view.head()) + (view.offset() * cudf::size_of(view.type())), + static_cast(view.head()) + (view.offset() * cudf::size_of(view.type())), view.size() * cudf::size_of(view.type()), stream, mr}, @@ -232,7 +236,7 @@ struct create_column_from_view { } template ::value> * = nullptr> + std::enable_if_t::value>* = nullptr> std::unique_ptr operator()() { auto lists_view = lists_column_view(view); @@ -240,7 +244,7 @@ struct create_column_from_view { } template ::value> * = nullptr> + std::enable_if_t::value>* = nullptr> std::unique_ptr operator()() { if (view.is_empty()) { return cudf::empty_like(view); } @@ -271,7 +275,7 @@ struct create_column_from_view { } // anonymous namespace // Copy from a view -column::column(column_view view, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) +column::column(column_view view, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) : // Move is needed here because the dereference operator of unique_ptr returns // an lvalue reference, which would otherwise dispatch to the copy constructor column{std::move(*type_dispatcher(view.type(), create_column_from_view{view, stream, mr}))} diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index 67d96bbc7ce..9456ae06b21 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -34,17 +34,17 @@ namespace detail { namespace { struct get_element_functor { - template () && !is_fixed_point()> *p = nullptr> + template () && !is_fixed_point()>* p = nullptr> std::unique_ptr operator()( - column_view const &input, + column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto s = make_fixed_width_scalar(data_type(type_to_id()), stream, mr); using ScalarType = cudf::scalar_type_t; - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); auto device_s = get_scalar_device_view(*typed_s); auto device_col = column_device_view::create(input, stream); @@ -58,12 +58,12 @@ struct get_element_functor { return s; } - template ::value> *p = nullptr> + template ::value>* p = nullptr> std::unique_ptr operator()( - column_view const &input, + column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto device_col = column_device_view::create(input, stream); @@ -83,12 +83,12 @@ struct get_element_functor { return std::make_unique(temp_data, temp_valid.value(stream), stream, mr); } - template ::value> *p = nullptr> + template ::value>* p = nullptr> std::unique_ptr operator()( - column_view const &input, + column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { auto dict_view = dictionary_column_view(input); auto indices_iter = detail::indexalator_factory::make_input_iterator(dict_view.indices()); @@ -119,12 +119,12 @@ struct get_element_functor { mr); } - template ::value> *p = nullptr> + template ::value>* p = nullptr> std::unique_ptr operator()( - column_view const &input, + column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { bool valid = is_element_valid_sync(input, index, stream); auto const child_col_idx = lists_column_view::child_column_index; @@ -144,12 +144,12 @@ struct get_element_functor { } } - template ()> *p = nullptr> + template ()>* p = nullptr> std::unique_ptr operator()( - column_view const &input, + column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { using Type = typename T::rep; @@ -175,12 +175,12 @@ struct get_element_functor { mr); } - template ::value> *p = nullptr> + template ::value>* p = nullptr> std::unique_ptr operator()( - column_view const &input, + column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { bool valid = is_element_valid_sync(input, index, stream); auto row_contents = @@ -192,10 +192,10 @@ struct get_element_functor { } // namespace -std::unique_ptr get_element(column_view const &input, +std::unique_ptr get_element(column_view const& input, size_type index, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(index >= 0 and index < input.size(), "Index out of bounds"); return type_dispatcher(input.type(), get_element_functor{}, input, index, stream, mr); @@ -203,9 +203,9 @@ std::unique_ptr get_element(column_view const &input, } // namespace detail -std::unique_ptr get_element(column_view const &input, +std::unique_ptr get_element(column_view const& input, size_type index, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { return detail::get_element(input, index, rmm::cuda_stream_default, mr); } diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index 51ca6f5d962..2baf336bb9e 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -205,7 +205,8 @@ std::pair>, std::vector> match_d auto dict_cols = dictionary::detail::match_dictionaries(dict_views, stream, mr); // replace the updated_columns vector entries for the set of columns at col_idx auto dict_col_idx = 0; - for (auto& v : updated_columns) v[col_idx] = dict_cols[dict_col_idx++]->view(); + for (auto& v : updated_columns) + v[col_idx] = dict_cols[dict_col_idx++]->view(); // move the updated dictionary columns into the main output vector std::move(dict_cols.begin(), dict_cols.end(), std::back_inserter(dictionary_columns)); } diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index ff62a260d5c..87f83c6edd6 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -413,7 +413,9 @@ void sparse_to_dense_results(table_view const& keys, row_bitmask_ptr, stream, mr); - for (auto&& agg : agg_v) { agg->finalize(finalizer); } + for (auto&& agg : agg_v) { + agg->finalize(finalizer); + } } } diff --git a/cpp/src/groupby/sort/group_collect.cu b/cpp/src/groupby/sort/group_collect.cu index 1e6a681af94..a30d4639af8 100644 --- a/cpp/src/groupby/sort/group_collect.cu +++ b/cpp/src/groupby/sort/group_collect.cu @@ -41,11 +41,11 @@ namespace detail { * @return Pair of null-eliminated grouped values and corresponding offsets */ std::pair, std::unique_ptr> purge_null_entries( - column_view const &values, - column_view const &offsets, + column_view const& values, + column_view const& offsets, size_type num_groups, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { auto values_device_view = column_device_view::create(values, stream); @@ -81,12 +81,12 @@ std::pair, std::unique_ptr> purge_null_entries( std::move(null_purged_values), std::move(null_purged_offsets)); } -std::unique_ptr group_collect(column_view const &values, +std::unique_ptr group_collect(column_view const& values, cudf::device_span group_offsets, size_type num_groups, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { auto [child_column, offsets_column] = [null_handling, num_groups, &values, &group_offsets, stream, mr] { diff --git a/cpp/src/groupby/sort/group_nth_element.cu b/cpp/src/groupby/sort/group_nth_element.cu index c3d874f3b33..e7dc57f6c93 100644 --- a/cpp/src/groupby/sort/group_nth_element.cu +++ b/cpp/src/groupby/sort/group_nth_element.cu @@ -33,15 +33,15 @@ namespace cudf { namespace groupby { namespace detail { -std::unique_ptr group_nth_element(column_view const &values, - column_view const &group_sizes, +std::unique_ptr group_nth_element(column_view const& values, + column_view const& group_sizes, cudf::device_span group_labels, cudf::device_span group_offsets, size_type num_groups, size_type n, null_policy null_handling, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(static_cast(values.size()) == group_labels.size(), "Size of values column should be same as that of group labels"); diff --git a/cpp/src/hash/managed.cuh b/cpp/src/hash/managed.cuh index c8d3178b1d8..c6cc60a6917 100644 --- a/cpp/src/hash/managed.cuh +++ b/cpp/src/hash/managed.cuh @@ -20,15 +20,15 @@ #include struct managed { - static void *operator new(size_t n) + static void* operator new(size_t n) { - void *ptr = 0; + void* ptr = 0; cudaError_t result = cudaMallocManaged(&ptr, n); if (cudaSuccess != result || 0 == ptr) throw std::bad_alloc(); return ptr; } - static void operator delete(void *ptr) noexcept + static void operator delete(void* ptr) noexcept { auto const free_result = cudaFree(ptr); assert(free_result == cudaSuccess); diff --git a/cpp/src/hash/unordered_multiset.cuh b/cpp/src/hash/unordered_multiset.cuh index 645d9bc5185..d28bf6f6fe5 100644 --- a/cpp/src/hash/unordered_multiset.cuh +++ b/cpp/src/hash/unordered_multiset.cuh @@ -38,8 +38,8 @@ template create(column_view const &col, rmm::cuda_stream_view stream) + static unordered_multiset create(column_view const& col, rmm::cuda_stream_view stream) { auto d_column = column_device_view::create(col, stream); auto d_col = *d_column; @@ -86,9 +86,9 @@ class unordered_multiset { auto hash_data = rmm::device_uvector(d_col.size(), stream); Hasher hasher; - size_type *d_hash_bins_start = hash_bins_start.data(); - size_type *d_hash_bins_end = hash_bins_end.data(); - Element *d_hash_data = hash_data.data(); + size_type* d_hash_bins_start = hash_bins_start.data(); + size_type* d_hash_bins_end = hash_bins_end.data(); + Element* d_hash_data = hash_data.data(); thrust::for_each(rmm::exec_policy(stream), thrust::make_counting_iterator(0), @@ -134,8 +134,8 @@ class unordered_multiset { private: unordered_multiset(size_type size, - rmm::device_uvector &&hash_bins, - rmm::device_uvector &&hash_data) + rmm::device_uvector&& hash_bins, + rmm::device_uvector&& hash_data) : size{size}, hash_bins{std::move(hash_bins)}, hash_data{std::move(hash_data)} { } diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp index 8f0599cdd5b..7227d7e4e0b 100644 --- a/cpp/src/io/avro/avro.cpp +++ b/cpp/src/io/avro/avro.cpp @@ -49,7 +49,7 @@ std::string container::get_encoded() return (len & 1) || (m_cur >= m_end) ? 0 : std::min(len >> 1, static_cast(m_end - m_cur)); }(); - auto const s = reinterpret_cast(m_cur); + auto const s = reinterpret_cast(m_cur); m_cur += len; return std::string(s, len); } @@ -63,7 +63,7 @@ std::string container::get_encoded() * * @returns true if successful, false if error */ -bool container::parse(file_metadata *md, size_t max_num_rows, size_t first_row) +bool container::parse(file_metadata* md, size_t max_num_rows, size_t first_row) { constexpr uint32_t avro_magic = (('O' << 0) | ('b' << 8) | ('j' << 16) | (0x01 << 24)); uint32_t sig4, max_block_size; @@ -195,7 +195,7 @@ enum { * * @returns true if successful, false if error */ -bool schema_parser::parse(std::vector &schema, const std::string &json_str) +bool schema_parser::parse(std::vector& schema, const std::string& json_str) { // Empty schema if (json_str == "[]") return true; @@ -361,8 +361,8 @@ bool schema_parser::parse(std::vector &schema, const std::string & std::string schema_parser::get_str() { std::string s; - const char *start = m_cur; - const char *cur = start; + const char* start = m_cur; + const char* cur = start; while (cur < m_end && *cur++ != '"') ; int32_t len = static_cast(cur - start - 1); diff --git a/cpp/src/io/avro/avro.h b/cpp/src/io/avro/avro.h index 13f5e4ecb3c..fe8f5634815 100644 --- a/cpp/src/io/avro/avro.h +++ b/cpp/src/io/avro/avro.h @@ -82,16 +82,16 @@ class schema_parser { public: schema_parser() {} - bool parse(std::vector &schema, const std::string &str); + bool parse(std::vector& schema, const std::string& str); protected: bool more_data() const { return (m_cur < m_end); } std::string get_str(); protected: - const char *m_base; - const char *m_cur; - const char *m_end; + const char* m_base; + const char* m_cur; + const char* m_end; }; /** @@ -99,7 +99,7 @@ class schema_parser { */ class container { public: - container(uint8_t const *base, size_t len) noexcept : m_base{base}, m_cur{base}, m_end{base + len} + container(uint8_t const* base, size_t len) noexcept : m_base{base}, m_cur{base}, m_end{base + len} { } @@ -119,12 +119,12 @@ class container { T get_encoded(); public: - bool parse(file_metadata *md, size_t max_num_rows = 0x7fffffff, size_t first_row = 0); + bool parse(file_metadata* md, size_t max_num_rows = 0x7fffffff, size_t first_row = 0); protected: - const uint8_t *m_base; - const uint8_t *m_cur; - const uint8_t *m_end; + const uint8_t* m_base; + const uint8_t* m_cur; + const uint8_t* m_end; }; } // namespace avro diff --git a/cpp/src/io/avro/avro_gpu.cu b/cpp/src/io/avro/avro_gpu.cu index ebd7f51a08a..6fabcf00b8f 100644 --- a/cpp/src/io/avro/avro_gpu.cu +++ b/cpp/src/io/avro/avro_gpu.cu @@ -32,7 +32,7 @@ constexpr int max_shared_schema_len = 1000; * Avro varint encoding - see * https://avro.apache.org/docs/1.2.0/spec.html#binary_encoding */ -static inline int64_t __device__ avro_decode_zigzag_varint(const uint8_t *&cur, const uint8_t *end) +static inline int64_t __device__ avro_decode_zigzag_varint(const uint8_t*& cur, const uint8_t* end) { uint64_t u = 0; if (cur < end) { @@ -65,13 +65,13 @@ static inline int64_t __device__ avro_decode_zigzag_varint(const uint8_t *&cur, * * @return data pointer at the end of the row (start of next row) */ -static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, - schemadesc_s *schema_g, +static const uint8_t* __device__ avro_decode_row(const schemadesc_s* schema, + schemadesc_s* schema_g, uint32_t schema_len, size_t row, size_t max_rows, - const uint8_t *cur, - const uint8_t *end, + const uint8_t* cur, + const uint8_t* end, device_span global_dictionary) { uint32_t array_start = 0, array_repeat_count = 0; @@ -96,11 +96,11 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, skip = skip_after; } - void *dataptr = schema[i].dataptr; + void* dataptr = schema[i].dataptr; switch (kind) { case type_null: if (dataptr != nullptr && row < max_rows) { - atomicAnd(static_cast(dataptr) + (row >> 5), ~(1 << (row & 0x1f))); + atomicAnd(static_cast(dataptr) + (row >> 5), ~(1 << (row & 0x1f))); atomicAdd(&schema_g[i].count, 1); } break; @@ -113,13 +113,13 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, int64_t v = avro_decode_zigzag_varint(cur, end); if (kind == type_int) { if (dataptr != nullptr && row < max_rows) { - static_cast(dataptr)[row] = static_cast(v); + static_cast(dataptr)[row] = static_cast(v); } } else if (kind == type_long) { - if (dataptr != nullptr && row < max_rows) { static_cast(dataptr)[row] = v; } + if (dataptr != nullptr && row < max_rows) { static_cast(dataptr)[row] = v; } } else { // string or enum size_t count = 0; - const char *ptr = 0; + const char* ptr = 0; if (kind == type_enum) { // dictionary size_t idx = schema[i].count + v; if (idx < global_dictionary.size()) { @@ -127,13 +127,13 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, count = global_dictionary[idx].second; } } else if (v >= 0 && cur + v <= end) { // string - ptr = reinterpret_cast(cur); + ptr = reinterpret_cast(cur); count = (size_t)v; cur += count; } if (dataptr != nullptr && row < max_rows) { - static_cast(dataptr)[row].first = ptr; - static_cast(dataptr)[row].second = count; + static_cast(dataptr)[row].first = ptr; + static_cast(dataptr)[row].second = count; } } } break; @@ -147,7 +147,7 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, } else { v = 0; } - static_cast(dataptr)[row] = v; + static_cast(dataptr)[row] = v; } else { cur += 4; } @@ -162,7 +162,7 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, } else { v = 0; } - static_cast(dataptr)[row] = v; + static_cast(dataptr)[row] = v; } else { cur += 8; } @@ -170,8 +170,8 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, case type_boolean: if (dataptr != nullptr && row < max_rows) { - uint8_t v = (cur < end) ? *cur : 0; - static_cast(dataptr)[row] = (v) ? 1 : 0; + uint8_t v = (cur < end) ? *cur : 0; + static_cast(dataptr)[row] = (v) ? 1 : 0; } cur++; break; @@ -228,10 +228,10 @@ static const uint8_t *__device__ avro_decode_row(const schemadesc_s *schema, */ // blockDim {32,num_warps,1} extern "C" __global__ void __launch_bounds__(num_warps * 32, 2) - gpuDecodeAvroColumnData(block_desc_s *blocks, - schemadesc_s *schema_g, + gpuDecodeAvroColumnData(block_desc_s* blocks, + schemadesc_s* schema_g, device_span global_dictionary, - const uint8_t *avro_data, + const uint8_t* avro_data, uint32_t num_blocks, uint32_t schema_len, uint32_t min_row_size, @@ -241,8 +241,8 @@ extern "C" __global__ void __launch_bounds__(num_warps * 32, 2) __shared__ __align__(8) schemadesc_s g_shared_schema[max_shared_schema_len]; __shared__ __align__(8) block_desc_s blk_g[num_warps]; - schemadesc_s *schema; - block_desc_s *const blk = &blk_g[threadIdx.y]; + schemadesc_s* schema; + block_desc_s* const blk = &blk_g[threadIdx.y]; uint32_t block_id = blockIdx.x * num_warps + threadIdx.y; size_t cur_row; uint32_t rows_remaining; @@ -267,7 +267,7 @@ extern "C" __global__ void __launch_bounds__(num_warps * 32, 2) end = cur + blk->size; while (rows_remaining > 0 && cur < end) { uint32_t nrows; - const uint8_t *start = cur; + const uint8_t* start = cur; if (cur_row > first_row + max_rows) break; if (cur + min_row_size * rows_remaining == end) { @@ -311,10 +311,10 @@ extern "C" __global__ void __launch_bounds__(num_warps * 32, 2) * @param[in] min_row_size Minimum size in bytes of a row * @param[in] stream CUDA stream to use, default 0 */ -void DecodeAvroColumnData(block_desc_s *blocks, - schemadesc_s *schema, +void DecodeAvroColumnData(block_desc_s* blocks, + schemadesc_s* schema, device_span global_dictionary, - const uint8_t *avro_data, + const uint8_t* avro_data, uint32_t num_blocks, uint32_t schema_len, size_t max_rows, diff --git a/cpp/src/io/avro/avro_gpu.h b/cpp/src/io/avro/avro_gpu.h index a82d3604d02..a895d1bea02 100644 --- a/cpp/src/io/avro/avro_gpu.h +++ b/cpp/src/io/avro/avro_gpu.h @@ -33,7 +33,7 @@ struct schemadesc_s { uint32_t kind; // avro type kind uint32_t count; // for records/unions: number of following child columns, for nulls: global // null_count, for enums: dictionary ofs - void *dataptr; // Ptr to column data, or null if column not selected + void* dataptr; // Ptr to column data, or null if column not selected }; /** @@ -50,10 +50,10 @@ struct schemadesc_s { * @param[in] min_row_size Minimum size in bytes of a row * @param[in] stream CUDA stream to use, default 0 */ -void DecodeAvroColumnData(block_desc_s *blocks, - schemadesc_s *schema, +void DecodeAvroColumnData(block_desc_s* blocks, + schemadesc_s* schema, cudf::device_span global_dictionary, - const uint8_t *avro_data, + const uint8_t* avro_data, uint32_t num_blocks, uint32_t schema_len, size_t max_rows = ~0, diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index 21253ce8cdf..f6ffdd99d35 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -46,7 +46,7 @@ namespace { /** * @brief Function that translates Avro data kind to cuDF type enum */ -type_id to_type_id(const avro::schema_entry *col) +type_id to_type_id(const avro::schema_entry* col) { switch (col->kind) { case avro::type_boolean: return type_id::BOOL8; @@ -69,7 +69,7 @@ type_id to_type_id(const avro::schema_entry *col) */ class metadata : public file_metadata { public: - explicit metadata(datasource *const src) : source(src) {} + explicit metadata(datasource* const src) : source(src) {} /** * @brief Initializes the parser and filters down to a subset of rows @@ -77,7 +77,7 @@ class metadata : public file_metadata { * @param[in,out] row_start Starting row of the selection * @param[in,out] row_count Total number of rows selected */ - void init_and_select_rows(int &row_start, int &row_count) + void init_and_select_rows(int& row_start, int& row_count) { const auto buffer = source->host_read(0, source->size()); avro::container pod(buffer->data(), buffer->size()); @@ -100,7 +100,7 @@ class metadata : public file_metadata { const auto num_avro_columns = static_cast(columns.size()); if (!use_names.empty()) { int index = 0; - for (const auto &use_name : use_names) { + for (const auto& use_name : use_names) { for (int i = 0; i < num_avro_columns; ++i, ++index) { if (index >= num_avro_columns) { index = 0; } if (columns[index].name == use_name && @@ -135,10 +135,10 @@ class metadata : public file_metadata { } private: - datasource *const source; + datasource* const source; }; -rmm::device_buffer reader::impl::decompress_data(const rmm::device_buffer &comp_block_data, +rmm::device_buffer reader::impl::decompress_data(const rmm::device_buffer& comp_block_data, rmm::cuda_stream_view stream) { size_t uncompressed_data_size = 0; @@ -149,12 +149,14 @@ rmm::device_buffer reader::impl::decompress_data(const rmm::device_buffer &comp_ // Guess an initial maximum uncompressed block size uint32_t initial_blk_len = (_metadata->max_block_size * 2 + 0xfff) & ~0xfff; uncompressed_data_size = initial_blk_len * _metadata->block_list.size(); - for (size_t i = 0; i < inflate_in.size(); ++i) { inflate_in[i].dstSize = initial_blk_len; } + for (size_t i = 0; i < inflate_in.size(); ++i) { + inflate_in[i].dstSize = initial_blk_len; + } } else if (_metadata->codec == "snappy") { // Extract the uncompressed length from the snappy stream for (size_t i = 0; i < _metadata->block_list.size(); i++) { const auto buffer = _source->host_read(_metadata->block_list[i].offset, 4); - const uint8_t *blk = buffer->data(); + const uint8_t* blk = buffer->data(); uint32_t blk_len = blk[0]; if (blk_len > 0x7f) { blk_len = (blk_len & 0x7f) | (blk[1] << 7); @@ -176,9 +178,9 @@ rmm::device_buffer reader::impl::decompress_data(const rmm::device_buffer &comp_ for (size_t i = 0, dst_pos = 0; i < _metadata->block_list.size(); i++) { const auto src_pos = _metadata->block_list[i].offset - base_offset; - inflate_in[i].srcDevice = static_cast(comp_block_data.data()) + src_pos; + inflate_in[i].srcDevice = static_cast(comp_block_data.data()) + src_pos; inflate_in[i].srcSize = _metadata->block_list[i].size; - inflate_in[i].dstDevice = static_cast(decomp_block_data.data()) + dst_pos; + inflate_in[i].dstDevice = static_cast(decomp_block_data.data()) + dst_pos; // Update blocks offsets & sizes to refer to uncompressed data _metadata->block_list[i].offset = dst_pos; @@ -215,7 +217,7 @@ rmm::device_buffer reader::impl::decompress_data(const rmm::device_buffer &comp_ if (actual_uncompressed_size > uncompressed_data_size) { decomp_block_data.resize(actual_uncompressed_size, stream); for (size_t i = 0, dst_pos = 0; i < _metadata->block_list.size(); i++) { - auto dst_base = static_cast(decomp_block_data.data()); + auto dst_base = static_cast(decomp_block_data.data()); inflate_in[i].dstDevice = dst_base + dst_pos; _metadata->block_list[i].offset = dst_pos; @@ -233,12 +235,12 @@ rmm::device_buffer reader::impl::decompress_data(const rmm::device_buffer &comp_ return decomp_block_data; } -void reader::impl::decode_data(const rmm::device_buffer &block_data, - const std::vector> &dict, +void reader::impl::decode_data(const rmm::device_buffer& block_data, + const std::vector>& dict, device_span global_dictionary, size_t num_rows, std::vector> selection, - std::vector &out_buffers, + std::vector& out_buffers, rmm::cuda_stream_view stream) { // Build gpu schema @@ -277,7 +279,7 @@ void reader::impl::decode_data(const rmm::device_buffer &block_data, _metadata->schema[i + 2].kind == type_null)), "Union with non-null type not currently supported"); } - std::vector valid_alias(out_buffers.size(), nullptr); + std::vector valid_alias(out_buffers.size(), nullptr); for (size_t i = 0; i < out_buffers.size(); i++) { const auto col_idx = selection[i].first; int schema_data_idx = _metadata->columns[col_idx].schema_data_idx; @@ -302,10 +304,10 @@ void reader::impl::decode_data(const rmm::device_buffer &block_data, _metadata->block_list.data(), _metadata->block_list.size() * sizeof(block_desc_s), stream); schema_desc.host_to_device(stream); - gpu::DecodeAvroColumnData(static_cast(block_list.data()), + gpu::DecodeAvroColumnData(static_cast(block_list.data()), schema_desc.device_ptr(), global_dictionary, - static_cast(block_data.data()), + static_cast(block_data.data()), static_cast(_metadata->block_list.size()), static_cast(schema_desc.size()), _metadata->num_rows, @@ -333,15 +335,15 @@ void reader::impl::decode_data(const rmm::device_buffer &block_data, } reader::impl::impl(std::unique_ptr source, - avro_reader_options const &options, - rmm::mr::device_memory_resource *mr) + avro_reader_options const& options, + rmm::mr::device_memory_resource* mr) : _mr(mr), _source(std::move(source)), _columns(options.get_columns()) { // Open the source Avro dataset metadata _metadata = std::make_unique(_source.get()); } -table_with_metadata reader::impl::read(avro_reader_options const &options, +table_with_metadata reader::impl::read(avro_reader_options const& options, rmm::cuda_stream_view stream) { auto skip_rows = options.get_skip_rows(); @@ -358,8 +360,8 @@ table_with_metadata reader::impl::read(avro_reader_options const &options, if (selected_columns.size() != 0) { // Get a list of column data types std::vector column_types; - for (const auto &col : selected_columns) { - auto &col_schema = _metadata->schema[_metadata->columns[col.first].schema_data_idx]; + for (const auto& col : selected_columns) { + auto& col_schema = _metadata->schema[_metadata->columns[col.first].schema_data_idx]; auto col_type = to_type_id(&col_schema); CUDF_EXPECTS(col_type != type_id::EMPTY, "Unknown type"); @@ -372,7 +374,7 @@ table_with_metadata reader::impl::read(avro_reader_options const &options, block_data = rmm::device_buffer{_metadata->total_data_size, stream}; auto read_bytes = _source->device_read(_metadata->block_list[0].offset, _metadata->total_data_size, - static_cast(block_data.data()), + static_cast(block_data.data()), stream); block_data.resize(read_bytes, stream); } else { @@ -396,11 +398,13 @@ table_with_metadata reader::impl::read(avro_reader_options const &options, std::vector> dict(column_types.size()); for (size_t i = 0; i < column_types.size(); ++i) { auto col_idx = selected_columns[i].first; - auto &col_schema = _metadata->schema[_metadata->columns[col_idx].schema_data_idx]; + auto& col_schema = _metadata->schema[_metadata->columns[col_idx].schema_data_idx]; dict[i].first = static_cast(total_dictionary_entries); dict[i].second = static_cast(col_schema.symbols.size()); total_dictionary_entries += dict[i].second; - for (const auto &sym : col_schema.symbols) { dictionary_data_size += sym.length(); } + for (const auto& sym : col_schema.symbols) { + dictionary_data_size += sym.length(); + } } rmm::device_uvector d_global_dict(total_dictionary_entries, stream); @@ -411,10 +415,10 @@ table_with_metadata reader::impl::read(avro_reader_options const &options, size_t dict_pos = 0; for (size_t i = 0; i < column_types.size(); ++i) { auto const col_idx = selected_columns[i].first; - auto const &col_schema = _metadata->schema[_metadata->columns[col_idx].schema_data_idx]; + auto const& col_schema = _metadata->schema[_metadata->columns[col_idx].schema_data_idx]; auto const col_dict_entries = &(h_global_dict[dict[i].first]); for (size_t j = 0; j < dict[i].second; j++) { - auto const &symbols = col_schema.symbols[j]; + auto const& symbols = col_schema.symbols[j]; auto const data_dst = h_global_dict_data.data() + dict_pos; auto const len = symbols.length(); @@ -471,20 +475,20 @@ table_with_metadata reader::impl::read(avro_reader_options const &options, } // Forward to implementation -reader::reader(std::vector const &filepaths, - avro_reader_options const &options, +reader::reader(std::vector const& filepaths, + avro_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(filepaths.size() == 1, "Only a single source is currently supported."); _impl = std::make_unique(datasource::create(filepaths[0]), options, mr); } // Forward to implementation -reader::reader(std::vector> &&sources, - avro_reader_options const &options, +reader::reader(std::vector>&& sources, + avro_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(sources.size() == 1, "Only a single source is currently supported."); _impl = std::make_unique(std::move(sources[0]), options, mr); @@ -494,7 +498,7 @@ reader::reader(std::vector> &&sources, reader::~reader() = default; // Forward to implementation -table_with_metadata reader::read(avro_reader_options const &options, rmm::cuda_stream_view stream) +table_with_metadata reader::read(avro_reader_options const& options, rmm::cuda_stream_view stream) { return _impl->read(options, stream); } diff --git a/cpp/src/io/avro/reader_impl.hpp b/cpp/src/io/avro/reader_impl.hpp index 8e09da03563..9af32ed88a0 100644 --- a/cpp/src/io/avro/reader_impl.hpp +++ b/cpp/src/io/avro/reader_impl.hpp @@ -61,8 +61,8 @@ class reader::impl { * @param mr Device memory resource to use for device memory allocation */ explicit impl(std::unique_ptr source, - avro_reader_options const &options, - rmm::mr::device_memory_resource *mr); + avro_reader_options const& options, + rmm::mr::device_memory_resource* mr); /** * @brief Read an entire set or a subset of data and returns a set of columns @@ -72,7 +72,7 @@ class reader::impl { * * @return The set of columns along with metadata */ - table_with_metadata read(avro_reader_options const &options, rmm::cuda_stream_view stream); + table_with_metadata read(avro_reader_options const& options, rmm::cuda_stream_view stream); private: /** @@ -83,7 +83,7 @@ class reader::impl { * * @return Device buffer to decompressed block data */ - rmm::device_buffer decompress_data(const rmm::device_buffer &comp_block_data, + rmm::device_buffer decompress_data(const rmm::device_buffer& comp_block_data, rmm::cuda_stream_view stream); /** @@ -95,16 +95,16 @@ class reader::impl { * @param out_buffers Output columns' device buffers * @param stream CUDA stream used for device memory operations and kernel launches. */ - void decode_data(const rmm::device_buffer &block_data, - const std::vector> &dict, + void decode_data(const rmm::device_buffer& block_data, + const std::vector>& dict, cudf::device_span global_dictionary, size_t num_rows, std::vector> columns, - std::vector &out_buffers, + std::vector& out_buffers, rmm::cuda_stream_view stream); private: - rmm::mr::device_memory_resource *_mr = nullptr; + rmm::mr::device_memory_resource* _mr = nullptr; std::unique_ptr _source; std::unique_ptr _metadata; diff --git a/cpp/src/io/comp/brotli_dict.cpp b/cpp/src/io/comp/brotli_dict.cpp index b493ebd6bfb..3e6939bb816 100644 --- a/cpp/src/io/comp/brotli_dict.cpp +++ b/cpp/src/io/comp/brotli_dict.cpp @@ -6528,7 +6528,7 @@ static const brotli_dictionary_s g_dictionary = { 136, 224, 164, 184, 224, 164, 149, 224, 165, 141, 224, 164, 176, 224, 164, 191, 224, 164, 175, 224, 164, 164, 224, 164, 190}}; -const brotli_dictionary_s *get_brotli_dictionary(void) { return &g_dictionary; } +const brotli_dictionary_s* get_brotli_dictionary(void) { return &g_dictionary; } } // namespace io } // namespace cudf diff --git a/cpp/src/io/comp/brotli_dict.h b/cpp/src/io/comp/brotli_dict.h index c4114b7fbcf..4c1fec1492c 100644 --- a/cpp/src/io/comp/brotli_dict.h +++ b/cpp/src/io/comp/brotli_dict.h @@ -79,7 +79,7 @@ struct brotli_dictionary_s { constexpr int brotli_min_dictionary_word_length = 4; constexpr int brotli_max_dictionary_word_length = 24; -const brotli_dictionary_s *get_brotli_dictionary(void); +const brotli_dictionary_s* get_brotli_dictionary(void); } // namespace io } // namespace cudf diff --git a/cpp/src/io/comp/cpu_unbz2.cpp b/cpp/src/io/comp/cpu_unbz2.cpp index 28d7394e485..f4cb6edd41f 100644 --- a/cpp/src/io/comp/cpu_unbz2.cpp +++ b/cpp/src/io/comp/cpu_unbz2.cpp @@ -101,13 +101,13 @@ namespace io { // Constants for the back end. #define BZ_MAX_ALPHA_SIZE 258 -#define BZ_MAX_CODE_LEN 23 +#define BZ_MAX_CODE_LEN 23 #define BZ_RUNA 0 #define BZ_RUNB 1 #define BZ_N_GROUPS 6 -#define BZ_G_SIZE 50 +#define BZ_G_SIZE 50 #define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE)) @@ -121,16 +121,16 @@ typedef struct { // Decoder state typedef struct { // Input - const uint8_t *cur; - const uint8_t *end; - const uint8_t *base; + const uint8_t* cur; + const uint8_t* end; + const uint8_t* base; uint64_t bitbuf; uint32_t bitpos; // Output - uint8_t *out; - uint8_t *outend; - uint8_t *outbase; + uint8_t* out; + uint8_t* outend; + uint8_t* outbase; // misc administratium uint32_t blockSize100k; @@ -156,25 +156,25 @@ typedef struct { } unbz_state_s; // return next 32 bits -static inline uint32_t next32bits(const unbz_state_s *s) +static inline uint32_t next32bits(const unbz_state_s* s) { return (uint32_t)((s->bitbuf << s->bitpos) >> 32); } // return next n bits -static inline uint32_t showbits(const unbz_state_s *s, uint32_t n) +static inline uint32_t showbits(const unbz_state_s* s, uint32_t n) { return (uint32_t)((s->bitbuf << s->bitpos) >> (64 - n)); } // update bit position, refill bit buffer if necessary -static void skipbits(unbz_state_s *s, uint32_t n) +static void skipbits(unbz_state_s* s, uint32_t n) { uint32_t bitpos = s->bitpos + n; if (bitpos >= 32) { - const uint8_t *cur = s->cur + 4; + const uint8_t* cur = s->cur + 4; uint32_t next32 = - (cur + 4 < s->end) ? __builtin_bswap32(*reinterpret_cast(cur + 4)) : 0; + (cur + 4 < s->end) ? __builtin_bswap32(*reinterpret_cast(cur + 4)) : 0; s->cur = cur; s->bitbuf = (s->bitbuf << 32) | next32; bitpos &= 0x1f; @@ -182,7 +182,7 @@ static void skipbits(unbz_state_s *s, uint32_t n) s->bitpos = bitpos; } -static inline uint32_t getbits(unbz_state_s *s, uint32_t n) +static inline uint32_t getbits(unbz_state_s* s, uint32_t n) { uint32_t bits = showbits(s, n); skipbits(s, n); @@ -190,7 +190,7 @@ static inline uint32_t getbits(unbz_state_s *s, uint32_t n) } /*---------------------------------------------------*/ -int32_t bz2_decompress_block(unbz_state_s *s) +int32_t bz2_decompress_block(unbz_state_s* s) { int nInUse; @@ -204,7 +204,7 @@ int32_t bz2_decompress_block(unbz_state_s *s) int32_t groupNo; int32_t groupPos; uint32_t nblock, nblockMAX; - const huff_s *gSel = nullptr; + const huff_s* gSel = nullptr; uint32_t inUse16; uint32_t sig0, sig1; @@ -263,11 +263,11 @@ int32_t bz2_decompress_block(unbz_state_s *s) // Now the coding tables for (t = 0; t < nGroups; t++) { int32_t pp, vec; - uint8_t *length = &s->len[0]; + uint8_t* length = &s->len[0]; int32_t curr = getbits(s, 5); int32_t minLen = BZ_MAX_CODE_LEN - 1; int32_t maxLen = 0; - huff_s *sel = &s->ht[t]; + huff_s* sel = &s->ht[t]; for (i = 0; i < alphaSize; i++) { for (;;) { uint32_t v = showbits(s, 2); @@ -297,9 +297,11 @@ int32_t bz2_decompress_block(unbz_state_s *s) sel->base[i] = 0; sel->limit[i] = 0; } - for (i = 0; i < alphaSize; i++) sel->base[length[i] + 1]++; + for (i = 0; i < alphaSize; i++) + sel->base[length[i] + 1]++; - for (i = 1; i < BZ_MAX_CODE_LEN; i++) sel->base[i] += sel->base[i - 1]; + for (i = 1; i < BZ_MAX_CODE_LEN; i++) + sel->base[i] += sel->base[i - 1]; vec = 0; for (i = minLen; i <= maxLen; i++) { @@ -318,7 +320,8 @@ int32_t bz2_decompress_block(unbz_state_s *s) EOB = nInUse + 1; nblockMAX = 100000 * s->blockSize100k; - for (i = 0; i <= 255; i++) s->unzftab[i] = 0; + for (i = 0; i <= 255; i++) + s->unzftab[i] = 0; // MTF init { @@ -456,7 +459,7 @@ int32_t bz2_decompress_block(unbz_state_s *s) // Verify the end-of-block signature: should be followed by another block or an end-of-stream // signature { - const uint8_t *save_cur = s->cur; + const uint8_t* save_cur = s->cur; uint64_t save_bitbuf = s->bitbuf; uint32_t save_bitpos = s->bitpos; sig0 = getbits(s, 24); @@ -476,14 +479,14 @@ int32_t bz2_decompress_block(unbz_state_s *s) } } -static void bzUnRLE(unbz_state_s *s) +static void bzUnRLE(unbz_state_s* s) { - uint8_t *out = s->out; - uint8_t *outend = s->outend; + uint8_t* out = s->out; + uint8_t* outend = s->outend; int32_t rle_cnt = s->save_nblock; int cprev = -1; - std::vector &tt = s->tt; + std::vector& tt = s->tt; uint32_t pos = tt[s->origPtr] >> 8; int mask = ~0; @@ -520,7 +523,7 @@ static void bzUnRLE(unbz_state_s *s) } int32_t cpu_bz2_uncompress( - const uint8_t *source, size_t sourceLen, uint8_t *dest, size_t *destLen, uint64_t *block_start) + const uint8_t* source, size_t sourceLen, uint8_t* dest, size_t* destLen, uint64_t* block_start) { unbz_state_s s{}; uint32_t v; @@ -534,7 +537,7 @@ int32_t cpu_bz2_uncompress( s.base = source; s.end = source + sourceLen - 4; // We will not read the final combined CRC (last 4 bytes of the file) - s.bitbuf = __builtin_bswap64(*reinterpret_cast(source)); + s.bitbuf = __builtin_bswap64(*reinterpret_cast(source)); s.bitpos = 0; s.out = dest; @@ -560,7 +563,7 @@ int32_t cpu_bz2_uncompress( s.cur = source + (size_t)(bit_offs >> 3); s.bitpos = (uint32_t)(bit_offs & 7); if (s.cur + 8 > s.end) return BZ_PARAM_ERROR; - s.bitbuf = __builtin_bswap64(*reinterpret_cast(s.cur)); + s.bitbuf = __builtin_bswap64(*reinterpret_cast(s.cur)); } } diff --git a/cpp/src/io/comp/debrotli.cu b/cpp/src/io/comp/debrotli.cu index 541163eb086..3f38dce3fa3 100644 --- a/cpp/src/io/comp/debrotli.cu +++ b/cpp/src/io/comp/debrotli.cu @@ -97,12 +97,12 @@ __inline__ __device__ int brotli_context_lut(int mode) { return (mode << 9); } inline __device__ uint8_t brotli_transform_type(int idx) { return kTransformsData[(idx * 3) + 1]; } -inline __device__ const uint8_t *brotli_transform_prefix(int idx) +inline __device__ const uint8_t* brotli_transform_prefix(int idx) { return &kPrefixSuffix[kPrefixSuffixMap[kTransformsData[(idx * 3)]]]; } -inline __device__ const uint8_t *brotli_transform_suffix(int idx) +inline __device__ const uint8_t* brotli_transform_suffix(int idx) { return &kPrefixSuffix[kPrefixSuffixMap[kTransformsData[(idx * 3) + 2]]]; } @@ -138,12 +138,12 @@ struct debrotli_huff_tree_group_s { uint16_t max_symbol; uint16_t num_htrees; uint16_t pad; - uint16_t *htrees[1]; + uint16_t* htrees[1]; }; // Must be able to at least hold worst-case context maps, tree groups and context modes constexpr int local_heap_size = - (256 * 64 + 256 * 4 + 3 * (sizeof(debrotli_huff_tree_group_s) + 255 * sizeof(uint16_t *)) + 256 + + (256 * 64 + 256 * 4 + 3 * (sizeof(debrotli_huff_tree_group_s) + 255 * sizeof(uint16_t*)) + 256 + 3 * brotli_huffman_max_size_258 * sizeof(uint16_t) + 3 * brotli_huffman_max_size_26 * sizeof(uint16_t)); @@ -152,15 +152,15 @@ constexpr int local_heap_size = */ struct debrotli_state_s { // Bitstream - const uint8_t *cur; - const uint8_t *end; - const uint8_t *base; + const uint8_t* cur; + const uint8_t* end; + const uint8_t* base; uint2 bitbuf; uint32_t bitpos; int32_t error; // Output - uint8_t *outbase; - uint8_t *out; + uint8_t* outbase; + uint8_t* out; size_t bytes_left; // Decoded symbols uint8_t window_bits; @@ -178,19 +178,19 @@ struct debrotli_state_s { uint32_t meta_block_len; uint16_t heap_used; uint16_t heap_limit; - uint8_t *context_map; - uint8_t *dist_context_map; - uint8_t *context_modes; - uint8_t *fb_base; + uint8_t* context_map; + uint8_t* dist_context_map; + uint8_t* context_modes; + uint8_t* fb_base; uint32_t fb_size; uint8_t block_type_rb[6]; uint8_t pad[2]; int dist_rb_idx; int dist_rb[4]; - debrotli_huff_tree_group_s *literal_hgroup; - debrotli_huff_tree_group_s *insert_copy_hgroup; - debrotli_huff_tree_group_s *distance_hgroup; - uint16_t *block_type_vlc[3]; + debrotli_huff_tree_group_s* literal_hgroup; + debrotli_huff_tree_group_s* insert_copy_hgroup; + debrotli_huff_tree_group_s* distance_hgroup; + uint16_t* block_type_vlc[3]; huff_scratch_s hs; uint32_t mtf[65]; uint64_t heap[local_heap_size / 8]; @@ -199,54 +199,54 @@ struct debrotli_state_s { inline __device__ uint32_t Log2Floor(uint32_t value) { return 32 - __clz(value); } /// @brief initializes the bit reader -__device__ void initbits(debrotli_state_s *s, const uint8_t *base, size_t len, size_t pos = 0) +__device__ void initbits(debrotli_state_s* s, const uint8_t* base, size_t len, size_t pos = 0) { - const uint8_t *p = base + pos; + const uint8_t* p = base + pos; uint32_t prefix_bytes = (uint32_t)(((size_t)p) & 3); p -= prefix_bytes; s->base = base; s->end = base + len; s->cur = p; - s->bitbuf.x = (p < s->end) ? *reinterpret_cast(p) : 0; + s->bitbuf.x = (p < s->end) ? *reinterpret_cast(p) : 0; p += 4; - s->bitbuf.y = (p < s->end) ? *reinterpret_cast(p) : 0; + s->bitbuf.y = (p < s->end) ? *reinterpret_cast(p) : 0; s->bitpos = prefix_bytes * 8; } // return next 32 bits -inline __device__ uint32_t next32bits(const debrotli_state_s *s) +inline __device__ uint32_t next32bits(const debrotli_state_s* s) { return __funnelshift_rc(s->bitbuf.x, s->bitbuf.y, s->bitpos); } /// return next n bits -inline __device__ uint32_t showbits(const debrotli_state_s *s, uint32_t n) +inline __device__ uint32_t showbits(const debrotli_state_s* s, uint32_t n) { uint32_t next32 = __funnelshift_rc(s->bitbuf.x, s->bitbuf.y, s->bitpos); return (next32 & ((1 << n) - 1)); } -inline __device__ void skipbits(debrotli_state_s *s, uint32_t n) +inline __device__ void skipbits(debrotli_state_s* s, uint32_t n) { uint32_t bitpos = s->bitpos + n; if (bitpos >= 32) { - const uint8_t *cur = s->cur + 8; + const uint8_t* cur = s->cur + 8; s->bitbuf.x = s->bitbuf.y; - s->bitbuf.y = (cur < s->end) ? *reinterpret_cast(cur) : 0; + s->bitbuf.y = (cur < s->end) ? *reinterpret_cast(cur) : 0; s->cur = cur - 4; bitpos &= 0x1f; } s->bitpos = bitpos; } -inline __device__ uint32_t getbits(debrotli_state_s *s, uint32_t n) +inline __device__ uint32_t getbits(debrotli_state_s* s, uint32_t n) { uint32_t bits = showbits(s, n); skipbits(s, n); return bits; } -inline __device__ uint32_t getbits_bytealign(debrotli_state_s *s) +inline __device__ uint32_t getbits_bytealign(debrotli_state_s* s) { uint32_t n = (uint32_t)((-(int32_t)s->bitpos) & 7); uint32_t bits = showbits(s, n); @@ -271,7 +271,7 @@ inline __device__ uint32_t getbits_bytealign(debrotli_state_s *s) * 65..128 xxxxxx1101 * 129..256 xxxxxxx1111 */ -static __device__ uint32_t getbits_u8vlc(debrotli_state_s *s) +static __device__ uint32_t getbits_u8vlc(debrotli_state_s* s) { uint32_t next32 = next32bits(s); uint32_t v, len; @@ -288,7 +288,7 @@ static __device__ uint32_t getbits_u8vlc(debrotli_state_s *s) } /// Decode a Huffman code with 8-bit initial lookup -static __device__ uint32_t getvlc(debrotli_state_s *s, const uint16_t *lut) +static __device__ uint32_t getvlc(debrotli_state_s* s, const uint16_t* lut) { uint32_t next32 = next32bits(s); uint32_t vlc, len; @@ -308,12 +308,12 @@ static __device__ uint32_t getvlc(debrotli_state_s *s, const uint16_t *lut) } /// Alloc bytes from the local (shared mem) heap -static __device__ uint8_t *local_alloc(debrotli_state_s *s, uint32_t bytes) +static __device__ uint8_t* local_alloc(debrotli_state_s* s, uint32_t bytes) { int heap_used = s->heap_used; int len = (bytes + 7) >> 3; if (heap_used + len <= s->heap_limit) { - uint8_t *ptr = reinterpret_cast(&s->heap[heap_used]); + uint8_t* ptr = reinterpret_cast(&s->heap[heap_used]); s->heap_used = (uint16_t)(heap_used + len); return ptr; } else { @@ -323,7 +323,7 @@ static __device__ uint8_t *local_alloc(debrotli_state_s *s, uint32_t bytes) /// Shrink the size of the local heap, returns ptr to end (used for stack-like intermediate /// allocations at the end of the heap) -static __device__ uint8_t *local_heap_shrink(debrotli_state_s *s, uint32_t bytes) +static __device__ uint8_t* local_heap_shrink(debrotli_state_s* s, uint32_t bytes) { int heap_used = s->heap_used; int heap_limit = s->heap_limit; @@ -331,13 +331,13 @@ static __device__ uint8_t *local_heap_shrink(debrotli_state_s *s, uint32_t bytes if (heap_limit - len >= heap_used) { heap_limit -= len; s->heap_limit = (uint16_t)heap_limit; - return reinterpret_cast(&s->heap[heap_limit]); + return reinterpret_cast(&s->heap[heap_limit]); } else { return nullptr; } } -static __device__ void local_heap_grow(debrotli_state_s *s, uint32_t bytes) +static __device__ void local_heap_grow(debrotli_state_s* s, uint32_t bytes) { int len = (bytes + 7) >> 3; int heap_limit = s->heap_limit + len; @@ -345,16 +345,16 @@ static __device__ void local_heap_grow(debrotli_state_s *s, uint32_t bytes) } /// Alloc memory from the fixed-size heap shared between all blocks (thread0-only) -static __device__ uint8_t *ext_heap_alloc(uint32_t bytes, - uint8_t *ext_heap_base, +static __device__ uint8_t* ext_heap_alloc(uint32_t bytes, + uint8_t* ext_heap_base, uint32_t ext_heap_size) { uint32_t len = (bytes + 0xf) & ~0xf; - volatile uint32_t *heap_ptr = reinterpret_cast(ext_heap_base); + volatile uint32_t* heap_ptr = reinterpret_cast(ext_heap_base); uint32_t first_free_block = ~0; for (;;) { uint32_t blk_next, blk_prev; - first_free_block = atomicExch((unsigned int *)heap_ptr, first_free_block); + first_free_block = atomicExch((unsigned int*)heap_ptr, first_free_block); if (first_free_block == ~0 || first_free_block >= ext_heap_size) { // Some other block is holding the heap or there are no free blocks: try again later continue; @@ -373,7 +373,7 @@ static __device__ uint8_t *ext_heap_alloc(uint32_t bytes, uint32_t next, blksz; if (((blk_next & 3) != 0) || (blk_next >= ext_heap_size)) { // Corrupted heap - atomicExch((unsigned int *)heap_ptr, first_free_block); + atomicExch((unsigned int*)heap_ptr, first_free_block); return nullptr; } next = heap_ptr[(blk_next >> 2) + 0]; @@ -398,14 +398,14 @@ static __device__ uint8_t *ext_heap_alloc(uint32_t bytes, } __threadfence(); // Restore the list head - atomicExch((unsigned int *)heap_ptr, first_free_block); + atomicExch((unsigned int*)heap_ptr, first_free_block); return ext_heap_base + blk_next; } else { blk_prev = blk_next; blk_next = next; } } while (blk_next != 0 && blk_next < ext_heap_size); - first_free_block = atomicExch((unsigned int *)heap_ptr, first_free_block); + first_free_block = atomicExch((unsigned int*)heap_ptr, first_free_block); // Reaching here means the heap is full // Just in case we're trying to allocate more than the entire heap if (len > ext_heap_size - 4 * sizeof(uint32_t)) { break; } @@ -414,17 +414,17 @@ static __device__ uint8_t *ext_heap_alloc(uint32_t bytes, } /// Free a memory block (thread0-only) -static __device__ void ext_heap_free(void *ptr, +static __device__ void ext_heap_free(void* ptr, uint32_t bytes, - uint8_t *ext_heap_base, + uint8_t* ext_heap_base, uint32_t ext_heap_size) { uint32_t len = (bytes + 0xf) & ~0xf; - volatile uint32_t *heap_ptr = (volatile uint32_t *)ext_heap_base; + volatile uint32_t* heap_ptr = (volatile uint32_t*)ext_heap_base; uint32_t first_free_block = ~0; - uint32_t cur_blk = static_cast(static_cast(ptr) - ext_heap_base); + uint32_t cur_blk = static_cast(static_cast(ptr) - ext_heap_base); for (;;) { - first_free_block = atomicExch((unsigned int *)heap_ptr, first_free_block); + first_free_block = atomicExch((unsigned int*)heap_ptr, first_free_block); if (first_free_block != ~0) { break; } // Some other block is holding the heap } @@ -485,12 +485,12 @@ static __device__ void ext_heap_free(void *ptr, } } __threadfence(); - atomicExch((unsigned int *)heap_ptr, first_free_block); + atomicExch((unsigned int*)heap_ptr, first_free_block); } -static __device__ uint32_t BuildSimpleHuffmanTable(uint16_t *lut, +static __device__ uint32_t BuildSimpleHuffmanTable(uint16_t* lut, int root_bits, - uint16_t *val, + uint16_t* val, uint32_t num_symbols) { uint32_t table_size = 1; @@ -562,7 +562,7 @@ static __device__ uint32_t BuildSimpleHuffmanTable(uint16_t *lut, return goal_size; } -static __device__ void BuildCodeLengthsHuffmanTable(huff_scratch_s *hs) +static __device__ void BuildCodeLengthsHuffmanTable(huff_scratch_s* hs) { uint32_t code; // current table entry int symbol; // symbol index in original or sorted table @@ -592,7 +592,9 @@ static __device__ void BuildCodeLengthsHuffmanTable(huff_scratch_s *hs) // Special case: all symbols but one have 0 code length. if (hs->offset[0] == 0) { code = huffcode(0, hs->sorted[0]); - for (key = 0; key < table_size; ++key) { hs->lenvlctab[key] = code; } + for (key = 0; key < table_size; ++key) { + hs->lenvlctab[key] = code; + } return; } @@ -606,7 +608,7 @@ static __device__ void BuildCodeLengthsHuffmanTable(huff_scratch_s *hs) for (int bits_count = hs->code_length_histo[bits]; bits_count != 0; --bits_count) { int end = table_size; code = huffcode(bits, hs->sorted[symbol++]); - uint16_t *p = &hs->lenvlctab[brev8(key)]; + uint16_t* p = &hs->lenvlctab[brev8(key)]; do { end -= step; p[end] = code; @@ -621,7 +623,7 @@ static __device__ void BuildCodeLengthsHuffmanTable(huff_scratch_s *hs) // Returns the table width of the next 2nd level table. |count| is the histogram // of bit lengths for the remaining symbols, |len| is the code length of the // next processed symbol. -static __device__ int NextTableBitSize(const uint16_t *const count, int len, int root_bits) +static __device__ int NextTableBitSize(const uint16_t* const count, int len, int root_bits) { int left = 1 << (len - root_bits); while (len < 15) { @@ -634,13 +636,13 @@ static __device__ int NextTableBitSize(const uint16_t *const count, int len, int } // Build a huffman lookup table (currently thread0-only) -static __device__ uint32_t BuildHuffmanTable(uint16_t *root_lut, +static __device__ uint32_t BuildHuffmanTable(uint16_t* root_lut, int root_bits, - const uint16_t *const symbol_lists, - uint16_t *count) + const uint16_t* const symbol_lists, + uint16_t* count) { uint32_t code; // current table entry - uint16_t *lut; // next available space in table + uint16_t* lut; // next available space in table int len; // current code length int symbol; // symbol index in original or sorted table int key; // prefix code @@ -654,7 +656,8 @@ static __device__ uint32_t BuildHuffmanTable(uint16_t *root_lut, int max_length = -1; int bits; - while (symbol_lists[max_length] == 0xFFFF) max_length--; + while (symbol_lists[max_length] == 0xFFFF) + max_length--; max_length += 16; lut = root_lut; @@ -677,7 +680,7 @@ static __device__ uint32_t BuildHuffmanTable(uint16_t *root_lut, for (int bits_count = count[bits]; bits_count != 0; --bits_count) { symbol = symbol_lists[symbol]; code = huffcode(bits, symbol); - uint16_t *p = &lut[brev8(key)]; + uint16_t* p = &lut[brev8(key)]; int end = table_size; do { end -= step; @@ -715,7 +718,7 @@ static __device__ uint32_t BuildHuffmanTable(uint16_t *root_lut, } symbol = symbol_lists[symbol]; code = huffcode(len - root_bits, symbol); - uint16_t *p = &lut[brev8(sub_key)]; + uint16_t* p = &lut[brev8(sub_key)]; int end = table_size; do { end -= step; @@ -883,10 +886,10 @@ invalid. */ // Decode Huffman tree (thread0-only) -static __device__ uint32_t DecodeHuffmanTree(debrotli_state_s *s, +static __device__ uint32_t DecodeHuffmanTree(debrotli_state_s* s, uint32_t alphabet_size, uint32_t max_symbol, - uint16_t *vlctab) + uint16_t* vlctab) { uint32_t prefix_code_type; @@ -916,8 +919,8 @@ static __device__ uint32_t DecodeHuffmanTree(debrotli_state_s *s, vlctab, huffman_lookup_table_width, s->hs.symbols_lists_array, nsym); } else { // Complex prefix code - huff_scratch_s *const hs = &s->hs; - uint16_t *symbol_lists = + huff_scratch_s* const hs = &s->hs; + uint16_t* symbol_lists = &s->hs.symbols_lists_array[16]; // Make small negative indexes addressable uint32_t space = 32, num_codes = 0, i, prev_code_len, symbol, repeat, repeat_code_len; @@ -1069,7 +1072,7 @@ formula : window size = (1 << WBITS) - 16 */ -static __device__ void DecodeStreamHeader(debrotli_state_s *s) +static __device__ void DecodeStreamHeader(debrotli_state_s* s) { uint32_t next32 = next32bits(s); uint32_t wbits = 0, len = 0; @@ -1155,7 +1158,7 @@ not set(if the ignored bits are not all zeros, the stream should be rejected as invalid) */ -static __device__ void DecodeMetaBlockHeader(debrotli_state_s *s) +static __device__ void DecodeMetaBlockHeader(debrotli_state_s* s) { uint32_t next32 = next32bits(s); uint32_t len = 1, is_empty = 0; @@ -1195,7 +1198,9 @@ static __device__ void DecodeMetaBlockHeader(debrotli_state_s *s) } skipbits(s, len); if (getbits_bytealign(s) != 0) { s->error = 1; } - for (len = mskiplen; len >= 32; len -= 32) { skipbits(s, 32); } + for (len = mskiplen; len >= 32; len -= 32) { + skipbits(s, 32); + } } } skipbits(s, len); @@ -1238,17 +1243,17 @@ Block count code + extra bits for first distance block count, appears only if NBLTYPESD >= 2 */ -static __device__ void DecodeHuffmanTables(debrotli_state_s *s) +static __device__ void DecodeHuffmanTables(debrotli_state_s* s) { for (int b = 0; b < 3; b++) { uint32_t nbltypes = 1 + getbits_u8vlc(s); s->num_block_types[b] = nbltypes; if (nbltypes >= 2) { uint32_t alphabet_size = nbltypes + 2, index, nbits, maxtblsz; - uint16_t *vlctab; + uint16_t* vlctab; maxtblsz = kMaxHuffmanTableSize[(alphabet_size + 31) >> 5]; maxtblsz = (maxtblsz > brotli_huffman_max_size_258) ? brotli_huffman_max_size_258 : maxtblsz; - vlctab = reinterpret_cast( + vlctab = reinterpret_cast( local_alloc(s, (brotli_huffman_max_size_26 + maxtblsz) * sizeof(uint16_t))); s->block_type_vlc[b] = vlctab; DecodeHuffmanTree(s, alphabet_size, alphabet_size, vlctab + brotli_huffman_max_size_26); @@ -1286,13 +1291,13 @@ static __device__ void DecodeHuffmanTables(debrotli_state_s *s) * Most of input values are 0 and 1. To reduce number of branches, we replace * inner for loop with do-while. */ -static __device__ void InverseMoveToFrontTransform(debrotli_state_s *s, uint8_t *v, uint32_t v_len) +static __device__ void InverseMoveToFrontTransform(debrotli_state_s* s, uint8_t* v, uint32_t v_len) { // Reinitialize elements that could have been changed. uint32_t i = 1; uint32_t upper_bound = s->mtf_upper_bound; - uint32_t *mtf = &s->mtf[1]; // Make mtf[-1] addressable. - uint8_t *mtf_u8 = reinterpret_cast(mtf); + uint32_t* mtf = &s->mtf[1]; // Make mtf[-1] addressable. + uint8_t* mtf_u8 = reinterpret_cast(mtf); uint32_t pattern = 0x03020100; // Little-endian // Initialize list using 4 consequent values pattern. @@ -1320,10 +1325,10 @@ static __device__ void InverseMoveToFrontTransform(debrotli_state_s *s, uint8_t s->mtf_upper_bound = upper_bound >> 2; } -static __device__ uint32_t DecodeContextMap(debrotli_state_s *s, - uint8_t *context_map, +static __device__ uint32_t DecodeContextMap(debrotli_state_s* s, + uint8_t* context_map, uint32_t context_map_size, - uint16_t *context_map_vlc) + uint16_t* context_map_vlc) { uint32_t num_htrees = getbits_u8vlc(s) + 1; uint32_t bits, context_index, max_run_length_prefix, alphabet_size; @@ -1367,7 +1372,7 @@ static __device__ uint32_t DecodeContextMap(debrotli_state_s *s, return num_htrees; } -static __device__ void DetectTrivialLiteralBlockTypes(debrotli_state_s *s) +static __device__ void DetectTrivialLiteralBlockTypes(debrotli_state_s* s) { uint32_t i; for (i = 0; i < s->num_block_types[0]; i++) { @@ -1375,7 +1380,9 @@ static __device__ void DetectTrivialLiteralBlockTypes(debrotli_state_s *s) uint32_t error = 0; uint32_t sample = s->context_map[offset]; uint32_t j; - for (j = 0; j < (1u << 6); ++j) { error |= s->context_map[offset + j] ^ sample; } + for (j = 0; j < (1u << 6); ++j) { + error |= s->context_map[offset + j] ^ sample; + } if (error == 0) { s->context_modes[i] |= 4u; } } } @@ -1405,13 +1412,13 @@ appears only if NTREESD >= 2; otherwise, the context map has only zero values */ -static __device__ debrotli_huff_tree_group_s *HuffmanTreeGroupInit(debrotli_state_s *s, +static __device__ debrotli_huff_tree_group_s* HuffmanTreeGroupInit(debrotli_state_s* s, uint32_t alphabet_size, uint32_t max_symbol, uint32_t ntrees) { - debrotli_huff_tree_group_s *group = reinterpret_cast(local_alloc( - s, sizeof(debrotli_huff_tree_group_s) + ntrees * sizeof(uint16_t *) - sizeof(uint16_t *))); + debrotli_huff_tree_group_s* group = reinterpret_cast(local_alloc( + s, sizeof(debrotli_huff_tree_group_s) + ntrees * sizeof(uint16_t*) - sizeof(uint16_t*))); group->alphabet_size = (uint16_t)alphabet_size; group->max_symbol = (uint16_t)max_symbol; group->num_htrees = (uint16_t)ntrees; @@ -1419,26 +1426,26 @@ static __device__ debrotli_huff_tree_group_s *HuffmanTreeGroupInit(debrotli_stat return group; } -static __device__ void HuffmanTreeGroupAlloc(debrotli_state_s *s, debrotli_huff_tree_group_s *group) +static __device__ void HuffmanTreeGroupAlloc(debrotli_state_s* s, debrotli_huff_tree_group_s* group) { if (!group->htrees[0]) { uint32_t alphabet_size = group->alphabet_size; uint32_t ntrees = group->num_htrees; uint32_t max_table_size = kMaxHuffmanTableSize[(alphabet_size + 31) >> 5]; uint32_t code_size = sizeof(uint16_t) * ntrees * max_table_size; - group->htrees[0] = reinterpret_cast(local_alloc(s, code_size)); + group->htrees[0] = reinterpret_cast(local_alloc(s, code_size)); if (!group->htrees[0]) { - if (s->fb_base) { group->htrees[0] = reinterpret_cast(s->fb_base + s->fb_size); } + if (s->fb_base) { group->htrees[0] = reinterpret_cast(s->fb_base + s->fb_size); } s->fb_size += (code_size + 3) & ~3; } } } // Decodes a series of Huffman table using ReadHuffmanCode function. -static __device__ void HuffmanTreeGroupDecode(debrotli_state_s *s, - debrotli_huff_tree_group_s *group) +static __device__ void HuffmanTreeGroupDecode(debrotli_state_s* s, + debrotli_huff_tree_group_s* group) { - uint16_t *next = group->htrees[0]; + uint16_t* next = group->htrees[0]; for (int htree_index = 0; htree_index < group->num_htrees; htree_index++) { uint32_t table_size = DecodeHuffmanTree(s, group->alphabet_size, group->max_symbol, next); @@ -1448,13 +1455,13 @@ static __device__ void HuffmanTreeGroupDecode(debrotli_state_s *s, } } -static __device__ void DecodeHuffmanTreeGroups(debrotli_state_s *s, - uint8_t *fb_heap_base, +static __device__ void DecodeHuffmanTreeGroups(debrotli_state_s* s, + uint8_t* fb_heap_base, uint32_t fb_heap_size) { uint32_t bits, npostfix, ndirect, nbltypesl; uint32_t context_map_size; - uint16_t *context_map_vlc; + uint16_t* context_map_vlc; uint32_t num_direct_codes, num_distance_codes, num_literal_htrees, num_dist_htrees; // Decode context maps @@ -1466,8 +1473,10 @@ static __device__ void DecodeHuffmanTreeGroups(debrotli_state_s *s, s->distance_postfix_mask = (1 << npostfix) - 1; nbltypesl = s->num_block_types[0]; s->context_modes = local_alloc(s, nbltypesl); - for (uint32_t i = 0; i < nbltypesl; i++) { s->context_modes[i] = getbits(s, 2); } - context_map_vlc = reinterpret_cast( + for (uint32_t i = 0; i < nbltypesl; i++) { + s->context_modes[i] = getbits(s, 2); + } + context_map_vlc = reinterpret_cast( local_heap_shrink(s, brotli_huffman_max_size_272 * sizeof(uint16_t))); context_map_size = nbltypesl << 6; s->context_map = local_alloc(s, context_map_size); @@ -1514,7 +1523,7 @@ static __device__ void DecodeHuffmanTreeGroups(debrotli_state_s *s, HuffmanTreeGroupDecode(s, s->distance_hgroup); } -static __device__ int PrepareLiteralDecoding(debrotli_state_s *s, const uint8_t *&context_map_slice) +static __device__ int PrepareLiteralDecoding(debrotli_state_s* s, const uint8_t*& context_map_slice) { int context_mode; uint32_t block_type = s->block_type_rb[1]; @@ -1525,13 +1534,13 @@ static __device__ int PrepareLiteralDecoding(debrotli_state_s *s, const uint8_t } /// Decodes a command or literal and updates block type ring-buffer. Reads 3..54 bits. -static __device__ uint32_t DecodeBlockTypeAndLength(debrotli_state_s *s, int tree_type) +static __device__ uint32_t DecodeBlockTypeAndLength(debrotli_state_s* s, int tree_type) { uint32_t max_block_type = s->num_block_types[tree_type]; if (max_block_type > 1) { - const uint16_t *len_tree = s->block_type_vlc[tree_type]; - const uint16_t *type_tree = len_tree + brotli_huffman_max_size_26; - uint8_t *ringbuffer = &s->block_type_rb[tree_type * 2]; + const uint16_t* len_tree = s->block_type_vlc[tree_type]; + const uint16_t* type_tree = len_tree + brotli_huffman_max_size_26; + uint8_t* ringbuffer = &s->block_type_rb[tree_type * 2]; // Read 0..15 + 3..39 bits. uint32_t block_type = getvlc(s, type_tree); uint32_t block_len = getvlc(s, len_tree); @@ -1553,7 +1562,7 @@ static __device__ uint32_t DecodeBlockTypeAndLength(debrotli_state_s *s, int tre } } -inline __device__ int ToUpperCase(uint8_t *p) +inline __device__ int ToUpperCase(uint8_t* p) { if (p[0] < 0xC0) { if (p[0] >= 'a' && p[0] <= 'z') { p[0] ^= 32; } @@ -1569,18 +1578,20 @@ inline __device__ int ToUpperCase(uint8_t *p) return 3; } -static __device__ int TransformDictionaryWord(uint8_t *dst, - const uint8_t *word, +static __device__ int TransformDictionaryWord(uint8_t* dst, + const uint8_t* word, int len, int transform_idx) { int idx = 0; - const uint8_t *prefix = brotli_transform_prefix(transform_idx); + const uint8_t* prefix = brotli_transform_prefix(transform_idx); uint8_t type = brotli_transform_type(transform_idx); - const uint8_t *suffix = brotli_transform_suffix(transform_idx); + const uint8_t* suffix = brotli_transform_suffix(transform_idx); { int prefix_len = *prefix++; - while (prefix_len--) { dst[idx++] = *prefix++; } + while (prefix_len--) { + dst[idx++] = *prefix++; + } } { const int t = type; @@ -1592,11 +1603,13 @@ static __device__ int TransformDictionaryWord(uint8_t *dst, word += skip; len -= skip; } - while (i < len) { dst[idx++] = word[i++]; } + while (i < len) { + dst[idx++] = word[i++]; + } if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) { ToUpperCase(&dst[idx - len]); } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) { - uint8_t *uppercase = &dst[idx - len]; + uint8_t* uppercase = &dst[idx - len]; while (len > 0) { int step = ToUpperCase(uppercase); uppercase += step; @@ -1606,24 +1619,26 @@ static __device__ int TransformDictionaryWord(uint8_t *dst, } { int suffix_len = *suffix++; - while (suffix_len--) { dst[idx++] = *suffix++; } + while (suffix_len--) { + dst[idx++] = *suffix++; + } return idx; } } /// ProcessCommands, actual decoding: 1 warp, most work done by thread0 -static __device__ void ProcessCommands(debrotli_state_s *s, const brotli_dictionary_s *words, int t) +static __device__ void ProcessCommands(debrotli_state_s* s, const brotli_dictionary_s* words, int t) { int32_t meta_block_len = s->meta_block_len; - uint8_t *out = s->out; + uint8_t* out = s->out; int32_t pos = 0; int p1 = s->p1; int p2 = s->p2; - const uint16_t *htree_command; + const uint16_t* htree_command; const uint8_t *context_map_slice, *dist_context_map_slice; int dist_rb_idx; uint32_t blen_L, blen_I, blen_D; - uint8_t *const dict_scratch = reinterpret_cast( + uint8_t* const dict_scratch = reinterpret_cast( &s->hs); // 24+13 bytes (max length of a dictionary word including prefix & suffix) int context_mode; @@ -1678,7 +1693,7 @@ static __device__ void ProcessCommands(debrotli_state_s *s, const brotli_diction insert_length -= len; blen_L -= len; if (brotli_need_context_lut(context_mode)) { - const debrotli_huff_tree_group_s *literal_hgroup = s->literal_hgroup; + const debrotli_huff_tree_group_s* literal_hgroup = s->literal_hgroup; do { int context = brotli_context(p1, p2, context_mode); p2 = p1; @@ -1686,7 +1701,7 @@ static __device__ void ProcessCommands(debrotli_state_s *s, const brotli_diction out[pos++] = p1; } while (--len); } else { - const uint16_t *literal_htree = s->literal_hgroup->htrees[context_map_slice[0]]; + const uint16_t* literal_htree = s->literal_hgroup->htrees[context_map_slice[0]]; do { p2 = p1; p1 = getvlc(s, literal_htree); @@ -1704,7 +1719,7 @@ static __device__ void ProcessCommands(debrotli_state_s *s, const brotli_diction distance_code = s->dist_rb[dist_rb_idx & 3]; distance_context = 1; } else { - const uint16_t *distance_tree; + const uint16_t* distance_tree; int distval; // Read distance code in the command, unless it was implicitly zero. if (blen_D == 0) { @@ -1847,7 +1862,7 @@ static __device__ void ProcessCommands(debrotli_state_s *s, const brotli_diction if (distance_code > 0) { // Copy for (uint32_t i = t; i < copy_length; i += 32) { - const uint8_t *src = + const uint8_t* src = out + pos + ((i >= (uint32_t)distance_code) ? (i % (uint32_t)distance_code) : i) - distance_code; b = *src; @@ -1855,7 +1870,7 @@ static __device__ void ProcessCommands(debrotli_state_s *s, const brotli_diction } } else { // Dictionary - const uint8_t *src = (distance_code < 0) ? &words->data[-distance_code] : dict_scratch; + const uint8_t* src = (distance_code < 0) ? &words->data[-distance_code] : dict_scratch; if (t < copy_length) { b = src[t]; out[pos + t] = b; @@ -1891,9 +1906,9 @@ static __device__ void ProcessCommands(debrotli_state_s *s, const brotli_diction * @param count Number of blocks to decompress */ extern "C" __global__ void __launch_bounds__(block_size, 2) - gpu_debrotli_kernel(gpu_inflate_input_s *inputs, - gpu_inflate_status_s *outputs, - uint8_t *scratch, + gpu_debrotli_kernel(gpu_inflate_input_s* inputs, + gpu_inflate_status_s* outputs, + uint8_t* scratch, uint32_t scratch_size, uint32_t count) { @@ -1901,16 +1916,16 @@ extern "C" __global__ void __launch_bounds__(block_size, 2) int t = threadIdx.x; int z = blockIdx.x; - debrotli_state_s *const s = &state_g; + debrotli_state_s* const s = &state_g; if (z >= count) { return; } // Thread0: initializes shared state and decode stream header if (!t) { - uint8_t const *src = static_cast(inputs[z].srcDevice); + uint8_t const* src = static_cast(inputs[z].srcDevice); size_t src_size = inputs[z].srcSize; if (src && src_size >= 8) { s->error = 0; - s->out = s->outbase = static_cast(inputs[z].dstDevice); + s->out = s->outbase = static_cast(inputs[z].dstDevice); s->bytes_left = inputs[z].dstSize; s->mtf_upper_bound = 63; s->dist_rb[0] = 16; @@ -1940,8 +1955,8 @@ extern "C" __global__ void __launch_bounds__(block_size, 2) if (!s->error && s->meta_block_len != 0) { if (s->is_uncompressed) { // Uncompressed block - const uint8_t *src = s->cur + ((s->bitpos + 7) >> 3); - uint8_t *dst = s->out; + const uint8_t* src = s->cur + ((s->bitpos + 7) >> 3); + uint8_t* dst = s->out; if (!t) { if (getbits_bytealign(s) != 0) { s->error = -1; @@ -1954,7 +1969,9 @@ extern "C" __global__ void __launch_bounds__(block_size, 2) __syncthreads(); if (!s->error) { // Simple block-wide memcpy - for (int32_t i = t; i < s->meta_block_len; i += block_size) { dst[i] = src[i]; } + for (int32_t i = t; i < s->meta_block_len; i += block_size) { + dst[i] = src[i]; + } } } else { // Compressed block @@ -1971,8 +1988,7 @@ extern "C" __global__ void __launch_bounds__(block_size, 2) if (!s->error) { // Warp0: Decode compressed block, warps 1..7 are all idle (!) if (t < 32) - ProcessCommands( - s, reinterpret_cast(scratch + scratch_size), t); + ProcessCommands(s, reinterpret_cast(scratch + scratch_size), t); __syncthreads(); } // Free any allocated memory @@ -2053,16 +2069,16 @@ size_t __host__ get_gpu_debrotli_scratch_size(int max_num_inputs) #include #endif -cudaError_t __host__ gpu_debrotli(gpu_inflate_input_s *inputs, - gpu_inflate_status_s *outputs, - void *scratch, +cudaError_t __host__ gpu_debrotli(gpu_inflate_input_s* inputs, + gpu_inflate_status_s* outputs, + void* scratch, size_t scratch_size, int count, rmm::cuda_stream_view stream) { uint32_t count32 = (count > 0) ? count : 0; uint32_t fb_heap_size; - uint8_t *scratch_u8 = static_cast(scratch); + uint8_t* scratch_u8 = static_cast(scratch); dim3 dim_block(block_size, 1); dim3 dim_grid(count32, 1); // TODO: Check max grid dimensions vs max expected count diff --git a/cpp/src/io/comp/gpuinflate.cu b/cpp/src/io/comp/gpuinflate.cu index eda1d37f78c..338af72e4c9 100644 --- a/cpp/src/io/comp/gpuinflate.cu +++ b/cpp/src/io/comp/gpuinflate.cu @@ -102,15 +102,15 @@ constexpr int prefetch_size = (1 << log2_prefetch_size); /// @brief Prefetcher state struct prefetch_queue_s { - const uint8_t *cur_p; ///< Prefetch location + const uint8_t* cur_p; ///< Prefetch location int run; ///< prefetcher will exit when run=0 uint8_t pref_data[prefetch_size]; }; template -inline __device__ volatile uint32_t *prefetch_addr32(volatile prefetch_queue_s &q, T *ptr) +inline __device__ volatile uint32_t* prefetch_addr32(volatile prefetch_queue_s& q, T* ptr) { - return reinterpret_cast(&q.pref_data[(prefetch_size - 4) & (size_t)(ptr)]); + return reinterpret_cast(&q.pref_data[(prefetch_size - 4) & (size_t)(ptr)]); } #endif // ENABLE_PREFETCH @@ -120,12 +120,12 @@ inline __device__ volatile uint32_t *prefetch_addr32(volatile prefetch_queue_s & */ struct inflate_state_s { // output state - uint8_t *out; ///< output buffer - uint8_t *outbase; ///< start of output buffer - uint8_t *outend; ///< end of output buffer + uint8_t* out; ///< output buffer + uint8_t* outbase; ///< start of output buffer + uint8_t* outend; ///< end of output buffer // Input state - uint8_t *cur; ///< input buffer - uint8_t *end; ///< end of input buffer + uint8_t* cur; ///< input buffer + uint8_t* end; ///< end of input buffer uint2 bitbuf; ///< bit buffer (64-bit) uint32_t bitpos; ///< position in bit buffer @@ -165,24 +165,24 @@ inline __device__ unsigned int bfe(unsigned int source, return bits; }; -inline __device__ uint32_t showbits(inflate_state_s *s, uint32_t n) +inline __device__ uint32_t showbits(inflate_state_s* s, uint32_t n) { uint32_t next32 = __funnelshift_rc(s->bitbuf.x, s->bitbuf.y, s->bitpos); return (next32 & ((1 << n) - 1)); } -inline __device__ uint32_t nextbits32(inflate_state_s *s) +inline __device__ uint32_t nextbits32(inflate_state_s* s) { return __funnelshift_rc(s->bitbuf.x, s->bitbuf.y, s->bitpos); } -inline __device__ void skipbits(inflate_state_s *s, uint32_t n) +inline __device__ void skipbits(inflate_state_s* s, uint32_t n) { uint32_t bitpos = s->bitpos + n; if (bitpos >= 32) { - uint8_t *cur = s->cur + 8; + uint8_t* cur = s->cur + 8; s->bitbuf.x = s->bitbuf.y; - s->bitbuf.y = (cur < s->end) ? *reinterpret_cast(cur) : 0; + s->bitbuf.y = (cur < s->end) ? *reinterpret_cast(cur) : 0; s->cur = cur - 4; bitpos &= 0x1f; } @@ -191,7 +191,7 @@ inline __device__ void skipbits(inflate_state_s *s, uint32_t n) // TODO: If we require 4-byte alignment of input bitstream & length (padded), reading bits would // become quite a bit faster -__device__ uint32_t getbits(inflate_state_s *s, uint32_t n) +__device__ uint32_t getbits(inflate_state_s* s, uint32_t n) { uint32_t v = showbits(s, n); skipbits(s, n); @@ -222,7 +222,7 @@ __device__ uint32_t getbits(inflate_state_s *s, uint32_t n) * - Incomplete codes are handled by this decoder, since they are permitted * in the deflate format. See the format notes for fixed() and dynamic(). */ -__device__ int decode(inflate_state_s *s, const int16_t *counts, const int16_t *symbols) +__device__ int decode(inflate_state_s* s, const int16_t* counts, const int16_t* symbols) { unsigned int len; // current number of bits in code unsigned int code; // len bits being decoded @@ -279,15 +279,16 @@ __device__ int decode(inflate_state_s *s, const int16_t *counts, const int16_t * * the code bits definition. */ __device__ int construct( - inflate_state_s *s, int16_t *counts, int16_t *symbols, const int16_t *length, int n) + inflate_state_s* s, int16_t* counts, int16_t* symbols, const int16_t* length, int n) { int symbol; // current symbol when stepping through length[] int len; // current length when stepping through counts[] int left; // number of possible codes left of current length - int16_t *offs = s->u.scratch.offs; + int16_t* offs = s->u.scratch.offs; // count number of codes of each length - for (len = 0; len <= max_bits; len++) counts[len] = 0; + for (len = 0; len <= max_bits; len++) + counts[len] = 0; for (symbol = 0; symbol < n; symbol++) (counts[length[symbol]])++; // assumes lengths are within bounds if (counts[0] == n) // no codes! @@ -303,7 +304,8 @@ __device__ int construct( // generate offsets into symbol table for each length for sorting offs[1] = 0; - for (len = 1; len < max_bits; len++) offs[len + 1] = offs[len] + counts[len]; + for (len = 1; len < max_bits; len++) + offs[len + 1] = offs[len] + counts[len]; // put symbols in table sorted by length, by symbol order within each length for (symbol = 0; symbol < n; symbol++) @@ -318,12 +320,12 @@ static const __device__ __constant__ uint8_t g_code_order[19 + 1] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, 0xff}; /// Dynamic block (custom huffman tables) -__device__ int init_dynamic(inflate_state_s *s) +__device__ int init_dynamic(inflate_state_s* s) { int nlen, ndist, ncode; /* number of lengths in descriptor */ int index; /* index of lengths[] */ int err; /* construct() return value */ - int16_t *lengths = s->u.scratch.lengths; + int16_t* lengths = s->u.scratch.lengths; // get number of lengths in each table, check lengths nlen = getbits(s, 5) + 257; @@ -333,8 +335,10 @@ __device__ int init_dynamic(inflate_state_s *s) return -3; // bad counts } // read code length code lengths (really), missing lengths are zero - for (index = 0; index < ncode; index++) lengths[g_code_order[index]] = getbits(s, 3); - for (; index < 19; index++) lengths[g_code_order[index]] = 0; + for (index = 0; index < ncode; index++) + lengths[g_code_order[index]] = getbits(s, 3); + for (; index < 19; index++) + lengths[g_code_order[index]] = 0; // build huffman table for code lengths codes (use lencode temporarily) err = construct(s, s->lencnt, s->lensym, lengths, 19); @@ -404,20 +408,25 @@ __device__ int init_dynamic(inflate_state_s *s) * length, this can be implemented as an incomplete code. Then the invalid * codes are detected while decoding. */ -__device__ int init_fixed(inflate_state_s *s) +__device__ int init_fixed(inflate_state_s* s) { - int16_t *lengths = s->u.scratch.lengths; + int16_t* lengths = s->u.scratch.lengths; int symbol; // literal/length table - for (symbol = 0; symbol < 144; symbol++) lengths[symbol] = 8; - for (; symbol < 256; symbol++) lengths[symbol] = 9; - for (; symbol < 280; symbol++) lengths[symbol] = 7; - for (; symbol < fix_l_codes; symbol++) lengths[symbol] = 8; + for (symbol = 0; symbol < 144; symbol++) + lengths[symbol] = 8; + for (; symbol < 256; symbol++) + lengths[symbol] = 9; + for (; symbol < 280; symbol++) + lengths[symbol] = 7; + for (; symbol < fix_l_codes; symbol++) + lengths[symbol] = 8; construct(s, s->lencnt, s->lensym, lengths, fix_l_codes); // distance table - for (symbol = 0; symbol < max_d_codes; symbol++) lengths[symbol] = 5; + for (symbol = 0; symbol < max_d_codes; symbol++) + lengths[symbol] = 5; // build huffman table for distance codes construct(s, s->distcnt, s->distsym, lengths, max_d_codes); @@ -497,21 +506,21 @@ static const __device__ __constant__ uint16_t g_dext[30] = { // Extra bits for 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; /// @brief Thread 0 only: decode bitstreams and output symbols into the symbol queue -__device__ void decode_symbols(inflate_state_s *s) +__device__ void decode_symbols(inflate_state_s* s) { uint32_t bitpos = s->bitpos; uint2 bitbuf = s->bitbuf; - uint8_t *cur = s->cur; - uint8_t *end = s->end; + uint8_t* cur = s->cur; + uint8_t* end = s->end; int32_t batch = 0; int32_t sym, batch_len; do { - volatile uint32_t *b = &s->x.u.symqueue[batch * batch_size]; + volatile uint32_t* b = &s->x.u.symqueue[batch * batch_size]; // Wait for the next batch entry to be empty #if ENABLE_PREFETCH // Wait for prefetcher to fetch a worst-case of 48 bits per symbol - while ((*(volatile int32_t *)&s->pref.cur_p - (int32_t)(size_t)cur < batch_size * 6) || + while ((*(volatile int32_t*)&s->pref.cur_p - (int32_t)(size_t)cur < batch_size * 6) || (s->x.batch_len[batch] != 0)) {} #else while (s->x.batch_len[batch] != 0) {} @@ -544,7 +553,7 @@ __device__ void decode_symbols(inflate_state_s *s) } else { // Slow length path uint32_t next32r = __brev(next32); - const int16_t *symbols = &s->lensym[s->index_slow_len]; + const int16_t* symbols = &s->lensym[s->index_slow_len]; unsigned int first = s->first_slow_len; int lext; #pragma unroll 1 @@ -583,7 +592,7 @@ __device__ void decode_symbols(inflate_state_s *s) cur += 4; #else cur += 8; - bitbuf.y = (cur < end) ? *(const uint32_t *)cur : 0; + bitbuf.y = (cur < end) ? *(const uint32_t*)cur : 0; cur -= 4; #endif bitpos &= 0x1f; @@ -599,7 +608,7 @@ __device__ void decode_symbols(inflate_state_s *s) len += dext; } else { uint32_t next32r = __brev(next32); - const int16_t *symbols = &s->distsym[s->index_slow_dist]; + const int16_t* symbols = &s->distsym[s->index_slow_dist]; unsigned int first = s->first_slow_dist; #pragma unroll 1 for (len = log2_dist_lut + 1; len <= max_bits; len++) { @@ -636,7 +645,7 @@ __device__ void decode_symbols(inflate_state_s *s) #else cur += 8; if (cur < end) { - bitbuf.y = *(const uint32_t *)cur; + bitbuf.y = *(const uint32_t*)cur; cur -= 4; } else { bitbuf.y = 0; @@ -654,7 +663,7 @@ __device__ void decode_symbols(inflate_state_s *s) } while (batch_len < batch_size - 1); s->x.batch_len[batch] = batch_len; #if ENABLE_PREFETCH - ((volatile inflate_state_s *)s)->cur = cur; + ((volatile inflate_state_s*)s)->cur = cur; #endif if (batch_len != 0) batch = (batch + 1) & (batch_count - 1); } while (sym != 256); @@ -672,13 +681,13 @@ __device__ void decode_symbols(inflate_state_s *s) * @brief Build lookup tables for faster decode * LUT format is symbols*16+length */ -__device__ void init_length_lut(inflate_state_s *s, int t) +__device__ void init_length_lut(inflate_state_s* s, int t) { - int32_t *lut = s->u.lut.lenlut; + int32_t* lut = s->u.lut.lenlut; for (uint32_t bits = t; bits < (1 << log2_len_lut); bits += blockDim.x) { - const int16_t *cnt = s->lencnt; - const int16_t *symbols = s->lensym; + const int16_t* cnt = s->lencnt; + const int16_t* symbols = s->lensym; int sym = -10 << 5; unsigned int first = 0; unsigned int rbits = __brev(bits) >> (32 - log2_len_lut); @@ -704,7 +713,7 @@ __device__ void init_length_lut(inflate_state_s *s, int t) if (!t) { unsigned int first = 0; unsigned int index = 0; - const int16_t *cnt = s->lencnt; + const int16_t* cnt = s->lencnt; for (unsigned int len = 1; len <= log2_len_lut; len++) { unsigned int count = cnt[len]; index += count; @@ -720,13 +729,13 @@ __device__ void init_length_lut(inflate_state_s *s, int t) * @brief Build lookup tables for faster decode of distance symbol * LUT format is symbols*16+length */ -__device__ void init_distance_lut(inflate_state_s *s, int t) +__device__ void init_distance_lut(inflate_state_s* s, int t) { - int32_t *lut = s->u.lut.distlut; + int32_t* lut = s->u.lut.distlut; for (uint32_t bits = t; bits < (1 << log2_dist_lut); bits += blockDim.x) { - const int16_t *cnt = s->distcnt; - const int16_t *symbols = s->distsym; + const int16_t* cnt = s->distcnt; + const int16_t* symbols = s->distsym; int sym = 0; unsigned int first = 0; unsigned int rbits = __brev(bits) >> (32 - log2_dist_lut); @@ -749,7 +758,7 @@ __device__ void init_distance_lut(inflate_state_s *s, int t) if (!t) { unsigned int first = 0; unsigned int index = 0; - const int16_t *cnt = s->distcnt; + const int16_t* cnt = s->distcnt; for (unsigned int len = 1; len <= log2_dist_lut; len++) { unsigned int count = cnt[len]; index += count; @@ -762,15 +771,15 @@ __device__ void init_distance_lut(inflate_state_s *s, int t) } /// @brief WARP1: process symbols and output uncompressed stream -__device__ void process_symbols(inflate_state_s *s, int t) +__device__ void process_symbols(inflate_state_s* s, int t) { - uint8_t *out = s->out; - const uint8_t *outend = s->outend; - const uint8_t *outbase = s->outbase; + uint8_t* out = s->out; + const uint8_t* outend = s->outend; + const uint8_t* outbase = s->outbase; int batch = 0; do { - volatile uint32_t *b = &s->x.u.symqueue[batch * batch_size]; + volatile uint32_t* b = &s->x.u.symqueue[batch * batch_size]; int batch_len, pos; int32_t symt; uint32_t lit_mask; @@ -798,7 +807,7 @@ __device__ void process_symbols(inflate_state_s *s, int t) len = max((symbol & 0xffff) - 256, 0); // max should be unnecessary, but just in case dist = symbol >> 16; for (int i = t; i < len; i += 32) { - const uint8_t *src = out + ((i >= dist) ? (i % dist) : i) - dist; + const uint8_t* src = out + ((i >= dist) ? (i % dist) : i) - dist; uint8_t b = (src < outbase) ? 0 : *src; if (out + i < outend) { out[i] = b; } } @@ -838,7 +847,7 @@ __device__ void process_symbols(inflate_state_s *s, int t) * - A stored block can have zero length. This is sometimes used to byte-align * subsets of the compressed data for random access or partial recovery. */ -__device__ int init_stored(inflate_state_s *s) +__device__ int init_stored(inflate_state_s* s) { uint32_t len, nlen; // length of stored block @@ -863,13 +872,13 @@ __device__ int init_stored(inflate_state_s *s) } /// Copy bytes from stored block to destination -__device__ void copy_stored(inflate_state_s *s, int t) +__device__ void copy_stored(inflate_state_s* s, int t) { int len = s->stored_blk_len; - uint8_t *cur = s->cur + (s->bitpos >> 3); - uint8_t *out = s->out; - uint8_t *outend = s->outend; - uint8_t *cur4; + uint8_t* cur = s->cur + (s->bitpos >> 3); + uint8_t* out = s->out; + uint8_t* outend = s->outend; + uint8_t* cur4; int slow_bytes = min(len, (int)((16 - (size_t)out) & 0xf)); int fast_bytes, bitpos; @@ -893,18 +902,18 @@ __device__ void copy_stored(inflate_state_s *s, int t) // Fast copy 16 bytes at a time for (int i = t * 16; i < fast_bytes; i += blockDim.x * 16) { uint4 u; - u.x = *reinterpret_cast(cur4 + i + 0 * 4); - u.y = *reinterpret_cast(cur4 + i + 1 * 4); - u.z = *reinterpret_cast(cur4 + i + 2 * 4); - u.w = *reinterpret_cast(cur4 + i + 3 * 4); + u.x = *reinterpret_cast(cur4 + i + 0 * 4); + u.y = *reinterpret_cast(cur4 + i + 1 * 4); + u.z = *reinterpret_cast(cur4 + i + 2 * 4); + u.w = *reinterpret_cast(cur4 + i + 3 * 4); if (bitpos != 0) { - uint32_t v = (bitpos != 0) ? *reinterpret_cast(cur4 + i + 4 * 4) : 0; + uint32_t v = (bitpos != 0) ? *reinterpret_cast(cur4 + i + 4 * 4) : 0; u.x = __funnelshift_rc(u.x, u.y, bitpos); u.y = __funnelshift_rc(u.y, u.z, bitpos); u.z = __funnelshift_rc(u.z, u.w, bitpos); u.w = __funnelshift_rc(u.w, v, bitpos); } - *reinterpret_cast(out + i) = u; + *reinterpret_cast(out + i) = u; } } cur += fast_bytes; @@ -920,20 +929,20 @@ __device__ void copy_stored(inflate_state_s *s, int t) __syncthreads(); if (t == 0) { // Reset bitstream to end of block - uint8_t *p = cur + len; + uint8_t* p = cur + len; uint32_t prefix_bytes = (uint32_t)(((size_t)p) & 3); p -= prefix_bytes; s->cur = p; - s->bitbuf.x = (p < s->end) ? *reinterpret_cast(p) : 0; + s->bitbuf.x = (p < s->end) ? *reinterpret_cast(p) : 0; p += 4; - s->bitbuf.y = (p < s->end) ? *reinterpret_cast(p) : 0; + s->bitbuf.y = (p < s->end) ? *reinterpret_cast(p) : 0; s->bitpos = prefix_bytes * 8; s->out = out; } } #if ENABLE_PREFETCH -__device__ void init_prefetcher(inflate_state_s *s, int t) +__device__ void init_prefetcher(inflate_state_s* s, int t) { if (t == 0) { s->pref.cur_p = s->cur; @@ -941,17 +950,17 @@ __device__ void init_prefetcher(inflate_state_s *s, int t) } } -__device__ void prefetch_warp(volatile inflate_state_s *s, int t) +__device__ void prefetch_warp(volatile inflate_state_s* s, int t) { - const uint8_t *cur_p = s->pref.cur_p; - const uint8_t *end = s->end; + const uint8_t* cur_p = s->pref.cur_p; + const uint8_t* end = s->end; while (shuffle((t == 0) ? s->pref.run : 0)) { int32_t cur_lo = (int32_t)(size_t)cur_p; int do_pref = - shuffle((t == 0) ? (cur_lo - *(volatile int32_t *)&s->cur < prefetch_size - 32 * 4 - 4) : 0); + shuffle((t == 0) ? (cur_lo - *(volatile int32_t*)&s->cur < prefetch_size - 32 * 4 - 4) : 0); if (do_pref) { - const uint8_t *p = cur_p + 4 * t; - *prefetch_addr32(s->pref, p) = (p < end) ? *reinterpret_cast(p) : 0; + const uint8_t* p = cur_p + 4 * t; + *prefetch_addr32(s->pref, p) = (p < end) ? *reinterpret_cast(p) : 0; cur_p += 4 * 32; __threadfence_block(); __syncwarp(); @@ -968,7 +977,7 @@ __device__ void prefetch_warp(volatile inflate_state_s *s, int t) * @brief Parse GZIP header * See https://tools.ietf.org/html/rfc1952 */ -__device__ int parse_gzip_header(const uint8_t *src, size_t src_size) +__device__ int parse_gzip_header(const uint8_t* src, size_t src_size) { int hdr_len = -1; @@ -1020,16 +1029,16 @@ __device__ int parse_gzip_header(const uint8_t *src, size_t src_size) */ template __global__ void __launch_bounds__(block_size) - inflate_kernel(gpu_inflate_input_s *inputs, gpu_inflate_status_s *outputs, int parse_hdr) + inflate_kernel(gpu_inflate_input_s* inputs, gpu_inflate_status_s* outputs, int parse_hdr) { __shared__ __align__(16) inflate_state_s state_g; int t = threadIdx.x; int z = blockIdx.x; - inflate_state_s *state = &state_g; + inflate_state_s* state = &state_g; if (!t) { - uint8_t *p = const_cast(static_cast(inputs[z].srcDevice)); + uint8_t* p = const_cast(static_cast(inputs[z].srcDevice)); size_t src_size = inputs[z].srcSize; uint32_t prefix_bytes; // Parse header if needed @@ -1045,16 +1054,16 @@ __global__ void __launch_bounds__(block_size) } } // Initialize shared state - state->out = const_cast(static_cast(inputs[z].dstDevice)); + state->out = const_cast(static_cast(inputs[z].dstDevice)); state->outbase = state->out; state->outend = state->out + inputs[z].dstSize; state->end = p + src_size; prefix_bytes = (uint32_t)(((size_t)p) & 3); p -= prefix_bytes; state->cur = p; - state->bitbuf.x = (p < state->end) ? *reinterpret_cast(p) : 0; + state->bitbuf.x = (p < state->end) ? *reinterpret_cast(p) : 0; p += 4; - state->bitbuf.y = (p < state->end) ? *reinterpret_cast(p) : 0; + state->bitbuf.y = (p < state->end) ? *reinterpret_cast(p) : 0; state->bitpos = prefix_bytes * 8; } __syncthreads(); @@ -1139,21 +1148,21 @@ __global__ void __launch_bounds__(block_size) * * @param inputs Source and destination information per block */ -__global__ void __launch_bounds__(1024) copy_uncompressed_kernel(gpu_inflate_input_s *inputs) +__global__ void __launch_bounds__(1024) copy_uncompressed_kernel(gpu_inflate_input_s* inputs) { - __shared__ const uint8_t *volatile src_g; - __shared__ uint8_t *volatile dst_g; + __shared__ const uint8_t* volatile src_g; + __shared__ uint8_t* volatile dst_g; __shared__ uint32_t volatile copy_len_g; uint32_t t = threadIdx.x; uint32_t z = blockIdx.x; - const uint8_t *src; - uint8_t *dst; + const uint8_t* src; + uint8_t* dst; uint32_t len, src_align_bytes, src_align_bits, dst_align_bytes; if (!t) { - src = static_cast(inputs[z].srcDevice); - dst = static_cast(inputs[z].dstDevice); + src = static_cast(inputs[z].srcDevice); + dst = static_cast(inputs[z].dstDevice); len = min((uint32_t)inputs[z].srcSize, (uint32_t)inputs[z].dstSize); src_g = src; dst_g = dst; @@ -1175,12 +1184,12 @@ __global__ void __launch_bounds__(1024) copy_uncompressed_kernel(gpu_inflate_inp src_align_bytes = (uint32_t)(3 & reinterpret_cast(src)); src_align_bits = src_align_bytes << 3; while (len >= 32) { - const uint32_t *src32 = reinterpret_cast(src - src_align_bytes); + const uint32_t* src32 = reinterpret_cast(src - src_align_bytes); uint32_t copy_cnt = min(len >> 2, 1024); if (t < copy_cnt) { uint32_t v = src32[t]; if (src_align_bits != 0) { v = __funnelshift_r(v, src32[t + 1], src_align_bits); } - reinterpret_cast(dst)[t] = v; + reinterpret_cast(dst)[t] = v; } src += copy_cnt * 4; dst += copy_cnt * 4; @@ -1189,8 +1198,8 @@ __global__ void __launch_bounds__(1024) copy_uncompressed_kernel(gpu_inflate_inp if (t < len) { dst[t] = src[t]; } } -cudaError_t __host__ gpuinflate(gpu_inflate_input_s *inputs, - gpu_inflate_status_s *outputs, +cudaError_t __host__ gpuinflate(gpu_inflate_input_s* inputs, + gpu_inflate_status_s* outputs, int count, int parse_hdr, rmm::cuda_stream_view stream) @@ -1203,7 +1212,7 @@ cudaError_t __host__ gpuinflate(gpu_inflate_input_s *inputs, return cudaSuccess; } -cudaError_t __host__ gpu_copy_uncompressed_blocks(gpu_inflate_input_s *inputs, +cudaError_t __host__ gpu_copy_uncompressed_blocks(gpu_inflate_input_s* inputs, int count, rmm::cuda_stream_view stream) { diff --git a/cpp/src/io/comp/gpuinflate.h b/cpp/src/io/comp/gpuinflate.h index 7ca6dd13e9a..a37d282997e 100644 --- a/cpp/src/io/comp/gpuinflate.h +++ b/cpp/src/io/comp/gpuinflate.h @@ -26,9 +26,9 @@ namespace io { * @brief Input parameters for the decompression interface */ struct gpu_inflate_input_s { - const void *srcDevice; + const void* srcDevice; uint64_t srcSize; - void *dstDevice; + void* dstDevice; uint64_t dstSize; }; @@ -53,8 +53,8 @@ struct gpu_inflate_status_s { * @param[in] parse_hdr Whether or not to parse GZIP header, default false * @param[in] stream CUDA stream to use, default 0 */ -cudaError_t gpuinflate(gpu_inflate_input_s *inputs, - gpu_inflate_status_s *outputs, +cudaError_t gpuinflate(gpu_inflate_input_s* inputs, + gpu_inflate_status_s* outputs, int count = 1, int parse_hdr = 0, rmm::cuda_stream_view stream = rmm::cuda_stream_default); @@ -66,7 +66,7 @@ cudaError_t gpuinflate(gpu_inflate_input_s *inputs, * @param[in] count Number of input structures, default 1 * @param[in] stream CUDA stream to use, default 0 */ -cudaError_t gpu_copy_uncompressed_blocks(gpu_inflate_input_s *inputs, +cudaError_t gpu_copy_uncompressed_blocks(gpu_inflate_input_s* inputs, int count = 1, rmm::cuda_stream_view stream = rmm::cuda_stream_default); @@ -81,8 +81,8 @@ cudaError_t gpu_copy_uncompressed_blocks(gpu_inflate_input_s *inputs, * @param[in] count Number of input/output structures, default 1 * @param[in] stream CUDA stream to use, default 0 */ -cudaError_t gpu_unsnap(gpu_inflate_input_s *inputs, - gpu_inflate_status_s *outputs, +cudaError_t gpu_unsnap(gpu_inflate_input_s* inputs, + gpu_inflate_status_s* outputs, int count = 1, rmm::cuda_stream_view stream = rmm::cuda_stream_default); @@ -108,9 +108,9 @@ size_t get_gpu_debrotli_scratch_size(int max_num_inputs = 0); * @param[in] count Number of input/output structures, default 1 * @param[in] stream CUDA stream to use, default 0 */ -cudaError_t gpu_debrotli(gpu_inflate_input_s *inputs, - gpu_inflate_status_s *outputs, - void *scratch, +cudaError_t gpu_debrotli(gpu_inflate_input_s* inputs, + gpu_inflate_status_s* outputs, + void* scratch, size_t scratch_size, int count = 1, rmm::cuda_stream_view stream = rmm::cuda_stream_default); @@ -126,8 +126,8 @@ cudaError_t gpu_debrotli(gpu_inflate_input_s *inputs, * @param[in] count Number of input/output structures, default 1 * @param[in] stream CUDA stream to use, default 0 */ -cudaError_t gpu_snap(gpu_inflate_input_s *inputs, - gpu_inflate_status_s *outputs, +cudaError_t gpu_snap(gpu_inflate_input_s* inputs, + gpu_inflate_status_s* outputs, int count = 1, rmm::cuda_stream_view stream = rmm::cuda_stream_default); diff --git a/cpp/src/io/comp/snap.cu b/cpp/src/io/comp/snap.cu index 999d02e3a50..a3d7bd048e8 100644 --- a/cpp/src/io/comp/snap.cu +++ b/cpp/src/io/comp/snap.cu @@ -31,11 +31,11 @@ constexpr int hash_bits = 12; * @brief snappy compressor state */ struct snap_state_s { - const uint8_t *src; ///< Ptr to uncompressed data + const uint8_t* src; ///< Ptr to uncompressed data uint32_t src_len; ///< Uncompressed data length - uint8_t *dst_base; ///< Base ptr to output compressed data - uint8_t *dst; ///< Current ptr to uncompressed data - uint8_t *end; ///< End of uncompressed data buffer + uint8_t* dst_base; ///< Base ptr to output compressed data + uint8_t* dst; ///< Current ptr to uncompressed data + uint8_t* end; ///< End of uncompressed data buffer volatile uint32_t literal_length; ///< Number of literal bytes volatile uint32_t copy_length; ///< Number of copy bytes volatile uint32_t copy_distance; ///< Distance for copy bytes @@ -53,10 +53,10 @@ static inline __device__ uint32_t snap_hash(uint32_t v) /** * @brief Fetches four consecutive bytes */ -static inline __device__ uint32_t fetch4(const uint8_t *src) +static inline __device__ uint32_t fetch4(const uint8_t* src) { uint32_t src_align = 3 & reinterpret_cast(src); - const uint32_t *src32 = reinterpret_cast(src - src_align); + const uint32_t* src32 = reinterpret_cast(src - src_align); uint32_t v = src32[0]; return (src_align) ? __funnelshift_r(v, src32[1], src_align * 8) : v; } @@ -72,8 +72,8 @@ static inline __device__ uint32_t fetch4(const uint8_t *src) * * @return Updated pointer to compressed byte stream */ -static __device__ uint8_t *StoreLiterals( - uint8_t *dst, uint8_t *end, const uint8_t *src, uint32_t len_minus1, uint32_t t) +static __device__ uint8_t* StoreLiterals( + uint8_t* dst, uint8_t* end, const uint8_t* src, uint32_t len_minus1, uint32_t t) { if (len_minus1 < 60) { if (!t && dst < end) dst[0] = (len_minus1 << 2); @@ -125,8 +125,8 @@ static __device__ uint8_t *StoreLiterals( * * @return Updated pointer to compressed byte stream */ -static __device__ uint8_t *StoreCopy(uint8_t *dst, - uint8_t *end, +static __device__ uint8_t* StoreCopy(uint8_t* dst, + uint8_t* end, uint32_t copy_len, uint32_t distance) { @@ -178,8 +178,8 @@ static inline __device__ uint32_t HashMatchAny(uint32_t v, uint32_t t) * * @return Number of bytes before first match (literal length) */ -static __device__ uint32_t FindFourByteMatch(snap_state_s *s, - const uint8_t *src, +static __device__ uint32_t FindFourByteMatch(snap_state_s* s, + const uint8_t* src, uint32_t pos0, uint32_t t) { @@ -233,8 +233,8 @@ static __device__ uint32_t FindFourByteMatch(snap_state_s *s, } /// @brief Returns the number of matching bytes for two byte sequences up to 63 bytes -static __device__ uint32_t Match60(const uint8_t *src1, - const uint8_t *src2, +static __device__ uint32_t Match60(const uint8_t* src1, + const uint8_t* src2, uint32_t len, uint32_t t) { @@ -258,21 +258,21 @@ static __device__ uint32_t Match60(const uint8_t *src1, * @param[in] count Number of blocks to compress */ extern "C" __global__ void __launch_bounds__(128) - snap_kernel(gpu_inflate_input_s *inputs, gpu_inflate_status_s *outputs, int count) + snap_kernel(gpu_inflate_input_s* inputs, gpu_inflate_status_s* outputs, int count) { __shared__ __align__(16) snap_state_s state_g; - snap_state_s *const s = &state_g; + snap_state_s* const s = &state_g; uint32_t t = threadIdx.x; uint32_t pos; - const uint8_t *src; + const uint8_t* src; if (!t) { - const uint8_t *src = static_cast(inputs[blockIdx.x].srcDevice); + const uint8_t* src = static_cast(inputs[blockIdx.x].srcDevice); uint32_t src_len = static_cast(inputs[blockIdx.x].srcSize); - uint8_t *dst = static_cast(inputs[blockIdx.x].dstDevice); + uint8_t* dst = static_cast(inputs[blockIdx.x].dstDevice); uint32_t dst_len = static_cast(inputs[blockIdx.x].dstSize); - uint8_t *end = dst + dst_len; + uint8_t* end = dst + dst_len; s->src = src; s->src_len = src_len; s->dst_base = dst; @@ -289,7 +289,7 @@ extern "C" __global__ void __launch_bounds__(128) s->copy_distance = 0; } for (uint32_t i = t; i < sizeof(s->hash_map) / sizeof(uint32_t); i += 128) { - *reinterpret_cast(&s->hash_map[i * 2]) = 0; + *reinterpret_cast(&s->hash_map[i * 2]) = 0; } __syncthreads(); src = s->src; @@ -301,8 +301,8 @@ extern "C" __global__ void __launch_bounds__(128) __syncthreads(); if (t < 32) { // WARP0: Encode literals and copies - uint8_t *dst = s->dst; - uint8_t *end = s->end; + uint8_t* dst = s->dst; + uint8_t* end = s->end; if (literal_len > 0) { dst = StoreLiterals(dst, end, src + pos, literal_len - 1, t); pos += literal_len; @@ -341,8 +341,8 @@ extern "C" __global__ void __launch_bounds__(128) } } -cudaError_t __host__ gpu_snap(gpu_inflate_input_s *inputs, - gpu_inflate_status_s *outputs, +cudaError_t __host__ gpu_snap(gpu_inflate_input_s* inputs, + gpu_inflate_status_s* outputs, int count, rmm::cuda_stream_view stream) { diff --git a/cpp/src/io/comp/unbz2.h b/cpp/src/io/comp/unbz2.h index 8f3a6eace5a..5731db63757 100644 --- a/cpp/src/io/comp/unbz2.h +++ b/cpp/src/io/comp/unbz2.h @@ -82,25 +82,25 @@ namespace io { // If BZ_OUTBUFF_FULL is returned and block_start is non-NULL, dstlen will be updated to point to // the end of the last valid block, and block_start will contain the offset in bits of the beginning // of the block, so it can be passed in to resume decoding later on. -#define BZ_OK 0 -#define BZ_RUN_OK 1 -#define BZ_FLUSH_OK 2 -#define BZ_FINISH_OK 3 -#define BZ_STREAM_END 4 -#define BZ_SEQUENCE_ERROR (-1) -#define BZ_PARAM_ERROR (-2) -#define BZ_MEM_ERROR (-3) -#define BZ_DATA_ERROR (-4) +#define BZ_OK 0 +#define BZ_RUN_OK 1 +#define BZ_FLUSH_OK 2 +#define BZ_FINISH_OK 3 +#define BZ_STREAM_END 4 +#define BZ_SEQUENCE_ERROR (-1) +#define BZ_PARAM_ERROR (-2) +#define BZ_MEM_ERROR (-3) +#define BZ_DATA_ERROR (-4) #define BZ_DATA_ERROR_MAGIC (-5) -#define BZ_IO_ERROR (-6) -#define BZ_UNEXPECTED_EOF (-7) -#define BZ_OUTBUFF_FULL (-8) +#define BZ_IO_ERROR (-6) +#define BZ_UNEXPECTED_EOF (-7) +#define BZ_OUTBUFF_FULL (-8) -int32_t cpu_bz2_uncompress(const uint8_t *input, +int32_t cpu_bz2_uncompress(const uint8_t* input, size_t inlen, - uint8_t *dst, - size_t *dstlen, - uint64_t *block_start = nullptr); + uint8_t* dst, + size_t* dstlen, + uint64_t* block_start = nullptr); } // namespace io } // namespace cudf diff --git a/cpp/src/io/comp/uncomp.cpp b/cpp/src/io/comp/uncomp.cpp index 44581bbc184..2cb99d897fe 100644 --- a/cpp/src/io/comp/uncomp.cpp +++ b/cpp/src/io/comp/uncomp.cpp @@ -106,32 +106,32 @@ struct bz2_file_header_s { #pragma pack(pop) struct gz_archive_s { - const gz_file_header_s *fhdr; + const gz_file_header_s* fhdr; uint16_t hcrc16; // header crc16 if present uint16_t xlen; - const uint8_t *fxtra; // xlen bytes (optional) - const uint8_t *fname; // zero-terminated original filename if present - const uint8_t *fcomment; // zero-terminated comment if present - const uint8_t *comp_data; // compressed data + const uint8_t* fxtra; // xlen bytes (optional) + const uint8_t* fname; // zero-terminated original filename if present + const uint8_t* fcomment; // zero-terminated comment if present + const uint8_t* comp_data; // compressed data size_t comp_len; // Compressed data length uint32_t crc32; // CRC32 of uncompressed data uint32_t isize; // Input size modulo 2^32 }; struct zip_archive_s { - const zip_eocd_s *eocd; // end of central directory - const zip64_eocdl *eocdl; // end of central dir locator (optional) - const zip_cdfh_s *cdfh; // start of central directory file headers + const zip_eocd_s* eocd; // end of central directory + const zip64_eocdl* eocdl; // end of central dir locator (optional) + const zip_cdfh_s* cdfh; // start of central directory file headers }; -bool ParseGZArchive(gz_archive_s *dst, const uint8_t *raw, size_t len) +bool ParseGZArchive(gz_archive_s* dst, const uint8_t* raw, size_t len) { - const gz_file_header_s *fhdr; + const gz_file_header_s* fhdr; if (!dst) return false; memset(dst, 0, sizeof(gz_archive_s)); if (len < sizeof(gz_file_header_s) + 8) return false; - fhdr = reinterpret_cast(raw); + fhdr = reinterpret_cast(raw); if (fhdr->id1 != 0x1f || fhdr->id2 != 0x8b) return false; dst->fhdr = fhdr; raw += sizeof(gz_file_header_s); @@ -188,7 +188,7 @@ bool ParseGZArchive(gz_archive_s *dst, const uint8_t *raw, size_t len) return (fhdr->comp_mthd == 8 && len > 0); } -bool OpenZipArchive(zip_archive_s *dst, const uint8_t *raw, size_t len) +bool OpenZipArchive(zip_archive_s* dst, const uint8_t* raw, size_t len) { memset(dst, 0, sizeof(zip_archive_s)); // Find the end of central directory @@ -196,17 +196,17 @@ bool OpenZipArchive(zip_archive_s *dst, const uint8_t *raw, size_t len) for (ptrdiff_t i = len - sizeof(zip_eocd_s) - 2; i + sizeof(zip_eocd_s) + 2 + 0xffff >= len && i >= 0; i--) { - const zip_eocd_s *eocd = reinterpret_cast(raw + i); + const zip_eocd_s* eocd = reinterpret_cast(raw + i); if (eocd->sig == 0x06054b50 && eocd->disk_id == eocd->start_disk // multi-file archives not supported && eocd->num_entries == eocd->total_entries && eocd->cdir_size >= sizeof(zip_cdfh_s) * eocd->num_entries && eocd->cdir_offset < len && - i + *reinterpret_cast(eocd + 1) <= static_cast(len)) { - const zip_cdfh_s *cdfh = reinterpret_cast(raw + eocd->cdir_offset); + i + *reinterpret_cast(eocd + 1) <= static_cast(len)) { + const zip_cdfh_s* cdfh = reinterpret_cast(raw + eocd->cdir_offset); dst->eocd = eocd; if (i >= static_cast(sizeof(zip64_eocdl))) { - const zip64_eocdl *eocdl = - reinterpret_cast(raw + i - sizeof(zip64_eocdl)); + const zip64_eocdl* eocdl = + reinterpret_cast(raw + i - sizeof(zip64_eocdl)); if (eocdl->sig == 0x07064b50) { dst->eocdl = eocdl; } } // Start of central directory @@ -217,13 +217,13 @@ bool OpenZipArchive(zip_archive_s *dst, const uint8_t *raw, size_t len) return (dst->eocd && dst->cdfh); } -int cpu_inflate(uint8_t *uncomp_data, size_t *destLen, const uint8_t *comp_data, size_t comp_len) +int cpu_inflate(uint8_t* uncomp_data, size_t* destLen, const uint8_t* comp_data, size_t comp_len) { int zerr; z_stream strm; memset(&strm, 0, sizeof(strm)); - strm.next_in = const_cast(reinterpret_cast(comp_data)); + strm.next_in = const_cast(reinterpret_cast(comp_data)); strm.avail_in = comp_len; strm.total_in = 0; strm.next_out = uncomp_data; @@ -250,16 +250,16 @@ int cpu_inflate(uint8_t *uncomp_data, size_t *destLen, const uint8_t *comp_data, * @param comp_data[in] Raw compressed data * @param comp_len[in] Compressed data size */ -int cpu_inflate_vector(std::vector &dst, const uint8_t *comp_data, size_t comp_len) +int cpu_inflate_vector(std::vector& dst, const uint8_t* comp_data, size_t comp_len) { int zerr; z_stream strm; memset(&strm, 0, sizeof(strm)); - strm.next_in = const_cast(reinterpret_cast(comp_data)); + strm.next_in = const_cast(reinterpret_cast(comp_data)); strm.avail_in = comp_len; strm.total_in = 0; - strm.next_out = reinterpret_cast(dst.data()); + strm.next_out = reinterpret_cast(dst.data()); strm.avail_out = dst.size(); strm.total_out = 0; zerr = inflateInit2(&strm, -15); // -15 for raw data without GZIP headers @@ -271,7 +271,7 @@ int cpu_inflate_vector(std::vector &dst, const uint8_t *comp_data, size_t if (strm.avail_out == 0) { dst.resize(strm.total_out + (1 << 30)); strm.avail_out = dst.size() - strm.total_out; - strm.next_out = reinterpret_cast(dst.data()) + strm.total_out; + strm.next_out = reinterpret_cast(dst.data()) + strm.total_out; } zerr = inflate(&strm, Z_SYNC_FLUSH); } while ((zerr == Z_BUF_ERROR || zerr == Z_OK) && strm.avail_out == 0 && @@ -293,10 +293,10 @@ int cpu_inflate_vector(std::vector &dst, const uint8_t *comp_data, size_t * * @return Vector containing the uncompressed output */ -std::vector io_uncompress_single_h2d(const void *src, size_t src_size, int stream_type) +std::vector io_uncompress_single_h2d(const void* src, size_t src_size, int stream_type) { - const uint8_t *raw = static_cast(src); - const uint8_t *comp_data = nullptr; + const uint8_t* raw = static_cast(src); + const uint8_t* comp_data = nullptr; size_t comp_len = 0; size_t uncomp_len = 0; @@ -320,8 +320,8 @@ std::vector io_uncompress_single_h2d(const void *src, size_t src_size, int if (OpenZipArchive(&za, raw, src_size)) { size_t cdfh_ofs = 0; for (int i = 0; i < za.eocd->num_entries; i++) { - const zip_cdfh_s *cdfh = reinterpret_cast( - reinterpret_cast(za.cdfh) + cdfh_ofs); + const zip_cdfh_s* cdfh = reinterpret_cast( + reinterpret_cast(za.cdfh) + cdfh_ofs); int cdfh_len = sizeof(zip_cdfh_s) + cdfh->fname_len + cdfh->extra_len + cdfh->comment_len; if (cdfh_ofs + cdfh_len > za.eocd->cdir_size || cdfh->sig != 0x02014b50) { // Bad cdir @@ -330,7 +330,7 @@ std::vector io_uncompress_single_h2d(const void *src, size_t src_size, int // For now, only accept with non-zero file sizes and DEFLATE if (cdfh->comp_method == 8 && cdfh->comp_size > 0 && cdfh->uncomp_size > 0) { size_t lfh_ofs = cdfh->hdr_ofs; - const zip_lfh_s *lfh = reinterpret_cast(raw + lfh_ofs); + const zip_lfh_s* lfh = reinterpret_cast(raw + lfh_ofs); if (lfh_ofs + sizeof(zip_lfh_s) <= src_size && lfh->sig == 0x04034b50 && lfh_ofs + sizeof(zip_lfh_s) + lfh->fname_len + lfh->extra_len <= src_size) { if (lfh->comp_method == 8 && lfh->comp_size > 0 && lfh->uncomp_size > 0) { @@ -354,7 +354,7 @@ std::vector io_uncompress_single_h2d(const void *src, size_t src_size, int if (stream_type != IO_UNCOMP_STREAM_TYPE_INFER) break; // Fall through for INFER case IO_UNCOMP_STREAM_TYPE_BZIP2: if (src_size > 4) { - const bz2_file_header_s *fhdr = reinterpret_cast(raw); + const bz2_file_header_s* fhdr = reinterpret_cast(raw); // Check for BZIP2 file signature "BZh1" to "BZh9" if (fhdr->sig[0] == 'B' && fhdr->sig[1] == 'Z' && fhdr->sig[2] == 'h' && fhdr->blksz >= '1' && fhdr->blksz <= '9') { @@ -392,7 +392,7 @@ std::vector io_uncompress_single_h2d(const void *src, size_t src_size, int do { size_t dst_len = uncomp_len - dst_ofs; bz_err = cpu_bz2_uncompress( - comp_data, comp_len, reinterpret_cast(dst.data()) + dst_ofs, &dst_len, &src_ofs); + comp_data, comp_len, reinterpret_cast(dst.data()) + dst_ofs, &dst_len, &src_ofs); if (bz_err == BZ_OUTBUFF_FULL) { // TBD: We could infer the compression ratio based on produced/consumed byte counts // in order to minimize realloc events and over-allocation @@ -422,7 +422,7 @@ std::vector io_uncompress_single_h2d(const void *src, size_t src_size, int * @return Vector containing the output uncompressed data */ std::vector get_uncompressed_data(host_span const data, - std::string const &compression) + std::string const& compression) { int comp_type = IO_UNCOMP_STREAM_TYPE_INFER; if (compression == "gzip") @@ -443,9 +443,9 @@ std::vector get_uncompressed_data(host_span const data, class HostDecompressor_ZLIB : public HostDecompressor { public: HostDecompressor_ZLIB(bool gz_hdr_) : gz_hdr(gz_hdr_) {} - size_t Decompress(uint8_t *dstBytes, + size_t Decompress(uint8_t* dstBytes, size_t dstLen, - const uint8_t *srcBytes, + const uint8_t* srcBytes, size_t srcLen) override { if (gz_hdr) { @@ -471,14 +471,14 @@ class HostDecompressor_ZLIB : public HostDecompressor { class HostDecompressor_SNAPPY : public HostDecompressor { public: HostDecompressor_SNAPPY() {} - size_t Decompress(uint8_t *dstBytes, + size_t Decompress(uint8_t* dstBytes, size_t dstLen, - const uint8_t *srcBytes, + const uint8_t* srcBytes, size_t srcLen) override { uint32_t uncompressed_size, bytes_left, dst_pos; - const uint8_t *cur = srcBytes; - const uint8_t *end = srcBytes + srcLen; + const uint8_t* cur = srcBytes; + const uint8_t* end = srcBytes + srcLen; if (!dstBytes || srcLen < 1) { return 0; } // Read uncompressed length (varint) @@ -510,12 +510,12 @@ class HostDecompressor_SNAPPY : public HostDecompressor { if (blen & 2) { // xxxxxx1x: copy with 6-bit length, 2-byte or 4-byte offset if (cur + 2 > end) break; - offset = *reinterpret_cast(cur); + offset = *reinterpret_cast(cur); cur += 2; if (blen & 1) // 4-byte offset { if (cur + 2 > end) break; - offset |= (*reinterpret_cast(cur)) << 16; + offset |= (*reinterpret_cast(cur)) << 16; cur += 2; } blen = (blen >> 2) + 1; diff --git a/cpp/src/io/comp/unsnap.cu b/cpp/src/io/comp/unsnap.cu index f9d491b3cc8..5fe01735dac 100644 --- a/cpp/src/io/comp/unsnap.cu +++ b/cpp/src/io/comp/unsnap.cu @@ -64,8 +64,8 @@ struct unsnap_queue_s { * @brief snappy decompression state */ struct unsnap_state_s { - const uint8_t *base; ///< base ptr of compressed stream - const uint8_t *end; ///< end of compressed stream + const uint8_t* base; ///< base ptr of compressed stream + const uint8_t* end; ///< end of compressed stream uint32_t uncompressed_size; ///< uncompressed stream size uint32_t bytes_left; ///< bytes to uncompressed remaining int32_t error; ///< current error status @@ -74,7 +74,7 @@ struct unsnap_state_s { gpu_inflate_input_s in; ///< input parameters for current block }; -inline __device__ volatile uint8_t &byte_access(unsnap_state_s *s, uint32_t pos) +inline __device__ volatile uint8_t& byte_access(unsnap_state_s* s, uint32_t pos) { return s->q.buf[pos & (prefetch_size - 1)]; } @@ -85,9 +85,9 @@ inline __device__ volatile uint8_t &byte_access(unsnap_state_s *s, uint32_t pos) * @param s decompression state * @param t warp lane id */ -__device__ void snappy_prefetch_bytestream(unsnap_state_s *s, int t) +__device__ void snappy_prefetch_bytestream(unsnap_state_s* s, int t) { - const uint8_t *base = s->base; + const uint8_t* base = s->base; uint32_t end = (uint32_t)(s->end - base); uint32_t align_bytes = (uint32_t)(0x20 - (0x1f & reinterpret_cast(base))); int32_t pos = min(align_bytes, end); @@ -275,7 +275,7 @@ inline __device__ uint32_t get_len5_mask(uint32_t v0, uint32_t v1) * @param s decompression state * @param t warp lane id */ -__device__ void snappy_decode_symbols(unsnap_state_s *s, uint32_t t) +__device__ void snappy_decode_symbols(unsnap_state_s* s, uint32_t t) { uint32_t cur = 0; uint32_t end = static_cast(s->end - s->base); @@ -285,13 +285,15 @@ __device__ void snappy_decode_symbols(unsnap_state_s *s, uint32_t t) for (;;) { int32_t batch_len; - volatile unsnap_batch_s *b; + volatile unsnap_batch_s* b; // Wait for prefetcher if (t == 0) { s->q.prefetch_rdpos = cur; #pragma unroll(1) // We don't want unrolling here - while (s->q.prefetch_wrpos < min(cur + 5 * batch_size, end)) { busy_wait(10); } + while (s->q.prefetch_wrpos < min(cur + 5 * batch_size, end)) { + busy_wait(10); + } b = &s->q.batch[batch * batch_size]; } // Process small symbols in parallel: for data that does not get good compression, @@ -315,17 +317,17 @@ __device__ void snappy_decode_symbols(unsnap_state_s *s, uint32_t t) is_long_sym = ((b0 & ~4) != 0) && (((b0 + 1) & 2) == 0); short_sym_mask = ballot(is_long_sym); batch_len = 0; - b = reinterpret_cast(shuffle(reinterpret_cast(b))); + b = reinterpret_cast(shuffle(reinterpret_cast(b))); if (!(short_sym_mask & 1)) { batch_len = shuffle((t == 0) ? (short_sym_mask) ? __ffs(short_sym_mask) - 1 : 32 : 0); if (batch_len != 0) { uint32_t blen = 0; int32_t ofs = 0; if (t < batch_len) { - blen = (b0 & 1) ? ((b0 >> 2) & 7) + 4 : ((b0 >> 2) + 1); - ofs = (b0 & 1) ? ((b0 & 0xe0) << 3) | byte_access(s, cur_t + 1) - : (b0 & 2) ? byte_access(s, cur_t + 1) | (byte_access(s, cur_t + 2) << 8) - : -(int32_t)(cur_t + 1); + blen = (b0 & 1) ? ((b0 >> 2) & 7) + 4 : ((b0 >> 2) + 1); + ofs = (b0 & 1) ? ((b0 & 0xe0) << 3) | byte_access(s, cur_t + 1) + : (b0 & 2) ? byte_access(s, cur_t + 1) | (byte_access(s, cur_t + 2) << 8) + : -(int32_t)(cur_t + 1); b[t].len = blen; b[t].offset = ofs; ofs += blen; // for correct out-of-range detection below @@ -368,11 +370,10 @@ __device__ void snappy_decode_symbols(unsnap_state_s *s, uint32_t t) uint32_t blen = 0; int32_t ofs = 0; if (t < batch_add) { - blen = (b0 & 1) ? ((b0 >> 2) & 7) + 4 : ((b0 >> 2) + 1); - ofs = (b0 & 1) - ? ((b0 & 0xe0) << 3) | byte_access(s, cur_t + 1) - : (b0 & 2) ? byte_access(s, cur_t + 1) | (byte_access(s, cur_t + 2) << 8) - : -(int32_t)(cur_t + 1); + blen = (b0 & 1) ? ((b0 >> 2) & 7) + 4 : ((b0 >> 2) + 1); + ofs = (b0 & 1) ? ((b0 & 0xe0) << 3) | byte_access(s, cur_t + 1) + : (b0 & 2) ? byte_access(s, cur_t + 1) | (byte_access(s, cur_t + 2) << 8) + : -(int32_t)(cur_t + 1); b[batch_len + t].len = blen; b[batch_len + t].offset = ofs; ofs += blen; // for correct out-of-range detection below @@ -451,7 +452,9 @@ __device__ void snappy_decode_symbols(unsnap_state_s *s, uint32_t t) // Wait for prefetcher s->q.prefetch_rdpos = cur; #pragma unroll(1) // We don't want unrolling here - while (s->q.prefetch_wrpos < min(cur + 5 * batch_size, end)) { busy_wait(10); } + while (s->q.prefetch_wrpos < min(cur + 5 * batch_size, end)) { + busy_wait(10); + } dst_pos += blen; if (bytes_left < blen) break; bytes_left -= blen; @@ -467,7 +470,9 @@ __device__ void snappy_decode_symbols(unsnap_state_s *s, uint32_t t) } batch_len = shuffle(batch_len); if (t == 0) { - while (s->q.batch_len[batch] != 0) { busy_wait(20); } + while (s->q.batch_len[batch] != 0) { + busy_wait(20); + } } if (batch_len != batch_size) { break; } } @@ -489,18 +494,20 @@ __device__ void snappy_decode_symbols(unsnap_state_s *s, uint32_t t) *would result in out-of-bounds accesses) */ template -__device__ void snappy_process_symbols(unsnap_state_s *s, int t, Storage &temp_storage) +__device__ void snappy_process_symbols(unsnap_state_s* s, int t, Storage& temp_storage) { - const uint8_t *literal_base = s->base; - uint8_t *out = static_cast(s->in.dstDevice); + const uint8_t* literal_base = s->base; + uint8_t* out = static_cast(s->in.dstDevice); int batch = 0; do { - volatile unsnap_batch_s *b = &s->q.batch[batch * batch_size]; + volatile unsnap_batch_s* b = &s->q.batch[batch * batch_size]; int32_t batch_len, blen_t, dist_t; if (t == 0) { - while ((batch_len = s->q.batch_len[batch]) == 0) { busy_wait(20); } + while ((batch_len = s->q.batch_len[batch]) == 0) { + busy_wait(20); + } } else { batch_len = 0; } @@ -529,7 +536,7 @@ __device__ void snappy_process_symbols(unsnap_state_s *s, int t, Storage &temp_s uint32_t tr = t - shuffle(bofs - blen_t, it); int32_t dist = shuffle(dist_t, it); if (it < n) { - const uint8_t *src = (dist > 0) ? (out + t - dist) : (literal_base + tr - dist); + const uint8_t* src = (dist > 0) ? (out + t - dist) : (literal_base + tr - dist); out[t] = *src; } out += shuffle(bofs, n - 1); @@ -556,7 +563,7 @@ __device__ void snappy_process_symbols(unsnap_state_s *s, int t, Storage &temp_s } blen += blen2; if (t < blen) { - const uint8_t *src = (dist > 0) ? (out - d) : (literal_base - d); + const uint8_t* src = (dist > 0) ? (out - d) : (literal_base - d); out[t] = src[t]; } out += blen; @@ -569,12 +576,12 @@ __device__ void snappy_process_symbols(unsnap_state_s *s, int t, Storage &temp_s uint8_t b0, b1; if (t < blen) { uint32_t pos = t; - const uint8_t *src = out + ((pos >= dist) ? (pos % dist) : pos) - dist; + const uint8_t* src = out + ((pos >= dist) ? (pos % dist) : pos) - dist; b0 = *src; } if (32 + t < blen) { uint32_t pos = 32 + t; - const uint8_t *src = out + ((pos >= dist) ? (pos % dist) : pos) - dist; + const uint8_t* src = out + ((pos >= dist) ? (pos % dist) : pos) - dist; b1 = *src; } if (t < blen) { out[t] = b0; } @@ -616,24 +623,23 @@ __device__ void snappy_process_symbols(unsnap_state_s *s, int t, Storage &temp_s */ template __global__ void __launch_bounds__(block_size) - unsnap_kernel(gpu_inflate_input_s *inputs, gpu_inflate_status_s *outputs) + unsnap_kernel(gpu_inflate_input_s* inputs, gpu_inflate_status_s* outputs) { __shared__ __align__(16) unsnap_state_s state_g; __shared__ cub::WarpReduce::TempStorage temp_storage; int t = threadIdx.x; - unsnap_state_s *s = &state_g; + unsnap_state_s* s = &state_g; int strm_id = blockIdx.x; if (t < sizeof(gpu_inflate_input_s) / sizeof(uint32_t)) { - reinterpret_cast(&s->in)[t] = - reinterpret_cast(&inputs[strm_id])[t]; + reinterpret_cast(&s->in)[t] = reinterpret_cast(&inputs[strm_id])[t]; __threadfence_block(); } if (t < batch_count) { s->q.batch_len[t] = 0; } __syncthreads(); if (!t) { - const uint8_t *cur = static_cast(s->in.srcDevice); - const uint8_t *end = cur + s->in.srcSize; + const uint8_t* cur = static_cast(s->in.srcDevice); + const uint8_t* end = cur + s->in.srcSize; s->error = 0; if (log_cyclecount) { s->tstart = clock(); } if (cur < end) { @@ -700,8 +706,8 @@ __global__ void __launch_bounds__(block_size) } } -cudaError_t __host__ gpu_unsnap(gpu_inflate_input_s *inputs, - gpu_inflate_status_s *outputs, +cudaError_t __host__ gpu_unsnap(gpu_inflate_input_s* inputs, + gpu_inflate_status_s* outputs, int count, rmm::cuda_stream_view stream) { diff --git a/cpp/src/io/csv/csv_gpu.cu b/cpp/src/io/csv/csv_gpu.cu index a3da5383196..68ac67b900d 100644 --- a/cpp/src/io/csv/csv_gpu.cu +++ b/cpp/src/io/csv/csv_gpu.cu @@ -269,7 +269,7 @@ __global__ void __launch_bounds__(csvparse_block_dim) auto const is_negative = (*trimmed_field_range.first == '-'); auto const data_begin = trimmed_field_range.first + (is_negative || (*trimmed_field_range.first == '+')); - cudf::size_type *ptr = cudf::io::gpu::infer_integral_field_counter( + cudf::size_type* ptr = cudf::io::gpu::infer_integral_field_counter( data_begin, data_begin + count_number, is_negative, d_column_data[actual_col]); atomicAdd(ptr, 1); } else if (is_floatingpoint(trimmed_field_len, @@ -292,33 +292,33 @@ __global__ void __launch_bounds__(csvparse_block_dim) } template -__inline__ __device__ T decode_value(char const *begin, - char const *end, - parse_options_view const &opts) +__inline__ __device__ T decode_value(char const* begin, + char const* end, + parse_options_view const& opts) { return cudf::io::parse_numeric(begin, end, opts); } template -__inline__ __device__ T decode_value(char const *begin, - char const *end, - parse_options_view const &opts) +__inline__ __device__ T decode_value(char const* begin, + char const* end, + parse_options_view const& opts) { return cudf::io::parse_numeric(begin, end, opts); } template <> -__inline__ __device__ cudf::timestamp_D decode_value(char const *begin, - char const *end, - parse_options_view const &opts) +__inline__ __device__ cudf::timestamp_D decode_value(char const* begin, + char const* end, + parse_options_view const& opts) { return timestamp_D{cudf::duration_D{to_date(begin, end, opts.dayfirst)}}; } template <> -__inline__ __device__ cudf::timestamp_s decode_value(char const *begin, - char const *end, - parse_options_view const &opts) +__inline__ __device__ cudf::timestamp_s decode_value(char const* begin, + char const* end, + parse_options_view const& opts) { auto milli = to_date_time(begin, end, opts.dayfirst); if (milli == -1) { @@ -329,9 +329,9 @@ __inline__ __device__ cudf::timestamp_s decode_value(char const *begin, } template <> -__inline__ __device__ cudf::timestamp_ms decode_value(char const *begin, - char const *end, - parse_options_view const &opts) +__inline__ __device__ cudf::timestamp_ms decode_value(char const* begin, + char const* end, + parse_options_view const& opts) { auto milli = to_date_time(begin, end, opts.dayfirst); if (milli == -1) { @@ -342,9 +342,9 @@ __inline__ __device__ cudf::timestamp_ms decode_value(char const *begin, } template <> -__inline__ __device__ cudf::timestamp_us decode_value(char const *begin, - char const *end, - parse_options_view const &opts) +__inline__ __device__ cudf::timestamp_us decode_value(char const* begin, + char const* end, + parse_options_view const& opts) { auto milli = to_date_time(begin, end, opts.dayfirst); if (milli == -1) { @@ -355,9 +355,9 @@ __inline__ __device__ cudf::timestamp_us decode_value(char const *begin, } template <> -__inline__ __device__ cudf::timestamp_ns decode_value(char const *begin, - char const *end, - parse_options_view const &opts) +__inline__ __device__ cudf::timestamp_ns decode_value(char const* begin, + char const* end, + parse_options_view const& opts) { auto milli = to_date_time(begin, end, opts.dayfirst); if (milli == -1) { @@ -371,7 +371,7 @@ __inline__ __device__ cudf::timestamp_ns decode_value(char const *begin, #define DURATION_DECODE_VALUE(Type) \ template <> \ __inline__ __device__ Type decode_value( \ - const char *begin, const char *end, parse_options_view const &opts) \ + const char* begin, const char* end, parse_options_view const& opts) \ { \ return Type{to_time_delta(begin, end)}; \ } @@ -385,18 +385,18 @@ DURATION_DECODE_VALUE(duration_ns) // The purpose of this is merely to allow compilation ONLY // TODO : make this work for csv template <> -__inline__ __device__ cudf::string_view decode_value(char const *begin, - char const *end, - parse_options_view const &opts) +__inline__ __device__ cudf::string_view decode_value(char const* begin, + char const* end, + parse_options_view const& opts) { return cudf::string_view{}; } // The purpose of this is merely to allow compilation ONLY template <> -__inline__ __device__ cudf::dictionary32 decode_value(char const *begin, - char const *end, - parse_options_view const &opts) +__inline__ __device__ cudf::dictionary32 decode_value(char const* begin, + char const* end, + parse_options_view const& opts) { return cudf::dictionary32{}; } @@ -404,9 +404,9 @@ __inline__ __device__ cudf::dictionary32 decode_value(char const *begin, // The purpose of this is merely to allow compilation ONLY // TODO : make this work for csv template <> -__inline__ __device__ cudf::list_view decode_value(char const *begin, - char const *end, - parse_options_view const &opts) +__inline__ __device__ cudf::list_view decode_value(char const* begin, + char const* end, + parse_options_view const& opts) { return cudf::list_view{}; } @@ -414,9 +414,9 @@ __inline__ __device__ cudf::list_view decode_value(char const *begin, // The purpose of this is merely to allow compilation ONLY // TODO : make this work for csv template <> -__inline__ __device__ cudf::struct_view decode_value(char const *begin, - char const *end, - parse_options_view const &opts) +__inline__ __device__ cudf::struct_view decode_value(char const* begin, + char const* end, + parse_options_view const& opts) { return cudf::struct_view{}; } @@ -434,16 +434,16 @@ struct decode_op { */ template and !std::is_same_v and - !cudf::is_fixed_point()> * = nullptr> - __host__ __device__ __forceinline__ bool operator()(void *out_buffer, + !cudf::is_fixed_point()>* = nullptr> + __host__ __device__ __forceinline__ bool operator()(void* out_buffer, size_t row, const data_type, - char const *begin, - char const *end, - parse_options_view const &opts, + char const* begin, + char const* end, + parse_options_view const& opts, column_parse::flags flags) { - static_cast(out_buffer)[row] = [&flags, &opts, begin, end]() -> T { + static_cast(out_buffer)[row] = [&flags, &opts, begin, end]() -> T { // Check for user-specified true/false values auto const field_len = static_cast(end - begin); if (serialized_trie_contains(opts.trie_true, {begin, field_len})) { return 1; } @@ -460,16 +460,16 @@ struct decode_op { * * @return bool Whether the parsed value is valid. */ - template ()> * = nullptr> - __host__ __device__ __forceinline__ bool operator()(void *out_buffer, + template ()>* = nullptr> + __host__ __device__ __forceinline__ bool operator()(void* out_buffer, size_t row, const data_type output_type, - char const *begin, - char const *end, - parse_options_view const &opts, + char const* begin, + char const* end, + parse_options_view const& opts, column_parse::flags flags) { - static_cast *>(out_buffer)[row] = + static_cast*>(out_buffer)[row] = [&flags, &opts, output_type, begin, end]() -> device_storage_type_t { return strings::detail::parse_decimal>( begin, end, output_type.scale()); @@ -481,16 +481,16 @@ struct decode_op { /** * @brief Dispatch for boolean type types. */ - template > * = nullptr> - __host__ __device__ __forceinline__ bool operator()(void *out_buffer, + template >* = nullptr> + __host__ __device__ __forceinline__ bool operator()(void* out_buffer, size_t row, const data_type, - char const *begin, - char const *end, - parse_options_view const &opts, + char const* begin, + char const* end, + parse_options_view const& opts, column_parse::flags flags) { - static_cast(out_buffer)[row] = [&opts, begin, end]() { + static_cast(out_buffer)[row] = [&opts, begin, end]() { // Check for user-specified true/false values auto const field_len = static_cast(end - begin); if (serialized_trie_contains(opts.trie_true, {begin, field_len})) { return true; } @@ -505,17 +505,17 @@ struct decode_op { * @brief Dispatch for floating points, which are set to NaN if the input * is not valid. In such case, the validity mask is set to zero too. */ - template > * = nullptr> - __host__ __device__ __forceinline__ bool operator()(void *out_buffer, + template >* = nullptr> + __host__ __device__ __forceinline__ bool operator()(void* out_buffer, size_t row, const data_type, - char const *begin, - char const *end, - parse_options_view const &opts, + char const* begin, + char const* end, + parse_options_view const& opts, column_parse::flags flags) { - T const value = decode_value(begin, end, opts); - static_cast(out_buffer)[row] = value; + T const value = decode_value(begin, end, opts); + static_cast(out_buffer)[row] = value; return !std::isnan(value); } @@ -525,16 +525,16 @@ struct decode_op { */ template and !std::is_floating_point_v and - !cudf::is_fixed_point()> * = nullptr> - __host__ __device__ __forceinline__ bool operator()(void *out_buffer, + !cudf::is_fixed_point()>* = nullptr> + __host__ __device__ __forceinline__ bool operator()(void* out_buffer, size_t row, const data_type, - char const *begin, - char const *end, - parse_options_view const &opts, + char const* begin, + char const* end, + parse_options_view const& opts, column_parse::flags flags) { - static_cast(out_buffer)[row] = decode_value(begin, end, opts); + static_cast(out_buffer)[row] = decode_value(begin, end, opts); return true; } @@ -559,8 +559,8 @@ __global__ void __launch_bounds__(csvparse_block_dim) device_span column_flags, device_span row_offsets, device_span dtypes, - device_span columns, - device_span valids) + device_span columns, + device_span valids) { auto const raw_csv = data.data(); // thread IDs range per block, so also need the block id. @@ -605,7 +605,7 @@ __global__ void __launch_bounds__(csvparse_block_dim) --end; } } - auto str_list = static_cast *>(columns[actual_col]); + auto str_list = static_cast*>(columns[actual_col]); str_list[rec_id].first = field_start; str_list[rec_id].second = end - field_start; } else { @@ -623,7 +623,7 @@ __global__ void __launch_bounds__(csvparse_block_dim) } } } else if (dtypes[actual_col].id() == cudf::type_id::STRING) { - auto str_list = static_cast *>(columns[actual_col]); + auto str_list = static_cast*>(columns[actual_col]); str_list[rec_id].first = nullptr; str_list[rec_id].second = 0; } @@ -680,7 +680,7 @@ constexpr __device__ uint32_t make_char_context(uint32_t id0, * The char_ctx value should be created via make_char_context, and its value should * have been evaluated at compile-time. */ -inline __device__ void merge_char_context(uint4 &ctx, uint32_t char_ctx, uint32_t pos) +inline __device__ void merge_char_context(uint4& ctx, uint32_t char_ctx, uint32_t pos) { uint32_t id0 = (ctx.w >> 0) & 3; uint32_t id1 = (ctx.w >> 2) & 3; @@ -709,9 +709,10 @@ inline __device__ packed_rowctx_t pack_rowmaps(uint4 ctx_map) */ inline __device__ uint32_t select_rowmap(uint4 ctx_map, uint32_t ctxid) { - return (ctxid == ROW_CTX_NONE) - ? ctx_map.x - : (ctxid == ROW_CTX_QUOTE) ? ctx_map.y : (ctxid == ROW_CTX_COMMENT) ? ctx_map.z : 0; + return (ctxid == ROW_CTX_NONE) ? ctx_map.x + : (ctxid == ROW_CTX_QUOTE) ? ctx_map.y + : (ctxid == ROW_CTX_COMMENT) ? ctx_map.z + : 0; } /** @@ -731,7 +732,7 @@ inline __device__ uint32_t select_rowmap(uint4 ctx_map, uint32_t ctxid) * @param t thread id (leaf node id) */ template -inline __device__ void ctx_merge(uint64_t *ctxtree, packed_rowctx_t *ctxb, uint32_t t) +inline __device__ void ctx_merge(uint64_t* ctxtree, packed_rowctx_t* ctxb, uint32_t t) { uint64_t tmp = shuffle_xor(*ctxb, lanemask); if (!(t & tmask)) { @@ -754,7 +755,7 @@ inline __device__ void ctx_merge(uint64_t *ctxtree, packed_rowctx_t *ctxb, uint3 */ template inline __device__ void ctx_unmerge( - uint32_t base, uint64_t *ctxtree, uint32_t *ctx, uint32_t *brow4, uint32_t t) + uint32_t base, uint64_t* ctxtree, uint32_t* ctx, uint32_t* brow4, uint32_t t) { rowctx32_t ctxb_left, ctxb_right, ctxb_sum; ctxb_sum = get_row_context(ctxtree[base], *ctx); @@ -869,7 +870,7 @@ static inline __device__ rowctx32_t rowctx_inverse_merge_transform(uint64_t ctxt * @param commentchar Comment line character (skip rows starting with this character) */ __global__ void __launch_bounds__(rowofs_block_dim) - gather_row_offsets_gpu(uint64_t *row_ctx, + gather_row_offsets_gpu(uint64_t* row_ctx, device_span offsets_out, device_span const data, size_t chunk_size, @@ -892,11 +893,11 @@ __global__ void __launch_bounds__(rowofs_block_dim) __align__(8) uint64_t ctxtree[rowofs_block_dim * 2]; } temp_storage; - const char *end = start + (min(parse_pos + chunk_size, data_size) - start_offset); + const char* end = start + (min(parse_pos + chunk_size, data_size) - start_offset); uint32_t t = threadIdx.x; size_t block_pos = (parse_pos - start_offset) + blockIdx.x * static_cast(rowofs_block_bytes) + t * 32; - const char *cur = start + block_pos; + const char* cur = start + block_pos; // Initial state is neutral context (no state transitions), zero rows uint4 ctx_map = { @@ -934,7 +935,7 @@ __global__ void __launch_bounds__(rowofs_block_dim) ctx = make_char_context(ROW_CTX_NONE, ROW_CTX_QUOTE); } } else { - const char *data_end = start + data_size - start_offset; + const char* data_end = start + data_size - start_offset; if (cur <= end && cur == data_end) { // Add a newline at data end (need the extra row offset to infer length of previous row) ctx = make_char_context(ROW_CTX_EOF, ROW_CTX_EOF, ROW_CTX_EOF, 1, 1, 1); @@ -993,7 +994,7 @@ __global__ void __launch_bounds__(rowofs_block_dim) } } -size_t __host__ count_blank_rows(const cudf::io::parse_options_view &opts, +size_t __host__ count_blank_rows(const cudf::io::parse_options_view& opts, device_span data, device_span row_offsets, rmm::cuda_stream_view stream) @@ -1011,7 +1012,7 @@ size_t __host__ count_blank_rows(const cudf::io::parse_options_view &opts, }); } -device_span __host__ remove_blank_rows(cudf::io::parse_options_view const &options, +device_span __host__ remove_blank_rows(cudf::io::parse_options_view const& options, device_span data, device_span row_offsets, rmm::cuda_stream_view stream) @@ -1032,7 +1033,7 @@ device_span __host__ remove_blank_rows(cudf::io::parse_options_view co } std::vector detect_column_types( - cudf::io::parse_options_view const &options, + cudf::io::parse_options_view const& options, device_span const data, device_span const column_flags, device_span const row_starts, @@ -1052,13 +1053,13 @@ std::vector detect_column_types( return detail::make_std_vector_sync(d_stats, stream); } -void __host__ decode_row_column_data(cudf::io::parse_options_view const &options, +void __host__ decode_row_column_data(cudf::io::parse_options_view const& options, device_span data, device_span column_flags, device_span row_offsets, device_span dtypes, - device_span columns, - device_span valids, + device_span columns, + device_span valids, rmm::cuda_stream_view stream) { // Calculate actual block count to use based on records count @@ -1070,8 +1071,8 @@ void __host__ decode_row_column_data(cudf::io::parse_options_view const &options options, data, column_flags, row_offsets, dtypes, columns, valids); } -uint32_t __host__ gather_row_offsets(const parse_options_view &options, - uint64_t *row_ctx, +uint32_t __host__ gather_row_offsets(const parse_options_view& options, + uint64_t* row_ctx, device_span const offsets_out, device_span const data, size_t chunk_size, diff --git a/cpp/src/io/csv/csv_gpu.h b/cpp/src/io/csv/csv_gpu.h index 838abe66b94..9b83028fa92 100644 --- a/cpp/src/io/csv/csv_gpu.h +++ b/cpp/src/io/csv/csv_gpu.h @@ -149,8 +149,8 @@ inline __host__ __device__ rowctx64_t select_row_context(rowctx64_t sel_ctx, * * @return Number of row contexts */ -uint32_t gather_row_offsets(cudf::io::parse_options_view const &options, - uint64_t *row_ctx, +uint32_t gather_row_offsets(cudf::io::parse_options_view const& options, + uint64_t* row_ctx, device_span offsets_out, device_span data, size_t chunk_size, @@ -170,7 +170,7 @@ uint32_t gather_row_offsets(cudf::io::parse_options_view const &options, * @param row_offsets Row offsets in the character data buffer * @param stream CUDA stream used for device memory operations and kernel launches. */ -size_t count_blank_rows(cudf::io::parse_options_view const &options, +size_t count_blank_rows(cudf::io::parse_options_view const& options, device_span data, device_span row_offsets, rmm::cuda_stream_view stream); @@ -183,7 +183,7 @@ size_t count_blank_rows(cudf::io::parse_options_view const &options, * @param row_offsets Row offsets in the character data buffer * @param stream CUDA stream used for device memory operations and kernel launches. */ -device_span remove_blank_rows(const cudf::io::parse_options_view &options, +device_span remove_blank_rows(const cudf::io::parse_options_view& options, device_span data, device_span row_offsets, rmm::cuda_stream_view stream); @@ -200,7 +200,7 @@ device_span remove_blank_rows(const cudf::io::parse_options_view &opti * @return stats Histogram of each dtypes' occurrence for each column */ std::vector detect_column_types( - cudf::io::parse_options_view const &options, + cudf::io::parse_options_view const& options, device_span data, device_span column_flags, device_span row_offsets, @@ -219,13 +219,13 @@ std::vector detect_column_types( * @param[out] valids Device memory output of column valids bitmap data * @param[in] stream CUDA stream to use, default 0 */ -void decode_row_column_data(cudf::io::parse_options_view const &options, +void decode_row_column_data(cudf::io::parse_options_view const& options, device_span data, device_span column_flags, device_span row_offsets, device_span dtypes, - device_span columns, - device_span valids, + device_span columns, + device_span valids, rmm::cuda_stream_view stream); } // namespace gpu diff --git a/cpp/src/io/csv/datetime.cuh b/cpp/src/io/csv/datetime.cuh index 4e4ddd09a9f..7160041ff4e 100644 --- a/cpp/src/io/csv/datetime.cuh +++ b/cpp/src/io/csv/datetime.cuh @@ -232,7 +232,9 @@ __inline__ __device__ void extract_time( if (*last == 'M' || *last == 'm') { if (*(last - 1) == 'P' || *(last - 1) == 'p') { hour_adjust = 12; } last = last - 2; - while (*last == ' ') { --last; } + while (*last == ' ') { + --last; + } } end = last + 1; diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index cae930b8197..70ce0fce1cc 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -58,7 +58,7 @@ struct VisitorOverload : Ts... { using Ts::operator()...; }; template -VisitorOverload(Ts...)->VisitorOverload; +VisitorOverload(Ts...) -> VisitorOverload; } // namespace namespace cudf { @@ -103,7 +103,7 @@ constexpr size_t calculateMaxRowSize(int num_columns = 0) noexcept * * @return Tuple of data_type and flags */ -std::tuple get_dtype_info(const std::string &dtype) +std::tuple get_dtype_info(const std::string& dtype) { if (dtype == "hex" || dtype == "hex64") { return std::make_tuple(data_type{cudf::type_id::INT64}, column_parse::as_hexadecimal); @@ -133,8 +133,8 @@ string removeQuotes(string str, char quotechar) * @brief Parse the first row to set the column names in the raw_csv parameter. * The first row can be either the header row, or the first data row */ -std::vector setColumnNames(std::vector const &header, - parse_options_view const &opts, +std::vector setColumnNames(std::vector const& header, + parse_options_view const& opts, int header_row, std::string prefix) { @@ -197,7 +197,7 @@ std::vector setColumnNames(std::vector const &header, } template -void erase_except_last(C &container, rmm::cuda_stream_view stream) +void erase_except_last(C& container, rmm::cuda_stream_view stream) { cudf::detail::device_single_thread( [span = device_span{container}] __device__() mutable { @@ -223,7 +223,7 @@ reader::impl::select_data_and_row_offsets(rmm::cuda_stream_view stream) size_t map_range_size = 0; if (range_size != 0) { auto num_given_dtypes = - std::visit([](const auto &dtypes) { return dtypes.size(); }, opts_.get_dtypes()); + std::visit([](const auto& dtypes) { return dtypes.size(); }, opts_.get_dtypes()); const auto num_columns = std::max(opts_.get_names().size(), num_given_dtypes); map_range_size = range_size + calculateMaxRowSize(num_columns); } @@ -241,7 +241,7 @@ reader::impl::select_data_and_row_offsets(rmm::cuda_stream_view stream) auto buffer = source_->host_read(range_offset, data_size); auto h_data = host_span( // - reinterpret_cast(buffer->data()), + reinterpret_cast(buffer->data()), buffer->size()); std::vector h_uncomp_data_owner; @@ -270,7 +270,7 @@ reader::impl::select_data_and_row_offsets(rmm::cuda_stream_view stream) num_rows, load_whole_file, stream); - auto &row_offsets = data_row_offsets.second; + auto& row_offsets = data_row_offsets.second; // Exclude the rows that are to be skipped from the end if (skip_end_rows > 0 && static_cast(skip_end_rows) < row_offsets.size()) { row_offsets.shrink(row_offsets.size() - skip_end_rows); @@ -283,8 +283,8 @@ reader::impl::select_data_and_row_offsets(rmm::cuda_stream_view stream) table_with_metadata reader::impl::read(rmm::cuda_stream_view stream) { auto const data_row_offsets = select_data_and_row_offsets(stream); - auto const &data = data_row_offsets.first; - auto const &row_offsets = data_row_offsets.second; + auto const& data = data_row_offsets.first; + auto const& row_offsets = data_row_offsets.second; // Exclude the end-of-data row from number of rows with actual data num_records_ = std::max(row_offsets.size(), 1ul) - 1; @@ -309,7 +309,7 @@ table_with_metadata reader::impl::read(rmm::cuda_stream_view stream) // Looking for duplicates std::unordered_map col_names_histogram; - for (auto &col_name : col_names_) { + for (auto& col_name : col_names_) { // Operator [] inserts a default-initialized value if the given key is not // present if (++col_names_histogram[col_name] > 1) { @@ -343,7 +343,7 @@ table_with_metadata reader::impl::read(rmm::cuda_stream_view stream) opts_.get_use_cols_indexes().end()) .size(); - for (const auto &name : opts_.get_use_cols_names()) { + for (const auto& name : opts_.get_use_cols_names()) { const auto it = std::find(col_names_.begin(), col_names_.end(), name); if (it != col_names_.end()) { auto curr_it = it - col_names_.begin(); @@ -361,7 +361,7 @@ table_with_metadata reader::impl::read(rmm::cuda_stream_view stream) column_flags_[index] |= column_parse::as_datetime; } - for (const auto &name : opts_.get_infer_date_names()) { + for (const auto& name : opts_.get_infer_date_names()) { auto it = std::find(col_names_.begin(), col_names_.end(), name); if (it != col_names_.end()) { column_flags_[it - col_names_.begin()] |= column_parse::as_datetime; @@ -376,7 +376,7 @@ table_with_metadata reader::impl::read(rmm::cuda_stream_view stream) auto out_columns = std::vector>(); bool has_to_infer_column_types = - std::visit([](const auto &dtypes) { return dtypes.empty(); }, opts_.get_dtypes()); + std::visit([](const auto& dtypes) { return dtypes.empty(); }, opts_.get_dtypes()); std::vector column_types; if (has_to_infer_column_types) { @@ -384,8 +384,8 @@ table_with_metadata reader::impl::read(rmm::cuda_stream_view stream) } else { column_types = std::visit(VisitorOverload{ - [&](const std::vector &data_types) { return data_types; }, - [&](const std::vector &dtypes) { return parse_column_types(dtypes); }}, + [&](const std::vector& data_types) { return data_types; }, + [&](const std::vector& dtypes) { return parse_column_types(dtypes); }}, opts_.get_dtypes()); } @@ -430,7 +430,9 @@ size_t reader::impl::find_first_row_start(host_span data) // For now, look for the first terminator (assume the first terminator isn't within a quote) // TODO: Attempt to infer this from the data size_t pos = 0; - while (pos < data.size() && data[pos] != opts.terminator) { ++pos; } + while (pos < data.size() && data[pos] != opts.terminator) { + ++pos; + } return std::min(pos + 1, data.size()); } @@ -537,7 +539,9 @@ reader::impl::load_data_and_gather_row_offsets(host_span data, stream.synchronize(); size_t rows_out_of_range = 0; - for (uint32_t i = 0; i < num_blocks; i++) { rows_out_of_range += row_ctx[i]; } + for (uint32_t i = 0; i < num_blocks; i++) { + rows_out_of_range += row_ctx[i]; + } if (rows_out_of_range != 0) { // Keep one row out of range (used to infer length of previous row) auto new_row_offsets_size = @@ -649,7 +653,7 @@ std::vector reader::impl::infer_column_types(device_span } if (opts_.get_timestamp_type().id() != cudf::type_id::EMPTY) { - for (auto &type : dtypes) { + for (auto& type : dtypes) { if (cudf::is_timestamp(type)) { type = opts_.get_timestamp_type(); } } } @@ -663,13 +667,13 @@ std::vector reader::impl::infer_column_types(device_span } std::vector reader::impl::parse_column_types( - const std::vector &types_as_strings) + const std::vector& types_as_strings) { std::vector dtypes; const bool is_dict = std::all_of(types_as_strings.begin(), types_as_strings.end(), - [](const auto &s) { return s.find(':') != std::string::npos; }); + [](const auto& s) { return s.find(':') != std::string::npos; }); if (!is_dict) { if (types_as_strings.size() == 1) { @@ -678,7 +682,9 @@ std::vector reader::impl::parse_column_types( column_parse::flags col_flags_; std::tie(dtype_, col_flags_) = get_dtype_info(types_as_strings[0]); dtypes.resize(num_active_cols_, dtype_); - for (int col = 0; col < num_actual_cols_; col++) { column_flags_[col] |= col_flags_; } + for (int col = 0; col < num_actual_cols_; col++) { + column_flags_[col] |= col_flags_; + } CUDF_EXPECTS(dtypes.back().id() != cudf::type_id::EMPTY, "Unsupported data type"); } else { // If it's a list, assign dtypes to active columns in the given order @@ -700,7 +706,7 @@ std::vector reader::impl::parse_column_types( // Translate vector of `name : dtype` strings to map // NOTE: Incoming pairs can be out-of-order from column names in dataset std::unordered_map col_type_map; - for (const auto &pair : types_as_strings) { + for (const auto& pair : types_as_strings) { const auto pos = pair.find_last_of(':'); const auto name = pair.substr(0, pos); const auto dtype = pair.substr(pos + 1, pair.size()); @@ -722,7 +728,7 @@ std::vector reader::impl::parse_column_types( } if (opts_.get_timestamp_type().id() != cudf::type_id::EMPTY) { - for (auto &type : dtypes) { + for (auto& type : dtypes) { if (cudf::is_timestamp(type)) { type = opts_.get_timestamp_type(); } } } @@ -761,8 +767,8 @@ std::vector reader::impl::decode_data(device_span dat } } - thrust::host_vector h_data(num_active_cols_); - thrust::host_vector h_valid(num_active_cols_); + thrust::host_vector h_data(num_active_cols_); + thrust::host_vector h_valid(num_active_cols_); for (int i = 0; i < num_active_cols_; ++i) { h_data[i] = out_buffers[i].data(); @@ -785,7 +791,7 @@ std::vector reader::impl::decode_data(device_span dat * @brief Create a serialized trie for N/A value matching, based on the options. */ cudf::detail::trie create_na_trie(char quotechar, - csv_reader_options const &reader_opts, + csv_reader_options const& reader_opts, rmm::cuda_stream_view stream) { // Default values to recognize as null values @@ -823,7 +829,7 @@ cudf::detail::trie create_na_trie(char quotechar, return cudf::detail::create_serialized_trie(na_values, stream); } -parse_options make_parse_options(csv_reader_options const &reader_opts, +parse_options make_parse_options(csv_reader_options const& reader_opts, rmm::cuda_stream_view stream) { auto parse_opts = parse_options{}; @@ -881,9 +887,9 @@ parse_options make_parse_options(csv_reader_options const &reader_opts, reader::impl::impl(std::unique_ptr source, std::string filepath, - csv_reader_options const &options, + csv_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : mr_(mr), source_(std::move(source)), filepath_(filepath), opts_(options) { num_actual_cols_ = opts_.get_names().size(); @@ -898,10 +904,10 @@ reader::impl::impl(std::unique_ptr source, } // Forward to implementation -reader::reader(std::vector const &filepaths, - csv_reader_options const &options, +reader::reader(std::vector const& filepaths, + csv_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(filepaths.size() == 1, "Only a single source is currently supported."); // Delay actual instantiation of data source until read to allow for @@ -910,10 +916,10 @@ reader::reader(std::vector const &filepaths, } // Forward to implementation -reader::reader(std::vector> &&sources, - csv_reader_options const &options, +reader::reader(std::vector>&& sources, + csv_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(sources.size() == 1, "Only a single source is currently supported."); _impl = std::make_unique(std::move(sources[0]), "", options, stream, mr); diff --git a/cpp/src/io/csv/reader_impl.hpp b/cpp/src/io/csv/reader_impl.hpp index 17f27a28e30..29c6b48bc8a 100644 --- a/cpp/src/io/csv/reader_impl.hpp +++ b/cpp/src/io/csv/reader_impl.hpp @@ -79,9 +79,9 @@ class reader::impl { */ explicit impl(std::unique_ptr source, std::string filepath, - csv_reader_options const &options, + csv_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr); + rmm::mr::device_memory_resource* mr); /** * @brief Read an entire set or a subset of data and returns a set of columns. @@ -104,7 +104,7 @@ class reader::impl { device_span selected; public: - selected_rows_offsets(rmm::device_uvector &&data, + selected_rows_offsets(rmm::device_uvector&& data, device_span selected_span) : all{std::move(data)}, selected{selected_span} { @@ -188,7 +188,7 @@ class reader::impl { * types * @return List of columns' data types */ - std::vector parse_column_types(std::vector const &types_as_strings); + std::vector parse_column_types(std::vector const& types_as_strings); /** * @brief Converts the row-column data and outputs to column bufferrs. @@ -204,7 +204,7 @@ class reader::impl { rmm::cuda_stream_view stream); private: - rmm::mr::device_memory_resource *mr_ = nullptr; + rmm::mr::device_memory_resource* mr_ = nullptr; std::unique_ptr source_; std::string filepath_; std::string compression_type_; diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index 2bc7969d5e5..8451255bfda 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -57,8 +57,8 @@ namespace { * @param[in] begin Pointer to the first character in the row * @param[in] end pointer to the first character after the row */ -__device__ std::pair limit_range_to_brackets(char const *begin, - char const *end) +__device__ std::pair limit_range_to_brackets(char const* begin, + char const* end) { auto const data_begin = thrust::next(thrust::find_if( thrust::seq, begin, end, [] __device__(auto c) { return c == '[' || c == '{'; })); @@ -81,9 +81,9 @@ __device__ std::pair limit_range_to_brackets(char co * * @return Begin and end iterators of the key name; (`end`, `end`) if a key is not found */ -__device__ std::pair get_next_key(char const *begin, - char const *end, - char quotechar) +__device__ std::pair get_next_key(char const* begin, + char const* end, + char quotechar) { // Key starts after the first quote auto const key_begin = thrust::find(thrust::seq, begin, end, quotechar) + 1; @@ -109,9 +109,9 @@ __device__ std::pair get_next_key(char const *begin, * @return The parsed numeric value */ template -__inline__ __device__ T decode_value(const char *begin, +__inline__ __device__ T decode_value(const char* begin, uint64_t end, - parse_options_view const &opts) + parse_options_view const& opts) { return cudf::io::parse_numeric(begin, end, opts); } @@ -126,9 +126,9 @@ __inline__ __device__ T decode_value(const char *begin, * @return The parsed numeric value */ template -__inline__ __device__ T decode_value(const char *begin, - const char *end, - parse_options_view const &opts) +__inline__ __device__ T decode_value(const char* begin, + const char* end, + parse_options_view const& opts) { return cudf::io::parse_numeric(begin, end, opts); } @@ -143,9 +143,9 @@ __inline__ __device__ T decode_value(const char *begin, * @return The parsed timestamp_D */ template <> -__inline__ __device__ cudf::timestamp_D decode_value(const char *begin, - const char *end, - parse_options_view const &opts) +__inline__ __device__ cudf::timestamp_D decode_value(const char* begin, + const char* end, + parse_options_view const& opts) { return cudf::timestamp_D{cudf::duration_D{to_date(begin, end, opts.dayfirst)}}; } @@ -160,9 +160,9 @@ __inline__ __device__ cudf::timestamp_D decode_value(const char *begin, * @return The parsed timestamp_s */ template <> -__inline__ __device__ cudf::timestamp_s decode_value(const char *begin, - const char *end, - parse_options_view const &opts) +__inline__ __device__ cudf::timestamp_s decode_value(const char* begin, + const char* end, + parse_options_view const& opts) { auto milli = to_date_time(begin, end, opts.dayfirst); return cudf::timestamp_s{cudf::duration_s{milli / 1000}}; @@ -178,9 +178,9 @@ __inline__ __device__ cudf::timestamp_s decode_value(const char *begin, * @return The parsed timestamp_ms */ template <> -__inline__ __device__ cudf::timestamp_ms decode_value(const char *begin, - const char *end, - parse_options_view const &opts) +__inline__ __device__ cudf::timestamp_ms decode_value(const char* begin, + const char* end, + parse_options_view const& opts) { auto milli = to_date_time(begin, end, opts.dayfirst); return cudf::timestamp_ms{cudf::duration_ms{milli}}; @@ -196,9 +196,9 @@ __inline__ __device__ cudf::timestamp_ms decode_value(const char *begin, * @return The parsed timestamp_us */ template <> -__inline__ __device__ cudf::timestamp_us decode_value(const char *begin, - const char *end, - parse_options_view const &opts) +__inline__ __device__ cudf::timestamp_us decode_value(const char* begin, + const char* end, + parse_options_view const& opts) { auto milli = to_date_time(begin, end, opts.dayfirst); return cudf::timestamp_us{cudf::duration_us{milli * 1000}}; @@ -214,21 +214,21 @@ __inline__ __device__ cudf::timestamp_us decode_value(const char *begin, * @return The parsed timestamp_ns */ template <> -__inline__ __device__ cudf::timestamp_ns decode_value(const char *begin, - const char *end, - parse_options_view const &opts) +__inline__ __device__ cudf::timestamp_ns decode_value(const char* begin, + const char* end, + parse_options_view const& opts) { auto milli = to_date_time(begin, end, opts.dayfirst); return cudf::timestamp_ns{cudf::duration_ns{milli * 1000000}}; } #ifndef DURATION_DECODE_VALUE -#define DURATION_DECODE_VALUE(Type) \ - template <> \ - __inline__ __device__ Type decode_value( \ - const char *begin, const char *end, parse_options_view const &) \ - { \ - return Type{to_time_delta(begin, end)}; \ +#define DURATION_DECODE_VALUE(Type) \ + template <> \ + __inline__ __device__ Type decode_value( \ + const char* begin, const char* end, parse_options_view const&) \ + { \ + return Type{to_time_delta(begin, end)}; \ } #endif DURATION_DECODE_VALUE(duration_D) @@ -239,48 +239,48 @@ DURATION_DECODE_VALUE(duration_ns) // The purpose of these is merely to allow compilation ONLY template <> -__inline__ __device__ cudf::string_view decode_value(const char *, - const char *, - parse_options_view const &) +__inline__ __device__ cudf::string_view decode_value(const char*, + const char*, + parse_options_view const&) { return cudf::string_view{}; } template <> -__inline__ __device__ cudf::dictionary32 decode_value(const char *, - const char *, - parse_options_view const &) +__inline__ __device__ cudf::dictionary32 decode_value(const char*, + const char*, + parse_options_view const&) { return cudf::dictionary32{}; } template <> -__inline__ __device__ cudf::list_view decode_value(const char *, - const char *, - parse_options_view const &) +__inline__ __device__ cudf::list_view decode_value(const char*, + const char*, + parse_options_view const&) { return cudf::list_view{}; } template <> -__inline__ __device__ cudf::struct_view decode_value(const char *, - const char *, - parse_options_view const &) +__inline__ __device__ cudf::struct_view decode_value(const char*, + const char*, + parse_options_view const&) { return cudf::struct_view{}; } template <> -__inline__ __device__ numeric::decimal32 decode_value(const char *, - const char *, - parse_options_view const &) +__inline__ __device__ numeric::decimal32 decode_value(const char*, + const char*, + parse_options_view const&) { return numeric::decimal32{}; } template <> -__inline__ __device__ numeric::decimal64 decode_value(const char *, - const char *, - parse_options_view const &) +__inline__ __device__ numeric::decimal64 decode_value(const char*, + const char*, + parse_options_view const&) { return numeric::decimal64{}; } @@ -297,14 +297,14 @@ struct ConvertFunctor { * It is handled here rather than within convertStrToValue() as that function * is used by other types (ex. timestamp) that aren't 'booleable'. */ - template ::value> * = nullptr> - __host__ __device__ __forceinline__ bool operator()(char const *begin, - char const *end, - void *output_column, + template ::value>* = nullptr> + __host__ __device__ __forceinline__ bool operator()(char const* begin, + char const* end, + void* output_column, cudf::size_type row, - const parse_options_view &opts) + const parse_options_view& opts) { - T &value{static_cast(output_column)[row]}; + T& value{static_cast(output_column)[row]}; value = [&opts, end, begin]() -> T { // Check for user-specified true/false values @@ -321,15 +321,15 @@ struct ConvertFunctor { * @brief Dispatch for floating points, which are set to NaN if the input * is not valid. In such case, the validity mask is set to zero too. */ - template ::value> * = nullptr> - __host__ __device__ __forceinline__ bool operator()(char const *begin, - char const *end, - void *out_buffer, + template ::value>* = nullptr> + __host__ __device__ __forceinline__ bool operator()(char const* begin, + char const* end, + void* out_buffer, size_t row, - parse_options_view const &opts) + parse_options_view const& opts) { - T const value = decode_value(begin, end, opts); - static_cast(out_buffer)[row] = value; + T const value = decode_value(begin, end, opts); + static_cast(out_buffer)[row] = value; return !std::isnan(value); } @@ -340,14 +340,14 @@ struct ConvertFunctor { */ template ::value and - !std::is_integral::value> * = nullptr> - __host__ __device__ __forceinline__ bool operator()(char const *begin, - char const *end, - void *output_column, + !std::is_integral::value>* = nullptr> + __host__ __device__ __forceinline__ bool operator()(char const* begin, + char const* end, + void* output_column, cudf::size_type row, - const parse_options_view &opts) + const parse_options_view& opts) { - static_cast(output_column)[row] = decode_value(begin, end, opts); + static_cast(output_column)[row] = decode_value(begin, end, opts); return true; } @@ -405,8 +405,8 @@ __device__ __inline__ bool is_like_float( */ struct field_descriptor { cudf::size_type column; - char const *value_begin; - char const *value_end; + char const* value_begin; + char const* value_end; }; /** @@ -420,11 +420,11 @@ struct field_descriptor { * nullptr is passed when the input file does not consist of objects. * @return Descriptor of the parsed field */ -__device__ field_descriptor next_field_descriptor(const char *begin, - const char *end, - parse_options_view const &opts, +__device__ field_descriptor next_field_descriptor(const char* begin, + const char* end, + parse_options_view const& opts, cudf::size_type field_idx, - col_map_type *col_map) + col_map_type* col_map) { auto const desc_pre_trim = col_map == nullptr @@ -463,7 +463,7 @@ __device__ field_descriptor next_field_descriptor(const char *begin, * * @return The begin and end iterators of the row data. */ -__device__ std::pair get_row_data_range( +__device__ std::pair get_row_data_range( device_span const data, device_span const row_offsets, size_type row) { auto const row_begin = data.begin() + row_offsets[row]; @@ -491,9 +491,9 @@ __global__ void convert_data_to_columns_kernel(parse_options_view opts, device_span const data, device_span const row_offsets, device_span const column_types, - col_map_type *col_map, - device_span const output_columns, - device_span const valid_fields, + col_map_type* col_map, + device_span const output_columns, + device_span const valid_fields, device_span const num_valid_fields) { const auto rec_id = threadIdx.x + (blockDim.x * blockIdx.x); @@ -515,7 +515,7 @@ __global__ void convert_data_to_columns_kernel(parse_options_view opts, if (!serialized_trie_contains(opts.trie_na, {desc.value_begin, value_len})) { // Type dispatcher does not handle strings if (column_types[desc.column].id() == type_id::STRING) { - auto str_list = static_cast(output_columns[desc.column]); + auto str_list = static_cast(output_columns[desc.column]); str_list[rec_id].first = desc.value_begin; str_list[rec_id].second = value_len; @@ -536,7 +536,7 @@ __global__ void convert_data_to_columns_kernel(parse_options_view opts, } } } else if (column_types[desc.column].id() == type_id::STRING) { - auto str_list = static_cast(output_columns[desc.column]); + auto str_list = static_cast(output_columns[desc.column]); str_list[rec_id].first = nullptr; str_list[rec_id].second = 0; } @@ -562,7 +562,7 @@ __global__ void detect_data_types_kernel( parse_options_view const opts, device_span const data, device_span const row_offsets, - col_map_type *col_map, + col_map_type* col_map, int num_columns, device_span const column_infos) { @@ -645,8 +645,8 @@ __global__ void detect_data_types_kernel( atomicAdd(&column_infos[desc.column].bool_count, 1); } else if (digit_count == int_req_number_cnt) { bool is_negative = (*desc.value_begin == '-'); - char const *data_begin = desc.value_begin + (is_negative || (*desc.value_begin == '+')); - cudf::size_type *ptr = cudf::io::gpu::infer_integral_field_counter( + char const* data_begin = desc.value_begin + (is_negative || (*desc.value_begin == '+')); + cudf::size_type* ptr = cudf::io::gpu::infer_integral_field_counter( data_begin, data_begin + digit_count, is_negative, column_infos[desc.column]); atomicAdd(ptr, 1); } else if (is_like_float( @@ -685,18 +685,18 @@ __global__ void detect_data_types_kernel( * @brief Input data range that contains a field in key:value format. */ struct key_value_range { - char const *key_begin; - char const *key_end; - char const *value_begin; - char const *value_end; + char const* key_begin; + char const* key_end; + char const* value_begin; + char const* value_end; }; /** * @brief Parse the next field in key:value format and return ranges of its parts. */ -__device__ key_value_range get_next_key_value_range(char const *begin, - char const *end, - parse_options_view const &opts) +__device__ key_value_range get_next_key_value_range(char const* begin, + char const* end, + parse_options_view const& opts) { auto const key_range = get_next_key(begin, end, opts.quotechar); @@ -721,7 +721,7 @@ __device__ key_value_range get_next_key_value_range(char const *begin, __global__ void collect_keys_info_kernel(parse_options_view const options, device_span const data, device_span const row_offsets, - unsigned long long int *keys_cnt, + unsigned long long int* keys_cnt, thrust::optional keys_info) { auto const rec_id = threadIdx.x + (blockDim.x * blockIdx.x); @@ -729,7 +729,7 @@ __global__ void collect_keys_info_kernel(parse_options_view const options, auto const row_data_range = get_row_data_range(data, row_offsets, rec_id); - auto advance = [&](const char *begin) { + auto advance = [&](const char* begin) { return get_next_key_value_range(begin, row_data_range.second, options); }; for (auto field_range = advance(row_data_range.first); @@ -751,13 +751,13 @@ __global__ void collect_keys_info_kernel(parse_options_view const options, /** * @copydoc cudf::io::json::gpu::convert_json_to_columns */ -void convert_json_to_columns(parse_options_view const &opts, +void convert_json_to_columns(parse_options_view const& opts, device_span const data, device_span const row_offsets, device_span const column_types, - col_map_type *col_map, - device_span const output_columns, - device_span const valid_fields, + col_map_type* col_map, + device_span const output_columns, + device_span const valid_fields, device_span num_valid_fields, rmm::cuda_stream_view stream) { @@ -779,12 +779,12 @@ void convert_json_to_columns(parse_options_view const &opts, */ std::vector detect_data_types( - const parse_options_view &options, + const parse_options_view& options, device_span const data, device_span const row_offsets, bool do_set_null_count, int num_columns, - col_map_type *col_map, + col_map_type* col_map, rmm::cuda_stream_view stream) { int block_size; @@ -822,10 +822,10 @@ std::vector detect_data_types( /** * @copydoc cudf::io::json::gpu::gpu_collect_keys_info */ -void collect_keys_info(parse_options_view const &options, +void collect_keys_info(parse_options_view const& options, device_span const data, device_span const row_offsets, - unsigned long long int *keys_cnt, + unsigned long long int* keys_cnt, thrust::optional keys_info, rmm::cuda_stream_view stream) { diff --git a/cpp/src/io/json/json_gpu.h b/cpp/src/io/json/json_gpu.h index 4a68ce48f20..4010461da44 100644 --- a/cpp/src/io/json/json_gpu.h +++ b/cpp/src/io/json/json_gpu.h @@ -51,13 +51,13 @@ using col_map_type = concurrent_unordered_map; * @param[out] num_valid_fields The numbers of valid fields in columns * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -void convert_json_to_columns(parse_options_view const &options, +void convert_json_to_columns(parse_options_view const& options, device_span data, device_span row_offsets, device_span column_types, - col_map_type *col_map, - device_span output_columns, - device_span valid_fields, + col_map_type* col_map, + device_span output_columns, + device_span valid_fields, device_span num_valid_fields, rmm::cuda_stream_view stream); @@ -75,12 +75,12 @@ void convert_json_to_columns(parse_options_view const &options, * @returns The count for each column data type */ std::vector detect_data_types( - parse_options_view const &options, + parse_options_view const& options, device_span data, device_span row_offsets, bool do_set_null_count, int num_columns, - col_map_type *col_map, + col_map_type* col_map, rmm::cuda_stream_view stream); /** @@ -93,10 +93,10 @@ std::vector detect_data_types( * @param[out] keys_info optional, information (offset, length, hash) for each found key * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -void collect_keys_info(parse_options_view const &options, +void collect_keys_info(parse_options_view const& options, device_span data, device_span row_offsets, - unsigned long long int *keys_cnt, + unsigned long long int* keys_cnt, thrust::optional keys_info, rmm::cuda_stream_view stream); diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu index 4d5eee6cac7..087aad5759f 100644 --- a/cpp/src/io/json/reader_impl.cu +++ b/cpp/src/io/json/reader_impl.cu @@ -136,7 +136,7 @@ col_map_ptr_type create_col_names_hash_map(column_view column_name_hashes, * * @return std::unique_ptr
cudf table with three columns (offsets, lenghts, hashes) */ -std::unique_ptr
create_json_keys_info_table(const parse_options_view &options, +std::unique_ptr
create_json_keys_info_table(const parse_options_view& options, device_span const data, device_span const row_offsets, rmm::cuda_stream_view stream) @@ -167,7 +167,7 @@ std::unique_ptr
create_json_keys_info_table(const parse_options_view &opt /** * @brief Extract the keys from the JSON file the name offsets/lengths. */ -std::vector create_key_strings(char const *h_data, +std::vector create_key_strings(char const* h_data, table_view sorted_info, rmm::cuda_stream_view stream) { @@ -213,7 +213,7 @@ std::pair, col_map_ptr_type> reader::impl::get_json_obj { auto info = create_json_keys_info_table( opts_.view(), - device_span(static_cast(data_.data()), data_.size()), + device_span(static_cast(data_.data()), data_.size()), rec_starts, stream); @@ -243,7 +243,7 @@ void reader::impl::ingest_raw_input(size_t range_offset, size_t range_size) // This allows only mapping of a subset of the file if using byte range if (sources_.empty()) { assert(!filepaths_.empty()); - for (const auto &path : filepaths_) { + for (const auto& path : filepaths_) { sources_.emplace_back(datasource::create(path, range_offset, map_range_size)); } } @@ -251,12 +251,14 @@ void reader::impl::ingest_raw_input(size_t range_offset, size_t range_size) // Iterate through the user defined sources and read the contents into the local buffer CUDF_EXPECTS(!sources_.empty(), "No sources were defined"); size_t total_source_size = 0; - for (const auto &source : sources_) { total_source_size += source->size(); } + for (const auto& source : sources_) { + total_source_size += source->size(); + } total_source_size = total_source_size - range_offset; buffer_.resize(total_source_size); size_t bytes_read = 0; - for (const auto &source : sources_) { + for (const auto& source : sources_) { if (!source->is_empty()) { auto data_size = (map_range_size != 0) ? map_range_size : source->size(); bytes_read += source->host_read(range_offset, data_size, &buffer_[bytes_read]); @@ -282,12 +284,12 @@ void reader::impl::decompress_input(rmm::cuda_stream_view stream) {{"gz", "gzip"}, {"zip", "zip"}, {"bz2", "bz2"}, {"xz", "xz"}}); if (compression_type == "none") { // Do not use the owner vector here to avoid extra copy - uncomp_data_ = reinterpret_cast(buffer_.data()); + uncomp_data_ = reinterpret_cast(buffer_.data()); uncomp_size_ = buffer_.size(); } else { uncomp_data_owner_ = get_uncompressed_data( // host_span( // - reinterpret_cast(buffer_.data()), + reinterpret_cast(buffer_.data()), buffer_.size()), compression_type); @@ -314,7 +316,7 @@ rmm::device_uvector reader::impl::find_record_starts(rmm::cuda_stream_ rmm::device_uvector rec_starts(prefilter_count, stream); - auto *find_result_ptr = rec_starts.data(); + auto* find_result_ptr = rec_starts.data(); // Manually adding an extra row to account for the first row in the file if (byte_range_offset_ == 0) { find_result_ptr++; @@ -372,7 +374,7 @@ rmm::device_uvector reader::impl::find_record_starts(rmm::cuda_stream_ * Only rows that need to be parsed are copied, based on the byte range * Also updates the array of record starts to match the device data offset. */ -void reader::impl::upload_data_to_device(rmm::device_uvector &rec_starts, +void reader::impl::upload_data_to_device(rmm::device_uvector& rec_starts, rmm::cuda_stream_view stream) { size_t start_offset = 0; @@ -472,7 +474,7 @@ void reader::impl::set_data_types(device_span rec_starts, // Assume that the dtype is in dictionary format only if all elements contain a colon const bool is_dict = - std::all_of(std::cbegin(dtype), std::cend(dtype), [](const std::string &s) { + std::all_of(std::cbegin(dtype), std::cend(dtype), [](const std::string& s) { return std::find(std::cbegin(s), std::cend(s), ':') != std::cend(s); }); @@ -487,7 +489,7 @@ void reader::impl::set_data_types(device_span rec_starts, std::cbegin(dtype), std::cend(dtype), std::inserter(col_type_map, col_type_map.end()), - [&](auto const &ts) { + [&](auto const& ts) { auto const [col_name, type_str] = split_on_colon(ts); return std::pair{std::string{col_name}, convert_string_to_dtype(std::string{type_str})}; }); @@ -496,12 +498,12 @@ void reader::impl::set_data_types(device_span rec_starts, std::transform(std::cbegin(metadata_.column_names), std::cend(metadata_.column_names), std::back_inserter(dtypes_), - [&](auto const &column_name) { return col_type_map[column_name]; }); + [&](auto const& column_name) { return col_type_map[column_name]; }); } else { std::transform(std::cbegin(dtype), std::cend(dtype), std::back_inserter(dtypes_), - [](auto const &col_dtype) { return convert_string_to_dtype(col_dtype); }); + [](auto const& col_dtype) { return convert_string_to_dtype(col_dtype); }); } } else { CUDF_EXPECTS(rec_starts.size() != 0, "No data available for data type inference.\n"); @@ -510,14 +512,14 @@ void reader::impl::set_data_types(device_span rec_starts, auto const h_column_infos = cudf::io::json::gpu::detect_data_types( opts_.view(), - device_span(static_cast(data_.data()), data_.size()), + device_span(static_cast(data_.data()), data_.size()), rec_starts, do_set_null_count, num_columns, get_column_map_device_ptr(), stream); - auto get_type_id = [&](auto const &cinfo) { + auto get_type_id = [&](auto const& cinfo) { auto int_count_total = cinfo.big_int_count + cinfo.negative_small_int_count + cinfo.positive_small_int_count; if (cinfo.null_count == static_cast(rec_starts.size())) { @@ -545,7 +547,7 @@ void reader::impl::set_data_types(device_span rec_starts, std::transform(std::cbegin(h_column_infos), std::cend(h_column_infos), std::back_inserter(dtypes_), - [&](auto const &cinfo) { return data_type{get_type_id(cinfo)}; }); + [&](auto const& cinfo) { return data_type{get_type_id(cinfo)}; }); } } @@ -562,8 +564,8 @@ table_with_metadata reader::impl::convert_data_to_table(device_span h_dtypes(num_columns); - thrust::host_vector h_data(num_columns); - thrust::host_vector h_valid(num_columns); + thrust::host_vector h_data(num_columns); + thrust::host_vector h_valid(num_columns); for (size_t i = 0; i < num_columns; ++i) { h_dtypes[i] = dtypes_[i]; @@ -572,14 +574,14 @@ table_with_metadata reader::impl::convert_data_to_table(device_span(h_dtypes, stream); - auto d_data = cudf::detail::make_device_uvector_async(h_data, stream); - auto d_valid = cudf::detail::make_device_uvector_async(h_valid, stream); + auto d_data = cudf::detail::make_device_uvector_async(h_data, stream); + auto d_valid = cudf::detail::make_device_uvector_async(h_valid, stream); auto d_valid_counts = cudf::detail::make_zeroed_device_uvector_async(num_columns, stream); cudf::io::json::gpu::convert_json_to_columns( opts_.view(), - device_span(static_cast(data_.data()), data_.size()), + device_span(static_cast(data_.data()), data_.size()), rec_starts, d_dtypes, get_column_map_device_ptr(), @@ -632,11 +634,11 @@ table_with_metadata reader::impl::convert_data_to_table(device_span(std::move(out_columns)), metadata_}; } -reader::impl::impl(std::vector> &&sources, - std::vector const &filepaths, - json_reader_options const &options, +reader::impl::impl(std::vector>&& sources, + std::vector const& filepaths, + json_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : options_(options), mr_(mr), sources_(std::move(sources)), filepaths_(filepaths) { CUDF_EXPECTS(options_.is_enabled_lines(), "Only JSON Lines format is currently supported.\n"); @@ -657,7 +659,7 @@ reader::impl::impl(std::vector> &&sources, * * @return Table and its metadata */ -table_with_metadata reader::impl::read(json_reader_options const &options, +table_with_metadata reader::impl::read(json_reader_options const& options, rmm::cuda_stream_view stream) { auto range_offset = options.get_byte_range_offset(); @@ -686,10 +688,10 @@ table_with_metadata reader::impl::read(json_reader_options const &options, } // Forward to implementation -reader::reader(std::vector const &filepaths, - json_reader_options const &options, +reader::reader(std::vector const& filepaths, + json_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { // Delay actual instantiation of data source until read to allow for // partial memory mapping of file using byte ranges @@ -698,10 +700,10 @@ reader::reader(std::vector const &filepaths, } // Forward to implementation -reader::reader(std::vector> &&sources, - json_reader_options const &options, +reader::reader(std::vector>&& sources, + json_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { std::vector file_paths = {}; // Empty filepaths _impl = std::make_unique(std::move(sources), file_paths, options, stream, mr); @@ -711,7 +713,7 @@ reader::reader(std::vector> &&sources, reader::~reader() = default; // Forward to implementation -table_with_metadata reader::read(json_reader_options const &options, rmm::cuda_stream_view stream) +table_with_metadata reader::read(json_reader_options const& options, rmm::cuda_stream_view stream) { return table_with_metadata{_impl->read(options, stream)}; } diff --git a/cpp/src/io/json/reader_impl.hpp b/cpp/src/io/json/reader_impl.hpp index f22653303ce..bbda7e9ba74 100644 --- a/cpp/src/io/json/reader_impl.hpp +++ b/cpp/src/io/json/reader_impl.hpp @@ -44,7 +44,7 @@ using namespace cudf::io::json; using namespace cudf::io; using col_map_type = cudf::io::json::gpu::col_map_type; -using col_map_ptr_type = std::unique_ptr>; +using col_map_ptr_type = std::unique_ptr>; /** * @brief Class used to parse Json input and convert it into gdf columns. @@ -54,13 +54,13 @@ class reader::impl { private: const json_reader_options options_{}; - rmm::mr::device_memory_resource *mr_ = nullptr; + rmm::mr::device_memory_resource* mr_ = nullptr; std::vector> sources_; std::vector filepaths_; std::vector buffer_; - const char *uncomp_data_ = nullptr; + const char* uncomp_data_ = nullptr; size_t uncomp_size_ = 0; // Used when the input data is compressed, to ensure the allocated uncompressed data is freed @@ -87,7 +87,7 @@ class reader::impl { * @brief Sets the column map data member and makes a device copy to be used as a kernel * parameter. */ - void set_column_map(col_map_ptr_type &&map, rmm::cuda_stream_view stream) + void set_column_map(col_map_ptr_type&& map, rmm::cuda_stream_view stream) { key_to_col_idx_map_ = std::move(map); d_key_col_map_ = @@ -145,7 +145,7 @@ class reader::impl { * Only rows that need to be parsed are copied, based on the byte range * Also updates the array of record starts to match the device data offset. */ - void upload_data_to_device(rmm::device_uvector &rec_starts, + void upload_data_to_device(rmm::device_uvector& rec_starts, rmm::cuda_stream_view stream); /** @@ -183,11 +183,11 @@ class reader::impl { /** * @brief Constructor from a dataset source with reader options. */ - explicit impl(std::vector> &&sources, - std::vector const &filepaths, - json_reader_options const &options, + explicit impl(std::vector>&& sources, + std::vector const& filepaths, + json_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr); + rmm::mr::device_memory_resource* mr); /** * @brief Read an entire set or a subset of data from the source @@ -197,7 +197,7 @@ class reader::impl { * * @return Table and its metadata */ - table_with_metadata read(json_reader_options const &options, rmm::cuda_stream_view stream); + table_with_metadata read(json_reader_options const& options, rmm::cuda_stream_view stream); }; } // namespace json diff --git a/cpp/src/io/orc/dict_enc.cu b/cpp/src/io/orc/dict_enc.cu index e69a61bde66..ef39e475b93 100644 --- a/cpp/src/io/orc/dict_enc.cu +++ b/cpp/src/io/orc/dict_enc.cu @@ -54,7 +54,7 @@ static inline __device__ uint32_t hash_string(const string_view val) if (val.empty()) { return 0; } else { - char const *ptr = val.data(); + char const* ptr = val.data(); uint32_t len = val.size_bytes(); return (ptr[0] + (ptr[len - 1] << 5) + (len << 10)) & ((1 << init_hash_bits) - 1); } @@ -68,13 +68,13 @@ static inline __device__ uint32_t hash_string(const string_view val) * @param[in] temp_storage shared memory storage to scan non-null positions */ template -static __device__ void LoadNonNullIndices(volatile dictinit_state_s *s, +static __device__ void LoadNonNullIndices(volatile dictinit_state_s* s, int t, - Storage &temp_storage) + Storage& temp_storage) { if (t == 0) { s->nnz = 0; } for (uint32_t i = 0; i < s->chunk.num_rows; i += block_size) { - const uint32_t *valid_map = s->chunk.leaf_column->null_mask(); + const uint32_t* valid_map = s->chunk.leaf_column->null_mask(); auto column_offset = s->chunk.leaf_column->offset(); uint32_t is_valid, nz_pos; if (t < block_size / 32) { @@ -120,12 +120,12 @@ static __device__ void LoadNonNullIndices(volatile dictinit_state_s *s, // blockDim {block_size,1,1} template __global__ void __launch_bounds__(block_size, 2) - gpuInitDictionaryIndices(DictionaryChunk *chunks, + gpuInitDictionaryIndices(DictionaryChunk* chunks, const table_device_view view, - uint32_t *dict_data, - uint32_t *dict_index, + uint32_t* dict_data, + uint32_t* dict_index, size_t row_index_stride, - size_type *str_col_ids, + size_type* str_col_ids, uint32_t num_columns) { __shared__ __align__(16) dictinit_state_s state_g; @@ -138,14 +138,14 @@ __global__ void __launch_bounds__(block_size, 2) typename block_scan::TempStorage scan_storage; } temp_storage; - dictinit_state_s *const s = &state_g; + dictinit_state_s* const s = &state_g; uint32_t col_id = blockIdx.x; uint32_t group_id = blockIdx.y; uint32_t nnz, start_row, dict_char_count; int t = threadIdx.x; if (t == 0) { - column_device_view *leaf_column_view = view.begin() + str_col_ids[col_id]; + column_device_view* leaf_column_view = view.begin() + str_col_ids[col_id]; s->chunk = chunks[group_id * num_columns + col_id]; s->chunk.leaf_column = leaf_column_view; s->chunk.dict_data = @@ -305,21 +305,21 @@ __global__ void __launch_bounds__(block_size, 2) */ // blockDim {1024,1,1} extern "C" __global__ void __launch_bounds__(1024) - gpuCompactChunkDictionaries(StripeDictionary *stripes, - DictionaryChunk const *chunks, + gpuCompactChunkDictionaries(StripeDictionary* stripes, + DictionaryChunk const* chunks, uint32_t num_columns) { __shared__ __align__(16) StripeDictionary stripe_g; __shared__ __align__(16) DictionaryChunk chunk_g; - __shared__ const uint32_t *volatile ck_curptr_g; + __shared__ const uint32_t* volatile ck_curptr_g; __shared__ uint32_t volatile ck_curlen_g; uint32_t col_id = blockIdx.x; uint32_t stripe_id = blockIdx.y; uint32_t chunk_len; int t = threadIdx.x; - const uint32_t *src; - uint32_t *dst; + const uint32_t* src; + uint32_t* dst; if (t == 0) stripe_g = stripes[stripe_id * num_columns + col_id]; __syncthreads(); @@ -365,7 +365,7 @@ struct build_state_s { // blockDim {1024,1,1} template __global__ void __launch_bounds__(block_size) - gpuBuildStripeDictionaries(StripeDictionary *stripes, uint32_t num_columns) + gpuBuildStripeDictionaries(StripeDictionary* stripes, uint32_t num_columns) { __shared__ __align__(16) build_state_s state_g; using block_reduce = cub::BlockReduce; @@ -375,7 +375,7 @@ __global__ void __launch_bounds__(block_size) typename block_scan::TempStorage scan_storage; } temp_storage; - build_state_s *const s = &state_g; + build_state_s* const s = &state_g; uint32_t col_id = blockIdx.x; uint32_t stripe_id = blockIdx.y; uint32_t num_strings; @@ -427,12 +427,12 @@ __global__ void __launch_bounds__(block_size) /** * @copydoc cudf::io::orc::gpu::InitDictionaryIndices */ -void InitDictionaryIndices(const table_device_view &view, - DictionaryChunk *chunks, - uint32_t *dict_data, - uint32_t *dict_index, +void InitDictionaryIndices(const table_device_view& view, + DictionaryChunk* chunks, + uint32_t* dict_data, + uint32_t* dict_index, size_t row_index_stride, - size_type *str_col_ids, + size_type* str_col_ids, uint32_t num_columns, uint32_t num_rowgroups, rmm::cuda_stream_view stream) @@ -447,9 +447,9 @@ void InitDictionaryIndices(const table_device_view &view, /** * @copydoc cudf::io::orc::gpu::BuildStripeDictionaries */ -void BuildStripeDictionaries(StripeDictionary *stripes, - StripeDictionary *stripes_host, - DictionaryChunk const *chunks, +void BuildStripeDictionaries(StripeDictionary* stripes, + StripeDictionary* stripes_host, + DictionaryChunk const* chunks, uint32_t num_stripes, uint32_t num_rowgroups, uint32_t num_columns, @@ -463,12 +463,12 @@ void BuildStripeDictionaries(StripeDictionary *stripes, if (stripes_host[i].dict_data != nullptr) { thrust::device_ptr dict_data_ptr = thrust::device_pointer_cast(stripes_host[i].dict_data); - column_device_view *string_column = stripes_host[i].leaf_column; + column_device_view* string_column = stripes_host[i].leaf_column; // NOTE: Requires the --expt-extended-lambda nvcc flag thrust::sort(rmm::exec_policy(stream), dict_data_ptr, dict_data_ptr + stripes_host[i].num_strings, - [string_column] __device__(const uint32_t &lhs, const uint32_t &rhs) { + [string_column] __device__(const uint32_t& lhs, const uint32_t& rhs) { return string_column->element(lhs) < string_column->element(rhs); }); diff --git a/cpp/src/io/orc/orc.cpp b/cpp/src/io/orc/orc.cpp index 7358f0e6404..dc23af594a5 100644 --- a/cpp/src/io/orc/orc.cpp +++ b/cpp/src/io/orc/orc.cpp @@ -23,7 +23,7 @@ namespace cudf { namespace io { namespace orc { -uint32_t ProtobufReader::read_field_size(const uint8_t *end) +uint32_t ProtobufReader::read_field_size(const uint8_t* end) { auto const size = get(); CUDF_EXPECTS(size <= static_cast(end - m_cur), "Protobuf parsing out of bounds"); @@ -41,7 +41,7 @@ void ProtobufReader::skip_struct_field(int t) } } -void ProtobufReader::read(PostScript &s, size_t maxlen) +void ProtobufReader::read(PostScript& s, size_t maxlen) { auto op = std::make_tuple(make_field_reader(1, s.footerLength), make_field_reader(2, s.compression), @@ -52,7 +52,7 @@ void ProtobufReader::read(PostScript &s, size_t maxlen) function_builder(s, maxlen, op); } -void ProtobufReader::read(FileFooter &s, size_t maxlen) +void ProtobufReader::read(FileFooter& s, size_t maxlen) { auto op = std::make_tuple(make_field_reader(1, s.headerLength), make_field_reader(2, s.contentLength), @@ -65,7 +65,7 @@ void ProtobufReader::read(FileFooter &s, size_t maxlen) function_builder(s, maxlen, op); } -void ProtobufReader::read(StripeInformation &s, size_t maxlen) +void ProtobufReader::read(StripeInformation& s, size_t maxlen) { auto op = std::make_tuple(make_field_reader(1, s.offset), make_field_reader(2, s.indexLength), @@ -75,7 +75,7 @@ void ProtobufReader::read(StripeInformation &s, size_t maxlen) function_builder(s, maxlen, op); } -void ProtobufReader::read(SchemaType &s, size_t maxlen) +void ProtobufReader::read(SchemaType& s, size_t maxlen) { auto op = std::make_tuple(make_field_reader(1, s.kind), make_packed_field_reader(2, s.subtypes), @@ -86,13 +86,13 @@ void ProtobufReader::read(SchemaType &s, size_t maxlen) function_builder(s, maxlen, op); } -void ProtobufReader::read(UserMetadataItem &s, size_t maxlen) +void ProtobufReader::read(UserMetadataItem& s, size_t maxlen) { auto op = std::make_tuple(make_field_reader(1, s.name), make_field_reader(2, s.value)); function_builder(s, maxlen, op); } -void ProtobufReader::read(StripeFooter &s, size_t maxlen) +void ProtobufReader::read(StripeFooter& s, size_t maxlen) { auto op = std::make_tuple(make_field_reader(1, s.streams), make_field_reader(2, s.columns), @@ -100,7 +100,7 @@ void ProtobufReader::read(StripeFooter &s, size_t maxlen) function_builder(s, maxlen, op); } -void ProtobufReader::read(Stream &s, size_t maxlen) +void ProtobufReader::read(Stream& s, size_t maxlen) { auto op = std::make_tuple(make_field_reader(1, s.kind), make_field_reader(2, s.column_id), @@ -108,59 +108,59 @@ void ProtobufReader::read(Stream &s, size_t maxlen) function_builder(s, maxlen, op); } -void ProtobufReader::read(ColumnEncoding &s, size_t maxlen) +void ProtobufReader::read(ColumnEncoding& s, size_t maxlen) { auto op = std::make_tuple(make_field_reader(1, s.kind), make_field_reader(2, s.dictionarySize)); function_builder(s, maxlen, op); } -void ProtobufReader::read(integer_statistics &s, size_t maxlen) +void ProtobufReader::read(integer_statistics& s, size_t maxlen) { auto op = std::make_tuple( make_field_reader(1, s.minimum), make_field_reader(2, s.maximum), make_field_reader(3, s.sum)); function_builder(s, maxlen, op); } -void ProtobufReader::read(double_statistics &s, size_t maxlen) +void ProtobufReader::read(double_statistics& s, size_t maxlen) { auto op = std::make_tuple( make_field_reader(1, s.minimum), make_field_reader(2, s.maximum), make_field_reader(3, s.sum)); function_builder(s, maxlen, op); } -void ProtobufReader::read(string_statistics &s, size_t maxlen) +void ProtobufReader::read(string_statistics& s, size_t maxlen) { auto op = std::make_tuple( make_field_reader(1, s.minimum), make_field_reader(2, s.maximum), make_field_reader(3, s.sum)); function_builder(s, maxlen, op); } -void ProtobufReader::read(bucket_statistics &s, size_t maxlen) +void ProtobufReader::read(bucket_statistics& s, size_t maxlen) { auto op = std::make_tuple(make_packed_field_reader(1, s.count)); function_builder(s, maxlen, op); } -void ProtobufReader::read(decimal_statistics &s, size_t maxlen) +void ProtobufReader::read(decimal_statistics& s, size_t maxlen) { auto op = std::make_tuple( make_field_reader(1, s.minimum), make_field_reader(2, s.maximum), make_field_reader(3, s.sum)); function_builder(s, maxlen, op); } -void ProtobufReader::read(date_statistics &s, size_t maxlen) +void ProtobufReader::read(date_statistics& s, size_t maxlen) { auto op = std::make_tuple(make_field_reader(1, s.minimum), make_field_reader(2, s.maximum)); function_builder(s, maxlen, op); } -void ProtobufReader::read(binary_statistics &s, size_t maxlen) +void ProtobufReader::read(binary_statistics& s, size_t maxlen) { auto op = std::make_tuple(make_field_reader(1, s.sum)); function_builder(s, maxlen, op); } -void ProtobufReader::read(timestamp_statistics &s, size_t maxlen) +void ProtobufReader::read(timestamp_statistics& s, size_t maxlen) { auto op = std::make_tuple(make_field_reader(1, s.minimum), make_field_reader(2, s.maximum), @@ -169,7 +169,7 @@ void ProtobufReader::read(timestamp_statistics &s, size_t maxlen) function_builder(s, maxlen, op); } -void ProtobufReader::read(column_statistics &s, size_t maxlen) +void ProtobufReader::read(column_statistics& s, size_t maxlen) { auto op = std::make_tuple(make_field_reader(1, s.number_of_values), make_field_reader(2, s.int_stats), @@ -183,13 +183,13 @@ void ProtobufReader::read(column_statistics &s, size_t maxlen) function_builder(s, maxlen, op); } -void ProtobufReader::read(StripeStatistics &s, size_t maxlen) +void ProtobufReader::read(StripeStatistics& s, size_t maxlen) { auto op = std::make_tuple(make_raw_field_reader(1, s.colStats)); function_builder(s, maxlen, op); } -void ProtobufReader::read(Metadata &s, size_t maxlen) +void ProtobufReader::read(Metadata& s, size_t maxlen) { auto op = std::make_tuple(make_field_reader(1, s.stripeStats)); function_builder(s, maxlen, op); @@ -243,7 +243,7 @@ void ProtobufWriter::put_row_index_entry(int32_t present_blk, m_buf->data()[lpos + 2] = (uint8_t)(sz); } -size_t ProtobufWriter::write(const PostScript &s) +size_t ProtobufWriter::write(const PostScript& s) { ProtobufFieldWriter w(this); w.field_uint(1, s.footerLength); @@ -255,7 +255,7 @@ size_t ProtobufWriter::write(const PostScript &s) return w.value(); } -size_t ProtobufWriter::write(const FileFooter &s) +size_t ProtobufWriter::write(const FileFooter& s) { ProtobufFieldWriter w(this); w.field_uint(1, s.headerLength); @@ -269,7 +269,7 @@ size_t ProtobufWriter::write(const FileFooter &s) return w.value(); } -size_t ProtobufWriter::write(const StripeInformation &s) +size_t ProtobufWriter::write(const StripeInformation& s) { ProtobufFieldWriter w(this); w.field_uint(1, s.offset); @@ -280,7 +280,7 @@ size_t ProtobufWriter::write(const StripeInformation &s) return w.value(); } -size_t ProtobufWriter::write(const SchemaType &s) +size_t ProtobufWriter::write(const SchemaType& s) { ProtobufFieldWriter w(this); w.field_uint(1, s.kind); @@ -292,7 +292,7 @@ size_t ProtobufWriter::write(const SchemaType &s) return w.value(); } -size_t ProtobufWriter::write(const UserMetadataItem &s) +size_t ProtobufWriter::write(const UserMetadataItem& s) { ProtobufFieldWriter w(this); w.field_string(1, s.name); @@ -300,7 +300,7 @@ size_t ProtobufWriter::write(const UserMetadataItem &s) return w.value(); } -size_t ProtobufWriter::write(const StripeFooter &s) +size_t ProtobufWriter::write(const StripeFooter& s) { ProtobufFieldWriter w(this); w.field_repeated_struct(1, s.streams); @@ -309,7 +309,7 @@ size_t ProtobufWriter::write(const StripeFooter &s) return w.value(); } -size_t ProtobufWriter::write(const Stream &s) +size_t ProtobufWriter::write(const Stream& s) { ProtobufFieldWriter w(this); w.field_uint(1, s.kind); @@ -318,7 +318,7 @@ size_t ProtobufWriter::write(const Stream &s) return w.value(); } -size_t ProtobufWriter::write(const ColumnEncoding &s) +size_t ProtobufWriter::write(const ColumnEncoding& s) { ProtobufFieldWriter w(this); w.field_uint(1, s.kind); @@ -326,14 +326,14 @@ size_t ProtobufWriter::write(const ColumnEncoding &s) return w.value(); } -size_t ProtobufWriter::write(const StripeStatistics &s) +size_t ProtobufWriter::write(const StripeStatistics& s) { ProtobufFieldWriter w(this); w.field_repeated_struct_blob(1, s.colStats); return w.value(); } -size_t ProtobufWriter::write(const Metadata &s) +size_t ProtobufWriter::write(const Metadata& s) { ProtobufFieldWriter w(this); w.field_repeated_struct(1, s.stripeStats); @@ -374,7 +374,7 @@ OrcDecompressor::OrcDecompressor(CompressionKind kind, uint32_t blockSize) * * @returns pointer to uncompressed data, nullptr if error */ -const uint8_t *OrcDecompressor::Decompress(const uint8_t *srcBytes, size_t srcLen, size_t *dstLen) +const uint8_t* OrcDecompressor::Decompress(const uint8_t* srcBytes, size_t srcLen, size_t* dstLen) { // If uncompressed, just pass-through the input if (m_kind == NONE) { @@ -429,7 +429,7 @@ const uint8_t *OrcDecompressor::Decompress(const uint8_t *srcBytes, size_t srcLe return m_buf.data(); } -metadata::metadata(datasource *const src) : source(src) +metadata::metadata(datasource* const src) : source(src) { const auto len = source->size(); const auto max_ps_size = std::min(len, static_cast(256)); @@ -437,7 +437,7 @@ metadata::metadata(datasource *const src) : source(src) // Read uncompressed postscript section (max 255 bytes + 1 byte for length) auto buffer = source->host_read(len - max_ps_size, max_ps_size); const size_t ps_length = buffer->data()[max_ps_size - 1]; - const uint8_t *ps_data = &buffer->data()[max_ps_size - ps_length - 1]; + const uint8_t* ps_data = &buffer->data()[max_ps_size - ps_length - 1]; ProtobufReader(ps_data, ps_length).read(ps); CUDF_EXPECTS(ps.footerLength + ps_length < len, "Invalid footer length"); @@ -466,7 +466,7 @@ metadata::metadata(datasource *const src) : source(src) void metadata::init_column_names() const { auto const schema_idxs = get_schema_indexes(); - auto const &types = ff.types; + auto const& types = ff.types; for (int32_t col_id = 0; col_id < get_num_columns(); ++col_id) { std::string col_name; if (schema_idxs[col_id].parent >= 0 and schema_idxs[col_id].field >= 0) { @@ -487,7 +487,7 @@ std::vector metadata::get_schema_indexes() const auto const schema_size = static_cast(result.size()); for (uint32_t i = 0; i < schema_size; i++) { - auto const &subtypes = ff.types[i].subtypes; + auto const& subtypes = ff.types[i].subtypes; auto const num_children = static_cast(subtypes.size()); if (result[i].parent == -1) { // Not initialized result[i].parent = i; // set root node as its own parent diff --git a/cpp/src/io/orc/orc.h b/cpp/src/io/orc/orc.h index 224820550e1..474f404be0f 100644 --- a/cpp/src/io/orc/orc.h +++ b/cpp/src/io/orc/orc.h @@ -135,32 +135,32 @@ struct Metadata { */ class ProtobufReader { public: - ProtobufReader(const uint8_t *base, size_t len) : m_base(base), m_cur(base), m_end(base + len) {} + ProtobufReader(const uint8_t* base, size_t len) : m_base(base), m_cur(base), m_end(base + len) {} template - void read(T &s) + void read(T& s) { read(s, m_end - m_cur); } - void read(PostScript &, size_t maxlen); - void read(FileFooter &, size_t maxlen); - void read(StripeInformation &, size_t maxlen); - void read(SchemaType &, size_t maxlen); - void read(UserMetadataItem &, size_t maxlen); - void read(StripeFooter &, size_t maxlen); - void read(Stream &, size_t maxlen); - void read(ColumnEncoding &, size_t maxlen); - void read(integer_statistics &, size_t maxlen); - void read(double_statistics &, size_t maxlen); - void read(string_statistics &, size_t maxlen); - void read(bucket_statistics &, size_t maxlen); - void read(decimal_statistics &, size_t maxlen); - void read(date_statistics &, size_t maxlen); - void read(binary_statistics &, size_t maxlen); - void read(timestamp_statistics &, size_t maxlen); - void read(column_statistics &, size_t maxlen); - void read(StripeStatistics &, size_t maxlen); - void read(Metadata &, size_t maxlen); + void read(PostScript&, size_t maxlen); + void read(FileFooter&, size_t maxlen); + void read(StripeInformation&, size_t maxlen); + void read(SchemaType&, size_t maxlen); + void read(UserMetadataItem&, size_t maxlen); + void read(StripeFooter&, size_t maxlen); + void read(Stream&, size_t maxlen); + void read(ColumnEncoding&, size_t maxlen); + void read(integer_statistics&, size_t maxlen); + void read(double_statistics&, size_t maxlen); + void read(string_statistics&, size_t maxlen); + void read(bucket_statistics&, size_t maxlen); + void read(decimal_statistics&, size_t maxlen); + void read(date_statistics&, size_t maxlen); + void read(binary_statistics&, size_t maxlen); + void read(timestamp_statistics&, size_t maxlen); + void read(column_statistics&, size_t maxlen); + void read(StripeStatistics&, size_t maxlen); + void read(Metadata&, size_t maxlen); private: template @@ -178,11 +178,11 @@ class ProtobufReader { void skip_struct_field(int t); template - void function_builder(T &s, size_t maxlen, std::tuple &op); + void function_builder(T& s, size_t maxlen, std::tuple& op); template ::value and - !std::is_enum::value> * = nullptr> + !std::is_enum::value>* = nullptr> int static constexpr encode_field_number_base(int field_number) noexcept { return (field_number * 8) + PB_TYPE_FIXEDLEN; @@ -190,21 +190,21 @@ class ProtobufReader { template ::value or - std::is_enum::value> * = nullptr> + std::is_enum::value>* = nullptr> int static constexpr encode_field_number_base(int field_number) noexcept { return (field_number * 8) + PB_TYPE_VARINT; } template ::value> * = nullptr> + typename std::enable_if_t::value>* = nullptr> int static constexpr encode_field_number_base(int field_number) noexcept { return (field_number * 8) + PB_TYPE_FIXED32; } template ::value> * = nullptr> + typename std::enable_if_t::value>* = nullptr> int static constexpr encode_field_number_base(int field_number) noexcept { return (field_number * 8) + PB_TYPE_FIXED64; @@ -212,7 +212,7 @@ class ProtobufReader { template ::value or - std::is_same::value> * = nullptr> + std::is_same::value>* = nullptr> int static constexpr encode_field_number(int field_number) noexcept { return encode_field_number_base(field_number); @@ -220,8 +220,8 @@ class ProtobufReader { // containters change the field number encoding template >::value> - * = nullptr> + typename std::enable_if_t< + std::is_same>::value>* = nullptr> int static constexpr encode_field_number(int field_number) noexcept { return encode_field_number_base(field_number); @@ -229,49 +229,49 @@ class ProtobufReader { // optional fields don't change the field number encoding template >::value> - * = nullptr> + typename std::enable_if_t< + std::is_same>::value>* = nullptr> int static constexpr encode_field_number(int field_number) noexcept { return encode_field_number_base(field_number); } - uint32_t read_field_size(const uint8_t *end); + uint32_t read_field_size(const uint8_t* end); - template ::value> * = nullptr> - void read_field(T &value, const uint8_t *end) + template ::value>* = nullptr> + void read_field(T& value, const uint8_t* end) { value = get(); } - template ::value> * = nullptr> - void read_field(T &value, const uint8_t *end) + template ::value>* = nullptr> + void read_field(T& value, const uint8_t* end) { value = static_cast(get()); } - template ::value> * = nullptr> - void read_field(T &value, const uint8_t *end) + template ::value>* = nullptr> + void read_field(T& value, const uint8_t* end) { auto const size = read_field_size(end); - value.assign(reinterpret_cast(m_cur), size); + value.assign(reinterpret_cast(m_cur), size); m_cur += size; } template >::value> * = nullptr> - void read_field(T &value, const uint8_t *end) + typename std::enable_if_t>::value>* = nullptr> + void read_field(T& value, const uint8_t* end) { auto const size = read_field_size(end); - value.emplace_back(reinterpret_cast(m_cur), size); + value.emplace_back(reinterpret_cast(m_cur), size); m_cur += size; } - template >::value and - !std::is_same::value> * = nullptr> - void read_field(T &value, const uint8_t *end) + template < + typename T, + typename std::enable_if_t>::value and + !std::is_same::value>* = nullptr> + void read_field(T& value, const uint8_t* end) { auto const size = read_field_size(end); value.emplace_back(); @@ -279,9 +279,9 @@ class ProtobufReader { } template >::value> - * = nullptr> - void read_field(T &value, const uint8_t *end) + typename std::enable_if_t< + std::is_same>::value>* = nullptr> + void read_field(T& value, const uint8_t* end) { typename T::value_type contained_value; read_field(contained_value, end); @@ -289,29 +289,30 @@ class ProtobufReader { } template - auto read_field(T &value, const uint8_t *end) -> decltype(read(value, 0)) + auto read_field(T& value, const uint8_t* end) -> decltype(read(value, 0)) { auto const size = read_field_size(end); read(value, size); } - template ::value> * = nullptr> - void read_field(T &value, const uint8_t *end) + template ::value>* = nullptr> + void read_field(T& value, const uint8_t* end) { memcpy(&value, m_cur, sizeof(T)); m_cur += sizeof(T); } template - void read_packed_field(T &value, const uint8_t *end) + void read_packed_field(T& value, const uint8_t* end) { auto const len = get(); auto const field_end = std::min(m_cur + len, end); - while (m_cur < field_end) value.push_back(get()); + while (m_cur < field_end) + value.push_back(get()); } template - void read_raw_field(T &value, const uint8_t *end) + void read_raw_field(T& value, const uint8_t* end) { auto const size = read_field_size(end); value.emplace_back(m_cur, m_cur + size); @@ -321,14 +322,14 @@ class ProtobufReader { template struct field_reader { int const encoded_field_number; - T &output_value; + T& output_value; - field_reader(int field_number, T &field_value) + field_reader(int field_number, T& field_value) : encoded_field_number(encode_field_number(field_number)), output_value(field_value) { } - inline void operator()(ProtobufReader *pbr, const uint8_t *end) + inline void operator()(ProtobufReader* pbr, const uint8_t* end) { pbr->read_field(output_value, end); } @@ -337,14 +338,14 @@ class ProtobufReader { template struct packed_field_reader { int const encoded_field_number; - T &output_value; + T& output_value; - packed_field_reader(int field_number, T &field_value) + packed_field_reader(int field_number, T& field_value) : encoded_field_number(encode_field_number(field_number)), output_value(field_value) { } - inline void operator()(ProtobufReader *pbr, const uint8_t *end) + inline void operator()(ProtobufReader* pbr, const uint8_t* end) { pbr->read_packed_field(output_value, end); } @@ -353,22 +354,22 @@ class ProtobufReader { template struct raw_field_reader { int const encoded_field_number; - T &output_value; + T& output_value; - raw_field_reader(int field_number, T &field_value) + raw_field_reader(int field_number, T& field_value) : encoded_field_number(encode_field_number(field_number)), output_value(field_value) { } - inline void operator()(ProtobufReader *pbr, const uint8_t *end) + inline void operator()(ProtobufReader* pbr, const uint8_t* end) { pbr->read_raw_field(output_value, end); } }; - const uint8_t *const m_base; - const uint8_t *m_cur; - const uint8_t *const m_end; + const uint8_t* const m_base; + const uint8_t* m_cur; + const uint8_t* const m_end; public: /** @@ -381,7 +382,7 @@ class ProtobufReader { * @return the field reader object of the right type */ template - static auto make_field_reader(int field_number, T &field_value) + static auto make_field_reader(int field_number, T& field_value) { return field_reader(field_number, field_value); } @@ -395,7 +396,7 @@ class ProtobufReader { * @return the packed field reader object of the right type */ template - static auto make_packed_field_reader(int field_number, T &field_value) + static auto make_packed_field_reader(int field_number, T& field_value) { return packed_field_reader(field_number, field_value); } @@ -410,7 +411,7 @@ class ProtobufReader { * @return the raw field reader object of the right type */ template - static auto make_raw_field_reader(int field_number, T &field_value) + static auto make_raw_field_reader(int field_number, T& field_value) { return raw_field_reader(field_number, field_value); } @@ -469,7 +470,7 @@ inline int64_t ProtobufReader::get() class ProtobufWriter { public: ProtobufWriter() { m_buf = nullptr; } - ProtobufWriter(std::vector *output) { m_buf = output; } + ProtobufWriter(std::vector* output) { m_buf = output; } void putb(uint8_t v) { m_buf->push_back(v); } uint32_t put_uint(uint64_t v) { @@ -496,19 +497,19 @@ class ProtobufWriter { TypeKind kind); public: - size_t write(const PostScript &); - size_t write(const FileFooter &); - size_t write(const StripeInformation &); - size_t write(const SchemaType &); - size_t write(const UserMetadataItem &); - size_t write(const StripeFooter &); - size_t write(const Stream &); - size_t write(const ColumnEncoding &); - size_t write(const StripeStatistics &); - size_t write(const Metadata &); + size_t write(const PostScript&); + size_t write(const FileFooter&); + size_t write(const StripeInformation&); + size_t write(const SchemaType&); + size_t write(const UserMetadataItem&); + size_t write(const StripeFooter&); + size_t write(const Stream&); + size_t write(const ColumnEncoding&); + size_t write(const StripeStatistics&); + size_t write(const Metadata&); protected: - std::vector *m_buf; + std::vector* m_buf; struct ProtobufFieldWriter; }; @@ -519,7 +520,7 @@ class ProtobufWriter { class OrcDecompressor { public: OrcDecompressor(CompressionKind kind, uint32_t blockSize); - const uint8_t *Decompress(const uint8_t *srcBytes, size_t srcLen, size_t *dstLen); + const uint8_t* Decompress(const uint8_t* srcBytes, size_t srcLen, size_t* dstLen); uint32_t GetLog2MaxCompressionRatio() const { return m_log2MaxRatio; } uint32_t GetMaxUncompressedBlockSize(uint32_t block_len) const { @@ -568,7 +569,7 @@ struct orc_column_meta { * convenience methods for initializing and accessing metadata. */ class metadata { - using OrcStripeInfo = std::pair; + using OrcStripeInfo = std::pair; public: struct stripe_source_mapping { @@ -577,12 +578,12 @@ class metadata { }; public: - explicit metadata(datasource *const src); + explicit metadata(datasource* const src); size_t get_total_rows() const { return ff.numberOfRows; } int get_num_stripes() const { return ff.stripes.size(); } int get_num_columns() const { return ff.types.size(); } - std::string const &get_column_name(int32_t column_id) const + std::string const& get_column_name(int32_t column_id) const { if (column_names.empty() && get_num_columns() != 0) { init_column_names(); } return column_names[column_id]; @@ -595,7 +596,7 @@ class metadata { Metadata md; std::vector stripefooters; std::unique_ptr decompressor; - datasource *const source; + datasource* const source; private: struct schema_indexes { diff --git a/cpp/src/io/orc/orc_field_reader.hpp b/cpp/src/io/orc/orc_field_reader.hpp index 8e9bca44340..45d2cbe3bf2 100644 --- a/cpp/src/io/orc/orc_field_reader.hpp +++ b/cpp/src/io/orc/orc_field_reader.hpp @@ -41,10 +41,10 @@ namespace orc { template struct FunctionSwitchImpl { template - static inline void run(ProtobufReader *pbr, - const uint8_t *end, - const int &encoded_field_number, - std::tuple &ops) + static inline void run(ProtobufReader* pbr, + const uint8_t* end, + const int& encoded_field_number, + std::tuple& ops) { if (encoded_field_number == std::get(ops).encoded_field_number) { std::get(ops)(pbr, end); @@ -57,10 +57,10 @@ struct FunctionSwitchImpl { template <> struct FunctionSwitchImpl<0> { template - static inline void run(ProtobufReader *pbr, - const uint8_t *end, - const int &encoded_field_number, - std::tuple &ops) + static inline void run(ProtobufReader* pbr, + const uint8_t* end, + const int& encoded_field_number, + std::tuple& ops) { if (encoded_field_number == std::get<0>(ops).encoded_field_number) { std::get<0>(ops)(pbr, end); @@ -78,10 +78,10 @@ struct FunctionSwitchImpl<0> { * pointed to by the functors. */ template -inline void ProtobufReader::function_builder(T &s, size_t maxlen, std::tuple &op) +inline void ProtobufReader::function_builder(T& s, size_t maxlen, std::tuple& op) { constexpr int index = std::tuple_size>::value - 1; - auto *const end = std::min(m_cur + maxlen, m_end); + auto* const end = std::min(m_cur + maxlen, m_end); while (m_cur < end) { auto const field = get(); FunctionSwitchImpl::run(this, end, field, op); diff --git a/cpp/src/io/orc/orc_field_writer.hpp b/cpp/src/io/orc/orc_field_writer.hpp index 13c7befa3a1..7882810b50d 100644 --- a/cpp/src/io/orc/orc_field_writer.hpp +++ b/cpp/src/io/orc/orc_field_writer.hpp @@ -31,15 +31,15 @@ namespace orc { struct ProtobufWriter::ProtobufFieldWriter { int struct_size; - ProtobufWriter *p; + ProtobufWriter* p; - ProtobufFieldWriter(ProtobufWriter *pbw) : struct_size(0), p(pbw) {} + ProtobufFieldWriter(ProtobufWriter* pbw) : struct_size(0), p(pbw) {} /** * @brief Function to write a unsigned integer to the internal buffer */ template - void field_uint(int field, const T &value) + void field_uint(int field, const T& value) { struct_size += p->put_uint(field * 8 + PB_TYPE_VARINT); struct_size += p->put_uint(static_cast(value)); @@ -50,7 +50,7 @@ struct ProtobufWriter::ProtobufFieldWriter { * buffer */ template - void field_packed_uint(int field, const std::vector &value) + void field_packed_uint(int field, const std::vector& value) { struct_size += p->put_uint(field * 8 + PB_TYPE_FIXEDLEN); auto lpos = p->m_buf->size(); @@ -68,31 +68,33 @@ struct ProtobufWriter::ProtobufFieldWriter { /** * @brief Function to write a string to the internal buffer */ - void field_string(int field, const std::string &value) + void field_string(int field, const std::string& value) { size_t len = value.length(); struct_size += p->put_uint(field * 8 + PB_TYPE_FIXEDLEN); struct_size += p->put_uint(len) + len; - for (size_t i = 0; i < len; i++) p->putb(value[i]); + for (size_t i = 0; i < len; i++) + p->putb(value[i]); } /** * @brief Function to write a blob to the internal buffer */ template - void field_blob(int field, const std::vector &value) + void field_blob(int field, const std::vector& value) { size_t len = value.size(); struct_size += p->put_uint(field * 8 + PB_TYPE_FIXEDLEN); struct_size += p->put_uint(len) + len; - for (size_t i = 0; i < len; i++) p->putb(value[i]); + for (size_t i = 0; i < len; i++) + p->putb(value[i]); } /** * @brief Function to write a struct to the internal buffer */ template - void field_struct(int field, const T &value) + void field_struct(int field, const T& value) { struct_size += p->put_uint((field)*8 + PB_TYPE_FIXEDLEN); auto lpos = p->m_buf->size(); @@ -107,18 +109,20 @@ struct ProtobufWriter::ProtobufFieldWriter { /** * @brief Function to write a vector of strings to the internal buffer */ - void field_repeated_string(int field, const std::vector &value) + void field_repeated_string(int field, const std::vector& value) { - for (const auto &elem : value) field_string(field, elem); + for (const auto& elem : value) + field_string(field, elem); } /** * @brief Function to write a vector of structs to the internal buffer */ template - void field_repeated_struct(int field, const std::vector &value) + void field_repeated_struct(int field, const std::vector& value) { - for (const auto &elem : value) field_struct(field, elem); + for (const auto& elem : value) + field_struct(field, elem); } /** @@ -126,9 +130,10 @@ struct ProtobufWriter::ProtobufFieldWriter { * buffer */ template - void field_repeated_struct_blob(int field, const std::vector &value) + void field_repeated_struct_blob(int field, const std::vector& value) { - for (const auto &elem : value) field_blob(field, elem); + for (const auto& elem : value) + field_blob(field, elem); } /** diff --git a/cpp/src/io/orc/orc_gpu.h b/cpp/src/io/orc/orc_gpu.h index b86a350fb64..fa91dd13755 100644 --- a/cpp/src/io/orc/orc_gpu.h +++ b/cpp/src/io/orc/orc_gpu.h @@ -37,7 +37,7 @@ using cudf::detail::device_2dspan; struct CompressedStreamInfo { CompressedStreamInfo() = default; - explicit constexpr CompressedStreamInfo(const uint8_t *compressed_data_, size_t compressed_size_) + explicit constexpr CompressedStreamInfo(const uint8_t* compressed_data_, size_t compressed_size_) : compressed_data(compressed_data_), uncompressed_data(nullptr), compressed_data_size(compressed_size_), @@ -49,13 +49,13 @@ struct CompressedStreamInfo { max_uncompressed_size(0) { } - const uint8_t *compressed_data; // [in] base ptr to compressed stream data - uint8_t *uncompressed_data; // [in] base ptr to uncompressed stream data or NULL if not known yet + const uint8_t* compressed_data; // [in] base ptr to compressed stream data + uint8_t* uncompressed_data; // [in] base ptr to uncompressed stream data or NULL if not known yet size_t compressed_data_size; // [in] compressed data size for this stream - gpu_inflate_input_s *decctl; // [in] base ptr to decompression structure to be filled - gpu_inflate_status_s *decstatus; // [in] results of decompression - gpu_inflate_input_s - *copyctl; // [in] base ptr to copy structure to be filled for uncompressed blocks + gpu_inflate_input_s* decctl; // [in] base ptr to decompression structure to be filled + gpu_inflate_status_s* decstatus; // [in] results of decompression + gpu_inflate_input_s* + copyctl; // [in] base ptr to copy structure to be filled for uncompressed blocks uint32_t num_compressed_blocks; // [in,out] number of entries in decctl(in), number of compressed // blocks(out) uint32_t num_uncompressed_blocks; // [in,out] number of entries in copyctl(in), number of @@ -89,11 +89,11 @@ constexpr int orc_decimal2float64_scale = 0x80; * @brief Struct to describe per stripe's column information */ struct ColumnDesc { - const uint8_t *streams[CI_NUM_STREAMS]; // ptr to data stream index + const uint8_t* streams[CI_NUM_STREAMS]; // ptr to data stream index uint32_t strm_id[CI_NUM_STREAMS]; // stream ids uint32_t strm_len[CI_NUM_STREAMS]; // stream length - uint32_t *valid_map_base; // base pointer of valid bit map for this column - void *column_data_base; // base pointer of column data + uint32_t* valid_map_base; // base pointer of valid bit map for this column + void* column_data_base; // base pointer of column data uint32_t start_row; // starting row of the stripe uint32_t num_rows; // number of rows in stripe uint32_t column_num_rows; // number of rows in whole column @@ -134,16 +134,16 @@ struct EncChunk { uint8_t dtype_len; // data type length int32_t scale; // scale for decimals or timestamps - uint32_t *dict_index; // dictionary index from row index + uint32_t* dict_index; // dictionary index from row index device_span decimal_offsets; - column_device_view *leaf_column; + column_device_view* leaf_column; }; /** * @brief Struct to describe the streams that correspond to a single `EncChunk`. */ struct encoder_chunk_streams { - uint8_t *data_ptrs[CI_NUM_STREAMS]; // encoded output + uint8_t* data_ptrs[CI_NUM_STREAMS]; // encoded output int32_t ids[CI_NUM_STREAMS]; // stream id; -1 if stream is not present uint32_t lengths[CI_NUM_STREAMS]; // in: max length, out: actual length }; @@ -166,8 +166,8 @@ struct StripeStream { * @brief Struct to describe a dictionary chunk */ struct DictionaryChunk { - uint32_t *dict_data; // dictionary data (index of non-null rows) - uint32_t *dict_index; // row indices of corresponding string (row from dictionary index) + uint32_t* dict_data; // dictionary data (index of non-null rows) + uint32_t* dict_index; // row indices of corresponding string (row from dictionary index) uint32_t start_row; // start row of this chunk uint32_t num_rows; // num rows in this chunk uint32_t num_strings; // number of strings in this chunk @@ -176,22 +176,22 @@ struct DictionaryChunk { uint32_t num_dict_strings; // number of strings in dictionary uint32_t dict_char_count; // size of dictionary string data for this chunk - column_device_view *leaf_column; //!< Pointer to string column + column_device_view* leaf_column; //!< Pointer to string column }; /** * @brief Struct to describe a dictionary */ struct StripeDictionary { - uint32_t *dict_data; // row indices of corresponding string (row from dictionary index) - uint32_t *dict_index; // dictionary index from row index + uint32_t* dict_data; // row indices of corresponding string (row from dictionary index) + uint32_t* dict_index; // dictionary index from row index uint32_t column_id; // real column id uint32_t start_chunk; // first chunk in stripe uint32_t num_chunks; // number of chunks in the stripe uint32_t num_strings; // number of unique strings in the dictionary uint32_t dict_char_count; // total size of dictionary string data - column_device_view *leaf_column; //!< Pointer to string column + column_device_view* leaf_column; //!< Pointer to string column }; /** @@ -204,7 +204,7 @@ struct StripeDictionary { *compressed size) * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void ParseCompressedStripeData(CompressedStreamInfo *strm_info, +void ParseCompressedStripeData(CompressedStreamInfo* strm_info, int32_t num_streams, uint32_t compression_block_size, uint32_t log2maxcr = 24, @@ -217,7 +217,7 @@ void ParseCompressedStripeData(CompressedStreamInfo *strm_info, * @param[in] num_streams Number of compressed streams * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void PostDecompressionReassemble(CompressedStreamInfo *strm_info, +void PostDecompressionReassemble(CompressedStreamInfo* strm_info, int32_t num_streams, rmm::cuda_stream_view stream = rmm::cuda_stream_default); @@ -235,9 +235,9 @@ void PostDecompressionReassemble(CompressedStreamInfo *strm_info, * value * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void ParseRowGroupIndex(RowGroup *row_groups, - CompressedStreamInfo *strm_info, - ColumnDesc *chunks, +void ParseRowGroupIndex(RowGroup* row_groups, + CompressedStreamInfo* strm_info, + ColumnDesc* chunks, uint32_t num_columns, uint32_t num_stripes, uint32_t num_rowgroups, @@ -255,8 +255,8 @@ void ParseRowGroupIndex(RowGroup *row_groups, * @param[in] first_row Crop all rows below first_row * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void DecodeNullsAndStringDictionaries(ColumnDesc *chunks, - DictionaryEntry *global_dictionary, +void DecodeNullsAndStringDictionaries(ColumnDesc* chunks, + DictionaryEntry* global_dictionary, uint32_t num_columns, uint32_t num_stripes, size_t first_row = 0, @@ -278,8 +278,8 @@ void DecodeNullsAndStringDictionaries(ColumnDesc *chunks, * @param[in] level Current nesting level being processed * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void DecodeOrcColumnData(ColumnDesc *chunks, - DictionaryEntry *global_dictionary, +void DecodeOrcColumnData(ColumnDesc* chunks, + DictionaryEntry* global_dictionary, device_2dspan row_groups, uint32_t num_columns, uint32_t num_stripes, @@ -311,7 +311,7 @@ void EncodeOrcColumnData(device_2dspan chunks, * @param[in,out] enc_streams chunk streams device array [column][rowgroup] * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void EncodeStripeDictionaries(StripeDictionary *stripes, +void EncodeStripeDictionaries(StripeDictionary* stripes, device_2dspan chunks, uint32_t num_string_columns, uint32_t num_stripes, @@ -325,7 +325,7 @@ void EncodeStripeDictionaries(StripeDictionary *stripes, * @param[in,out] chunks encoder chunk device array [column][rowgroup] * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void set_chunk_columns(const table_device_view &view, +void set_chunk_columns(const table_device_view& view, device_2dspan chunks, rmm::cuda_stream_view stream); @@ -353,14 +353,14 @@ void CompactOrcDataStreams(device_2dspan strm_desc, * @param[out] comp_in Per-block compression input parameters * @param[out] comp_out Per-block compression status */ -void CompressOrcDataStreams(uint8_t *compressed_data, +void CompressOrcDataStreams(uint8_t* compressed_data, uint32_t num_compressed_blocks, CompressionKind compression, uint32_t comp_blk_size, device_2dspan strm_desc, device_2dspan enc_streams, - gpu_inflate_input_s *comp_in, - gpu_inflate_status_s *comp_out, + gpu_inflate_input_s* comp_in, + gpu_inflate_status_s* comp_out, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** @@ -376,12 +376,12 @@ void CompressOrcDataStreams(uint8_t *compressed_data, * @param[in] num_rowgroups Number of row groups * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void InitDictionaryIndices(const table_device_view &view, - DictionaryChunk *chunks, - uint32_t *dict_data, - uint32_t *dict_index, +void InitDictionaryIndices(const table_device_view& view, + DictionaryChunk* chunks, + uint32_t* dict_data, + uint32_t* dict_index, size_t row_index_stride, - size_type *str_col_ids, + size_type* str_col_ids, uint32_t num_columns, uint32_t num_rowgroups, rmm::cuda_stream_view stream); @@ -397,9 +397,9 @@ void InitDictionaryIndices(const table_device_view &view, * @param[in] num_columns Number of columns * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void BuildStripeDictionaries(StripeDictionary *stripes_dev, - StripeDictionary *stripes_host, - DictionaryChunk const *chunks, +void BuildStripeDictionaries(StripeDictionary* stripes_dev, + StripeDictionary* stripes_host, + DictionaryChunk const* chunks, uint32_t num_stripes, uint32_t num_rowgroups, uint32_t num_columns, @@ -415,8 +415,8 @@ void BuildStripeDictionaries(StripeDictionary *stripes_dev, * @param[in] row_index_stride Rowgroup size in rows * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void orc_init_statistics_groups(statistics_group *groups, - const stats_column_desc *cols, +void orc_init_statistics_groups(statistics_group* groups, + const stats_column_desc* cols, uint32_t num_columns, uint32_t num_rowgroups, uint32_t row_index_stride, @@ -430,8 +430,8 @@ void orc_init_statistics_groups(statistics_group *groups, * @param[in] statistics_count Number of statistics buffers to encode * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void orc_init_statistics_buffersize(statistics_merge_group *groups, - const statistics_chunk *chunks, +void orc_init_statistics_buffersize(statistics_merge_group* groups, + const statistics_chunk* chunks, uint32_t statistics_count, rmm::cuda_stream_view stream = rmm::cuda_stream_default); @@ -443,9 +443,9 @@ void orc_init_statistics_buffersize(statistics_merge_group *groups, * @param[in,out] chunks Statistics data * @param[in] statistics_count Number of statistics buffers */ -void orc_encode_statistics(uint8_t *blob_bfr, - statistics_merge_group *groups, - const statistics_chunk *chunks, +void orc_encode_statistics(uint8_t* blob_bfr, + statistics_merge_group* groups, + const statistics_chunk* chunks, uint32_t statistics_count, rmm::cuda_stream_view stream = rmm::cuda_stream_default); diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 3221c754349..9d7e82f0281 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -52,7 +52,7 @@ namespace { /** * @brief Function that translates ORC data kind to cuDF type enum */ -constexpr type_id to_type_id(const orc::SchemaType &schema, +constexpr type_id to_type_id(const orc::SchemaType& schema, bool use_np_dtypes, type_id timestamp_type_id, bool decimals_as_float64) @@ -131,12 +131,12 @@ namespace { * @brief struct to store buffer data and size of list buffer */ struct list_buffer_data { - size_type *data; + size_type* data; size_type size; }; // Generates offsets for list buffer from number of elements in a row. -void generate_offsets_for_list(rmm::device_uvector const &buff_data, +void generate_offsets_for_list(rmm::device_uvector const& buff_data, rmm::cuda_stream_view stream) { auto transformer = [] __device__(list_buffer_data list_data) { @@ -172,19 +172,19 @@ struct orc_stream_info { * @brief Function that populates column descriptors stream/chunk */ size_t gather_stream_info(const size_t stripe_index, - const orc::StripeInformation *stripeinfo, - const orc::StripeFooter *stripefooter, - const std::vector &orc2gdf, - const std::vector &gdf2orc, + const orc::StripeInformation* stripeinfo, + const orc::StripeFooter* stripefooter, + const std::vector& orc2gdf, + const std::vector& gdf2orc, const std::vector types, bool use_index, - size_t *num_dictionary_entries, - cudf::detail::hostdevice_2dvector &chunks, - std::vector &stream_info) + size_t* num_dictionary_entries, + cudf::detail::hostdevice_2dvector& chunks, + std::vector& stream_info) { uint64_t src_offset = 0; uint64_t dst_offset = 0; - for (const auto &stream : stripefooter->streams) { + for (const auto& stream : stripefooter->streams) { if (!stream.column_id || *stream.column_id >= orc2gdf.size()) { dst_offset += stream.length; continue; @@ -200,11 +200,11 @@ size_t gather_stream_info(const size_t stripe_index, const auto schema_type = types[column_id]; if (schema_type.subtypes.size() != 0) { if (schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) { - for (const auto &idx : schema_type.subtypes) { + for (const auto& idx : schema_type.subtypes) { auto child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; if (child_idx >= 0) { col = child_idx; - auto &chunk = chunks[stripe_index][col]; + auto& chunk = chunks[stripe_index][col]; chunk.strm_id[gpu::CI_PRESENT] = stream_info.size(); chunk.strm_len[gpu::CI_PRESENT] = stream.length; } @@ -215,7 +215,7 @@ size_t gather_stream_info(const size_t stripe_index, if (col != -1) { if (src_offset >= stripeinfo->indexLength || use_index) { // NOTE: skip_count field is temporarily used to track index ordering - auto &chunk = chunks[stripe_index][col]; + auto& chunk = chunks[stripe_index][col]; const auto idx = get_index_type_and_pos(stream.kind, chunk.skip_count, col == orc2gdf[column_id]); if (idx.first < gpu::CI_NUM_STREAMS) { @@ -243,8 +243,8 @@ size_t gather_stream_info(const size_t stripe_index, /** * @brief Determines if a column should be converted from decimal to float */ -bool should_convert_decimal_column_to_float(const std::vector &columns_to_convert, - cudf::io::orc::metadata &metadata, +bool should_convert_decimal_column_to_float(const std::vector& columns_to_convert, + cudf::io::orc::metadata& metadata, int column_index) { return (std::find(columns_to_convert.begin(), @@ -260,7 +260,7 @@ bool should_convert_decimal_column_to_float(const std::vector &colu * to aggregate that metadata from all the files. */ class aggregate_orc_metadata { - using OrcStripeInfo = std::pair; + using OrcStripeInfo = std::pair; public: mutable std::vector per_file_metadata; @@ -271,11 +271,11 @@ class aggregate_orc_metadata { /** * @brief Create a metadata object from each element in the source vector */ - auto metadatas_from_sources(std::vector> const &sources) + auto metadatas_from_sources(std::vector> const& sources) { std::vector metadatas; std::transform( - sources.cbegin(), sources.cend(), std::back_inserter(metadatas), [](auto const &source) { + sources.cbegin(), sources.cend(), std::back_inserter(metadatas), [](auto const& source) { return cudf::io::orc::metadata(source.get()); }); return metadatas; @@ -287,7 +287,7 @@ class aggregate_orc_metadata { size_type calc_num_rows() const { return std::accumulate( - per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto &sum, auto &pfm) { + per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto& sum, auto& pfm) { return sum + pfm.get_total_rows(); }); } @@ -307,12 +307,12 @@ class aggregate_orc_metadata { size_type calc_num_stripes() const { return std::accumulate( - per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto &sum, auto &pfm) { + per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto& sum, auto& pfm) { return sum + pfm.get_num_stripes(); }); } - aggregate_orc_metadata(std::vector> const &sources) + aggregate_orc_metadata(std::vector> const& sources) : per_file_metadata(metadatas_from_sources(sources)), num_rows(calc_num_rows()), num_columns(calc_num_cols()), @@ -320,7 +320,7 @@ class aggregate_orc_metadata { { // Verify that the input files have the same number of columns, // as well as matching types, compression, and names - for (auto const &pfm : per_file_metadata) { + for (auto const& pfm : per_file_metadata) { CUDF_EXPECTS(per_file_metadata[0].get_num_columns() == pfm.get_num_columns(), "All sources must have the same number of columns"); CUDF_EXPECTS(per_file_metadata[0].ps.compression == pfm.ps.compression, @@ -341,7 +341,7 @@ class aggregate_orc_metadata { } } - auto const &get_schema(int schema_idx) const { return per_file_metadata[0].ff.types[schema_idx]; } + auto const& get_schema(int schema_idx) const { return per_file_metadata[0].ff.types[schema_idx]; } auto get_col_type(int col_idx) const { return per_file_metadata[0].ff.types[col_idx]; } @@ -353,7 +353,7 @@ class aggregate_orc_metadata { auto get_num_source_files() const { return per_file_metadata.size(); } - auto const &get_types() const { return per_file_metadata[0].ff.types; } + auto const& get_types() const { return per_file_metadata[0].ff.types; } int get_row_index_stride() const { return per_file_metadata[0].ff.rowIndexStride; } @@ -367,9 +367,9 @@ class aggregate_orc_metadata { } std::vector select_stripes( - std::vector> const &user_specified_stripes, - size_type &row_start, - size_type &row_count) + std::vector> const& user_specified_stripes, + size_type& row_start, + size_type& row_count) { std::vector selected_stripes_mapping; @@ -388,7 +388,7 @@ class aggregate_orc_metadata { // Coalesce stripe info at the source file later since that makes downstream processing much // easier in impl::read - for (const size_t &stripe_idx : user_specified_stripes[src_file_idx]) { + for (const size_t& stripe_idx : user_specified_stripes[src_file_idx]) { CUDF_EXPECTS(stripe_idx < per_file_metadata[src_file_idx].ff.stripes.size(), "Invalid stripe index"); stripe_infos.push_back( @@ -431,7 +431,7 @@ class aggregate_orc_metadata { // Read each stripe's stripefooter metadata if (not selected_stripes_mapping.empty()) { - for (auto &mapping : selected_stripes_mapping) { + for (auto& mapping : selected_stripes_mapping) { // Resize to all stripe_info for the source level per_file_metadata[mapping.source_idx].stripefooters.resize(mapping.stripe_info.size()); @@ -471,12 +471,12 @@ class aggregate_orc_metadata { * @return returns number of child columns at same level in case of struct and next level in case * of list */ - uint32_t add_column(std::vector> &selection, - std::vector const &types, + uint32_t add_column(std::vector>& selection, + std::vector const& types, const size_t level, const uint32_t id, - bool &has_timestamp_column, - bool &has_list_column) + bool& has_timestamp_column, + bool& has_list_column) { uint32_t num_lvl_child_columns = 0; if (level == selection.size()) { selection.emplace_back(); } @@ -521,9 +521,9 @@ class aggregate_orc_metadata { * @return Vector of list of ORC column meta-data */ std::vector> select_columns( - std::vector const &use_names, bool &has_timestamp_column, bool &has_list_column) + std::vector const& use_names, bool& has_timestamp_column, bool& has_list_column) { - auto const &pfm = per_file_metadata[0]; + auto const& pfm = per_file_metadata[0]; std::vector> selection; if (not use_names.empty()) { @@ -531,7 +531,7 @@ class aggregate_orc_metadata { // Have to check only parent columns auto const num_columns = pfm.ff.types[0].subtypes.size(); - for (const auto &use_name : use_names) { + for (const auto& use_name : use_names) { bool name_found = false; for (uint32_t i = 0; i < num_columns; ++i, ++index) { if (index >= num_columns) { index = 0; } @@ -547,7 +547,7 @@ class aggregate_orc_metadata { CUDF_EXPECTS(name_found, "Unknown column name : " + std::string(use_name)); } } else { - for (auto const &col_id : pfm.ff.types[0].subtypes) { + for (auto const& col_id : pfm.ff.types[0].subtypes) { add_column(selection, pfm.ff.types, 0, col_id, has_timestamp_column, has_list_column); } } @@ -557,21 +557,21 @@ class aggregate_orc_metadata { }; rmm::device_buffer reader::impl::decompress_stripe_data( - cudf::detail::hostdevice_2dvector &chunks, - const std::vector &stripe_data, - const OrcDecompressor *decompressor, - std::vector &stream_info, + cudf::detail::hostdevice_2dvector& chunks, + const std::vector& stripe_data, + const OrcDecompressor* decompressor, + std::vector& stream_info, size_t num_stripes, - cudf::detail::hostdevice_2dvector &row_groups, + cudf::detail::hostdevice_2dvector& row_groups, size_t row_index_stride, bool use_base_stride, rmm::cuda_stream_view stream) { // Parse the columns' compressed info hostdevice_vector compinfo(0, stream_info.size(), stream); - for (const auto &info : stream_info) { + for (const auto& info : stream_info) { compinfo.insert(gpu::CompressedStreamInfo( - static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, + static_cast(stripe_data[info.stripe_idx].data()) + info.dst_pos, info.length)); } compinfo.host_to_device(stream); @@ -603,7 +603,7 @@ rmm::device_buffer reader::impl::decompress_stripe_data( uint32_t start_pos = 0; uint32_t start_pos_uncomp = (uint32_t)num_compressed_blocks; for (size_t i = 0; i < compinfo.size(); ++i) { - auto dst_base = static_cast(decomp_data.data()); + auto dst_base = static_cast(decomp_data.data()); compinfo[i].uncompressed_data = dst_base + decomp_offset; compinfo[i].decctl = inflate_in.data() + start_pos; compinfo[i].decstatus = inflate_out.data() + start_pos; @@ -651,7 +651,7 @@ rmm::device_buffer reader::impl::decompress_stripe_data( for (size_t i = 0; i < num_stripes; ++i) { for (size_t j = 0; j < num_columns; ++j) { - auto &chunk = chunks[i][j]; + auto& chunk = chunks[i][j]; for (int k = 0; k < gpu::CI_NUM_STREAMS; ++k) { if (chunk.strm_len[k] > 0 && chunk.strm_id[k] < compinfo.size()) { chunk.streams[k] = compinfo[chunk.strm_id[k]].uncompressed_data; @@ -678,13 +678,13 @@ rmm::device_buffer reader::impl::decompress_stripe_data( return decomp_data; } -void reader::impl::decode_stream_data(cudf::detail::hostdevice_2dvector &chunks, +void reader::impl::decode_stream_data(cudf::detail::hostdevice_2dvector& chunks, size_t num_dicts, size_t skip_rows, timezone_table_view tz_table, - cudf::detail::hostdevice_2dvector &row_groups, + cudf::detail::hostdevice_2dvector& row_groups, size_t row_index_stride, - std::vector &out_buffers, + std::vector& out_buffers, size_t level, rmm::cuda_stream_view stream) { @@ -694,7 +694,7 @@ void reader::impl::decode_stream_data(cudf::detail::hostdevice_2dvector chunks, cudf::detail::host_2dspan row_groups, - std::vector const &list_col, + std::vector const& list_col, const int32_t level) { const auto num_of_stripes = chunks.size().first; @@ -737,7 +737,7 @@ void reader::impl::aggregate_child_meta(cudf::detail::host_2dspan reader::impl::create_empty_column(const int32_t orc_col_id, - column_name_info &schema_info, + column_name_info& schema_info, rmm::cuda_stream_view stream) { schema_info.name = _metadata->get_column_name(0, orc_col_id); @@ -843,12 +843,12 @@ std::unique_ptr reader::impl::create_empty_column(const int32_t orc_col_ } // Adds child column buffers to parent column -column_buffer &&reader::impl::assemble_buffer(const int32_t orc_col_id, - std::vector> &col_buffers, +column_buffer&& reader::impl::assemble_buffer(const int32_t orc_col_id, + std::vector>& col_buffers, const size_t level) { auto const col_id = _col_meta.orc_col_map[level][orc_col_id]; - auto &col_buffer = col_buffers[level][col_id]; + auto& col_buffer = col_buffers[level][col_id]; col_buffer.name = _metadata->get_column_name(0, orc_col_id); switch (col_buffer.type.id()) { @@ -858,7 +858,7 @@ column_buffer &&reader::impl::assemble_buffer(const int32_t orc_col_id, break; case type_id::STRUCT: - for (auto const &col : _metadata->get_col_type(orc_col_id).subtypes) { + for (auto const& col : _metadata->get_col_type(orc_col_id).subtypes) { col_buffer.children.emplace_back(assemble_buffer(col, col_buffers, level)); } @@ -871,13 +871,13 @@ column_buffer &&reader::impl::assemble_buffer(const int32_t orc_col_id, } // creates columns along with schema information for each column -void reader::impl::create_columns(std::vector> &&col_buffers, - std::vector> &out_columns, - std::vector &schema_info, +void reader::impl::create_columns(std::vector>&& col_buffers, + std::vector>& out_columns, + std::vector& schema_info, rmm::cuda_stream_view stream) { for (size_t i = 0; i < _selected_columns[0].size();) { - auto const &col_meta = _selected_columns[0][i]; + auto const& col_meta = _selected_columns[0][i]; schema_info.emplace_back(""); auto col_buffer = assemble_buffer(col_meta.id, col_buffers, 0); @@ -888,9 +888,9 @@ void reader::impl::create_columns(std::vector> &&col_ } } -reader::impl::impl(std::vector> &&sources, - orc_reader_options const &options, - rmm::mr::device_memory_resource *mr) +reader::impl::impl(std::vector>&& sources, + orc_reader_options const& options, + rmm::mr::device_memory_resource* mr) : _mr(mr), _sources(std::move(sources)) { // Open and parse the source(s) dataset metadata @@ -917,7 +917,7 @@ reader::impl::impl(std::vector> &&sources, table_with_metadata reader::impl::read(size_type skip_rows, size_type num_rows, - const std::vector> &stripes, + const std::vector>& stripes, rmm::cuda_stream_view stream) { CUDF_EXPECTS(skip_rows == 0 or (not _has_list_column), @@ -940,14 +940,14 @@ table_with_metadata reader::impl::read(size_type skip_rows, // in the same level since child column also have same number of rows, // list column children will be 1 level down compared to parent. for (size_t level = 0; level < _selected_columns.size(); level++) { - auto &selected_columns = _selected_columns[level]; + auto& selected_columns = _selected_columns[level]; // Association between each ORC column and its cudf::column _col_meta.orc_col_map.emplace_back(_metadata->get_num_cols(), -1); std::vector list_col; // Get a list of column data types std::vector column_types; - for (auto &col : selected_columns) { + for (auto& col : selected_columns) { // If the column type is orc::DECIMAL see if the user // desires it to be converted to float64 or not auto const decimal_as_float64 = should_convert_decimal_column_to_float( @@ -977,7 +977,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, // If no rows or stripes to read, return empty columns if (num_rows <= 0 || selected_stripes.empty()) { for (size_t i = 0; i < _selected_columns[0].size();) { - auto const &col_meta = _selected_columns[0][i]; + auto const& col_meta = _selected_columns[0][i]; auto const schema = _metadata->get_schema(col_meta.id); schema_info.emplace_back(""); out_columns.push_back( @@ -992,7 +992,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, std::accumulate(selected_stripes.begin(), selected_stripes.end(), 0, - [](size_t sum, auto &stripe_source_mapping) { + [](size_t sum, auto& stripe_source_mapping) { return sum + stripe_source_mapping.stripe_info.size(); }); const auto num_columns = selected_columns.size(); @@ -1014,16 +1014,16 @@ table_with_metadata reader::impl::read(size_type skip_rows, std::vector stream_info; // Tracker for eventually deallocating compressed and uncompressed data - auto &stripe_data = lvl_stripe_data[level]; + auto& stripe_data = lvl_stripe_data[level]; size_t stripe_start_row = 0; size_t num_dict_entries = 0; size_t num_rowgroups = 0; int stripe_idx = 0; - for (auto const &stripe_source_mapping : selected_stripes) { + for (auto const& stripe_source_mapping : selected_stripes) { // Iterate through the source files selected stripes - for (auto const &stripe : stripe_source_mapping.stripe_info) { + for (auto const& stripe : stripe_source_mapping.stripe_info) { const auto stripe_info = stripe.first; const auto stripe_footer = stripe.second; @@ -1042,7 +1042,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, CUDF_EXPECTS(total_data_size > 0, "Expected streams data within stripe"); stripe_data.emplace_back(total_data_size, stream); - auto dst_base = static_cast(stripe_data.back().data()); + auto dst_base = static_cast(stripe_data.back().data()); // Coalesce consecutive streams into one read while (stream_count < stream_info.size()) { @@ -1082,7 +1082,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, } // Update chunks to reference streams pointers for (size_t col_idx = 0; col_idx < num_columns; col_idx++) { - auto &chunk = chunks[stripe_idx][col_idx]; + auto& chunk = chunks[stripe_idx][col_idx]; // start row, number of rows in a each stripe and total number of rows // may change in lower levels of nesting chunk.start_row = (level == 0) @@ -1106,11 +1106,11 @@ table_with_metadata reader::impl::read(size_type skip_rows, .scale.value_or(0) | (decimal_as_float64 ? orc::gpu::orc_decimal2float64_scale : 0); - chunk.rowgroup_id = rowgroup_id; - chunk.dtype_len = (column_types[col_idx].id() == type_id::STRING) - ? sizeof(string_index_pair) - : ((column_types[col_idx].id() == type_id::LIST) or - (column_types[col_idx].id() == type_id::STRUCT)) + chunk.rowgroup_id = rowgroup_id; + chunk.dtype_len = (column_types[col_idx].id() == type_id::STRING) + ? sizeof(string_index_pair) + : ((column_types[col_idx].id() == type_id::LIST) or + (column_types[col_idx].id() == type_id::STRUCT)) ? sizeof(int32_t) : cudf::size_of(column_types[col_idx]); chunk.num_rowgroups = stripe_num_rowgroups; @@ -1135,14 +1135,14 @@ table_with_metadata reader::impl::read(size_type skip_rows, if (level > 0 and row_groups.size().first) { cudf::host_span row_groups_span(row_groups.base_host_ptr(), num_rowgroups * num_columns); - auto &rw_grp_meta = _col_meta.rwgrp_meta; + auto& rw_grp_meta = _col_meta.rwgrp_meta; // Update start row and num rows per row group std::transform(rw_grp_meta.begin(), rw_grp_meta.end(), row_groups_span.begin(), rw_grp_meta.begin(), - [&](auto meta, auto &row_grp) { + [&](auto meta, auto& row_grp) { row_grp.num_rows = meta.num_rows; row_grp.start_row = meta.start_row; return meta; @@ -1220,9 +1220,9 @@ table_with_metadata reader::impl::read(size_type skip_rows, if (list_col.size()) { std::vector buff_data; std::for_each( - out_buffers[level].begin(), out_buffers[level].end(), [&buff_data](auto &out_buffer) { + out_buffers[level].begin(), out_buffers[level].end(), [&buff_data](auto& out_buffer) { if (out_buffer.type.id() == type_id::LIST) { - auto data = static_cast(out_buffer.data()); + auto data = static_cast(out_buffer.data()); buff_data.emplace_back(list_buffer_data{data, out_buffer.size}); } }); @@ -1248,27 +1248,29 @@ table_with_metadata reader::impl::read(size_type skip_rows, out_metadata.schema_info = std::move(schema_info); - for (const auto &meta : _metadata->per_file_metadata) { - for (const auto &kv : meta.ff.metadata) { out_metadata.user_data.insert({kv.name, kv.value}); } + for (const auto& meta : _metadata->per_file_metadata) { + for (const auto& kv : meta.ff.metadata) { + out_metadata.user_data.insert({kv.name, kv.value}); + } } return {std::make_unique
(std::move(out_columns)), std::move(out_metadata)}; } // Forward to implementation -reader::reader(std::vector const &filepaths, - orc_reader_options const &options, +reader::reader(std::vector const& filepaths, + orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { _impl = std::make_unique(datasource::create(filepaths), options, mr); } // Forward to implementation -reader::reader(std::vector> &&sources, - orc_reader_options const &options, +reader::reader(std::vector>&& sources, + orc_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { _impl = std::make_unique(std::move(sources), options, mr); } @@ -1277,7 +1279,7 @@ reader::reader(std::vector> &&sources, reader::~reader() = default; // Forward to implementation -table_with_metadata reader::read(orc_reader_options const &options, rmm::cuda_stream_view stream) +table_with_metadata reader::read(orc_reader_options const& options, rmm::cuda_stream_view stream) { return _impl->read( options.get_skip_rows(), options.get_num_rows(), options.get_stripes(), stream); diff --git a/cpp/src/io/orc/stats_enc.cu b/cpp/src/io/orc/stats_enc.cu index 4c85150a9f0..517a1e0e689 100644 --- a/cpp/src/io/orc/stats_enc.cu +++ b/cpp/src/io/orc/stats_enc.cu @@ -39,8 +39,8 @@ constexpr unsigned int init_groups_per_block = 4; constexpr unsigned int init_threads_per_block = init_threads_per_group * init_groups_per_block; __global__ void __launch_bounds__(init_threads_per_block) - gpu_init_statistics_groups(statistics_group *groups, - const stats_column_desc *cols, + gpu_init_statistics_groups(statistics_group* groups, + const stats_column_desc* cols, uint32_t num_columns, uint32_t num_rowgroups, uint32_t row_index_stride) @@ -49,7 +49,7 @@ __global__ void __launch_bounds__(init_threads_per_block) uint32_t col_id = blockIdx.y; uint32_t chunk_id = (blockIdx.x * init_groups_per_block) + threadIdx.y; uint32_t t = threadIdx.x; - statistics_group *group = &group_g[threadIdx.y]; + statistics_group* group = &group_g[threadIdx.y]; if (chunk_id < num_rowgroups and t == 0) { uint32_t num_rows = cols[col_id].leaf_column->size(); group->col = &cols[col_id]; @@ -78,8 +78,8 @@ constexpr unsigned int pb_fldlen_common = 2 * pb_fld_hdrlen + pb_fldlen_int64; template __global__ void __launch_bounds__(block_size, 1) - gpu_init_statistics_buffersize(statistics_merge_group *groups, - const statistics_chunk *chunks, + gpu_init_statistics_buffersize(statistics_merge_group* groups, + const statistics_chunk* chunks, uint32_t statistics_count) { using block_scan = cub::BlockScan; @@ -91,7 +91,7 @@ __global__ void __launch_bounds__(block_size, 1) uint32_t stats_len = 0, stats_pos; uint32_t idx = start + t; if (idx < statistics_count) { - const stats_column_desc *col = groups[idx].col; + const stats_column_desc* col = groups[idx].col; statistics_dtype dtype = col->stats_dtype; switch (dtype) { case dtype_bool: stats_len = pb_fldlen_common + pb_fld_hdrlen + pb_fldlen_bucket1; break; @@ -131,8 +131,8 @@ __global__ void __launch_bounds__(block_size, 1) } struct stats_state_s { - uint8_t *base; ///< Output buffer start - uint8_t *end; ///< Output buffer end + uint8_t* base; ///< Output buffer start + uint8_t* end; ///< Output buffer end statistics_chunk chunk; statistics_merge_group group; stats_column_desc col; @@ -146,7 +146,7 @@ struct stats_state_s { * https://developers.google.com/protocol-buffers/docs/encoding */ // Protobuf varint encoding for unsigned int -__device__ inline uint8_t *pb_encode_uint(uint8_t *p, uint64_t v) +__device__ inline uint8_t* pb_encode_uint(uint8_t* p, uint64_t v) { while (v > 0x7f) { *p++ = ((uint32_t)v | 0x80); @@ -157,30 +157,30 @@ __device__ inline uint8_t *pb_encode_uint(uint8_t *p, uint64_t v) } // Protobuf field encoding for unsigned int -__device__ inline uint8_t *pb_put_uint(uint8_t *p, uint32_t id, uint64_t v) +__device__ inline uint8_t* pb_put_uint(uint8_t* p, uint32_t id, uint64_t v) { p[0] = id * 8 + PB_TYPE_VARINT; // NOTE: Assumes id < 16 return pb_encode_uint(p + 1, v); } // Protobuf field encoding for signed int -__device__ inline uint8_t *pb_put_int(uint8_t *p, uint32_t id, int64_t v) +__device__ inline uint8_t* pb_put_int(uint8_t* p, uint32_t id, int64_t v) { int64_t s = (v < 0); return pb_put_uint(p, id, (v ^ -s) * 2 + s); } // Protobuf field encoding for 'packed' unsigned int (single value) -__device__ inline uint8_t *pb_put_packed_uint(uint8_t *p, uint32_t id, uint64_t v) +__device__ inline uint8_t* pb_put_packed_uint(uint8_t* p, uint32_t id, uint64_t v) { - uint8_t *p2 = pb_encode_uint(p + 2, v); + uint8_t* p2 = pb_encode_uint(p + 2, v); p[0] = id * 8 + PB_TYPE_FIXEDLEN; p[1] = static_cast(p2 - (p + 2)); return p2; } // Protobuf field encoding for binary/string -__device__ inline uint8_t *pb_put_binary(uint8_t *p, uint32_t id, const void *bytes, uint32_t len) +__device__ inline uint8_t* pb_put_binary(uint8_t* p, uint32_t id, const void* bytes, uint32_t len) { p[0] = id * 8 + PB_TYPE_FIXEDLEN; p = pb_encode_uint(p + 1, len); @@ -189,7 +189,7 @@ __device__ inline uint8_t *pb_put_binary(uint8_t *p, uint32_t id, const void *by } // Protobuf field encoding for 64-bit raw encoding (double) -__device__ inline uint8_t *pb_put_fixed64(uint8_t *p, uint32_t id, const void *raw64) +__device__ inline uint8_t* pb_put_fixed64(uint8_t* p, uint32_t id, const void* raw64) { p[0] = id * 8 + PB_TYPE_FIXED64; memcpy(p + 1, raw64, 8); @@ -226,15 +226,15 @@ constexpr unsigned int encode_threads_per_block = encode_threads_per_chunk * encode_chunks_per_block; __global__ void __launch_bounds__(encode_threads_per_block) - gpu_encode_statistics(uint8_t *blob_bfr, - statistics_merge_group *groups, - const statistics_chunk *chunks, + gpu_encode_statistics(uint8_t* blob_bfr, + statistics_merge_group* groups, + const statistics_chunk* chunks, uint32_t statistics_count) { __shared__ __align__(8) stats_state_s state_g[encode_chunks_per_block]; uint32_t t = threadIdx.x; uint32_t idx = blockIdx.x * encode_chunks_per_block + threadIdx.y; - stats_state_s *const s = &state_g[threadIdx.y]; + stats_state_s* const s = &state_g[threadIdx.y]; // Encode and update actual bfr size if (idx < statistics_count && t == 0) { @@ -243,8 +243,8 @@ __global__ void __launch_bounds__(encode_threads_per_block) s->col = *(s->group.col); s->base = blob_bfr + s->group.start_chunk; s->end = blob_bfr + s->group.start_chunk + s->group.num_chunks; - uint8_t *cur = pb_put_uint(s->base, 1, s->chunk.non_nulls); - uint8_t *fld_start = cur; + uint8_t* cur = pb_put_uint(s->base, 1, s->chunk.non_nulls); + uint8_t* fld_start = cur; switch (s->col.stats_dtype) { case dtype_int8: case dtype_int16: @@ -373,8 +373,8 @@ __global__ void __launch_bounds__(encode_threads_per_block) * @param[in] row_index_stride Rowgroup size in rows * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void orc_init_statistics_groups(statistics_group *groups, - const stats_column_desc *cols, +void orc_init_statistics_groups(statistics_group* groups, + const stats_column_desc* cols, uint32_t num_columns, uint32_t num_rowgroups, uint32_t row_index_stride, @@ -394,8 +394,8 @@ void orc_init_statistics_groups(statistics_group *groups, * @param[in] statistics_count Number of statistics buffers to encode * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void orc_init_statistics_buffersize(statistics_merge_group *groups, - const statistics_chunk *chunks, +void orc_init_statistics_buffersize(statistics_merge_group* groups, + const statistics_chunk* chunks, uint32_t statistics_count, rmm::cuda_stream_view stream) { @@ -411,9 +411,9 @@ void orc_init_statistics_buffersize(statistics_merge_group *groups, * @param[in,out] chunks Statistics data * @param[in] statistics_count Number of statistics buffers */ -void orc_encode_statistics(uint8_t *blob_bfr, - statistics_merge_group *groups, - const statistics_chunk *chunks, +void orc_encode_statistics(uint8_t* blob_bfr, + statistics_merge_group* groups, + const statistics_chunk* chunks, uint32_t statistics_count, rmm::cuda_stream_view stream) { diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 81fb1a394bb..13e606018ce 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -50,7 +50,7 @@ struct int128_s { }; struct orc_bytestream_s { - const uint8_t *base; + const uint8_t* base; uint32_t pos; uint32_t len; uint32_t fill_pos; @@ -93,7 +93,7 @@ struct orc_rowdec_state_s { }; struct orc_strdict_state_s { - DictionaryEntry *local_dict; + DictionaryEntry* local_dict; uint32_t dict_pos; uint32_t dict_len; }; @@ -146,8 +146,8 @@ struct orcdec_state_s { * @param[in] base Pointer to raw byte stream data * @param[in] len Stream length in bytes */ -static __device__ void bytestream_init(volatile orc_bytestream_s *bs, - const uint8_t *base, +static __device__ void bytestream_init(volatile orc_bytestream_s* bs, + const uint8_t* base, uint32_t len) { uint32_t pos = (len > 0) ? static_cast(7 & reinterpret_cast(base)) : 0; @@ -164,7 +164,7 @@ static __device__ void bytestream_init(volatile orc_bytestream_s *bs, * @param[in] bs Byte stream input * @param[in] bytes_consumed Number of bytes that were consumed */ -static __device__ void bytestream_flush_bytes(volatile orc_bytestream_s *bs, +static __device__ void bytestream_flush_bytes(volatile orc_bytestream_s* bs, uint32_t bytes_consumed) { uint32_t pos = bs->pos; @@ -183,7 +183,7 @@ static __device__ void bytestream_flush_bytes(volatile orc_bytestream_s *bs, * @param[in] bs Byte stream input * @param[in] t thread id */ -static __device__ void bytestream_fill(orc_bytestream_s *bs, int t) +static __device__ void bytestream_fill(orc_bytestream_s* bs, int t) { auto const count = bs->fill_count; if (t < count) { @@ -201,7 +201,7 @@ static __device__ void bytestream_fill(orc_bytestream_s *bs, int t) * @param[in] pos Position in byte stream * @return byte */ -inline __device__ uint8_t bytestream_readbyte(volatile orc_bytestream_s *bs, int pos) +inline __device__ uint8_t bytestream_readbyte(volatile orc_bytestream_s* bs, int pos) { return bs->buf.u8[pos & (bytestream_buffer_size - 1)]; } @@ -213,7 +213,7 @@ inline __device__ uint8_t bytestream_readbyte(volatile orc_bytestream_s *bs, int * @param[in] pos Position in byte stream * @result bits */ -inline __device__ uint32_t bytestream_readu32(volatile orc_bytestream_s *bs, int pos) +inline __device__ uint32_t bytestream_readu32(volatile orc_bytestream_s* bs, int pos) { uint32_t a = bs->buf.u32[(pos & (bytestream_buffer_size - 1)) >> 2]; uint32_t b = bs->buf.u32[((pos + 4) & (bytestream_buffer_size - 1)) >> 2]; @@ -228,7 +228,7 @@ inline __device__ uint32_t bytestream_readu32(volatile orc_bytestream_s *bs, int * @param[in] numbits number of bits * @return bits */ -inline __device__ uint64_t bytestream_readu64(volatile orc_bytestream_s *bs, int pos) +inline __device__ uint64_t bytestream_readu64(volatile orc_bytestream_s* bs, int pos) { uint32_t a = bs->buf.u32[(pos & (bytestream_buffer_size - 1)) >> 2]; uint32_t b = bs->buf.u32[((pos + 4) & (bytestream_buffer_size - 1)) >> 2]; @@ -249,7 +249,7 @@ inline __device__ uint64_t bytestream_readu64(volatile orc_bytestream_s *bs, int * @param[in] numbits number of bits * @return decoded value */ -inline __device__ uint32_t bytestream_readbits(volatile orc_bytestream_s *bs, +inline __device__ uint32_t bytestream_readbits(volatile orc_bytestream_s* bs, int bitpos, uint32_t numbits) { @@ -267,7 +267,7 @@ inline __device__ uint32_t bytestream_readbits(volatile orc_bytestream_s *bs, * @param[in] numbits number of bits * @return decoded value */ -inline __device__ uint64_t bytestream_readbits64(volatile orc_bytestream_s *bs, +inline __device__ uint64_t bytestream_readbits64(volatile orc_bytestream_s* bs, int bitpos, uint32_t numbits) { @@ -292,10 +292,10 @@ inline __device__ uint64_t bytestream_readbits64(volatile orc_bytestream_s *bs, * @param[in] numbits number of bits * @param[out] result decoded value */ -inline __device__ void bytestream_readbe(volatile orc_bytestream_s *bs, +inline __device__ void bytestream_readbe(volatile orc_bytestream_s* bs, int bitpos, uint32_t numbits, - uint32_t &result) + uint32_t& result) { result = bytestream_readbits(bs, bitpos, numbits); } @@ -308,10 +308,10 @@ inline __device__ void bytestream_readbe(volatile orc_bytestream_s *bs, * @param[in] numbits number of bits * @param[out] result decoded value */ -inline __device__ void bytestream_readbe(volatile orc_bytestream_s *bs, +inline __device__ void bytestream_readbe(volatile orc_bytestream_s* bs, int bitpos, uint32_t numbits, - int32_t &result) + int32_t& result) { uint32_t u = bytestream_readbits(bs, bitpos, numbits); result = (int32_t)((u >> 1u) ^ -(int32_t)(u & 1)); @@ -325,10 +325,10 @@ inline __device__ void bytestream_readbe(volatile orc_bytestream_s *bs, * @param[in] numbits number of bits * @param[out] result decoded value */ -inline __device__ void bytestream_readbe(volatile orc_bytestream_s *bs, +inline __device__ void bytestream_readbe(volatile orc_bytestream_s* bs, int bitpos, uint32_t numbits, - uint64_t &result) + uint64_t& result) { result = bytestream_readbits64(bs, bitpos, numbits); } @@ -341,10 +341,10 @@ inline __device__ void bytestream_readbe(volatile orc_bytestream_s *bs, * @param[in] numbits number of bits * @param[out] result decoded value */ -inline __device__ void bytestream_readbe(volatile orc_bytestream_s *bs, +inline __device__ void bytestream_readbe(volatile orc_bytestream_s* bs, int bitpos, uint32_t numbits, - int64_t &result) + int64_t& result) { uint64_t u = bytestream_readbits64(bs, bitpos, numbits); result = (int64_t)((u >> 1u) ^ -(int64_t)(u & 1)); @@ -358,7 +358,7 @@ inline __device__ void bytestream_readbe(volatile orc_bytestream_s *bs, * @return length of varint in bytes */ template -inline __device__ uint32_t varint_length(volatile orc_bytestream_s *bs, int pos) +inline __device__ uint32_t varint_length(volatile orc_bytestream_s* bs, int pos) { if (bytestream_readbyte(bs, pos) > 0x7f) { uint32_t next32 = bytestream_readu32(bs, pos + 1); @@ -396,7 +396,7 @@ inline __device__ uint32_t varint_length(volatile orc_bytestream_s *bs, int pos) * @return new position in byte stream buffer */ template -inline __device__ int decode_base128_varint(volatile orc_bytestream_s *bs, int pos, T &result) +inline __device__ int decode_base128_varint(volatile orc_bytestream_s* bs, int pos, T& result) { uint32_t v = bytestream_readbyte(bs, pos++); if (v > 0x7f) { @@ -450,7 +450,7 @@ inline __device__ int decode_base128_varint(volatile orc_bytestream_s *bs, int p /** * @brief Decodes a signed int128 encoded as base-128 varint (used for decimals) */ -inline __device__ int128_s decode_varint128(volatile orc_bytestream_s *bs, int pos) +inline __device__ int128_s decode_varint128(volatile orc_bytestream_s* bs, int pos) { uint32_t b = bytestream_readbyte(bs, pos++); int64_t sign_mask = -(int32_t)(b & 1); @@ -478,7 +478,7 @@ inline __device__ int128_s decode_varint128(volatile orc_bytestream_s *bs, int p /** * @brief Decodes an unsigned 32-bit varint */ -inline __device__ int decode_varint(volatile orc_bytestream_s *bs, int pos, uint32_t &result) +inline __device__ int decode_varint(volatile orc_bytestream_s* bs, int pos, uint32_t& result) { uint32_t u; pos = decode_base128_varint(bs, pos, u); @@ -489,7 +489,7 @@ inline __device__ int decode_varint(volatile orc_bytestream_s *bs, int pos, uint /** * @brief Decodes an unsigned 64-bit varint */ -inline __device__ int decode_varint(volatile orc_bytestream_s *bs, int pos, uint64_t &result) +inline __device__ int decode_varint(volatile orc_bytestream_s* bs, int pos, uint64_t& result) { uint64_t u; pos = decode_base128_varint(bs, pos, u); @@ -500,7 +500,7 @@ inline __device__ int decode_varint(volatile orc_bytestream_s *bs, int pos, uint /** * @brief Signed version of 32-bit decode_varint */ -inline __device__ int decode_varint(volatile orc_bytestream_s *bs, int pos, int32_t &result) +inline __device__ int decode_varint(volatile orc_bytestream_s* bs, int pos, int32_t& result) { uint32_t u; pos = decode_base128_varint(bs, pos, u); @@ -511,7 +511,7 @@ inline __device__ int decode_varint(volatile orc_bytestream_s *bs, int pos, int3 /** * @brief Signed version of 64-bit decode_varint */ -inline __device__ int decode_varint(volatile orc_bytestream_s *bs, int pos, int64_t &result) +inline __device__ int decode_varint(volatile orc_bytestream_s* bs, int pos, int64_t& result) { uint64_t u; pos = decode_base128_varint(bs, pos, u); @@ -529,7 +529,7 @@ inline __device__ int decode_varint(volatile orc_bytestream_s *bs, int pos, int6 * @return number of values decoded */ template -inline __device__ void lengths_to_positions(volatile T *vals, uint32_t numvals, unsigned int t) +inline __device__ void lengths_to_positions(volatile T* vals, uint32_t numvals, unsigned int t) { for (uint32_t n = 1; n < numvals; n <<= 1) { __syncthreads(); @@ -550,7 +550,7 @@ inline __device__ void lengths_to_positions(volatile T *vals, uint32_t numvals, */ template static __device__ uint32_t Integer_RLEv1( - orc_bytestream_s *bs, volatile orc_rlev1_state_s *rle, volatile T *vals, uint32_t maxvals, int t) + orc_bytestream_s* bs, volatile orc_rlev1_state_s* rle, volatile T* vals, uint32_t maxvals, int t) { uint32_t numvals, numruns; if (t == 0) { @@ -603,7 +603,9 @@ static __device__ uint32_t Integer_RLEv1( int delta = run_data >> 24; uint32_t base = run_data & 0x3ff; uint32_t pos = vals[base] & 0xffff; - for (int i = 1 + tr; i < n; i += 32) { vals[base + i] = ((delta * i) << 16) | pos; } + for (int i = 1 + tr; i < n; i += 32) { + vals[base + i] = ((delta * i) << 16) | pos; + } } __syncthreads(); } @@ -654,7 +656,7 @@ static const __device__ __constant__ uint8_t ClosestFixedBitsMap[65] = { */ template static __device__ uint32_t Integer_RLEv2( - orc_bytestream_s *bs, volatile orc_rlev2_state_s *rle, volatile T *vals, uint32_t maxvals, int t) + orc_bytestream_s* bs, volatile orc_rlev2_state_s* rle, volatile T* vals, uint32_t maxvals, int t) { uint32_t numvals, numruns; int r, tr; @@ -705,7 +707,7 @@ static __device__ uint32_t Integer_RLEv2( pos += l; if (pos > maxpos) break; ((numvals == 0) and (n > maxvals)) ? numvals = maxvals : numvals += n; - lastpos = pos; + lastpos = pos; numruns++; } rle->num_vals = numvals; @@ -865,7 +867,9 @@ static __device__ uint32_t Integer_RLEv2( baseval = rle->baseval.u32[r]; else baseval = rle->baseval.u64[r]; - for (uint32_t j = tr; j < n; j += 32) { vals[base + j] += baseval; } + for (uint32_t j = tr; j < n; j += 32) { + vals[base + j] += baseval; + } } } __syncthreads(); @@ -880,7 +884,7 @@ static __device__ uint32_t Integer_RLEv2( * * @return 32-bit value */ -inline __device__ uint32_t rle8_read_bool32(volatile uint32_t *vals, uint32_t bitpos) +inline __device__ uint32_t rle8_read_bool32(volatile uint32_t* vals, uint32_t bitpos) { uint32_t a = vals[(bitpos >> 5) + 0]; uint32_t b = vals[(bitpos >> 5) + 1]; @@ -900,9 +904,9 @@ inline __device__ uint32_t rle8_read_bool32(volatile uint32_t *vals, uint32_t bi * * @return number of values decoded */ -static __device__ uint32_t Byte_RLE(orc_bytestream_s *bs, - volatile orc_byterle_state_s *rle, - volatile uint8_t *vals, +static __device__ uint32_t Byte_RLE(orc_bytestream_s* bs, + volatile orc_byterle_state_s* rle, + volatile uint8_t* vals, uint32_t maxvals, int t) { @@ -931,7 +935,7 @@ static __device__ uint32_t Byte_RLE(orc_bytestream_s *bs, if (pos > maxpos) break; numruns++; ((numvals == 0) and (n > maxvals)) ? numvals = maxvals : numvals += n; - lastpos = pos; + lastpos = pos; } rle->num_runs = numruns; rle->num_vals = numvals; @@ -1011,9 +1015,9 @@ static const __device__ __constant__ int64_t kPow5i[28] = {1, * * @return number of values decoded */ -static __device__ int Decode_Decimals(orc_bytestream_s *bs, - volatile orc_byterle_state_s *scratch, - volatile orcdec_state_s::values &vals, +static __device__ int Decode_Decimals(orc_bytestream_s* bs, + volatile orc_byterle_state_s* scratch, + volatile orcdec_state_s::values& vals, int val_scale, int numvals, int col_scale, @@ -1115,8 +1119,8 @@ static __device__ int Decode_Decimals(orc_bytestream_s *bs, // blockDim {block_size,1,1} template __global__ void __launch_bounds__(block_size) - gpuDecodeNullsAndStringDictionaries(ColumnDesc *chunks, - DictionaryEntry *global_dictionary, + gpuDecodeNullsAndStringDictionaries(ColumnDesc* chunks, + DictionaryEntry* global_dictionary, uint32_t num_columns, uint32_t num_stripes, size_t first_row) @@ -1129,7 +1133,7 @@ __global__ void __launch_bounds__(block_size) typename block_reduce::TempStorage bk_storage; } temp_storage; - orcdec_state_s *const s = &state_g; + orcdec_state_s* const s = &state_g; const bool is_nulldec = (blockIdx.y >= num_stripes); const uint32_t column = blockIdx.x; const uint32_t stripe = (is_nulldec) ? blockIdx.y - num_stripes : blockIdx.y; @@ -1179,7 +1183,7 @@ __global__ void __launch_bounds__(block_size) int64_t dst_pos = max(dst_row, (int64_t)0); uint32_t startbit = -static_cast(min(dst_row, (int64_t)0)); uint32_t nbits = nrows - min(startbit, nrows); - uint32_t *valid = s->chunk.valid_map_base + (dst_pos >> 5); + uint32_t* valid = s->chunk.valid_map_base + (dst_pos >> 5); uint32_t bitpos = static_cast(dst_pos) & 0x1f; if ((size_t)(dst_pos + nbits) > max_num_rows) { nbits = static_cast(max_num_rows - min((size_t)dst_pos, max_num_rows)); @@ -1254,7 +1258,7 @@ __global__ void __launch_bounds__(block_size) __syncthreads(); while (s->top.dict.dict_len > 0) { uint32_t numvals = min(s->top.dict.dict_len, blockDim.x), len; - volatile uint32_t *vals = s->vals.u32; + volatile uint32_t* vals = s->vals.u32; bytestream_fill(&s->bs, t); __syncthreads(); if (is_rlev1(s->chunk.encoding_kind)) { @@ -1296,10 +1300,10 @@ __global__ void __launch_bounds__(block_size) * @param[in] temp_storage shared memory storage to perform block reduce */ template -static __device__ void DecodeRowPositions(orcdec_state_s *s, +static __device__ void DecodeRowPositions(orcdec_state_s* s, size_t first_row, int t, - Storage &temp_storage) + Storage& temp_storage) { using block_reduce = cub::BlockReduce; @@ -1325,10 +1329,9 @@ static __device__ void DecodeRowPositions(orcdec_state_s *s, uint32_t rmax = s->top.data.end_row - min((uint32_t)first_row, s->top.data.end_row); uint32_t r = (uint32_t)(s->top.data.cur_row + s->top.data.nrows + t - first_row); uint32_t valid = (t < nrows && r < rmax) - ? (((const uint8_t *)s->chunk.valid_map_base)[r >> 3] >> (r & 7)) & 1 + ? (((const uint8_t*)s->chunk.valid_map_base)[r >> 3] >> (r & 7)) & 1 : 0; - volatile uint16_t *row_ofs_plus1 = - (volatile uint16_t *)&s->u.rowdec.row[s->u.rowdec.nz_count]; + volatile uint16_t* row_ofs_plus1 = (volatile uint16_t*)&s->u.rowdec.row[s->u.rowdec.nz_count]; uint32_t nz_pos, row_plus1, nz_count = s->u.rowdec.nz_count, last_row; if (t < nrows) { row_ofs_plus1[t] = valid; } lengths_to_positions(row_ofs_plus1, nrows, t); @@ -1382,8 +1385,8 @@ static const __device__ __constant__ uint32_t kTimestampNanoScale[8] = { // blockDim {block_size,1,1} template __global__ void __launch_bounds__(block_size) - gpuDecodeOrcColumnData(ColumnDesc *chunks, - DictionaryEntry *global_dictionary, + gpuDecodeOrcColumnData(ColumnDesc* chunks, + DictionaryEntry* global_dictionary, timezone_table_view tz_table, device_2dspan row_groups, size_t first_row, @@ -1397,7 +1400,7 @@ __global__ void __launch_bounds__(block_size) typename cub::BlockReduce::TempStorage blk_uint64; } temp_storage; - orcdec_state_s *const s = &state_g; + orcdec_state_s* const s = &state_g; uint32_t chunk_id; int t = threadIdx.x; auto num_rowgroups = row_groups.size().first; @@ -1481,7 +1484,7 @@ __global__ void __launch_bounds__(block_size) uint32_t vals_skipped = 0; if (s->is_string || s->chunk.type_kind == TIMESTAMP) { // For these data types, we have a secondary unsigned 32-bit data stream - orc_bytestream_s *bs = (is_dictionary(s->chunk.encoding_kind)) ? &s->bs : &s->bs2; + orc_bytestream_s* bs = (is_dictionary(s->chunk.encoding_kind)) ? &s->bs : &s->bs2; uint32_t ofs = 0; if (s->chunk.type_kind == TIMESTAMP) { // Restore buffered secondary stream values, if any @@ -1617,7 +1620,7 @@ __global__ void __launch_bounds__(block_size) } else if (s->chunk.type_kind == LONG || s->chunk.type_kind == TIMESTAMP || s->chunk.type_kind == DECIMAL) { - orc_bytestream_s *bs = (s->chunk.type_kind == DECIMAL) ? &s->bs2 : &s->bs; + orc_bytestream_s* bs = (s->chunk.type_kind == DECIMAL) ? &s->bs2 : &s->bs; if (is_rlev1(s->chunk.encoding_kind)) { numvals = Integer_RLEv1(bs, &s->u.rlev1, s->vals.i64, numvals, t); } else { @@ -1698,14 +1701,14 @@ __global__ void __launch_bounds__(block_size) s->top.data.cur_row + s->u.rowdec.row[t] - 1 < s->top.data.end_row) { size_t row = s->top.data.cur_row + s->u.rowdec.row[t] - 1 - first_row; if (row < max_num_rows) { - void *data_out = s->chunk.column_data_base; + void* data_out = s->chunk.column_data_base; switch (s->chunk.type_kind) { case FLOAT: - case INT: static_cast(data_out)[row] = s->vals.u32[t + vals_skipped]; break; + case INT: static_cast(data_out)[row] = s->vals.u32[t + vals_skipped]; break; case DOUBLE: case LONG: case DECIMAL: - static_cast(data_out)[row] = s->vals.u64[t + vals_skipped]; + static_cast(data_out)[row] = s->vals.u64[t + vals_skipped]; break; case LIST: { // Since the offsets column in cudf is `size_type`, @@ -1713,38 +1716,38 @@ __global__ void __launch_bounds__(block_size) cudf_assert( (s->vals.u64[t + vals_skipped] > std::numeric_limits::max()) and "Number of elements is more than what size_type can handle"); - list_child_elements = s->vals.u64[t + vals_skipped]; - static_cast(data_out)[row] = list_child_elements; + list_child_elements = s->vals.u64[t + vals_skipped]; + static_cast(data_out)[row] = list_child_elements; } break; case SHORT: - static_cast(data_out)[row] = + static_cast(data_out)[row] = static_cast(s->vals.u32[t + vals_skipped]); break; - case BYTE: static_cast(data_out)[row] = s->vals.u8[t + vals_skipped]; break; + case BYTE: static_cast(data_out)[row] = s->vals.u8[t + vals_skipped]; break; case BOOLEAN: - static_cast(data_out)[row] = + static_cast(data_out)[row] = (s->vals.u8[(t + vals_skipped) >> 3] >> ((~(t + vals_skipped)) & 7)) & 1; break; case DATE: if (s->chunk.dtype_len == 8) { // Convert from days to milliseconds by multiplying by 24*3600*1000 - static_cast(data_out)[row] = + static_cast(data_out)[row] = 86400000ll * (int64_t)s->vals.i32[t + vals_skipped]; } else { - static_cast(data_out)[row] = s->vals.u32[t + vals_skipped]; + static_cast(data_out)[row] = s->vals.u32[t + vals_skipped]; } break; case STRING: case BINARY: case VARCHAR: case CHAR: { - string_index_pair *strdesc = &static_cast(data_out)[row]; - void const *ptr = nullptr; + string_index_pair* strdesc = &static_cast(data_out)[row]; + void const* ptr = nullptr; uint32_t count = 0; if (is_dictionary(s->chunk.encoding_kind)) { auto const dict_idx = s->vals.u32[t + vals_skipped]; if (dict_idx < s->chunk.dict_len) { - auto const &g_entry = global_dictionary[s->chunk.dictionary_start + dict_idx]; + auto const& g_entry = global_dictionary[s->chunk.dictionary_start + dict_idx]; ptr = s->chunk.streams[CI_DICTIONARY] + g_entry.pos; count = g_entry.len; @@ -1758,7 +1761,7 @@ __global__ void __launch_bounds__(block_size) count = secondary_val; } } - strdesc->first = static_cast(ptr); + strdesc->first = static_cast(ptr); strdesc->second = count; break; } @@ -1771,12 +1774,12 @@ __global__ void __launch_bounds__(block_size) } if (seconds < 0 && nanos != 0) { seconds -= 1; } if (s->chunk.ts_clock_rate) - static_cast(data_out)[row] = + static_cast(data_out)[row] = seconds * s->chunk.ts_clock_rate + (nanos + (499999999 / s->chunk.ts_clock_rate)) / (1000000000 / s->chunk.ts_clock_rate); // Output to desired clock rate else - static_cast(data_out)[row] = seconds * 1000000000 + nanos; + static_cast(data_out)[row] = seconds * 1000000000 + nanos; break; } } @@ -1825,8 +1828,8 @@ __global__ void __launch_bounds__(block_size) * @param[in] first_row Crop all rows below first_row * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void __host__ DecodeNullsAndStringDictionaries(ColumnDesc *chunks, - DictionaryEntry *global_dictionary, +void __host__ DecodeNullsAndStringDictionaries(ColumnDesc* chunks, + DictionaryEntry* global_dictionary, uint32_t num_columns, uint32_t num_stripes, size_t first_row, @@ -1853,8 +1856,8 @@ void __host__ DecodeNullsAndStringDictionaries(ColumnDesc *chunks, * @param[in] level nesting level being processed * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void __host__ DecodeOrcColumnData(ColumnDesc *chunks, - DictionaryEntry *global_dictionary, +void __host__ DecodeOrcColumnData(ColumnDesc* chunks, + DictionaryEntry* global_dictionary, device_2dspan row_groups, uint32_t num_columns, uint32_t num_stripes, diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index b469d7215b4..cfc2d54a1b7 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -57,7 +57,7 @@ struct intrle_enc_state_s { struct strdata_enc_state_s { uint32_t char_count; uint32_t lengths_red[(512 / 32)]; - const char *str_data[512]; + const char* str_data[512]; }; struct orcenc_state_s { @@ -126,9 +126,9 @@ static inline __device__ uint32_t CountLeadingBytes64(uint64_t v) { return __clz */ template static __device__ void StoreBytes( - orcenc_state_s *s, const uint8_t *inbuf, uint32_t inpos, uint32_t count, int t) + orcenc_state_s* s, const uint8_t* inbuf, uint32_t inpos, uint32_t count, int t) { - uint8_t *dst = s->stream.data_ptrs[cid] + s->strm_pos[cid]; + uint8_t* dst = s->stream.data_ptrs[cid] + s->strm_pos[cid]; while (count > 0) { uint32_t n = min(count, 512); if (t < n) { dst[t] = inbuf[(inpos + t) & inmask]; } @@ -157,9 +157,9 @@ static __device__ void StoreBytes( */ template static __device__ uint32_t ByteRLE( - orcenc_state_s *s, const uint8_t *inbuf, uint32_t inpos, uint32_t numvals, uint32_t flush, int t) + orcenc_state_s* s, const uint8_t* inbuf, uint32_t inpos, uint32_t numvals, uint32_t flush, int t) { - uint8_t *dst = s->stream.data_ptrs[cid] + s->strm_pos[cid]; + uint8_t* dst = s->stream.data_ptrs[cid] + s->strm_pos[cid]; uint32_t out_cnt = 0; while (numvals > 0) { @@ -272,7 +272,7 @@ static const __device__ __constant__ uint8_t kByteLengthToRLEv2_W[9] = { /** * @brief Encode a varint value, return the number of bytes written */ -static inline __device__ uint32_t StoreVarint(uint8_t *dst, uint64_t v) +static inline __device__ uint32_t StoreVarint(uint8_t* dst, uint64_t v) { uint32_t bytecnt = 0; for (;;) { @@ -289,7 +289,7 @@ static inline __device__ uint32_t StoreVarint(uint8_t *dst, uint64_t v) } template -static inline __device__ void StoreBytesBigEndian(uint8_t *dst, T v, uint32_t w) +static inline __device__ void StoreBytesBigEndian(uint8_t* dst, T v, uint32_t w) { for (uint32_t i = 0, b = w * 8; i < w; ++i) { b -= 8; @@ -299,7 +299,7 @@ static inline __device__ void StoreBytesBigEndian(uint8_t *dst, T v, uint32_t w) // Combine and store bits for symbol widths less than 8 static inline __device__ void StoreBitsBigEndian( - uint8_t *dst, uint32_t v, uint32_t w, int num_vals, int t) + uint8_t* dst, uint32_t v, uint32_t w, int num_vals, int t) { if (t <= (num_vals | 0x1f)) { uint32_t mask; @@ -343,16 +343,16 @@ template -static __device__ uint32_t IntegerRLE(orcenc_state_s *s, - const T *inbuf, +static __device__ uint32_t IntegerRLE(orcenc_state_s* s, + const T* inbuf, uint32_t inpos, uint32_t numvals, uint32_t flush, int t, - Storage &temp_storage) + Storage& temp_storage) { using block_reduce = cub::BlockReduce; - uint8_t *dst = s->stream.data_ptrs[cid] + s->strm_pos[cid]; + uint8_t* dst = s->stream.data_ptrs[cid] + s->strm_pos[cid]; uint32_t out_cnt = 0; __shared__ volatile uint64_t block_vmin; @@ -473,7 +473,7 @@ static __device__ uint32_t IntegerRLE(orcenc_state_s *s, uint32_t bw, pw = 1, pll, pgw = 1, bv_scale = (is_signed) ? 0 : 1; vmax = (is_signed) ? ((vmin < 0) ? -vmin : vmin) * 2 : vmin; bw = (sizeof(T) > 4) ? (8 - min(CountLeadingBytes64(vmax << bv_scale), 7)) - : (4 - min(CountLeadingBytes32(vmax << bv_scale), 3)); + : (4 - min(CountLeadingBytes32(vmax << bv_scale), 3)); if (zero_pll_war) { // Insert a dummy zero patch pll = 1; @@ -560,8 +560,8 @@ static __device__ uint32_t IntegerRLE(orcenc_state_s *s, * @param[in] len(t) string length (per thread) * @param[in] t thread id */ -static __device__ void StoreStringData(uint8_t *dst, - strdata_enc_state_s *strenc, +static __device__ void StoreStringData(uint8_t* dst, + strdata_enc_state_s* strenc, uint32_t len, int t) { @@ -601,7 +601,7 @@ static __device__ void StoreStringData(uint8_t *dst, * @param[in] t thread id */ template -inline __device__ void lengths_to_positions(volatile T *vals, uint32_t numvals, unsigned int t) +inline __device__ void lengths_to_positions(volatile T* vals, uint32_t numvals, unsigned int t) { for (uint32_t n = 1; n < numvals; n <<= 1) { __syncthreads(); @@ -635,7 +635,7 @@ __global__ void __launch_bounds__(block_size) typename cub::BlockReduce::TempStorage u64; } temp_storage; - orcenc_state_s *const s = &state_g; + orcenc_state_s* const s = &state_g; uint32_t col_id = blockIdx.x; uint32_t group_id = blockIdx.y; int t = threadIdx.x; @@ -913,7 +913,7 @@ __global__ void __launch_bounds__(block_size) streams[col_id][group_id].lengths[t] = s->strm_pos[t]; if (!s->stream.data_ptrs[t]) { streams[col_id][group_id].data_ptrs[t] = - static_cast(const_cast(s->chunk.leaf_column->head())) + + static_cast(const_cast(s->chunk.leaf_column->head())) + (s->chunk.leaf_column->offset() + s->chunk.start_row) * s->chunk.dtype_len; } } @@ -929,14 +929,14 @@ __global__ void __launch_bounds__(block_size) // blockDim {512,1,1} template __global__ void __launch_bounds__(block_size) - gpuEncodeStringDictionaries(StripeDictionary *stripes, + gpuEncodeStringDictionaries(StripeDictionary* stripes, device_2dspan chunks, device_2dspan streams) { __shared__ __align__(16) orcenc_state_s state_g; __shared__ typename cub::BlockReduce::TempStorage temp_storage; - orcenc_state_s *const s = &state_g; + orcenc_state_s* const s = &state_g; uint32_t stripe_id = blockIdx.x; uint32_t cid = (blockIdx.y) ? CI_DICTIONARY : CI_DATA2; int t = threadIdx.x; @@ -953,7 +953,7 @@ __global__ void __launch_bounds__(block_size) s->nrows = s->u.dict_stripe.num_strings; s->cur_row = 0; } - column_device_view *string_column = s->u.dict_stripe.leaf_column; + column_device_view* string_column = s->u.dict_stripe.leaf_column; auto const dict_data = s->u.dict_stripe.dict_data; __syncthreads(); if (s->chunk.encoding_kind != DICTIONARY_V2) { @@ -965,7 +965,7 @@ __global__ void __launch_bounds__(block_size) uint32_t string_idx = (t < numvals) ? dict_data[s->cur_row + t] : 0; if (cid == CI_DICTIONARY) { // Encoding string contents - const char *ptr = 0; + const char* ptr = 0; uint32_t count = 0; if (t < numvals) { auto string_val = string_column->element(string_idx); @@ -1026,7 +1026,7 @@ __global__ void __launch_bounds__(1024) { __shared__ __align__(16) StripeStream ss; __shared__ __align__(16) encoder_chunk_streams strm0; - __shared__ uint8_t *volatile ck_curptr_g; + __shared__ uint8_t* volatile ck_curptr_g; __shared__ uint32_t volatile ck_curlen_g; auto const stripe_id = blockIdx.x; @@ -1041,7 +1041,7 @@ __global__ void __launch_bounds__(1024) auto const cid = ss.stream_type; auto dst_ptr = strm0.data_ptrs[cid] + strm0.lengths[cid]; for (auto group = ss.first_chunk_id + 1; group < ss.first_chunk_id + ss.num_chunks; ++group) { - uint8_t *src_ptr; + uint8_t* src_ptr; uint32_t len; if (t == 0) { src_ptr = streams[ss.column_id][group].data_ptrs[cid]; @@ -1080,13 +1080,13 @@ __global__ void __launch_bounds__(1024) __global__ void __launch_bounds__(256) gpuInitCompressionBlocks(device_2dspan strm_desc, device_2dspan streams, // const? - gpu_inflate_input_s *comp_in, - gpu_inflate_status_s *comp_out, - uint8_t *compressed_bfr, + gpu_inflate_input_s* comp_in, + gpu_inflate_status_s* comp_out, + uint8_t* compressed_bfr, uint32_t comp_blk_size) { __shared__ __align__(16) StripeStream ss; - __shared__ uint8_t *volatile uncomp_base_g; + __shared__ uint8_t* volatile uncomp_base_g; auto const stripe_id = blockIdx.x; auto const stream_id = blockIdx.y; @@ -1103,8 +1103,8 @@ __global__ void __launch_bounds__(256) dst = compressed_bfr + ss.bfr_offset; num_blocks = (ss.stream_size > 0) ? (ss.stream_size - 1) / comp_blk_size + 1 : 1; for (uint32_t b = t; b < num_blocks; b += 256) { - gpu_inflate_input_s *blk_in = &comp_in[ss.first_block + b]; - gpu_inflate_status_s *blk_out = &comp_out[ss.first_block + b]; + gpu_inflate_input_s* blk_in = &comp_in[ss.first_block + b]; + gpu_inflate_status_s* blk_out = &comp_out[ss.first_block + b]; uint32_t blk_size = min(comp_blk_size, ss.stream_size - min(b * comp_blk_size, ss.stream_size)); blk_in->srcDevice = src + b * comp_blk_size; blk_in->srcSize = blk_size; @@ -1130,21 +1130,21 @@ __global__ void __launch_bounds__(256) // blockDim {1024,1,1} __global__ void __launch_bounds__(1024) gpuCompactCompressedBlocks(device_2dspan strm_desc, - gpu_inflate_input_s *comp_in, - gpu_inflate_status_s *comp_out, - uint8_t *compressed_bfr, + gpu_inflate_input_s* comp_in, + gpu_inflate_status_s* comp_out, + uint8_t* compressed_bfr, uint32_t comp_blk_size) { __shared__ __align__(16) StripeStream ss; - __shared__ const uint8_t *volatile comp_src_g; + __shared__ const uint8_t* volatile comp_src_g; __shared__ uint32_t volatile comp_len_g; auto const stripe_id = blockIdx.x; auto const stream_id = blockIdx.y; uint32_t t = threadIdx.x; uint32_t num_blocks, b, blk_size; - const uint8_t *src; - uint8_t *dst; + const uint8_t* src; + uint8_t* dst; if (t == 0) ss = strm_desc[stripe_id][stream_id]; __syncthreads(); @@ -1154,21 +1154,21 @@ __global__ void __launch_bounds__(1024) b = 0; do { if (t == 0) { - gpu_inflate_input_s *blk_in = &comp_in[ss.first_block + b]; - gpu_inflate_status_s *blk_out = &comp_out[ss.first_block + b]; + gpu_inflate_input_s* blk_in = &comp_in[ss.first_block + b]; + gpu_inflate_status_s* blk_out = &comp_out[ss.first_block + b]; uint32_t src_len = min(comp_blk_size, ss.stream_size - min(b * comp_blk_size, ss.stream_size)); uint32_t dst_len = (blk_out->status == 0) ? blk_out->bytes_written : src_len; uint32_t blk_size24; if (dst_len >= src_len) { // Copy from uncompressed source - src = static_cast(blk_in->srcDevice); + src = static_cast(blk_in->srcDevice); blk_out->bytes_written = src_len; dst_len = src_len; blk_size24 = dst_len * 2 + 1; } else { // Compressed block - src = static_cast(blk_in->dstDevice); + src = static_cast(blk_in->dstDevice); blk_size24 = dst_len * 2 + 0; } dst[0] = static_cast(blk_size24 >> 0); @@ -1207,7 +1207,7 @@ void EncodeOrcColumnData(device_2dspan chunks, gpuEncodeOrcColumnData<512><<>>(chunks, streams); } -void EncodeStripeDictionaries(StripeDictionary *stripes, +void EncodeStripeDictionaries(StripeDictionary* stripes, device_2dspan chunks, uint32_t num_string_columns, uint32_t num_stripes, @@ -1220,7 +1220,7 @@ void EncodeStripeDictionaries(StripeDictionary *stripes, <<>>(stripes, chunks, enc_streams); } -void set_chunk_columns(const table_device_view &view, +void set_chunk_columns(const table_device_view& view, device_2dspan chunks, rmm::cuda_stream_view stream) { @@ -1239,14 +1239,14 @@ void CompactOrcDataStreams(device_2dspan strm_desc, gpuCompactOrcDataStreams<<>>(strm_desc, enc_streams); } -void CompressOrcDataStreams(uint8_t *compressed_data, +void CompressOrcDataStreams(uint8_t* compressed_data, uint32_t num_compressed_blocks, CompressionKind compression, uint32_t comp_blk_size, device_2dspan strm_desc, device_2dspan enc_streams, - gpu_inflate_input_s *comp_in, - gpu_inflate_status_s *comp_out, + gpu_inflate_input_s* comp_in, + gpu_inflate_status_s* comp_out, rmm::cuda_stream_view stream) { dim3 dim_block_init(256, 1); diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu index b0f2cb4b739..317b7255718 100644 --- a/cpp/src/io/orc/stripe_init.cu +++ b/cpp/src/io/orc/stripe_init.cu @@ -32,11 +32,11 @@ struct compressed_stream_s { // blockDim {128,1,1} extern "C" __global__ void __launch_bounds__(128, 8) gpuParseCompressedStripeData( - CompressedStreamInfo *strm_info, int32_t num_streams, uint32_t block_size, uint32_t log2maxcr) + CompressedStreamInfo* strm_info, int32_t num_streams, uint32_t block_size, uint32_t log2maxcr) { __shared__ compressed_stream_s strm_g[4]; - compressed_stream_s *const s = &strm_g[threadIdx.x / 32]; + compressed_stream_s* const s = &strm_g[threadIdx.x / 32]; int strm_id = blockIdx.x * 4 + (threadIdx.x / 32); int lane_id = threadIdx.x % 32; @@ -45,9 +45,9 @@ extern "C" __global__ void __launch_bounds__(128, 8) gpuParseCompressedStripeDat __syncthreads(); if (strm_id < num_streams) { // Walk through the compressed blocks - const uint8_t *cur = s->info.compressed_data; - const uint8_t *end = cur + s->info.compressed_data_size; - uint8_t *uncompressed = s->info.uncompressed_data; + const uint8_t* cur = s->info.compressed_data; + const uint8_t* end = cur + s->info.compressed_data_size; + uint8_t* uncompressed = s->info.uncompressed_data; size_t max_uncompressed_size = 0; uint32_t num_compressed_blocks = 0; uint32_t num_uncompressed_blocks = 0; @@ -55,7 +55,7 @@ extern "C" __global__ void __launch_bounds__(128, 8) gpuParseCompressedStripeDat uint32_t block_len = shuffle((lane_id == 0) ? cur[0] | (cur[1] << 8) | (cur[2] << 16) : 0); uint32_t is_uncompressed = block_len & 1; uint32_t uncompressed_size; - gpu_inflate_input_s *init_ctl = nullptr; + gpu_inflate_input_s* init_ctl = nullptr; block_len >>= 1; cur += 3; if (block_len > block_size || cur + block_len > end) { @@ -67,10 +67,9 @@ extern "C" __global__ void __launch_bounds__(128, 8) gpuParseCompressedStripeDat // TBD: For some codecs like snappy, it wouldn't be too difficult to get the actual // uncompressed size and avoid waste due to block size alignment For now, rely on the max // compression ratio to limit waste for the most extreme cases (small single-block streams) - uncompressed_size = - (is_uncompressed) - ? block_len - : (block_len < (block_size >> log2maxcr)) ? block_len << log2maxcr : block_size; + uncompressed_size = (is_uncompressed) ? block_len + : (block_len < (block_size >> log2maxcr)) ? block_len << log2maxcr + : block_size; if (is_uncompressed) { if (uncompressed_size <= 32) { // For short blocks, copy the uncompressed data to output @@ -94,7 +93,7 @@ extern "C" __global__ void __launch_bounds__(128, 8) gpuParseCompressedStripeDat num_compressed_blocks++; } if (!lane_id && init_ctl) { - s->ctl.srcDevice = const_cast(cur); + s->ctl.srcDevice = const_cast(cur); s->ctl.srcSize = block_len; s->ctl.dstDevice = uncompressed + max_uncompressed_size; s->ctl.dstSize = uncompressed_size; @@ -118,11 +117,11 @@ extern "C" __global__ void __launch_bounds__(128, 8) gpuParseCompressedStripeDat // blockDim {128,1,1} extern "C" __global__ void __launch_bounds__(128, 8) - gpuPostDecompressionReassemble(CompressedStreamInfo *strm_info, int32_t num_streams) + gpuPostDecompressionReassemble(CompressedStreamInfo* strm_info, int32_t num_streams) { __shared__ compressed_stream_s strm_g[4]; - compressed_stream_s *const s = &strm_g[threadIdx.x / 32]; + compressed_stream_s* const s = &strm_g[threadIdx.x / 32]; int strm_id = blockIdx.x * 4 + (threadIdx.x / 32); int lane_id = threadIdx.x % 32; @@ -133,12 +132,12 @@ extern "C" __global__ void __launch_bounds__(128, 8) s->info.num_compressed_blocks + s->info.num_uncompressed_blocks > 0 && s->info.max_uncompressed_size > 0) { // Walk through the compressed blocks - const uint8_t *cur = s->info.compressed_data; - const uint8_t *end = cur + s->info.compressed_data_size; - const gpu_inflate_input_s *dec_in = s->info.decctl; - const gpu_inflate_status_s *dec_out = s->info.decstatus; - uint8_t *uncompressed_actual = s->info.uncompressed_data; - uint8_t *uncompressed_estimated = uncompressed_actual; + const uint8_t* cur = s->info.compressed_data; + const uint8_t* end = cur + s->info.compressed_data_size; + const gpu_inflate_input_s* dec_in = s->info.decctl; + const gpu_inflate_status_s* dec_out = s->info.decstatus; + uint8_t* uncompressed_actual = s->info.uncompressed_data; + uint8_t* uncompressed_estimated = uncompressed_actual; uint32_t num_compressed_blocks = 0; uint32_t max_compressed_blocks = s->info.num_compressed_blocks; @@ -159,9 +158,9 @@ extern "C" __global__ void __launch_bounds__(128, 8) break; } uncompressed_size_est = - shuffle((lane_id == 0) ? *(const uint32_t *)&dec_in[num_compressed_blocks].dstSize : 0); + shuffle((lane_id == 0) ? *(const uint32_t*)&dec_in[num_compressed_blocks].dstSize : 0); uncompressed_size_actual = shuffle( - (lane_id == 0) ? *(const uint32_t *)&dec_out[num_compressed_blocks].bytes_written : 0); + (lane_id == 0) ? *(const uint32_t*)&dec_out[num_compressed_blocks].bytes_written : 0); } // In practice, this should never happen with a well-behaved writer, as we would expect the // uncompressed size to always be equal to the compression block size except for the last @@ -219,13 +218,13 @@ enum row_entry_state_e { * @param[in] end end of byte stream * @return bytes consumed */ -static uint32_t __device__ ProtobufParseRowIndexEntry(rowindex_state_s *s, - const uint8_t *start, - const uint8_t *end) +static uint32_t __device__ ProtobufParseRowIndexEntry(rowindex_state_s* s, + const uint8_t* start, + const uint8_t* end) { constexpr uint32_t pb_rowindexentry_id = static_cast(PB_TYPE_FIXEDLEN) + 8; - const uint8_t *cur = start; + const uint8_t* cur = start; row_entry_state_e state = NOT_FOUND; uint32_t length = 0, strm_idx_id = s->chunk.skip_count >> 8, idx_id = 1, ci_id = CI_PRESENT, pos_end = 0; @@ -268,9 +267,9 @@ static uint32_t __device__ ProtobufParseRowIndexEntry(rowindex_state_s *s, } break; case STORE_INDEX0: - ci_id = (idx_id == (strm_idx_id & 0xff)) - ? CI_DATA - : (idx_id == ((strm_idx_id >> 8) & 0xff)) ? CI_DATA2 : CI_PRESENT; + ci_id = (idx_id == (strm_idx_id & 0xff)) ? CI_DATA + : (idx_id == ((strm_idx_id >> 8) & 0xff)) ? CI_DATA2 + : CI_PRESENT; idx_id++; if (s->is_compressed) { if (ci_id < CI_PRESENT) s->row_index_entry[0][ci_id] = v; @@ -313,9 +312,9 @@ static uint32_t __device__ ProtobufParseRowIndexEntry(rowindex_state_s *s, * @param[in,out] s row group index state * @param[in] num_rowgroups Number of index entries to read */ -static __device__ void gpuReadRowGroupIndexEntries(rowindex_state_s *s, int num_rowgroups) +static __device__ void gpuReadRowGroupIndexEntries(rowindex_state_s* s, int num_rowgroups) { - const uint8_t *index_data = s->chunk.streams[CI_INDEX]; + const uint8_t* index_data = s->chunk.streams[CI_INDEX]; int index_data_len = s->chunk.strm_len[CI_INDEX]; for (int i = 0; i < num_rowgroups; i++) { s->row_index_entry[0][0] = 0; @@ -347,7 +346,7 @@ static __device__ void gpuReadRowGroupIndexEntries(rowindex_state_s *s, int num_ * @param[in] num_rowgroups Number of index entries * @param[in] t thread id */ -static __device__ void gpuMapRowIndexToUncompressed(rowindex_state_s *s, +static __device__ void gpuMapRowIndexToUncompressed(rowindex_state_s* s, int ci_id, int num_rowgroups, int t) @@ -356,10 +355,10 @@ static __device__ void gpuMapRowIndexToUncompressed(rowindex_state_s *s, if (strm_len > 0) { int32_t compressed_offset = (t < num_rowgroups) ? s->compressed_offset[t][ci_id] : 0; if (compressed_offset > 0) { - const uint8_t *start = s->strm_info[ci_id].compressed_data; - const uint8_t *cur = start; - const uint8_t *end = cur + s->strm_info[ci_id].compressed_data_size; - gpu_inflate_status_s *decstatus = s->strm_info[ci_id].decstatus; + const uint8_t* start = s->strm_info[ci_id].compressed_data; + const uint8_t* cur = start; + const uint8_t* end = cur + s->strm_info[ci_id].compressed_data_size; + gpu_inflate_status_s* decstatus = s->strm_info[ci_id].decstatus; uint32_t uncomp_offset = 0; for (;;) { uint32_t block_len, is_uncompressed; @@ -398,9 +397,9 @@ static __device__ void gpuMapRowIndexToUncompressed(rowindex_state_s *s, */ // blockDim {128,1,1} extern "C" __global__ void __launch_bounds__(128, 8) - gpuParseRowGroupIndex(RowGroup *row_groups, - CompressedStreamInfo *strm_info, - ColumnDesc *chunks, + gpuParseRowGroupIndex(RowGroup* row_groups, + CompressedStreamInfo* strm_info, + ColumnDesc* chunks, uint32_t num_columns, uint32_t num_stripes, uint32_t num_rowgroups, @@ -408,7 +407,7 @@ extern "C" __global__ void __launch_bounds__(128, 8) bool use_base_stride) { __shared__ __align__(16) rowindex_state_s state_g; - rowindex_state_s *const s = &state_g; + rowindex_state_s* const s = &state_g; uint32_t chunk_id = blockIdx.y * num_columns + blockIdx.x; int t = threadIdx.x; @@ -454,8 +453,8 @@ extern "C" __global__ void __launch_bounds__(128, 8) ? rowidx_stride : row_groups[(s->rowgroup_start + i) * num_columns + blockIdx.x].start_row; for (int j = t4; j < rowgroup_size4; j += 4) { - ((uint32_t *)&row_groups[(s->rowgroup_start + i) * num_columns + blockIdx.x])[j] = - ((volatile uint32_t *)&s->rowgroups[i])[j]; + ((uint32_t*)&row_groups[(s->rowgroup_start + i) * num_columns + blockIdx.x])[j] = + ((volatile uint32_t*)&s->rowgroups[i])[j]; } row_groups[(s->rowgroup_start + i) * num_columns + blockIdx.x].num_rows = num_rows; row_groups[(s->rowgroup_start + i) * num_columns + blockIdx.x].start_row = start_row; @@ -466,7 +465,7 @@ extern "C" __global__ void __launch_bounds__(128, 8) } } -void __host__ ParseCompressedStripeData(CompressedStreamInfo *strm_info, +void __host__ ParseCompressedStripeData(CompressedStreamInfo* strm_info, int32_t num_streams, uint32_t compression_block_size, uint32_t log2maxcr, @@ -478,7 +477,7 @@ void __host__ ParseCompressedStripeData(CompressedStreamInfo *strm_info, strm_info, num_streams, compression_block_size, log2maxcr); } -void __host__ PostDecompressionReassemble(CompressedStreamInfo *strm_info, +void __host__ PostDecompressionReassemble(CompressedStreamInfo* strm_info, int32_t num_streams, rmm::cuda_stream_view stream) { @@ -502,9 +501,9 @@ void __host__ PostDecompressionReassemble(CompressedStreamInfo *strm_info, * value * @param[in] stream CUDA stream to use, default `rmm::cuda_stream_default` */ -void __host__ ParseRowGroupIndex(RowGroup *row_groups, - CompressedStreamInfo *strm_info, - ColumnDesc *chunks, +void __host__ ParseRowGroupIndex(RowGroup* row_groups, + CompressedStreamInfo* strm_info, + ColumnDesc* chunks, uint32_t num_columns, uint32_t num_stripes, uint32_t num_rowgroups, diff --git a/cpp/src/io/orc/timezone.cpp b/cpp/src/io/orc/timezone.cpp index 81ffa954c1a..f5bda3401c0 100644 --- a/cpp/src/io/orc/timezone.cpp +++ b/cpp/src/io/orc/timezone.cpp @@ -76,7 +76,7 @@ struct timezone_file { { return (is_64bit ? sizeof(uint64_t) : sizeof(uint32_t)) + sizeof(uint32_t); } - static constexpr auto file_content_size_32(timezone_file_header const &header) noexcept + static constexpr auto file_content_size_32(timezone_file_header const& header) noexcept { return header.timecnt * sizeof(uint32_t) + // transition times header.timecnt * sizeof(uint8_t) + // transition time index @@ -100,9 +100,9 @@ struct timezone_file { header.charcnt = __builtin_bswap32(header.charcnt); } - void read_header(std::ifstream &input_file, size_t file_size) + void read_header(std::ifstream& input_file, size_t file_size) { - input_file.read(reinterpret_cast(&header), sizeof(header)); + input_file.read(reinterpret_cast(&header), sizeof(header)); CUDF_EXPECTS(!input_file.fail() && header.magic == tzif_magic, "Error reading time zones file header."); header_to_little_endian(); @@ -113,7 +113,7 @@ struct timezone_file { // skip the 32-bit content input_file.seekg(file_content_size_32(header), std::ios_base::cur); // read the 64-bit header - input_file.read(reinterpret_cast(&header), sizeof(header)); + input_file.read(reinterpret_cast(&header), sizeof(header)); header_to_little_endian(); is_header_from_64bit = true; } @@ -125,7 +125,7 @@ struct timezone_file { "Number of transition times is larger than the file size."); } - timezone_file(std::string const &timezone_name) + timezone_file(std::string const& timezone_name) { using std::ios_base; @@ -142,23 +142,25 @@ struct timezone_file { // Read transition times (convert from 32-bit to 64-bit if necessary) transition_times.resize(timecnt()); if (is_header_from_64bit) { - fin.read(reinterpret_cast(transition_times.data()), + fin.read(reinterpret_cast(transition_times.data()), transition_times.size() * sizeof(int64_t)); - for (auto &tt : transition_times) { tt = __builtin_bswap64(tt); } + for (auto& tt : transition_times) { + tt = __builtin_bswap64(tt); + } } else { std::vector tt32(timecnt()); - fin.read(reinterpret_cast(tt32.data()), tt32.size() * sizeof(int32_t)); + fin.read(reinterpret_cast(tt32.data()), tt32.size() * sizeof(int32_t)); std::transform( - tt32.cbegin(), tt32.cend(), std::back_inserter(transition_times), [](auto &tt) { + tt32.cbegin(), tt32.cend(), std::back_inserter(transition_times), [](auto& tt) { return __builtin_bswap32(tt); }); } ttime_idx.resize(timecnt()); - fin.read(reinterpret_cast(ttime_idx.data()), timecnt() * sizeof(uint8_t)); + fin.read(reinterpret_cast(ttime_idx.data()), timecnt() * sizeof(uint8_t)); // Read time types ttype.resize(typecnt()); - fin.read(reinterpret_cast(ttype.data()), typecnt() * sizeof(localtime_type_record_s)); + fin.read(reinterpret_cast(ttype.data()), typecnt() * sizeof(localtime_type_record_s)); CUDF_EXPECTS(!fin.fail(), "Failed to read time types from the time zone file."); for (uint32_t i = 0; i < typecnt(); i++) { ttype[i].utcoff = __builtin_bswap32(ttype[i].utcoff); @@ -182,7 +184,7 @@ struct timezone_file { template class posix_parser { public: - posix_parser(Container const &tz_string) : cur{tz_string.begin()}, end{tz_string.end()} {} + posix_parser(Container const& tz_string) : cur{tz_string.begin()}, end{tz_string.end()} {} /** * @brief Advances the parser past a name from the posix TZ string. @@ -340,7 +342,7 @@ static int days_in_month(int month, bool is_leap_year) * * @return transition time in seconds from the beginning of the year */ -static int64_t get_transition_time(dst_transition_s const &trans, int year) +static int64_t get_transition_time(dst_transition_s const& trans, int year) { auto day = trans.day; @@ -365,7 +367,9 @@ static int64_t get_transition_time(dst_transition_s const &trans, int year) day += 7; } // Add months - for (int m = 1; m < month; m++) { day += days_in_month(m, is_leap); } + for (int m = 1; m < month; m++) { + day += days_in_month(m, is_leap); + } } else if (trans.type == 'J') { // Account for 29th of February on leap years day += (day > 31 + 29 && is_leap_year(year)); @@ -374,7 +378,7 @@ static int64_t get_transition_time(dst_transition_s const &trans, int year) return trans.time + day * day_seconds; } -timezone_table build_timezone_transition_table(std::string const &timezone_name, +timezone_table build_timezone_transition_table(std::string const& timezone_name, rmm::cuda_stream_view stream) { if (timezone_name == "UTC" || timezone_name.empty()) { diff --git a/cpp/src/io/orc/timezone.cuh b/cpp/src/io/orc/timezone.cuh index b0231ca9e7d..e5341573418 100644 --- a/cpp/src/io/orc/timezone.cuh +++ b/cpp/src/io/orc/timezone.cuh @@ -56,8 +56,8 @@ static constexpr uint32_t cycle_entry_cnt = 2 * cycle_years; * * @return GMT offset */ -CUDA_HOST_DEVICE_CALLABLE int32_t get_gmt_offset_impl(int64_t const *ttimes, - int32_t const *offsets, +CUDA_HOST_DEVICE_CALLABLE int32_t get_gmt_offset_impl(int64_t const* ttimes, + int32_t const* offsets, size_t count, int64_t ts) { @@ -112,8 +112,8 @@ struct timezone_table { rmm::device_uvector offsets; timezone_table() : ttimes{0, rmm::cuda_stream_default}, offsets{0, rmm::cuda_stream_default} {} timezone_table(int32_t gmt_offset, - rmm::device_uvector &&ttimes, - rmm::device_uvector &&offsets) + rmm::device_uvector&& ttimes, + rmm::device_uvector&& offsets) : gmt_offset{gmt_offset}, ttimes{std::move(ttimes)}, offsets{std::move(offsets)} { } @@ -130,7 +130,7 @@ struct timezone_table { * * @return The transition table for the given timezone */ -timezone_table build_timezone_transition_table(std::string const &timezone_name, +timezone_table build_timezone_transition_table(std::string const& timezone_name, rmm::cuda_stream_view stream); } // namespace io diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 4a2330d479b..0cd3f333ba3 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -138,8 +138,8 @@ class orc_column_view { */ explicit orc_column_view(size_t index, size_t str_id, - column_view const &col, - const table_metadata *metadata, + column_view const& col, + const table_metadata* metadata, rmm::cuda_stream_view stream) : _index(index), _str_id(str_id), @@ -168,7 +168,7 @@ class orc_column_view { /** * @brief Function that associates an existing dictionary chunk allocation */ - void attach_dict_chunk(gpu::DictionaryChunk *host_dict, gpu::DictionaryChunk *dev_dict) + void attach_dict_chunk(gpu::DictionaryChunk* host_dict, gpu::DictionaryChunk* dev_dict) { dict = host_dict; d_dict = dev_dict; @@ -180,14 +180,14 @@ class orc_column_view { } auto device_dict_chunk() const { return d_dict; } - auto const &decimal_offsets() const { return d_decimal_offsets; } - void attach_decimal_offsets(uint32_t *sizes_ptr) { d_decimal_offsets = sizes_ptr; } + auto const& decimal_offsets() const { return d_decimal_offsets; } + void attach_decimal_offsets(uint32_t* sizes_ptr) { d_decimal_offsets = sizes_ptr; } /** * @brief Function that associates an existing stripe dictionary allocation */ - void attach_stripe_dict(gpu::StripeDictionary *host_stripe_dict, - gpu::StripeDictionary *dev_stripe_dict) + void attach_stripe_dict(gpu::StripeDictionary* host_stripe_dict, + gpu::StripeDictionary* dev_stripe_dict) { stripe_dict = host_stripe_dict; d_stripe_dict = dev_stripe_dict; @@ -207,7 +207,7 @@ class orc_column_view { auto data_count() const noexcept { return _data_count; } size_t null_count() const noexcept { return _null_count; } bool nullable() const noexcept { return (_nulls != nullptr); } - uint32_t const *nulls() const noexcept { return _nulls; } + uint32_t const* nulls() const noexcept { return _nulls; } auto scale() const noexcept { return _scale; } auto precision() const noexcept { return _precision; } @@ -226,7 +226,7 @@ class orc_column_view { size_t _type_width = 0; size_type _data_count = 0; size_t _null_count = 0; - uint32_t const *_nulls = nullptr; + uint32_t const* _nulls = nullptr; // ORC-related members std::string _name{}; @@ -238,21 +238,21 @@ class orc_column_view { // String dictionary-related members size_t dict_stride = 0; - gpu::DictionaryChunk const *dict = nullptr; - gpu::StripeDictionary const *stripe_dict = nullptr; - gpu::DictionaryChunk *d_dict = nullptr; - gpu::StripeDictionary *d_stripe_dict = nullptr; + gpu::DictionaryChunk const* dict = nullptr; + gpu::StripeDictionary const* stripe_dict = nullptr; + gpu::DictionaryChunk* d_dict = nullptr; + gpu::StripeDictionary* d_stripe_dict = nullptr; // Offsets for encoded decimal elements. Used to enable direct writing of encoded decimal elements // into the output stream. - uint32_t *d_decimal_offsets = nullptr; + uint32_t* d_decimal_offsets = nullptr; }; std::vector writer::impl::gather_stripe_info( host_span columns, size_t num_rowgroups) { auto const is_any_column_string = - std::any_of(columns.begin(), columns.end(), [](auto const &col) { return col.is_string(); }); + std::any_of(columns.begin(), columns.end(), [](auto const& col) { return col.is_string(); }); // Apply rows per stripe limit to limit string dictionaries size_t const max_stripe_rows = is_any_column_string ? 1000000 : 5000000; @@ -260,7 +260,7 @@ std::vector writer::impl::gather_stripe_info( for (size_t rowgroup = 0, stripe_start = 0, stripe_size = 0; rowgroup < num_rowgroups; ++rowgroup) { auto const rowgroup_size = - std::accumulate(columns.begin(), columns.end(), 0ul, [&](size_t total_size, auto const &col) { + std::accumulate(columns.begin(), columns.end(), 0ul, [&](size_t total_size, auto const& col) { if (col.is_string()) { const auto dt = col.host_dict_chunk(rowgroup); return total_size + row_index_stride_ + dt->string_char_count; @@ -285,19 +285,19 @@ std::vector writer::impl::gather_stripe_info( return infos; } -void writer::impl::init_dictionaries(const table_device_view &view, - orc_column_view *columns, - std::vector const &str_col_ids, +void writer::impl::init_dictionaries(const table_device_view& view, + orc_column_view* columns, + std::vector const& str_col_ids, device_span d_str_col_ids, - uint32_t *dict_data, - uint32_t *dict_index, - hostdevice_vector *dict) + uint32_t* dict_data, + uint32_t* dict_index, + hostdevice_vector* dict) { const size_t num_rowgroups = dict->size() / str_col_ids.size(); // Setup per-rowgroup dictionary indexes for each dictionary-aware column for (size_t i = 0; i < str_col_ids.size(); ++i) { - auto &str_column = columns[str_col_ids[i]]; + auto& str_column = columns[str_col_ids[i]]; str_column.set_dict_stride(str_col_ids.size()); str_column.attach_dict_chunk(dict->host_ptr(), dict->device_ptr()); } @@ -314,21 +314,21 @@ void writer::impl::init_dictionaries(const table_device_view &view, dict->device_to_host(stream, true); } -void writer::impl::build_dictionaries(orc_column_view *columns, - std::vector const &str_col_ids, +void writer::impl::build_dictionaries(orc_column_view* columns, + std::vector const& str_col_ids, host_span stripe_bounds, - hostdevice_vector const &dict, - uint32_t *dict_index, - hostdevice_vector &stripe_dict) + hostdevice_vector const& dict, + uint32_t* dict_index, + hostdevice_vector& stripe_dict) { const auto num_rowgroups = dict.size() / str_col_ids.size(); for (size_t col_idx = 0; col_idx < str_col_ids.size(); ++col_idx) { - auto &str_column = columns[str_col_ids[col_idx]]; + auto& str_column = columns[str_col_ids[col_idx]]; str_column.attach_stripe_dict(stripe_dict.host_ptr(), stripe_dict.device_ptr()); - for (auto const &stripe : stripe_bounds) { - auto &sd = stripe_dict[stripe.id * str_col_ids.size() + col_idx]; + for (auto const& stripe : stripe_bounds) { + auto& sd = stripe_dict[stripe.id * str_col_ids.size() + col_idx]; sd.dict_data = str_column.host_dict_chunk(stripe.first)->dict_data; sd.dict_index = dict_index + col_idx * str_column.data_count(); // Indexed by abs row sd.column_id = str_col_ids[col_idx]; @@ -337,7 +337,7 @@ void writer::impl::build_dictionaries(orc_column_view *columns, sd.dict_char_count = 0; sd.num_strings = std::accumulate(stripe.cbegin(), stripe.cend(), 0, [&](auto dt_str_cnt, auto rg_idx) { - const auto &dt = dict[rg_idx * str_col_ids.size() + col_idx]; + const auto& dt = dict[rg_idx * str_col_ids.size() + col_idx]; return dt_str_cnt + dt.num_dict_strings; }); sd.leaf_column = dict[col_idx].leaf_column; @@ -353,13 +353,13 @@ void writer::impl::build_dictionaries(orc_column_view *columns, stripe_bounds.back().cend(), string_column_cost{}, [&](auto cost, auto rg_idx) -> string_column_cost { - const auto &dt = dict[rg_idx * str_col_ids.size() + col_idx]; + const auto& dt = dict[rg_idx * str_col_ids.size() + col_idx]; return {cost.direct + dt.string_char_count, cost.dictionary + dt.dict_char_count + dt.num_dict_strings}; }); // Disable dictionary if it does not reduce the output size if (col_cost.dictionary >= col_cost.direct) { - for (auto const &stripe : stripe_bounds) { + for (auto const& stripe : stripe_bounds) { stripe_dict[stripe.id * str_col_ids.size() + col_idx].dict_data = nullptr; } } @@ -379,19 +379,19 @@ void writer::impl::build_dictionaries(orc_column_view *columns, orc_streams writer::impl::create_streams(host_span columns, host_span stripe_bounds, - std::map const &decimal_column_sizes) + std::map const& decimal_column_sizes) { // 'column 0' row index stream std::vector streams{{ROW_INDEX, 0}}; // TODO: Separate index and data streams? // First n + 1 streams are row index streams streams.reserve(columns.size() + 1); - std::transform(columns.begin(), columns.end(), std::back_inserter(streams), [](auto const &col) { + std::transform(columns.begin(), columns.end(), std::back_inserter(streams), [](auto const& col) { return Stream{ROW_INDEX, col.id()}; }); std::vector ids(columns.size() * gpu::CI_NUM_STREAMS, -1); - for (auto &column : columns) { + for (auto& column : columns) { TypeKind kind = column.orc_kind(); StreamKind data_kind = DATA; StreamKind data2_kind = LENGTH; @@ -454,7 +454,7 @@ orc_streams writer::impl::create_streams(host_span columns, size_t dict_data_size = 0; size_t dict_strings = 0; size_t dict_lengths_div512 = 0; - for (auto const &stripe : stripe_bounds) { + for (auto const& stripe : stripe_bounds) { const auto sd = column.host_stripe_dict(stripe.id); enable_dict = (enable_dict && sd->dict_data != nullptr); if (enable_dict) { @@ -546,13 +546,13 @@ orc_streams::orc_stream_offsets orc_streams::compute_offsets( size_t non_rle_data_size = 0; size_t rle_data_size = 0; for (size_t i = 0; i < streams.size(); ++i) { - const auto &stream = streams[i]; + const auto& stream = streams[i]; auto const is_rle_data = [&]() { // First stream is an index stream, don't check types, etc. if (!stream.column_index().has_value()) return true; - auto const &column = columns[stream.column_index().value()]; + auto const& column = columns[stream.column_index().value()]; // Dictionary encoded string column - dictionary characters or // directly encoded string - column characters if (column.orc_kind() == TypeKind::STRING && @@ -581,18 +581,18 @@ orc_streams::orc_stream_offsets orc_streams::compute_offsets( } struct segmented_valid_cnt_input { - bitmask_type const *mask; + bitmask_type const* mask; std::vector indices; }; -encoded_data writer::impl::encode_columns(const table_device_view &view, +encoded_data writer::impl::encode_columns(const table_device_view& view, host_span columns, - std::vector const &str_col_ids, - rmm::device_uvector &&dict_data, - rmm::device_uvector &&dict_index, - encoder_decimal_info &&dec_chunk_sizes, + std::vector const& str_col_ids, + rmm::device_uvector&& dict_data, + rmm::device_uvector&& dict_index, + encoder_decimal_info&& dec_chunk_sizes, host_span stripe_bounds, - orc_streams const &streams) + orc_streams const& streams) { auto const num_columns = columns.size(); auto const num_rowgroups = stripes_size(stripe_bounds); @@ -604,11 +604,11 @@ encoded_data writer::impl::encode_columns(const table_device_view &view, // Initialize column chunks' descriptions std::map validity_check_inputs; - for (auto const &column : columns) { - for (auto const &stripe : stripe_bounds) { + for (auto const& column : columns) { + for (auto const& stripe : stripe_bounds) { for (auto rg_idx_it = stripe.cbegin(); rg_idx_it < stripe.cend(); ++rg_idx_it) { auto const rg_idx = *rg_idx_it; - auto &ck = chunks[column.index()][rg_idx]; + auto& ck = chunks[column.index()][rg_idx]; ck.start_row = (rg_idx * row_index_stride_); ck.num_rows = std::min(row_index_stride_, column.data_count() - ck.start_row); @@ -618,7 +618,7 @@ encoded_data writer::impl::encode_columns(const table_device_view &view, ck.dict_index = (ck.encoding_kind == DICTIONARY_V2) ? column.host_stripe_dict(stripe.id)->dict_index : nullptr; - ck.dtype_len = 1; + ck.dtype_len = 1; } else { ck.dtype_len = column.type_width(); } @@ -632,22 +632,22 @@ encoded_data writer::impl::encode_columns(const table_device_view &view, auto validity_check_indices = [&](size_t col_idx) { std::vector indices; - for (auto const &stripe : stripe_bounds) { + for (auto const& stripe : stripe_bounds) { for (auto rg_idx_it = stripe.cbegin(); rg_idx_it < stripe.cend() - 1; ++rg_idx_it) { - auto const &chunk = chunks[col_idx][*rg_idx_it]; + auto const& chunk = chunks[col_idx][*rg_idx_it]; indices.push_back(chunk.start_row); indices.push_back(chunk.start_row + chunk.num_rows); } } return indices; }; - for (auto const &column : columns) { + for (auto const& column : columns) { if (column.orc_kind() == TypeKind::BOOLEAN && column.nullable()) { validity_check_inputs[column.index()] = {column.nulls(), validity_check_indices(column.index())}; } } - for (auto &cnt_in : validity_check_inputs) { + for (auto& cnt_in : validity_check_inputs) { auto const valid_counts = segmented_count_set_bits(cnt_in.second.mask, cnt_in.second.indices); CUDF_EXPECTS( std::none_of(valid_counts.cbegin(), @@ -659,13 +659,13 @@ encoded_data writer::impl::encode_columns(const table_device_view &view, } for (size_t col_idx = 0; col_idx < num_columns; col_idx++) { - auto const &column = columns[col_idx]; + auto const& column = columns[col_idx]; auto col_streams = chunk_streams[col_idx]; - for (auto const &stripe : stripe_bounds) { + for (auto const& stripe : stripe_bounds) { for (auto rg_idx_it = stripe.cbegin(); rg_idx_it < stripe.cend(); ++rg_idx_it) { auto const rg_idx = *rg_idx_it; - auto const &ck = chunks[col_idx][rg_idx]; - auto &strm = col_streams[rg_idx]; + auto const& ck = chunks[col_idx][rg_idx]; + auto& strm = col_streams[rg_idx]; for (int strm_type = 0; strm_type < gpu::CI_NUM_STREAMS; ++strm_type) { auto const strm_id = streams.id(col_idx * gpu::CI_NUM_STREAMS + strm_type); @@ -688,7 +688,7 @@ encoded_data writer::impl::encode_columns(const table_device_view &view, if (strm_type == gpu::CI_DATA2 && ck.encoding_kind == DICTIONARY_V2) strm.data_ptrs[strm_type] += stream_offsets.non_rle_data_size; } else { - auto const &strm_up = col_streams[stripe_dict[-dict_stride].start_chunk]; + auto const& strm_up = col_streams[stripe_dict[-dict_stride].start_chunk]; strm.data_ptrs[strm_type] = strm_up.data_ptrs[strm_type] + strm_up.lengths[strm_type]; } @@ -754,19 +754,19 @@ std::vector writer::impl::gather_stripes( size_t num_rows, size_t num_index_streams, host_span stripe_bounds, - hostdevice_2dvector *enc_streams, - hostdevice_2dvector *strm_desc) + hostdevice_2dvector* enc_streams, + hostdevice_2dvector* strm_desc) { std::vector stripes(stripe_bounds.size()); - for (auto const &stripe : stripe_bounds) { + for (auto const& stripe : stripe_bounds) { for (size_t col_idx = 0; col_idx < enc_streams->size().first; col_idx++) { - const auto &strm = (*enc_streams)[col_idx][stripe.first]; + const auto& strm = (*enc_streams)[col_idx][stripe.first]; // Assign stream data of column data stream(s) for (int k = 0; k < gpu::CI_INDEX; k++) { const auto stream_id = strm.ids[k]; if (stream_id != -1) { - auto *ss = &(*strm_desc)[stripe.id][stream_id - num_index_streams]; + auto* ss = &(*strm_desc)[stripe.id][stream_id - num_index_streams]; ss->stream_size = 0; ss->first_chunk_id = stripe.first; ss->num_chunks = stripe.size; @@ -790,7 +790,7 @@ std::vector writer::impl::gather_stripes( } std::vector> writer::impl::gather_statistic_blobs( - const table_device_view &table, + const table_device_view& table, host_span columns, host_span stripe_bounds) { @@ -804,8 +804,8 @@ std::vector> writer::impl::gather_statistic_blobs( rmm::device_uvector stat_chunks(num_chunks + num_stat_blobs, stream); rmm::device_uvector stat_groups(num_chunks, stream); - for (auto const &column : columns) { - stats_column_desc *desc = &stat_desc[column.index()]; + for (auto const& column : columns) { + stats_column_desc* desc = &stat_desc[column.index()]; switch (column.orc_kind()) { case TypeKind::BYTE: desc->stats_dtype = dtype_int8; break; case TypeKind::SHORT: desc->stats_dtype = dtype_int16; break; @@ -834,13 +834,13 @@ std::vector> writer::impl::gather_statistic_blobs( } else { desc->ts_scale = 0; } - for (auto const &stripe : stripe_bounds) { + for (auto const& stripe : stripe_bounds) { auto grp = &stat_merge[column.index() * stripe_bounds.size() + stripe.id]; grp->col = stat_desc.device_ptr(column.index()); grp->start_chunk = static_cast(column.index() * num_rowgroups + stripe.first); grp->num_chunks = stripe.size; } - statistics_merge_group *col_stats = + statistics_merge_group* col_stats = &stat_merge[stripe_bounds.size() * columns.size() + column.index()]; col_stats->col = stat_desc.device_ptr(column.index()); col_stats->start_chunk = static_cast(column.index() * stripe_bounds.size()); @@ -888,8 +888,8 @@ std::vector> writer::impl::gather_statistic_blobs( blobs.device_to_host(stream, true); for (size_t i = 0; i < num_stat_blobs; i++) { - const uint8_t *stat_begin = blobs.host_ptr(stat_merge[i].start_chunk); - const uint8_t *stat_end = stat_begin + stat_merge[i].num_chunks; + const uint8_t* stat_begin = blobs.host_ptr(stat_merge[i].start_chunk); + const uint8_t* stat_end = stat_begin + stat_merge[i].num_chunks; stat_blobs[i].assign(stat_begin, stat_end); } @@ -899,13 +899,13 @@ std::vector> writer::impl::gather_statistic_blobs( void writer::impl::write_index_stream(int32_t stripe_id, int32_t stream_id, host_span columns, - stripe_rowgroups const &rowgroups_range, + stripe_rowgroups const& rowgroups_range, host_2dspan enc_streams, host_2dspan strm_desc, host_span comp_out, - StripeInformation *stripe, - orc_streams *streams, - ProtobufWriter *pbw) + StripeInformation* stripe, + orc_streams* streams, + ProtobufWriter* pbw) { row_group_index_info present; row_group_index_info data; @@ -913,13 +913,13 @@ void writer::impl::write_index_stream(int32_t stripe_id, auto kind = TypeKind::STRUCT; auto const column_id = stream_id - 1; - auto find_record = [=, &strm_desc](gpu::encoder_chunk_streams const &stream, + auto find_record = [=, &strm_desc](gpu::encoder_chunk_streams const& stream, gpu::StreamIndexType type) { row_group_index_info record; if (stream.ids[type] > 0) { record.pos = 0; if (compression_kind_ != NONE) { - auto const &ss = strm_desc[stripe_id][stream.ids[type] - (columns.size() + 1)]; + auto const& ss = strm_desc[stripe_id][stream.ids[type] - (columns.size() + 1)]; record.blk_pos = ss.first_block; record.comp_pos = 0; record.comp_size = ss.stream_size; @@ -927,9 +927,9 @@ void writer::impl::write_index_stream(int32_t stripe_id, } return record; }; - auto scan_record = [=, &comp_out](gpu::encoder_chunk_streams const &stream, + auto scan_record = [=, &comp_out](gpu::encoder_chunk_streams const& stream, gpu::StreamIndexType type, - row_group_index_info &record) { + row_group_index_info& record) { if (record.pos >= 0) { record.pos += stream.lengths[type]; while ((record.pos >= 0) && (record.blk_pos >= 0) && @@ -945,7 +945,7 @@ void writer::impl::write_index_stream(int32_t stripe_id, // TBD: Not sure we need an empty index stream for column 0 if (stream_id != 0) { - const auto &strm = enc_streams[column_id][0]; + const auto& strm = enc_streams[column_id][0]; present = find_record(strm, gpu::CI_PRESENT); data = find_record(strm, gpu::CI_DATA); data2 = find_record(strm, gpu::CI_DATA2); @@ -965,7 +965,7 @@ void writer::impl::write_index_stream(int32_t stripe_id, present.comp_pos, present.pos, data.comp_pos, data.pos, data2.comp_pos, data2.pos, kind); if (stream_id != 0) { - const auto &strm = enc_streams[column_id][rowgroup]; + const auto& strm = enc_streams[column_id][rowgroup]; scan_record(strm, gpu::CI_PRESENT, present); scan_record(strm, gpu::CI_DATA, data); scan_record(strm, gpu::CI_DATA2, data2); @@ -983,18 +983,18 @@ void writer::impl::write_index_stream(int32_t stripe_id, stripe->indexLength += buffer_.size(); } -void writer::impl::write_data_stream(gpu::StripeStream const &strm_desc, - gpu::encoder_chunk_streams const &enc_stream, - uint8_t const *compressed_data, - uint8_t *stream_out, - StripeInformation *stripe, - orc_streams *streams) +void writer::impl::write_data_stream(gpu::StripeStream const& strm_desc, + gpu::encoder_chunk_streams const& enc_stream, + uint8_t const* compressed_data, + uint8_t* stream_out, + StripeInformation* stripe, + orc_streams* streams) { const auto length = strm_desc.stream_size; (*streams)[enc_stream.ids[strm_desc.stream_type]].length = length; if (length == 0) { return; } - const auto *stream_in = (compression_kind_ == NONE) ? enc_stream.data_ptrs[strm_desc.stream_type] + const auto* stream_in = (compression_kind_ == NONE) ? enc_stream.data_ptrs[strm_desc.stream_type] : (compressed_data + strm_desc.bfr_offset); if (out_sink_->is_device_write_preferred(length)) { @@ -1009,7 +1009,7 @@ void writer::impl::write_data_stream(gpu::StripeStream const &strm_desc, stripe->dataLength += length; } -void writer::impl::add_uncompressed_block_headers(std::vector &v) +void writer::impl::add_uncompressed_block_headers(std::vector& v) { if (compression_kind_ != NONE) { size_t uncomp_len = v.size() - 3, pos = 0, block_len; @@ -1030,10 +1030,10 @@ void writer::impl::add_uncompressed_block_headers(std::vector &v) } writer::impl::impl(std::unique_ptr sink, - orc_writer_options const &options, + orc_writer_options const& options, SingleWriteMode mode, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : compression_kind_(to_orc_compression(options.get_compression())), enable_statistics_(options.enable_statistics()), out_sink_(std::move(sink)), @@ -1046,10 +1046,10 @@ writer::impl::impl(std::unique_ptr sink, } writer::impl::impl(std::unique_ptr sink, - chunked_orc_writer_options const &options, + chunked_orc_writer_options const& options, SingleWriteMode mode, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : compression_kind_(to_orc_compression(options.get_compression())), enable_statistics_(options.enable_statistics()), out_sink_(std::move(sink)), @@ -1073,7 +1073,7 @@ void writer::impl::init_state() out_sink_->host_write(MAGIC, std::strlen(MAGIC)); } -rmm::device_uvector get_string_column_ids(const table_device_view &view, +rmm::device_uvector get_string_column_ids(const table_device_view& view, rmm::cuda_stream_view stream) { rmm::device_uvector string_column_ids(view.num_columns(), stream); @@ -1096,8 +1096,8 @@ rmm::device_uvector get_string_column_ids(const table_device_view &vi struct rowgroup_iterator { using difference_type = long; using value_type = int; - using pointer = int *; - using reference = int &; + using pointer = int*; + using reference = int&; using iterator_category = thrust::output_device_iterator_tag; size_type idx; size_type rowgroup_size; @@ -1111,7 +1111,7 @@ struct rowgroup_iterator { { return rowgroup_iterator{idx + i, rowgroup_size}; } - CUDA_HOST_DEVICE_CALLABLE rowgroup_iterator &operator++() + CUDA_HOST_DEVICE_CALLABLE rowgroup_iterator& operator++() { ++idx; return *this; @@ -1120,14 +1120,14 @@ struct rowgroup_iterator { { return (idx + offset) / rowgroup_size; } - CUDA_HOST_DEVICE_CALLABLE bool operator!=(rowgroup_iterator const &other) + CUDA_HOST_DEVICE_CALLABLE bool operator!=(rowgroup_iterator const& other) { return idx != other.idx; } }; // returns host vector of per-rowgroup sizes -encoder_decimal_info decimal_chunk_sizes(table_view const &table, +encoder_decimal_info decimal_chunk_sizes(table_view const& table, host_span orc_columns, size_type rowgroup_size, host_span stripes, @@ -1138,21 +1138,21 @@ encoder_decimal_info decimal_chunk_sizes(table_view const &table, auto const d_table = table_device_view::create(table, stream); // Compute per-element offsets (within each row group) on the device for (size_t col_idx = 0; col_idx < orc_columns.size(); ++col_idx) { - auto &orc_col = orc_columns[col_idx]; + auto& orc_col = orc_columns[col_idx]; if (orc_col.orc_kind() == DECIMAL) { - auto const &col = table.column(col_idx); - auto ¤t_sizes = + auto const& col = table.column(col_idx); + auto& current_sizes = elem_sizes.insert({col_idx, rmm::device_uvector(col.size(), stream)}) .first->second; thrust::tabulate(rmm::exec_policy(stream), current_sizes.begin(), current_sizes.end(), [table = *d_table, col_idx] __device__(auto idx) { - auto const &col = table.column(col_idx); + auto const& col = table.column(col_idx); if (col.is_null(idx)) return 0u; - int64_t const element = (col.type().id() == type_id::DECIMAL32) - ? col.element(idx) - : col.element(idx); + int64_t const element = (col.type().id() == type_id::DECIMAL32) + ? col.element(idx) + : col.element(idx); int64_t const sign = (element < 0) ? 1 : 0; uint64_t zigzaged_value = ((element ^ -sign) * 2) + sign; @@ -1180,7 +1180,7 @@ encoder_decimal_info decimal_chunk_sizes(table_view const &table, auto const num_rowgroups = stripes_size(stripes); auto d_tmp_rowgroup_sizes = rmm::device_uvector(num_rowgroups, stream); std::map> rg_sizes; - for (auto const &[col_idx, esizes] : elem_sizes) { + for (auto const& [col_idx, esizes] : elem_sizes) { // Copy last elem in each row group - equal to row group size thrust::tabulate( rmm::exec_policy(stream), @@ -1196,13 +1196,13 @@ encoder_decimal_info decimal_chunk_sizes(table_view const &table, } std::map decimal_column_sizes( - std::map> const &chunk_sizes) + std::map> const& chunk_sizes) { std::map column_sizes; std::transform(chunk_sizes.cbegin(), chunk_sizes.cend(), std::inserter(column_sizes, column_sizes.end()), - [](auto const &chunk_size) -> std::pair { + [](auto const& chunk_size) -> std::pair { return { chunk_size.first, std::accumulate(chunk_size.second.cbegin(), chunk_size.second.cend(), 0lu)}; @@ -1210,7 +1210,7 @@ std::map decimal_column_sizes( return column_sizes; } -void writer::impl::write(table_view const &table) +void writer::impl::write(table_view const& table) { CUDF_EXPECTS(not closed, "Data has already been flushed to out and closed"); auto const num_columns = table.num_columns(); @@ -1231,7 +1231,7 @@ void writer::impl::write(table_view const &table) orc_columns.reserve(num_columns); // Mapping of string columns for quick look-up std::vector str_col_ids; - for (auto const &column : table) { + for (auto const& column : table) { auto const current_id = orc_columns.size(); auto const current_str_id = str_col_ids.size(); @@ -1302,7 +1302,7 @@ void writer::impl::write(table_view const &table) for (size_t stripe_id = 0; stripe_id < stripe_bounds.size(); stripe_id++) { for (size_t i = 0; i < num_data_streams; i++) { // TODO range for (at least) - gpu::StripeStream *ss = &strm_descs[stripe_id][i]; + gpu::StripeStream* ss = &strm_descs[stripe_id][i]; if (!out_sink_->is_device_write_preferred(ss->stream_size)) { all_device_write = false; } size_t stream_size = ss->stream_size; if (compression_kind_ != NONE) { @@ -1323,7 +1323,7 @@ void writer::impl::write(table_view const &table) return pinned_buffer{nullptr, cudaFreeHost}; } else { return pinned_buffer{[](size_t size) { - uint8_t *ptr = nullptr; + uint8_t* ptr = nullptr; CUDA_TRY(cudaMallocHost(&ptr, size)); return ptr; }(max_stream_size), @@ -1337,7 +1337,7 @@ void writer::impl::write(table_view const &table) hostdevice_vector comp_in(num_compressed_blocks, stream); if (compression_kind_ != NONE) { strm_descs.host_to_device(stream); - gpu::CompressOrcDataStreams(static_cast(compressed_data.data()), + gpu::CompressOrcDataStreams(static_cast(compressed_data.data()), num_compressed_blocks, compression_kind_, compression_blocksize_, @@ -1354,8 +1354,8 @@ void writer::impl::write(table_view const &table) // Write stripes for (size_t stripe_id = 0; stripe_id < stripes.size(); ++stripe_id) { - auto const &rowgroup_range = stripe_bounds[stripe_id]; - auto &stripe = stripes[stripe_id]; + auto const& rowgroup_range = stripe_bounds[stripe_id]; + auto& stripe = stripes[stripe_id]; stripe.offset = out_sink_->bytes_written(); @@ -1374,10 +1374,10 @@ void writer::impl::write(table_view const &table) } // Column data consisting one or more separate streams - for (auto const &strm_desc : strm_descs[stripe_id]) { + for (auto const& strm_desc : strm_descs[stripe_id]) { write_data_stream(strm_desc, enc_data.streams[strm_desc.column_id][rowgroup_range.first], - static_cast(compressed_data.data()), + static_cast(compressed_data.data()), stream_output.get(), &stripe, &streams); @@ -1450,7 +1450,7 @@ void writer::impl::write(table_view const &table) ff.types[0].kind = STRUCT; ff.types[0].subtypes.resize(num_columns); ff.types[0].fieldNames.resize(num_columns); - for (auto const &column : orc_columns) { + for (auto const& column : orc_columns) { ff.types[column.id()].kind = column.orc_kind(); if (column.orc_kind() == DECIMAL) { ff.types[column.id()].scale = static_cast(column.scale()); @@ -1465,7 +1465,7 @@ void writer::impl::write(table_view const &table) "Mismatch in table structure between multiple calls to write"); CUDF_EXPECTS(std::all_of(orc_columns.cbegin(), orc_columns.cend(), - [&](auto const &col) { + [&](auto const& col) { return ff.types[1 + col.index()].kind == col.orc_kind(); }), "Mismatch in column types between multiple calls to write"); @@ -1517,20 +1517,20 @@ void writer::impl::close() // Forward to implementation writer::writer(std::unique_ptr sink, - orc_writer_options const &options, + orc_writer_options const& options, SingleWriteMode mode, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : _impl(std::make_unique(std::move(sink), options, mode, stream, mr)) { } // Forward to implementation writer::writer(std::unique_ptr sink, - chunked_orc_writer_options const &options, + chunked_orc_writer_options const& options, SingleWriteMode mode, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : _impl(std::make_unique(std::move(sink), options, mode, stream, mr)) { } @@ -1539,7 +1539,7 @@ writer::writer(std::unique_ptr sink, writer::~writer() = default; // Forward to implementation -void writer::write(table_view const &table) { _impl->write(table); } +void writer::write(table_view const& table) { _impl->write(table); } // Forward to implementation void writer::close() { _impl->close(); } diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 155c83a88d9..ba8754ce6ca 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -110,7 +110,7 @@ class orc_streams { orc_stream_offsets compute_offsets(host_span columns, size_t num_rowgroups) const; - operator std::vector const&() const { return streams; } + operator std::vector const &() const { return streams; } private: std::vector streams; diff --git a/cpp/src/io/parquet/compact_protocol_writer.cpp b/cpp/src/io/parquet/compact_protocol_writer.cpp index a9b8eb0ac6b..dde86af68c8 100644 --- a/cpp/src/io/parquet/compact_protocol_writer.cpp +++ b/cpp/src/io/parquet/compact_protocol_writer.cpp @@ -24,7 +24,7 @@ namespace parquet { * @Brief Parquet CompactProtocolWriter class */ -size_t CompactProtocolWriter::write(const FileMetaData &f) +size_t CompactProtocolWriter::write(const FileMetaData& f) { CompactProtocolFieldWriter c(*this); c.field_int(1, f.version); @@ -48,7 +48,7 @@ size_t CompactProtocolWriter::write(const FileMetaData &f) return c.value(); } -size_t CompactProtocolWriter::write(const SchemaElement &s) +size_t CompactProtocolWriter::write(const SchemaElement& s) { CompactProtocolFieldWriter c(*this); if (s.type != UNDEFINED_TYPE) { @@ -69,7 +69,7 @@ size_t CompactProtocolWriter::write(const SchemaElement &s) return c.value(); } -size_t CompactProtocolWriter::write(const RowGroup &r) +size_t CompactProtocolWriter::write(const RowGroup& r) { CompactProtocolFieldWriter c(*this); c.field_struct_list(1, r.columns); @@ -78,7 +78,7 @@ size_t CompactProtocolWriter::write(const RowGroup &r) return c.value(); } -size_t CompactProtocolWriter::write(const KeyValue &k) +size_t CompactProtocolWriter::write(const KeyValue& k) { CompactProtocolFieldWriter c(*this); c.field_string(1, k.key); @@ -86,7 +86,7 @@ size_t CompactProtocolWriter::write(const KeyValue &k) return c.value(); } -size_t CompactProtocolWriter::write(const ColumnChunk &s) +size_t CompactProtocolWriter::write(const ColumnChunk& s) { CompactProtocolFieldWriter c(*this); if (s.file_path.size() != 0) { c.field_string(1, s.file_path); } @@ -103,7 +103,7 @@ size_t CompactProtocolWriter::write(const ColumnChunk &s) return c.value(); } -size_t CompactProtocolWriter::write(const ColumnChunkMetaData &s) +size_t CompactProtocolWriter::write(const ColumnChunkMetaData& s) { CompactProtocolFieldWriter c(*this); c.field_int(1, s.type); @@ -122,9 +122,10 @@ size_t CompactProtocolWriter::write(const ColumnChunkMetaData &s) void CompactProtocolFieldWriter::put_byte(uint8_t v) { writer.m_buf.push_back(v); } -void CompactProtocolFieldWriter::put_byte(const uint8_t *raw, uint32_t len) +void CompactProtocolFieldWriter::put_byte(const uint8_t* raw, uint32_t len) { - for (uint32_t i = 0; i < len; i++) writer.m_buf.push_back(raw[i]); + for (uint32_t i = 0; i < len; i++) + writer.m_buf.push_back(raw[i]); } uint32_t CompactProtocolFieldWriter::put_uint(uint64_t v) @@ -170,17 +171,19 @@ inline void CompactProtocolFieldWriter::field_int(int field, int64_t val) } template -inline void CompactProtocolFieldWriter::field_int_list(int field, const std::vector &val) +inline void CompactProtocolFieldWriter::field_int_list(int field, const std::vector& val) { put_field_header(field, current_field_value, ST_FLD_LIST); put_byte((uint8_t)((std::min(val.size(), (size_t)0xfu) << 4) | ST_FLD_I32)); if (val.size() >= 0xf) put_uint(val.size()); - for (auto &v : val) { put_int(static_cast(v)); } + for (auto& v : val) { + put_int(static_cast(v)); + } current_field_value = field; } template -inline void CompactProtocolFieldWriter::field_struct(int field, const T &val) +inline void CompactProtocolFieldWriter::field_struct(int field, const T& val) { put_field_header(field, current_field_value, ST_FLD_STRUCT); writer.write(val); @@ -188,12 +191,14 @@ inline void CompactProtocolFieldWriter::field_struct(int field, const T &val) } template -inline void CompactProtocolFieldWriter::field_struct_list(int field, const std::vector &val) +inline void CompactProtocolFieldWriter::field_struct_list(int field, const std::vector& val) { put_field_header(field, current_field_value, ST_FLD_LIST); put_byte((uint8_t)((std::min(val.size(), (size_t)0xfu) << 4) | ST_FLD_STRUCT)); if (val.size() >= 0xf) put_uint(val.size()); - for (auto &v : val) { writer.write(v); } + for (auto& v : val) { + writer.write(v); + } current_field_value = field; } @@ -204,7 +209,7 @@ inline size_t CompactProtocolFieldWriter::value() } inline void CompactProtocolFieldWriter::field_struct_blob(int field, - const std::vector &val) + const std::vector& val) { put_field_header(field, current_field_value, ST_FLD_STRUCT); put_byte(val.data(), (uint32_t)val.size()); @@ -212,32 +217,32 @@ inline void CompactProtocolFieldWriter::field_struct_blob(int field, current_field_value = field; } -inline void CompactProtocolFieldWriter::field_string(int field, const std::string &val) +inline void CompactProtocolFieldWriter::field_string(int field, const std::string& val) { put_field_header(field, current_field_value, ST_FLD_BINARY); put_uint(val.size()); // FIXME : replace reinterpret_cast - put_byte(reinterpret_cast(val.data()), (uint32_t)val.size()); + put_byte(reinterpret_cast(val.data()), (uint32_t)val.size()); current_field_value = field; } inline void CompactProtocolFieldWriter::field_string_list(int field, - const std::vector &val) + const std::vector& val) { put_field_header(field, current_field_value, ST_FLD_LIST); put_byte((uint8_t)((std::min(val.size(), (size_t)0xfu) << 4) | ST_FLD_BINARY)); if (val.size() >= 0xf) put_uint(val.size()); - for (auto &v : val) { + for (auto& v : val) { put_uint(v.size()); // FIXME : replace reinterpret_cast - put_byte(reinterpret_cast(v.data()), (uint32_t)v.size()); + put_byte(reinterpret_cast(v.data()), (uint32_t)v.size()); } current_field_value = field; } inline int CompactProtocolFieldWriter::current_field() { return current_field_value; } -inline void CompactProtocolFieldWriter::set_current_field(const int &field) +inline void CompactProtocolFieldWriter::set_current_field(const int& field) { current_field_value = field; } diff --git a/cpp/src/io/parquet/compact_protocol_writer.hpp b/cpp/src/io/parquet/compact_protocol_writer.hpp index 2ce9245490e..633bbdf1e19 100644 --- a/cpp/src/io/parquet/compact_protocol_writer.hpp +++ b/cpp/src/io/parquet/compact_protocol_writer.hpp @@ -36,34 +36,34 @@ namespace parquet { */ class CompactProtocolWriter { public: - CompactProtocolWriter(std::vector *output) : m_buf(*output) {} + CompactProtocolWriter(std::vector* output) : m_buf(*output) {} - size_t write(const FileMetaData &); - size_t write(const SchemaElement &); - size_t write(const RowGroup &); - size_t write(const KeyValue &); - size_t write(const ColumnChunk &); - size_t write(const ColumnChunkMetaData &); + size_t write(const FileMetaData&); + size_t write(const SchemaElement&); + size_t write(const RowGroup&); + size_t write(const KeyValue&); + size_t write(const ColumnChunk&); + size_t write(const ColumnChunkMetaData&); protected: - std::vector &m_buf; + std::vector& m_buf; friend class CompactProtocolFieldWriter; }; class CompactProtocolFieldWriter { - CompactProtocolWriter &writer; + CompactProtocolWriter& writer; size_t struct_start_pos; int current_field_value; public: - CompactProtocolFieldWriter(CompactProtocolWriter &caller) + CompactProtocolFieldWriter(CompactProtocolWriter& caller) : writer(caller), struct_start_pos(writer.m_buf.size()), current_field_value(0) { } void put_byte(uint8_t v); - void put_byte(const uint8_t *raw, uint32_t len); + void put_byte(const uint8_t* raw, uint32_t len); uint32_t put_uint(uint64_t v); @@ -76,25 +76,25 @@ class CompactProtocolFieldWriter { inline void field_int(int field, int64_t val); template - inline void field_int_list(int field, const std::vector &val); + inline void field_int_list(int field, const std::vector& val); template - inline void field_struct(int field, const T &val); + inline void field_struct(int field, const T& val); template - inline void field_struct_list(int field, const std::vector &val); + inline void field_struct_list(int field, const std::vector& val); inline size_t value(); - inline void field_struct_blob(int field, const std::vector &val); + inline void field_struct_blob(int field, const std::vector& val); - inline void field_string(int field, const std::string &val); + inline void field_string(int field, const std::string& val); - inline void field_string_list(int field, const std::vector &val); + inline void field_string_list(int field, const std::vector& val); inline int current_field(); - inline void set_current_field(const int &field); + inline void set_current_field(const int& field); }; } // namespace parquet diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index dfd9c1384c5..9e1301d6355 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -45,10 +45,10 @@ namespace parquet { namespace gpu { struct page_state_s { - const uint8_t *data_start; - const uint8_t *data_end; - const uint8_t *lvl_end; - const uint8_t *dict_base; // ptr to dictionary page data + const uint8_t* data_start; + const uint8_t* data_end; + const uint8_t* lvl_end; + const uint8_t* dict_base; // ptr to dictionary page data int32_t dict_size; // size of dictionary data int32_t first_row; // First row in page to output int32_t num_rows; // Rows in page to decode (including rows to be skipped) @@ -80,7 +80,7 @@ struct page_state_s { int32_t input_leaf_count; // how many leaf values of the input we've processed uint32_t rep[non_zero_buffer_size]; // circular buffer of repetition level values uint32_t def[non_zero_buffer_size]; // circular buffer of definition level values - const uint8_t *lvl_start[NUM_LEVEL_TYPES]; // [def,rep] + const uint8_t* lvl_start[NUM_LEVEL_TYPES]; // [def,rep] int32_t lvl_count[NUM_LEVEL_TYPES]; // how many of each of the streams we've decoded int32_t row_index_lower_bound; // lower bound of row indices we should process }; @@ -100,9 +100,9 @@ struct page_state_s { * * @return The hash value */ -__device__ uint32_t device_str2hash32(const char *key, size_t len, uint32_t seed = 33) +__device__ uint32_t device_str2hash32(const char* key, size_t len, uint32_t seed = 33) { - const uint8_t *p = reinterpret_cast(key); + const uint8_t* p = reinterpret_cast(key); uint32_t h1 = seed, k1; const uint32_t c1 = 0xcc9e2d51; const uint32_t c2 = 0x1b873593; @@ -149,7 +149,7 @@ __device__ uint32_t device_str2hash32(const char *key, size_t len, uint32_t seed * * @return The 32-bit value read */ -inline __device__ uint32_t get_vlq32(const uint8_t *&cur, const uint8_t *end) +inline __device__ uint32_t get_vlq32(const uint8_t*& cur, const uint8_t* end) { uint32_t v = *cur++; if (v >= 0x80 && cur < end) { @@ -178,9 +178,9 @@ inline __device__ uint32_t get_vlq32(const uint8_t *&cur, const uint8_t *end) * * @return The length of the section */ -__device__ uint32_t InitLevelSection(page_state_s *s, - const uint8_t *cur, - const uint8_t *end, +__device__ uint32_t InitLevelSection(page_state_s* s, + const uint8_t* cur, + const uint8_t* end, level_type lvl) { int32_t len; @@ -236,10 +236,10 @@ __device__ uint32_t InitLevelSection(page_state_s *s, * @param[in] lvl The level type we are decoding - DEFINITION or REPETITION */ __device__ void gpuDecodeStream( - uint32_t *output, page_state_s *s, int32_t target_count, int t, level_type lvl) + uint32_t* output, page_state_s* s, int32_t target_count, int t, level_type lvl) { - const uint8_t *cur_def = s->lvl_start[lvl]; - const uint8_t *end = s->lvl_end; + const uint8_t* cur_def = s->lvl_start[lvl]; + const uint8_t* end = s->lvl_end; uint32_t level_run = s->initial_rle_run[lvl]; int32_t level_val = s->initial_rle_value[lvl]; int level_bits = s->col.level_bits[lvl]; @@ -253,7 +253,7 @@ __device__ void gpuDecodeStream( // Get a new run symbol from the byte stream int sym_len = 0; if (!t) { - const uint8_t *cur = cur_def; + const uint8_t* cur = cur_def; if (cur < end) { level_run = get_vlq32(cur, end); } if (!(level_run & 1)) { if (cur < end) level_val = cur[0]; @@ -282,7 +282,7 @@ __device__ void gpuDecodeStream( batch_len8 = (batch_len + 7) >> 3; if (t < batch_len) { int bitpos = t * level_bits; - const uint8_t *cur = cur_def + (bitpos >> 3); + const uint8_t* cur = cur_def + (bitpos >> 3); bitpos &= 7; if (cur < end) level_val = cur[0]; cur++; @@ -327,9 +327,9 @@ __device__ void gpuDecodeStream( * * @return The new output position */ -__device__ int gpuDecodeDictionaryIndices(volatile page_state_s *s, int target_pos, int t) +__device__ int gpuDecodeDictionaryIndices(volatile page_state_s* s, int target_pos, int t) { - const uint8_t *end = s->data_end; + const uint8_t* end = s->data_end; int dict_bits = s->dict_bits; int pos = s->dict_pos; @@ -337,7 +337,7 @@ __device__ int gpuDecodeDictionaryIndices(volatile page_state_s *s, int target_p int is_literal, batch_len; if (!t) { uint32_t run = s->dict_run; - const uint8_t *cur = s->data_start; + const uint8_t* cur = s->data_start; if (run <= 1) { run = (cur < end) ? get_vlq32(cur, end) : 0; if (!(run & 1)) { @@ -380,7 +380,7 @@ __device__ int gpuDecodeDictionaryIndices(volatile page_state_s *s, int target_p int dict_idx = s->dict_val; if (is_literal) { int32_t ofs = (t - ((batch_len + 7) & ~7)) * dict_bits; - const uint8_t *p = s->data_start + (ofs >> 3); + const uint8_t* p = s->data_start + (ofs >> 3); ofs &= 7; if (p < end) { uint32_t c = 8 - ofs; @@ -413,16 +413,16 @@ __device__ int gpuDecodeDictionaryIndices(volatile page_state_s *s, int target_p * * @return The new output position */ -__device__ int gpuDecodeRleBooleans(volatile page_state_s *s, int target_pos, int t) +__device__ int gpuDecodeRleBooleans(volatile page_state_s* s, int target_pos, int t) { - const uint8_t *end = s->data_end; + const uint8_t* end = s->data_end; int pos = s->dict_pos; while (pos < target_pos) { int is_literal, batch_len; if (!t) { uint32_t run = s->dict_run; - const uint8_t *cur = s->data_start; + const uint8_t* cur = s->data_start; if (run <= 1) { run = (cur < end) ? get_vlq32(cur, end) : 0; if (!(run & 1)) { @@ -455,7 +455,7 @@ __device__ int gpuDecodeRleBooleans(volatile page_state_s *s, int target_pos, in int dict_idx; if (is_literal) { int32_t ofs = t - ((batch_len + 7) & ~7); - const uint8_t *p = s->data_start + (ofs >> 3); + const uint8_t* p = s->data_start + (ofs >> 3); dict_idx = (p < end) ? (p[0] >> (ofs & 7u)) & 1 : 0; } else { dict_idx = s->dict_val; @@ -476,12 +476,12 @@ __device__ int gpuDecodeRleBooleans(volatile page_state_s *s, int target_pos, in * * @return The new output position */ -__device__ void gpuInitStringDescriptors(volatile page_state_s *s, int target_pos, int t) +__device__ void gpuInitStringDescriptors(volatile page_state_s* s, int target_pos, int t) { int pos = s->dict_pos; // This step is purely serial if (!t) { - const uint8_t *cur = s->data_start; + const uint8_t* cur = s->data_start; int dict_size = s->dict_size; int k = s->dict_val; @@ -511,9 +511,9 @@ __device__ void gpuInitStringDescriptors(volatile page_state_s *s, int target_po * @param[in] src_pos Source position * @param[in] dstv Pointer to row output data (string descriptor or 32-bit hash) */ -inline __device__ void gpuOutputString(volatile page_state_s *s, int src_pos, void *dstv) +inline __device__ void gpuOutputString(volatile page_state_s* s, int src_pos, void* dstv) { - const char *ptr = NULL; + const char* ptr = NULL; size_t len = 0; if (s->dict_base) { @@ -522,8 +522,8 @@ inline __device__ void gpuOutputString(volatile page_state_s *s, int src_pos, vo sizeof(string_index_pair) : 0; if (dict_pos < (uint32_t)s->dict_size) { - const string_index_pair *src = - reinterpret_cast(s->dict_base + dict_pos); + const string_index_pair* src = + reinterpret_cast(s->dict_base + dict_pos); ptr = src->first; len = src->second; } @@ -531,16 +531,16 @@ inline __device__ void gpuOutputString(volatile page_state_s *s, int src_pos, vo // Plain encoding uint32_t dict_pos = s->dict_idx[src_pos & (non_zero_buffer_size - 1)]; if (dict_pos <= (uint32_t)s->dict_size) { - ptr = reinterpret_cast(s->data_start + dict_pos); + ptr = reinterpret_cast(s->data_start + dict_pos); len = s->str_len[src_pos & (non_zero_buffer_size - 1)]; } } if (s->dtype_len == 4) { // Output hash - *static_cast(dstv) = device_str2hash32(ptr, len); + *static_cast(dstv) = device_str2hash32(ptr, len); } else { // Output string descriptor - string_index_pair *dst = static_cast(dstv); + string_index_pair* dst = static_cast(dstv); dst->first = ptr; dst->second = len; } @@ -553,7 +553,7 @@ inline __device__ void gpuOutputString(volatile page_state_s *s, int src_pos, vo * @param[in] src_pos Source position * @param[in] dst Pointer to row output data */ -inline __device__ void gpuOutputBoolean(volatile page_state_s *s, int src_pos, uint8_t *dst) +inline __device__ void gpuOutputBoolean(volatile page_state_s* s, int src_pos, uint8_t* dst) { *dst = s->dict_idx[src_pos & (non_zero_buffer_size - 1)]; } @@ -566,8 +566,8 @@ inline __device__ void gpuOutputBoolean(volatile page_state_s *s, int src_pos, u * @param[in] dict_pos byte position in dictionary * @param[in] dict_size size of dictionary */ -inline __device__ void gpuStoreOutput(uint32_t *dst, - const uint8_t *src8, +inline __device__ void gpuStoreOutput(uint32_t* dst, + const uint8_t* src8, uint32_t dict_pos, uint32_t dict_size) { @@ -576,9 +576,9 @@ inline __device__ void gpuStoreOutput(uint32_t *dst, src8 -= ofs; // align to 32-bit boundary ofs <<= 3; // bytes -> bits if (dict_pos < dict_size) { - bytebuf = *reinterpret_cast(src8 + dict_pos); + bytebuf = *reinterpret_cast(src8 + dict_pos); if (ofs) { - uint32_t bytebufnext = *reinterpret_cast(src8 + dict_pos + 4); + uint32_t bytebufnext = *reinterpret_cast(src8 + dict_pos + 4); bytebuf = __funnelshift_r(bytebuf, bytebufnext, ofs); } } else { @@ -595,8 +595,8 @@ inline __device__ void gpuStoreOutput(uint32_t *dst, * @param[in] dict_pos byte position in dictionary * @param[in] dict_size size of dictionary */ -inline __device__ void gpuStoreOutput(uint2 *dst, - const uint8_t *src8, +inline __device__ void gpuStoreOutput(uint2* dst, + const uint8_t* src8, uint32_t dict_pos, uint32_t dict_size) { @@ -605,10 +605,10 @@ inline __device__ void gpuStoreOutput(uint2 *dst, src8 -= ofs; // align to 32-bit boundary ofs <<= 3; // bytes -> bits if (dict_pos < dict_size) { - v.x = *reinterpret_cast(src8 + dict_pos + 0); - v.y = *reinterpret_cast(src8 + dict_pos + 4); + v.x = *reinterpret_cast(src8 + dict_pos + 0); + v.y = *reinterpret_cast(src8 + dict_pos + 4); if (ofs) { - uint32_t next = *reinterpret_cast(src8 + dict_pos + 8); + uint32_t next = *reinterpret_cast(src8 + dict_pos + 8); v.x = __funnelshift_r(v.x, v.y, ofs); v.y = __funnelshift_r(v.y, next, ofs); } @@ -625,9 +625,9 @@ inline __device__ void gpuStoreOutput(uint2 *dst, * @param[in] src_pos Source position * @param[in] dst Pointer to row output data */ -inline __device__ void gpuOutputInt96Timestamp(volatile page_state_s *s, int src_pos, int64_t *dst) +inline __device__ void gpuOutputInt96Timestamp(volatile page_state_s* s, int src_pos, int64_t* dst) { - const uint8_t *src8; + const uint8_t* src8; uint32_t dict_pos, dict_size = s->dict_size, ofs; int64_t ts; @@ -647,11 +647,11 @@ inline __device__ void gpuOutputInt96Timestamp(volatile page_state_s *s, int src if (dict_pos + 4 < dict_size) { uint3 v; int64_t nanos, secs, days; - v.x = *reinterpret_cast(src8 + dict_pos + 0); - v.y = *reinterpret_cast(src8 + dict_pos + 4); - v.z = *reinterpret_cast(src8 + dict_pos + 8); + v.x = *reinterpret_cast(src8 + dict_pos + 0); + v.y = *reinterpret_cast(src8 + dict_pos + 4); + v.z = *reinterpret_cast(src8 + dict_pos + 8); if (ofs) { - uint32_t next = *reinterpret_cast(src8 + dict_pos + 12); + uint32_t next = *reinterpret_cast(src8 + dict_pos + 12); v.x = __funnelshift_r(v.x, v.y, ofs); v.y = __funnelshift_r(v.y, v.z, ofs); v.z = __funnelshift_r(v.z, next, ofs); @@ -681,9 +681,9 @@ inline __device__ void gpuOutputInt96Timestamp(volatile page_state_s *s, int src * @param[in] src_pos Source position * @param[in] dst Pointer to row output data */ -inline __device__ void gpuOutputInt64Timestamp(volatile page_state_s *s, int src_pos, int64_t *dst) +inline __device__ void gpuOutputInt64Timestamp(volatile page_state_s* s, int src_pos, int64_t* dst) { - const uint8_t *src8; + const uint8_t* src8; uint32_t dict_pos, dict_size = s->dict_size, ofs; int64_t ts; @@ -704,10 +704,10 @@ inline __device__ void gpuOutputInt64Timestamp(volatile page_state_s *s, int src uint2 v; int64_t val; int32_t ts_scale; - v.x = *reinterpret_cast(src8 + dict_pos + 0); - v.y = *reinterpret_cast(src8 + dict_pos + 4); + v.x = *reinterpret_cast(src8 + dict_pos + 0); + v.y = *reinterpret_cast(src8 + dict_pos + 4); if (ofs) { - uint32_t next = *reinterpret_cast(src8 + dict_pos + 8); + uint32_t next = *reinterpret_cast(src8 + dict_pos + 8); v.x = __funnelshift_r(v.x, v.y, ofs); v.y = __funnelshift_r(v.y, next, ofs); } @@ -746,12 +746,12 @@ static const __device__ __constant__ double kPow10[40] = { * @param[in] dst Pointer to row output data * @param[in] dtype Stored data type */ -inline __device__ void gpuOutputDecimalAsFloat(volatile page_state_s *s, +inline __device__ void gpuOutputDecimalAsFloat(volatile page_state_s* s, int src_pos, - double *dst, + double* dst, int dtype) { - const uint8_t *dict; + const uint8_t* dict; uint32_t dict_pos, dict_size = s->dict_size, dtype_len_in; int64_t i128_hi, i128_lo; int32_t scale; @@ -823,12 +823,12 @@ inline __device__ void gpuOutputDecimalAsFloat(volatile page_state_s *s, * @param[in] src_pos Source position * @param[in] dst Pointer to row output data */ -inline __device__ void gpuOutputFixedLenByteArrayAsInt64(volatile page_state_s *s, +inline __device__ void gpuOutputFixedLenByteArrayAsInt64(volatile page_state_s* s, int src_pos, - int64_t *dst) + int64_t* dst) { uint32_t const dtype_len_in = s->dtype_len_in; - uint8_t const *data = s->dict_base ? s->dict_base : s->data_start; + uint8_t const* data = s->dict_base ? s->dict_base : s->data_start; uint32_t const pos = (s->dict_base ? ((s->dict_bits > 0) ? s->dict_idx[src_pos & (non_zero_buffer_size - 1)] : 0) : src_pos) * @@ -857,9 +857,9 @@ inline __device__ void gpuOutputFixedLenByteArrayAsInt64(volatile page_state_s * * @param[in] dst Pointer to row output data */ template -inline __device__ void gpuOutputFast(volatile page_state_s *s, int src_pos, T *dst) +inline __device__ void gpuOutputFast(volatile page_state_s* s, int src_pos, T* dst) { - const uint8_t *dict; + const uint8_t* dict; uint32_t dict_pos, dict_size = s->dict_size; if (s->dict_base) { @@ -883,12 +883,12 @@ inline __device__ void gpuOutputFast(volatile page_state_s *s, int src_pos, T *d * @param[in] dst8 Pointer to row output data * @param[in] len Length of element */ -static __device__ void gpuOutputGeneric(volatile page_state_s *s, +static __device__ void gpuOutputGeneric(volatile page_state_s* s, int src_pos, - uint8_t *dst8, + uint8_t* dst8, int len) { - const uint8_t *dict; + const uint8_t* dict; uint32_t dict_pos, dict_size = s->dict_size; if (s->dict_base) { @@ -908,23 +908,23 @@ static __device__ void gpuOutputGeneric(volatile page_state_s *s, } } else { // Copy 4 bytes at a time - const uint8_t *src8 = dict; + const uint8_t* src8 = dict; unsigned int ofs = 3 & reinterpret_cast(src8); src8 -= ofs; // align to 32-bit boundary ofs <<= 3; // bytes -> bits for (unsigned int i = 0; i < len; i += 4) { uint32_t bytebuf; if (dict_pos < dict_size) { - bytebuf = *reinterpret_cast(src8 + dict_pos); + bytebuf = *reinterpret_cast(src8 + dict_pos); if (ofs) { - uint32_t bytebufnext = *reinterpret_cast(src8 + dict_pos + 4); + uint32_t bytebufnext = *reinterpret_cast(src8 + dict_pos + 4); bytebuf = __funnelshift_r(bytebuf, bytebufnext, ofs); } } else { bytebuf = 0; } dict_pos += 4; - *reinterpret_cast(dst8 + i) = bytebuf; + *reinterpret_cast(dst8 + i) = bytebuf; } } } @@ -939,9 +939,9 @@ static __device__ void gpuOutputGeneric(volatile page_state_s *s, * @param[in] min_row crop all rows below min_row * @param[in] num_chunk Number of column chunks */ -static __device__ bool setupLocalPageInfo(page_state_s *const s, - PageInfo *p, - ColumnChunkDesc const *chunks, +static __device__ bool setupLocalPageInfo(page_state_s* const s, + PageInfo* p, + ColumnChunkDesc const* chunks, size_t min_row, size_t num_rows, int32_t num_chunks) @@ -984,8 +984,8 @@ static __device__ bool setupLocalPageInfo(page_state_s *const s, // - On page N, the remaining 4/6 values are encoded, but there are no new rows. // if (s->page.num_input_values > 0 && s->page.num_rows > 0) { if (s->page.num_input_values > 0) { - uint8_t *cur = s->page.page_data; - uint8_t *end = cur + s->page.uncompressed_page_size; + uint8_t* cur = s->page.page_data; + uint8_t* end = cur + s->page.uncompressed_page_size; uint32_t dtype_len_out = s->col.data_type >> 3; s->ts_scale = 0; @@ -1052,7 +1052,7 @@ static __device__ bool setupLocalPageInfo(page_state_s *const s, if (s->col.column_data_base != nullptr) { int max_depth = s->col.max_nesting_depth; for (int idx = 0; idx < max_depth; idx++) { - PageNestingInfo *pni = &s->page.nesting[idx]; + PageNestingInfo* pni = &s->page.nesting[idx]; size_t output_offset; // schemas without lists @@ -1064,7 +1064,7 @@ static __device__ bool setupLocalPageInfo(page_state_s *const s, output_offset = pni->page_start_value; } - pni->data_out = static_cast(s->col.column_data_base[idx]); + pni->data_out = static_cast(s->col.column_data_base[idx]); if (pni->data_out != nullptr) { // anything below max depth with a valid data pointer must be a list, so the // element size is the size of the offset type. @@ -1094,7 +1094,7 @@ static __device__ bool setupLocalPageInfo(page_state_s *const s, // RLE-packed dictionary indices, first byte indicates index length in bits if (((s->col.data_type & 7) == BYTE_ARRAY) && (s->col.str_dict_index)) { // String dictionary: use index - s->dict_base = reinterpret_cast(s->col.str_dict_index); + s->dict_base = reinterpret_cast(s->col.str_dict_index); s->dict_size = s->col.page_info[0].num_input_values * sizeof(string_index_pair); } else { s->dict_base = @@ -1195,7 +1195,7 @@ static __device__ bool setupLocalPageInfo(page_state_s *const s, * @param[in] valid_mask The validity mask to be stored * @param[in] value_count # of bits in the validity mask */ -static __device__ void store_validity(PageNestingInfo *pni, +static __device__ void store_validity(PageNestingInfo* pni, uint32_t valid_mask, int32_t value_count) { @@ -1249,10 +1249,10 @@ static __device__ void store_validity(PageNestingInfo *pni, * @param[in] target_input_value_count The desired # of input level values we want to process * @param[in] t Thread index */ -inline __device__ void get_nesting_bounds(int &start_depth, - int &end_depth, - int &d, - page_state_s *s, +inline __device__ void get_nesting_bounds(int& start_depth, + int& end_depth, + int& d, + page_state_s* s, int input_value_count, int32_t target_input_value_count, int t) @@ -1288,7 +1288,7 @@ inline __device__ void get_nesting_bounds(int &start_depth, * @param[in] t Thread index */ static __device__ void gpuUpdateValidityOffsetsAndRowIndices(int32_t target_input_value_count, - page_state_s *s, + page_state_s* s, int t) { // max nesting depth of the column @@ -1339,7 +1339,7 @@ static __device__ void gpuUpdateValidityOffsetsAndRowIndices(int32_t target_inpu // walk from 0 to max_depth uint32_t next_thread_value_count, next_warp_value_count; for (int s_idx = 0; s_idx < max_depth; s_idx++) { - PageNestingInfo *pni = &s->page.nesting[s_idx]; + PageNestingInfo* pni = &s->page.nesting[s_idx]; // if we are within the range of nesting levels we should be adding value indices for int const in_nesting_bounds = @@ -1391,7 +1391,7 @@ static __device__ void gpuUpdateValidityOffsetsAndRowIndices(int32_t target_inpu cudf::size_type const ofs = s->page.nesting[s_idx + 1].value_count + next_thread_value_count + s->page.nesting[s_idx + 1].page_start_value; - (reinterpret_cast(pni->data_out))[idx] = ofs; + (reinterpret_cast(pni->data_out))[idx] = ofs; } } @@ -1455,7 +1455,7 @@ static __device__ void gpuUpdateValidityOffsetsAndRowIndices(int32_t target_inpu * @param[in] target_leaf_count Target count of non-null leaf values to generate indices for * @param[in] t Thread index */ -__device__ void gpuDecodeLevels(page_state_s *s, int32_t target_leaf_count, int t) +__device__ void gpuDecodeLevels(page_state_s* s, int32_t target_leaf_count, int t) { bool has_repetition = s->col.max_level[level_type::REPETITION] > 0; @@ -1494,7 +1494,7 @@ __device__ void gpuDecodeLevels(page_state_s *s, int32_t target_leaf_count, int * @param[in] bounds_set Whether or not s->row_index_lower_bound, s->first_row and s->num_rows * have been computed for this page (they will only be set in the second/trim pass). */ -static __device__ void gpuUpdatePageSizes(page_state_s *s, +static __device__ void gpuUpdatePageSizes(page_state_s* s, int32_t target_input_value_count, int t, bool bounds_set) @@ -1586,8 +1586,8 @@ static __device__ void gpuUpdatePageSizes(page_state_s *s, */ // blockDim {block_size,1,1} extern "C" __global__ void __launch_bounds__(block_size) - gpuComputePageSizes(PageInfo *pages, - ColumnChunkDesc const *chunks, + gpuComputePageSizes(PageInfo* pages, + ColumnChunkDesc const* chunks, size_t min_row, size_t num_rows, int32_t num_chunks, @@ -1595,10 +1595,10 @@ extern "C" __global__ void __launch_bounds__(block_size) { __shared__ __align__(16) page_state_s state_g; - page_state_s *const s = &state_g; + page_state_s* const s = &state_g; int page_idx = blockIdx.x; int t = threadIdx.x; - PageInfo *pp = &pages[page_idx]; + PageInfo* pp = &pages[page_idx]; if (!setupLocalPageInfo( s, pp, chunks, trim_pass ? min_row : 0, trim_pass ? num_rows : INT_MAX, num_chunks)) { @@ -1678,15 +1678,15 @@ extern "C" __global__ void __launch_bounds__(block_size) */ // blockDim {block_size,1,1} extern "C" __global__ void __launch_bounds__(block_size) - gpuDecodePageData(PageInfo *pages, - ColumnChunkDesc const *chunks, + gpuDecodePageData(PageInfo* pages, + ColumnChunkDesc const* chunks, size_t min_row, size_t num_rows, int32_t num_chunks) { __shared__ __align__(16) page_state_s state_g; - page_state_s *const s = &state_g; + page_state_s* const s = &state_g; int page_idx = blockIdx.x; int t = threadIdx.x; int out_thread0; @@ -1732,7 +1732,7 @@ extern "C" __global__ void __launch_bounds__(block_size) } else if ((s->col.data_type & 7) == BYTE_ARRAY) { gpuInitStringDescriptors(s, src_target_pos, t & 0x1f); } - if (t == 32) { *(volatile int32_t *)&s->dict_pos = src_target_pos; } + if (t == 32) { *(volatile int32_t*)&s->dict_pos = src_target_pos; } } else { // WARP1..WARP3: Decode values int dtype = s->col.data_type & 7; @@ -1767,52 +1767,52 @@ extern "C" __global__ void __launch_bounds__(block_size) int leaf_level_index = s->col.max_nesting_depth - 1; uint32_t dtype_len = s->dtype_len; - void *dst = + void* dst = s->page.nesting[leaf_level_index].data_out + static_cast(dst_pos) * dtype_len; if (dtype == BYTE_ARRAY) { gpuOutputString(s, val_src_pos, dst); } else if (dtype == BOOLEAN) { - gpuOutputBoolean(s, val_src_pos, static_cast(dst)); + gpuOutputBoolean(s, val_src_pos, static_cast(dst)); } else if (s->col.converted_type == DECIMAL) { switch (dtype) { - case INT32: gpuOutputFast(s, val_src_pos, static_cast(dst)); break; - case INT64: gpuOutputFast(s, val_src_pos, static_cast(dst)); break; + case INT32: gpuOutputFast(s, val_src_pos, static_cast(dst)); break; + case INT64: gpuOutputFast(s, val_src_pos, static_cast(dst)); break; default: // we currently do not support reading byte arrays larger than DECIMAL64 if (s->dtype_len_in <= 8) { - gpuOutputFixedLenByteArrayAsInt64(s, val_src_pos, static_cast(dst)); + gpuOutputFixedLenByteArrayAsInt64(s, val_src_pos, static_cast(dst)); } else { - gpuOutputDecimalAsFloat(s, val_src_pos, static_cast(dst), dtype); + gpuOutputDecimalAsFloat(s, val_src_pos, static_cast(dst), dtype); } break; } } else if (dtype == INT96) { - gpuOutputInt96Timestamp(s, val_src_pos, static_cast(dst)); + gpuOutputInt96Timestamp(s, val_src_pos, static_cast(dst)); } else if (dtype_len == 8) { if (s->ts_scale) { - gpuOutputInt64Timestamp(s, val_src_pos, static_cast(dst)); + gpuOutputInt64Timestamp(s, val_src_pos, static_cast(dst)); } else { - gpuOutputFast(s, val_src_pos, static_cast(dst)); + gpuOutputFast(s, val_src_pos, static_cast(dst)); } } else if (dtype_len == 4) { - gpuOutputFast(s, val_src_pos, static_cast(dst)); + gpuOutputFast(s, val_src_pos, static_cast(dst)); } else { - gpuOutputGeneric(s, val_src_pos, static_cast(dst), dtype_len); + gpuOutputGeneric(s, val_src_pos, static_cast(dst), dtype_len); } } - if (t == out_thread0) { *(volatile int32_t *)&s->src_pos = target_pos; } + if (t == out_thread0) { *(volatile int32_t*)&s->src_pos = target_pos; } } __syncthreads(); } } struct chunk_row_output_iter { - PageInfo *p; + PageInfo* p; using value_type = size_type; using difference_type = size_type; - using pointer = size_type *; - using reference = size_type &; + using pointer = size_type*; + using reference = size_type&; using iterator_category = thrust::output_device_iterator_tag; __host__ __device__ chunk_row_output_iter operator+(int i) @@ -1828,16 +1828,16 @@ struct chunk_row_output_iter { }; struct start_offset_output_iterator { - PageInfo *pages; - int *page_indices; + PageInfo* pages; + int* page_indices; int cur_index; int src_col_schema; int nesting_depth; int empty = 0; using value_type = size_type; using difference_type = size_type; - using pointer = size_type *; - using reference = size_type &; + using pointer = size_type*; + using reference = size_type&; using iterator_category = thrust::output_device_iterator_tag; __host__ __device__ start_offset_output_iterator operator+(int i) @@ -1854,7 +1854,7 @@ struct start_offset_output_iterator { private: __device__ reference dereference(int index) { - PageInfo const &p = pages[page_indices[index]]; + PageInfo const& p = pages[page_indices[index]]; if (p.src_col_schema != src_col_schema || p.flags & PAGEINFO_FLAGS_DICTIONARY) { return empty; } return p.nesting[nesting_depth].page_start_value; } @@ -1863,14 +1863,14 @@ struct start_offset_output_iterator { /** * @copydoc cudf::io::parquet::gpu::PreprocessColumnData */ -void PreprocessColumnData(hostdevice_vector &pages, - hostdevice_vector const &chunks, - std::vector &input_columns, - std::vector &output_columns, +void PreprocessColumnData(hostdevice_vector& pages, + hostdevice_vector const& chunks, + std::vector& input_columns, + std::vector& output_columns, size_t num_rows, size_t min_row, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { dim3 dim_block(block_size, 1); dim3 dim_grid(pages.size(), 1); // 1 threadblock per page @@ -1885,9 +1885,9 @@ void PreprocessColumnData(hostdevice_vector &pages, // computes: // PageInfo::chunk_row for all pages auto key_input = thrust::make_transform_iterator( - pages.device_ptr(), [] __device__(PageInfo const &page) { return page.chunk_idx; }); + pages.device_ptr(), [] __device__(PageInfo const& page) { return page.chunk_idx; }); auto page_input = thrust::make_transform_iterator( - pages.device_ptr(), [] __device__(PageInfo const &page) { return page.num_rows; }); + pages.device_ptr(), [] __device__(PageInfo const& page) { return page.num_rows; }); thrust::exclusive_scan_by_key(rmm::exec_policy(stream), key_input, key_input + pages.size(), @@ -1927,7 +1927,7 @@ void PreprocessColumnData(hostdevice_vector &pages, pages.device_ptr(), pages.device_ptr() + pages.size(), page_keys.begin(), - [] __device__(PageInfo const &page) { return page.src_col_schema; }); + [] __device__(PageInfo const& page) { return page.src_col_schema; }); thrust::sequence(rmm::exec_policy(stream), page_index.begin(), page_index.end()); thrust::stable_sort_by_key(rmm::exec_policy(stream), @@ -1939,20 +1939,20 @@ void PreprocessColumnData(hostdevice_vector &pages, // compute output column sizes by examining the pages of the -input- columns for (size_t idx = 0; idx < input_columns.size(); idx++) { - auto const &input_col = input_columns[idx]; + auto const& input_col = input_columns[idx]; auto src_col_schema = input_col.schema_idx; size_t max_depth = input_col.nesting_depth(); - auto *cols = &output_columns; + auto* cols = &output_columns; for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) { - auto &out_buf = (*cols)[input_col.nesting[l_idx]]; + auto& out_buf = (*cols)[input_col.nesting[l_idx]]; cols = &out_buf.children; // size iterator. indexes pages by sorted order auto size_input = thrust::make_transform_iterator( page_index.begin(), [src_col_schema, l_idx, pages = pages.device_ptr()] __device__(int index) { - auto const &page = pages[index]; + auto const& page = pages[index]; if (page.src_col_schema != src_col_schema || page.flags & PAGEINFO_FLAGS_DICTIONARY) { return 0; } @@ -1989,8 +1989,8 @@ void PreprocessColumnData(hostdevice_vector &pages, /** * @copydoc cudf::io::parquet::gpu::DecodePageData */ -void __host__ DecodePageData(hostdevice_vector &pages, - hostdevice_vector const &chunks, +void __host__ DecodePageData(hostdevice_vector& pages, + hostdevice_vector const& chunks, size_t num_rows, size_t min_row, rmm::cuda_stream_view stream) diff --git a/cpp/src/io/parquet/page_dict.cu b/cpp/src/io/parquet/page_dict.cu index 2d505b99981..0c55828b120 100644 --- a/cpp/src/io/parquet/page_dict.cu +++ b/cpp/src/io/parquet/page_dict.cu @@ -29,8 +29,8 @@ namespace parquet { namespace gpu { struct dict_state_s { uint32_t row_cnt; - PageFragment *cur_fragment; - uint32_t *hashmap; + PageFragment* cur_fragment; + uint32_t* hashmap; uint32_t total_dict_entries; //!< Total number of entries in dictionary uint32_t dictionary_size; //!< Total dictionary size in bytes uint32_t num_dict_entries; //!< Dictionary entries in current fragment to add @@ -52,14 +52,14 @@ inline __device__ uint32_t uint64_hash16(uint64_t v) return uint32_hash16((uint32_t)(v + (v >> 32))); } -inline __device__ uint32_t hash_string(const string_view &val) +inline __device__ uint32_t hash_string(const string_view& val) { - const char *p = val.data(); + const char* p = val.data(); uint32_t len = val.size_bytes(); uint32_t hash = len; if (len > 0) { uint32_t align_p = 3 & reinterpret_cast(p); - const uint32_t *p32 = reinterpret_cast(p - align_p); + const uint32_t* p32 = reinterpret_cast(p - align_p); uint32_t ofs = align_p * 8; uint32_t v; while (len > 4) { @@ -85,8 +85,8 @@ inline __device__ uint32_t hash_string(const string_view &val) * @param[in] frag_start_row row position of current fragment * @param[in] t thread id */ -__device__ void FetchDictionaryFragment(dict_state_s *s, - uint32_t *dict_data, +__device__ void FetchDictionaryFragment(dict_state_s* s, + uint32_t* dict_data, uint32_t frag_start_row, uint32_t t) { @@ -108,12 +108,12 @@ __device__ void FetchDictionaryFragment(dict_state_s *s, /// Generate dictionary indices in ascending row order template -__device__ void GenerateDictionaryIndices(dict_state_s *s, uint32_t t) +__device__ void GenerateDictionaryIndices(dict_state_s* s, uint32_t t) { using block_scan = cub::BlockScan; __shared__ typename block_scan::TempStorage temp_storage; - uint32_t *dict_index = s->col.dict_index; - uint32_t *dict_data = s->col.dict_data + s->ck.start_row; + uint32_t* dict_index = s->col.dict_index; + uint32_t* dict_data = s->col.dict_data + s->ck.start_row; uint32_t num_dict_entries = 0; for (uint32_t i = 0; i < s->row_cnt; i += 1024) { @@ -150,13 +150,13 @@ __device__ void GenerateDictionaryIndices(dict_state_s *s, uint32_t t) // blockDim(1024, 1, 1) template __global__ void __launch_bounds__(block_size, 1) - gpuBuildChunkDictionaries(device_span chunks, uint32_t *dev_scratch) + gpuBuildChunkDictionaries(device_span chunks, uint32_t* dev_scratch) { __shared__ __align__(8) dict_state_s state_g; using block_reduce = cub::BlockReduce; __shared__ typename block_reduce::TempStorage temp_storage; - dict_state_s *const s = &state_g; + dict_state_s* const s = &state_g; uint32_t t = threadIdx.x; uint32_t dtype, dtype_len, dtype_len_in; @@ -227,23 +227,19 @@ __global__ void __launch_bounds__(block_size, 1) val = s->col.leaf_column->element(row); hash = uint64_hash16(val); } else { - val = (dtype_len_in == 4) - ? s->col.leaf_column->element(row) - : (dtype_len_in == 2) ? s->col.leaf_column->element(row) - : s->col.leaf_column->element(row); + val = (dtype_len_in == 4) ? s->col.leaf_column->element(row) + : (dtype_len_in == 2) ? s->col.leaf_column->element(row) + : s->col.leaf_column->element(row); hash = uint32_hash16(val); } // Walk the list of rows with the same hash next_addr = &s->hashmap[hash]; while ((next = atomicCAS(next_addr, 0, row + 1)) != 0) { auto const current = next - 1; - uint64_t val2 = (dtype_len_in == 8) - ? s->col.leaf_column->element(current) - : (dtype_len_in == 4) - ? s->col.leaf_column->element(current) - : (dtype_len_in == 2) - ? s->col.leaf_column->element(current) - : s->col.leaf_column->element(current); + uint64_t val2 = (dtype_len_in == 8) ? s->col.leaf_column->element(current) + : (dtype_len_in == 4) ? s->col.leaf_column->element(current) + : (dtype_len_in == 2) ? s->col.leaf_column->element(current) + : s->col.leaf_column->element(current); if (val2 == val) { is_dupe = 1; break; @@ -274,7 +270,9 @@ __global__ void __launch_bounds__(block_size, 1) bool reorder_check = (is_valid && is_dupe && next - 1 > row); if (reorder_check) { next = s->col.dict_index[next - 1]; - while (next & (1u << 31)) { next = s->col.dict_index[next & 0x7fffffff]; } + while (next & (1u << 31)) { + next = s->col.dict_index[next & 0x7fffffff]; + } } if (__syncthreads_or(reorder_check)) { if (reorder_check) { atomicMin(&s->col.dict_index[next], row); } @@ -324,7 +322,7 @@ __global__ void __launch_bounds__(block_size, 1) * @param[in] stream CUDA stream to use, default 0 */ void BuildChunkDictionaries(device_span chunks, - uint32_t *dev_scratch, + uint32_t* dev_scratch, rmm::cuda_stream_view stream) { auto num_chunks = chunks.size(); diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu index bf9114949aa..3c62dcf7eea 100644 --- a/cpp/src/io/parquet/page_enc.cu +++ b/cpp/src/io/parquet/page_enc.cu @@ -59,8 +59,8 @@ struct frag_init_state_s { }; struct page_enc_state_s { - uint8_t *cur; //!< current output ptr - uint8_t *rle_out; //!< current RLE write ptr + uint8_t* cur; //!< current output ptr + uint8_t* rle_out; //!< current RLE write ptr uint32_t rle_run; //!< current RLE run uint32_t run_val; //!< current RLE run value uint32_t rle_pos; //!< RLE encoder positions @@ -81,9 +81,9 @@ struct page_enc_state_s { /** * @brief Return a 12-bit hash from a byte sequence */ -inline __device__ uint32_t hash_string(const string_view &val) +inline __device__ uint32_t hash_string(const string_view& val) { - char const *ptr = val.data(); + char const* ptr = val.data(); uint32_t len = val.size_bytes(); if (len != 0) { return (ptr[0] + (ptr[len - 1] << 5) + (len << 10)) & ((1 << init_hash_bits) - 1); @@ -130,7 +130,7 @@ __global__ void __launch_bounds__(block_size) typename block_scan::TempStorage scan_storage; } temp_storage; - frag_init_state_s *const s = &state_g; + frag_init_state_s* const s = &state_g; uint32_t t = threadIdx.x; uint32_t start_row, dtype_len, dtype_len_in, dtype; @@ -190,9 +190,11 @@ __global__ void __launch_bounds__(block_size) s->frag.num_values = s->frag.num_rows; } } - dtype = s->col.physical_type; - dtype_len = - (dtype == INT96) ? 12 : (dtype == INT64 || dtype == DOUBLE) ? 8 : (dtype == BOOLEAN) ? 1 : 4; + dtype = s->col.physical_type; + dtype_len = (dtype == INT96) ? 12 + : (dtype == INT64 || dtype == DOUBLE) ? 8 + : (dtype == BOOLEAN) ? 1 + : 4; if (dtype == INT32) { dtype_len_in = GetDtypeLogicalLen(s->col.leaf_column); } else if (dtype == INT96) { @@ -224,11 +226,10 @@ __global__ void __launch_bounds__(block_size) } else if (dtype_len_in == 8) { hash = uint64_init_hash(s->col.leaf_column->element(val_idx)); } else { - hash = uint32_init_hash((dtype_len_in == 4) - ? s->col.leaf_column->element(val_idx) - : (dtype_len_in == 2) - ? s->col.leaf_column->element(val_idx) - : s->col.leaf_column->element(val_idx)); + hash = + uint32_init_hash((dtype_len_in == 4) ? s->col.leaf_column->element(val_idx) + : (dtype_len_in == 2) ? s->col.leaf_column->element(val_idx) + : s->col.leaf_column->element(val_idx)); } } } else { @@ -246,7 +247,7 @@ __global__ void __launch_bounds__(block_size) } __syncthreads(); if (is_valid && dtype != BOOLEAN) { - uint32_t *dict_index = s->col.dict_index; + uint32_t* dict_index = s->col.dict_index; if (dict_index) { atomicAdd(&s->map.u32[hash >> 1], (hash & 1) ? 1 << 16 : 1); dict_index[start_value_idx + nz_pos] = @@ -283,7 +284,7 @@ __global__ void __launch_bounds__(block_size) __syncthreads(); // Put the indices back in hash order if (s->col.dict_index) { - uint32_t *dict_index = s->col.dict_index + start_row; + uint32_t* dict_index = s->col.dict_index + start_row; uint32_t nnz = s->frag.non_nulls; for (uint32_t i = 0; i < nnz; i += block_size) { uint32_t pos = 0, hash = 0, pos_old, pos_new, sh, colliding_row, val = 0; @@ -393,7 +394,7 @@ __global__ void __launch_bounds__(128) uint32_t frag_id = blockIdx.y * 4 + (threadIdx.x >> 5); uint32_t column_id = blockIdx.x; auto num_fragments_per_column = fragments.size().second; - statistics_group *const g = &group_g[threadIdx.x >> 5]; + statistics_group* const g = &group_g[threadIdx.x >> 5]; if (!lane_id && frag_id < num_fragments_per_column) { g->col = &col_desc[column_id]; g->start_row = fragments[column_id][frag_id].start_value_idx; @@ -408,8 +409,8 @@ __global__ void __launch_bounds__(128) gpuInitPages(device_2dspan chunks, device_span pages, device_span col_desc, - statistics_merge_group *page_grstats, - statistics_merge_group *chunk_grstats, + statistics_merge_group* page_grstats, + statistics_merge_group* chunk_grstats, int32_t num_columns) { // TODO: All writing seems to be done by thread 0. Could be replaced by thrust foreach @@ -502,9 +503,9 @@ __global__ void __launch_bounds__(128) fragment_data_size = frag_g.fragment_data_size; } // TODO (dm): this convoluted logic to limit page size needs refactoring - max_page_size = (values_in_page * 2 >= ck_g.num_values) - ? 256 * 1024 - : (values_in_page * 3 >= ck_g.num_values) ? 384 * 1024 : 512 * 1024; + max_page_size = (values_in_page * 2 >= ck_g.num_values) ? 256 * 1024 + : (values_in_page * 3 >= ck_g.num_values) ? 384 * 1024 + : 512 * 1024; if (num_rows >= ck_g.num_rows || (values_in_page > 0 && (page_size + fragment_data_size > max_page_size || @@ -632,7 +633,7 @@ static __device__ __constant__ uint32_t kRleRunMask[16] = { /** * @brief Variable-length encode an integer */ -inline __device__ uint8_t *VlqEncode(uint8_t *p, uint32_t v) +inline __device__ uint8_t* VlqEncode(uint8_t* p, uint32_t v) { while (v > 0x7f) { *p++ = (v | 0x80); @@ -646,7 +647,7 @@ inline __device__ uint8_t *VlqEncode(uint8_t *p, uint32_t v) * @brief Pack literal values in output bitstream (1,2,4,8,12 or 16 bits per value) */ inline __device__ void PackLiterals( - uint8_t *dst, uint32_t v, uint32_t count, uint32_t w, uint32_t t) + uint8_t* dst, uint32_t v, uint32_t count, uint32_t w, uint32_t t) { if (w == 1 || w == 2 || w == 4 || w == 8 || w == 12 || w == 16) { if (t <= (count | 0x1f)) { @@ -713,7 +714,7 @@ inline __device__ void PackLiterals( // Copy scratch data to final destination auto available_bytes = (count * w + 7) / 8; - auto scratch_bytes = reinterpret_cast(&scratch[0]); + auto scratch_bytes = reinterpret_cast(&scratch[0]); if (t < available_bytes) { dst[t] = scratch_bytes[t]; } if (t + 128 < available_bytes) { dst[t + 128] = scratch_bytes[t + 128]; } __syncthreads(); @@ -730,7 +731,7 @@ inline __device__ void PackLiterals( * @param[in] t thread id (0..127) */ static __device__ void RleEncode( - page_enc_state_s *s, uint32_t numvals, uint32_t nbits, uint32_t flush, uint32_t t) + page_enc_state_s* s, uint32_t numvals, uint32_t nbits, uint32_t flush, uint32_t t) { uint32_t rle_pos = s->rle_pos; uint32_t rle_run = s->rle_run; @@ -759,7 +760,7 @@ static __device__ void RleEncode( if (rle_rpt_count < max_rpt_count || (flush && rle_pos == numvals)) { if (t == 0) { uint32_t const run_val = s->run_val; - uint8_t *dst = VlqEncode(s->rle_out, rle_run); + uint8_t* dst = VlqEncode(s->rle_out, rle_run); *dst++ = run_val; if (nbits > 8) { *dst++ = run_val >> 8; } s->rle_out = dst; @@ -823,7 +824,7 @@ static __device__ void RleEncode( rle_rpt_count = 0; // Defer repeat run } if (lit_div8 != 0) { - uint8_t *dst = s->rle_out + 1 + (rle_run >> 1) * nbits; + uint8_t* dst = s->rle_out + 1 + (rle_run >> 1) * nbits; PackLiterals(dst, (rle_pos + t < numvals) ? v0 : 0, lit_div8 * 8, nbits, t); rle_run = (rle_run + lit_div8 * 2) | 1; rle_pos = min(rle_pos + lit_div8 * 8, numvals); @@ -833,7 +834,7 @@ static __device__ void RleEncode( __syncthreads(); // Complete literal run if (!t) { - uint8_t *dst = s->rle_out; + uint8_t* dst = s->rle_out; dst[0] = rle_run; // At most 0x7f dst += 1 + nbits * (rle_run >> 1); s->rle_out = dst; @@ -868,13 +869,13 @@ static __device__ void RleEncode( * @param[in] flush nonzero if last batch in block * @param[in] t thread id (0..127) */ -static __device__ void PlainBoolEncode(page_enc_state_s *s, +static __device__ void PlainBoolEncode(page_enc_state_s* s, uint32_t numvals, uint32_t flush, uint32_t t) { uint32_t rle_pos = s->rle_pos; - uint8_t *dst = s->rle_out; + uint8_t* dst = s->rle_out; while (rle_pos < numvals) { uint32_t pos = rle_pos + t; @@ -935,7 +936,7 @@ __global__ void __launch_bounds__(128, 8) using block_scan = cub::BlockScan; __shared__ typename block_scan::TempStorage temp_storage; - page_enc_state_s *const s = &state_g; + page_enc_state_s* const s = &state_g; uint32_t t = threadIdx.x; uint32_t dtype, dtype_len_in, dtype_len_out; int32_t dict_bits; @@ -1002,8 +1003,8 @@ __global__ void __launch_bounds__(128, 8) __syncthreads(); } if (t < 32) { - uint8_t *cur = s->cur; - uint8_t *rle_out = s->rle_out; + uint8_t* cur = s->cur; + uint8_t* rle_out = s->rle_out; if (t < 4) { uint32_t rle_bytes = (uint32_t)(rle_out - cur) - 4; cur[t] = rle_bytes >> (t * 8); @@ -1015,7 +1016,7 @@ __global__ void __launch_bounds__(128, 8) } else if (s->page.page_type != PageType::DICTIONARY_PAGE && s->col.num_rep_level_bits() != 0 // This means there ARE repetition levels (has list) ) { - auto encode_levels = [&](uint8_t const *lvl_val_data, uint32_t nbits) { + auto encode_levels = [&](uint8_t const* lvl_val_data, uint32_t nbits) { // For list types, the repetition and definition levels are pre-calculated. We just need to // encode and write them now. if (!t) { @@ -1040,8 +1041,8 @@ __global__ void __launch_bounds__(128, 8) __syncthreads(); } if (t < 32) { - uint8_t *cur = s->cur; - uint8_t *rle_out = s->rle_out; + uint8_t* cur = s->cur; + uint8_t* rle_out = s->rle_out; if (t < 4) { uint32_t rle_bytes = (uint32_t)(rle_out - cur) - 4; cur[t] = rle_bytes >> (t * 8); @@ -1056,9 +1057,11 @@ __global__ void __launch_bounds__(128, 8) } // Encode data values __syncthreads(); - dtype = s->col.physical_type; - dtype_len_out = - (dtype == INT96) ? 12 : (dtype == INT64 || dtype == DOUBLE) ? 8 : (dtype == BOOLEAN) ? 1 : 4; + dtype = s->col.physical_type; + dtype_len_out = (dtype == INT96) ? 12 + : (dtype == INT64 || dtype == DOUBLE) ? 8 + : (dtype == BOOLEAN) ? 1 + : 4; if (dtype == INT32) { dtype_len_in = GetDtypeLogicalLen(s->col.leaf_column); } else if (dtype == INT96) { @@ -1068,7 +1071,7 @@ __global__ void __launch_bounds__(128, 8) } dict_bits = (dtype == BOOLEAN) ? 1 : (s->page.dict_bits_plus1 - 1); if (t == 0) { - uint8_t *dst = s->cur; + uint8_t* dst = s->cur; s->rle_run = 0; s->rle_pos = 0; s->rle_numvals = 0; @@ -1138,7 +1141,7 @@ __global__ void __launch_bounds__(128, 8) __syncthreads(); } else { // Non-dictionary encoding - uint8_t *dst = s->cur; + uint8_t* dst = s->cur; if (is_valid) { len = dtype_len_out; @@ -1250,7 +1253,7 @@ __global__ void __launch_bounds__(128, 8) } } if (t == 0) { - uint8_t *base = s->page.page_data + s->page.max_hdr_size; + uint8_t* base = s->page.page_data + s->page.max_hdr_size; uint32_t actual_data_size = static_cast(s->cur - base); uint32_t compressed_bfr_size = GetMaxCompressedBfrSize(actual_data_size); s->page.max_data_size = actual_data_size; @@ -1298,7 +1301,7 @@ __global__ void __launch_bounds__(128) gpuDecideCompression(device_span 0x7f) { *p++ = v | 0x80; @@ -1339,7 +1342,7 @@ inline __device__ uint8_t *cpw_put_uint32(uint8_t *p, uint32_t v) return p; } -inline __device__ uint8_t *cpw_put_uint64(uint8_t *p, uint64_t v) +inline __device__ uint8_t* cpw_put_uint64(uint8_t* p, uint64_t v) { while (v > 0x7f) { *p++ = v | 0x80; @@ -1349,19 +1352,19 @@ inline __device__ uint8_t *cpw_put_uint64(uint8_t *p, uint64_t v) return p; } -inline __device__ uint8_t *cpw_put_int32(uint8_t *p, int32_t v) +inline __device__ uint8_t* cpw_put_int32(uint8_t* p, int32_t v) { int32_t s = (v < 0); return cpw_put_uint32(p, (v ^ -s) * 2 + s); } -inline __device__ uint8_t *cpw_put_int64(uint8_t *p, int64_t v) +inline __device__ uint8_t* cpw_put_int64(uint8_t* p, int64_t v) { int64_t s = (v < 0); return cpw_put_uint64(p, (v ^ -s) * 2 + s); } -inline __device__ uint8_t *cpw_put_fldh(uint8_t *p, int f, int cur, int t) +inline __device__ uint8_t* cpw_put_fldh(uint8_t* p, int f, int cur, int t) { if (f > cur && f <= cur + 15) { *p++ = ((f - cur) << 4) | t; @@ -1373,11 +1376,11 @@ inline __device__ uint8_t *cpw_put_fldh(uint8_t *p, int f, int cur, int t) } class header_encoder { - uint8_t *current_header_ptr; + uint8_t* current_header_ptr; int current_field_index; public: - inline __device__ header_encoder(uint8_t *header_start) + inline __device__ header_encoder(uint8_t* header_start) : current_header_ptr(header_start), current_field_index(0) { } @@ -1411,7 +1414,7 @@ class header_encoder { current_field_index = field; } - inline __device__ void field_binary(int field, const void *value, uint32_t length) + inline __device__ void field_binary(int field, const void* value, uint32_t length) { current_header_ptr = cpw_put_fldh(current_header_ptr, field, current_field_index, ST_FLD_BINARY); @@ -1421,21 +1424,21 @@ class header_encoder { current_field_index = field; } - inline __device__ void end(uint8_t **header_end, bool termination_flag = true) + inline __device__ void end(uint8_t** header_end, bool termination_flag = true) { if (termination_flag == false) { *current_header_ptr++ = 0; } *header_end = current_header_ptr; } - inline __device__ uint8_t *get_ptr(void) { return current_header_ptr; } + inline __device__ uint8_t* get_ptr(void) { return current_header_ptr; } - inline __device__ void set_ptr(uint8_t *ptr) { current_header_ptr = ptr; } + inline __device__ void set_ptr(uint8_t* ptr) { current_header_ptr = ptr; } }; -__device__ uint8_t *EncodeStatistics(uint8_t *start, - const statistics_chunk *s, +__device__ uint8_t* EncodeStatistics(uint8_t* start, + const statistics_chunk* s, uint8_t dtype, - float *fp_scratch) + float* fp_scratch) { uint8_t *end, dtype_len; switch (dtype) { @@ -1488,7 +1491,7 @@ __global__ void __launch_bounds__(128) gpuEncodePageHeaders(device_span pages, device_span comp_stat, device_span page_stats, - const statistics_chunk *chunk_stats) + const statistics_chunk* chunk_stats) { // When this whole kernel becomes single thread, the following variables need not be __shared__ __shared__ __align__(8) parquet_column_device_view col_g; @@ -1579,7 +1582,7 @@ __global__ void __launch_bounds__(1024) uint32_t t = threadIdx.x; uint8_t *dst, *dst_base; - const EncPage *first_page; + const EncPage* first_page; uint32_t num_pages, uncompressed_size; if (t == 0) ck_g = chunks[blockIdx.x]; @@ -1592,7 +1595,7 @@ __global__ void __launch_bounds__(1024) dst_base = dst; uncompressed_size = ck_g.bfr_size; for (uint32_t page = 0; page < num_pages; page++) { - const uint8_t *src; + const uint8_t* src; uint32_t hdr_len, data_len; if (t == 0) { page_g = first_page[page]; } @@ -1625,8 +1628,8 @@ __global__ void __launch_bounds__(1024) * */ struct def_level_fn { - column_device_view const *parent_col; - uint8_t const *d_nullability; + column_device_view const* parent_col; + uint8_t const* d_nullability; uint8_t sub_level_start; uint8_t curr_def_level; @@ -1757,12 +1760,14 @@ struct def_level_fn { */ dremel_data get_dremel_data(column_view h_col, // TODO(cp): use device_span once it is converted to a single hd_vec - rmm::device_uvector const &d_nullability, - std::vector const &nullability, + rmm::device_uvector const& d_nullability, + std::vector const& nullability, rmm::cuda_stream_view stream) { auto get_list_level = [](column_view col) { - while (col.type().id() == type_id::STRUCT) { col = col.child(0); } + while (col.type().id() == type_id::STRUCT) { + col = col.child(0); + } return col; }; @@ -1832,7 +1837,7 @@ dremel_data get_dremel_data(column_view h_col, } std::unique_ptr device_view_owners; - column_device_view *d_nesting_levels; + column_device_view* d_nesting_levels; std::tie(device_view_owners, d_nesting_levels) = contiguous_copy_column_device_views(nesting_levels, stream); @@ -2147,8 +2152,8 @@ void InitEncoderPages(device_2dspan chunks, device_span pages, device_span col_desc, int32_t num_columns, - statistics_merge_group *page_grstats, - statistics_merge_group *chunk_grstats, + statistics_merge_group* page_grstats, + statistics_merge_group* chunk_grstats, rmm::cuda_stream_view stream) { auto num_rowgroups = chunks.size().first; @@ -2199,7 +2204,7 @@ void DecideCompression(device_span chunks, rmm::cuda_stream_view void EncodePageHeaders(device_span pages, device_span comp_stat, device_span page_stats, - const statistics_chunk *chunk_stats, + const statistics_chunk* chunk_stats, rmm::cuda_stream_view stream) { // TODO: single thread task. No need for 128 threads/block. Earlier it used to employ rest of the diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index bc10fd92566..a5536775116 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -45,9 +45,9 @@ static const __device__ __constant__ uint8_t g_list2struct[16] = {0, ST_FLD_LIST}; struct byte_stream_s { - const uint8_t *cur; - const uint8_t *end; - const uint8_t *base; + const uint8_t* cur; + const uint8_t* end; + const uint8_t* base; // Parsed symbols PageType page_type; PageInfo page; @@ -61,12 +61,12 @@ struct byte_stream_s { * * @return Current byte pointed to by the byte stream */ -inline __device__ unsigned int getb(byte_stream_s *bs) +inline __device__ unsigned int getb(byte_stream_s* bs) { return (bs->cur < bs->end) ? *bs->cur++ : 0; } -inline __device__ void skip_bytes(byte_stream_s *bs, size_t bytecnt) +inline __device__ void skip_bytes(byte_stream_s* bs, size_t bytecnt) { bytecnt = min(bytecnt, (size_t)(bs->end - bs->cur)); bs->cur += bytecnt; @@ -83,7 +83,7 @@ inline __device__ void skip_bytes(byte_stream_s *bs, size_t bytecnt) * * @return Decoded 32 bit integer */ -__device__ uint32_t get_u32(byte_stream_s *bs) +__device__ uint32_t get_u32(byte_stream_s* bs) { uint32_t v = 0, l = 0, c; do { @@ -105,13 +105,13 @@ __device__ uint32_t get_u32(byte_stream_s *bs) * * @return Decoded 32 bit integer */ -inline __device__ int32_t get_i32(byte_stream_s *bs) +inline __device__ int32_t get_i32(byte_stream_s* bs) { uint32_t u = get_u32(bs); return (int32_t)((u >> 1u) ^ -(int32_t)(u & 1)); } -__device__ void skip_struct_field(byte_stream_s *bs, int field_type) +__device__ void skip_struct_field(byte_stream_s* bs, int field_type) { int struct_depth = 0; int rep_cnt = 0; @@ -161,11 +161,11 @@ __device__ void skip_struct_field(byte_stream_s *bs, int field_type) */ struct ParquetFieldInt32 { int field; - int32_t &val; + int32_t& val; - __device__ ParquetFieldInt32(int f, int32_t &v) : field(f), val(v) {} + __device__ ParquetFieldInt32(int f, int32_t& v) : field(f), val(v) {} - inline __device__ bool operator()(byte_stream_s *bs, int field_type) + inline __device__ bool operator()(byte_stream_s* bs, int field_type) { val = get_i32(bs); return (field_type != ST_FLD_I32); @@ -180,11 +180,11 @@ struct ParquetFieldInt32 { template struct ParquetFieldEnum { int field; - Enum &val; + Enum& val; - __device__ ParquetFieldEnum(int f, Enum &v) : field(f), val(v) {} + __device__ ParquetFieldEnum(int f, Enum& v) : field(f), val(v) {} - inline __device__ bool operator()(byte_stream_s *bs, int field_type) + inline __device__ bool operator()(byte_stream_s* bs, int field_type) { val = static_cast(get_i32(bs)); return (field_type != ST_FLD_I32); @@ -204,7 +204,7 @@ struct ParquetFieldStruct { __device__ ParquetFieldStruct(int f) : field(f) {} - inline __device__ bool operator()(byte_stream_s *bs, int field_type) + inline __device__ bool operator()(byte_stream_s* bs, int field_type) { return ((field_type != ST_FLD_STRUCT) || !op(bs)); } @@ -226,10 +226,10 @@ struct ParquetFieldStruct { template struct FunctionSwitchImpl { template - static inline __device__ bool run(byte_stream_s *bs, + static inline __device__ bool run(byte_stream_s* bs, int field_type, - const int &field, - thrust::tuple &ops) + const int& field, + thrust::tuple& ops) { if (field == thrust::get(ops).field) { return thrust::get(ops)(bs, field_type); @@ -242,10 +242,10 @@ struct FunctionSwitchImpl { template <> struct FunctionSwitchImpl<0> { template - static inline __device__ bool run(byte_stream_s *bs, + static inline __device__ bool run(byte_stream_s* bs, int field_type, - const int &field, - thrust::tuple &ops) + const int& field, + thrust::tuple& ops) { if (field == thrust::get<0>(ops).field) { return thrust::get<0>(ops)(bs, field_type); @@ -267,7 +267,7 @@ struct FunctionSwitchImpl<0> { * byte stream. Otherwise true is returned. */ template -inline __device__ bool parse_header(thrust::tuple &op, byte_stream_s *bs) +inline __device__ bool parse_header(thrust::tuple& op, byte_stream_s* bs) { constexpr int index = thrust::tuple_size>::value - 1; int field = 0; @@ -284,7 +284,7 @@ inline __device__ bool parse_header(thrust::tuple &op, byte_stream_ } struct gpuParseDataPageHeader { - __device__ bool operator()(byte_stream_s *bs) + __device__ bool operator()(byte_stream_s* bs) { auto op = thrust::make_tuple(ParquetFieldInt32(1, bs->page.num_input_values), ParquetFieldEnum(2, bs->page.encoding), @@ -295,7 +295,7 @@ struct gpuParseDataPageHeader { }; struct gpuParseDictionaryPageHeader { - __device__ bool operator()(byte_stream_s *bs) + __device__ bool operator()(byte_stream_s* bs) { auto op = thrust::make_tuple(ParquetFieldInt32(1, bs->page.num_input_values), ParquetFieldEnum(2, bs->page.encoding)); @@ -304,7 +304,7 @@ struct gpuParseDictionaryPageHeader { }; struct gpuParseDataPageHeaderV2 { - __device__ bool operator()(byte_stream_s *bs) + __device__ bool operator()(byte_stream_s* bs) { auto op = thrust::make_tuple(ParquetFieldInt32(1, bs->page.num_input_values), ParquetFieldInt32(3, bs->page.num_rows), @@ -316,7 +316,7 @@ struct gpuParseDataPageHeaderV2 { }; struct gpuParsePageHeader { - __device__ bool operator()(byte_stream_s *bs) + __device__ bool operator()(byte_stream_s* bs) { auto op = thrust::make_tuple(ParquetFieldEnum(1, bs->page_type), ParquetFieldInt32(2, bs->page.uncompressed_page_size), @@ -336,14 +336,14 @@ struct gpuParsePageHeader { */ // blockDim {128,1,1} extern "C" __global__ void __launch_bounds__(128) - gpuDecodePageHeaders(ColumnChunkDesc *chunks, int32_t num_chunks) + gpuDecodePageHeaders(ColumnChunkDesc* chunks, int32_t num_chunks) { gpuParsePageHeader parse_page_header; __shared__ byte_stream_s bs_g[4]; int lane_id = threadIdx.x % 32; int chunk = (blockIdx.x * 4) + (threadIdx.x / 32); - byte_stream_s *const bs = &bs_g[threadIdx.x / 32]; + byte_stream_s* const bs = &bs_g[threadIdx.x / 32]; if (chunk < num_chunks and lane_id == 0) bs->ck = chunks[chunk]; __syncthreads(); @@ -354,7 +354,7 @@ extern "C" __global__ void __launch_bounds__(128) uint32_t dictionary_page_count = 0; int32_t max_num_pages; int32_t num_dict_pages = bs->ck.num_dict_pages; - PageInfo *page_info; + PageInfo* page_info; if (!lane_id) { bs->base = bs->cur = bs->ck.compressed_data; @@ -402,7 +402,7 @@ extern "C" __global__ void __launch_bounds__(128) break; default: index_out = -1; break; } - bs->page.page_data = const_cast(bs->cur); + bs->page.page_data = const_cast(bs->cur); bs->cur += bs->page.compressed_page_size; } else { bs->cur = bs->end; @@ -434,21 +434,21 @@ extern "C" __global__ void __launch_bounds__(128) */ // blockDim {128,1,1} extern "C" __global__ void __launch_bounds__(128) - gpuBuildStringDictionaryIndex(ColumnChunkDesc *chunks, int32_t num_chunks) + gpuBuildStringDictionaryIndex(ColumnChunkDesc* chunks, int32_t num_chunks) { __shared__ ColumnChunkDesc chunk_g[4]; int lane_id = threadIdx.x % 32; int chunk = (blockIdx.x * 4) + (threadIdx.x / 32); - ColumnChunkDesc *const ck = &chunk_g[threadIdx.x / 32]; + ColumnChunkDesc* const ck = &chunk_g[threadIdx.x / 32]; if (chunk < num_chunks and lane_id == 0) *ck = chunks[chunk]; __syncthreads(); if (chunk >= num_chunks) { return; } if (!lane_id && ck->num_dict_pages > 0 && ck->str_dict_index) { // Data type to describe a string - string_index_pair *dict_index = ck->str_dict_index; - const uint8_t *dict = ck->page_info[0].page_data; + string_index_pair* dict_index = ck->str_dict_index; + const uint8_t* dict = ck->page_info[0].page_data; int dict_size = ck->page_info[0].uncompressed_page_size; int num_entries = ck->page_info[0].num_input_values; int pos = 0, cur = 0; @@ -464,13 +464,13 @@ extern "C" __global__ void __launch_bounds__(128) } } // TODO: Could store 8 entries in shared mem, then do a single warp-wide store - dict_index[i].first = reinterpret_cast(dict + pos + 4); + dict_index[i].first = reinterpret_cast(dict + pos + 4); dict_index[i].second = len; } } } -void __host__ DecodePageHeaders(ColumnChunkDesc *chunks, +void __host__ DecodePageHeaders(ColumnChunkDesc* chunks, int32_t num_chunks, rmm::cuda_stream_view stream) { @@ -479,7 +479,7 @@ void __host__ DecodePageHeaders(ColumnChunkDesc *chunks, gpuDecodePageHeaders<<>>(chunks, num_chunks); } -void __host__ BuildStringDictionaryIndex(ColumnChunkDesc *chunks, +void __host__ BuildStringDictionaryIndex(ColumnChunkDesc* chunks, int32_t num_chunks, rmm::cuda_stream_view stream) { diff --git a/cpp/src/io/parquet/parquet.cpp b/cpp/src/io/parquet/parquet.cpp index 2a1bd0d5a18..febfdf8b06a 100644 --- a/cpp/src/io/parquet/parquet.cpp +++ b/cpp/src/io/parquet/parquet.cpp @@ -63,7 +63,8 @@ bool CompactProtocolReader::skip_struct_field(int t, int depth) if (n == 0xf) n = get_i32(); t = g_list2struct[c & 0xf]; if (depth > 10) return false; - for (int32_t i = 0; i < n; i++) skip_struct_field(t, depth + 1); + for (int32_t i = 0; i < n; i++) + skip_struct_field(t, depth + 1); } break; case ST_FLD_STRUCT: for (;;) { @@ -84,10 +85,10 @@ bool CompactProtocolReader::skip_struct_field(int t, int depth) template struct FunctionSwitchImpl { template - static inline bool run(CompactProtocolReader *cpr, + static inline bool run(CompactProtocolReader* cpr, int field_type, - const int &field, - std::tuple &ops) + const int& field, + std::tuple& ops) { if (field == std::get(ops).field()) { return std::get(ops)(cpr, field_type); @@ -100,10 +101,10 @@ struct FunctionSwitchImpl { template <> struct FunctionSwitchImpl<0> { template - static inline bool run(CompactProtocolReader *cpr, + static inline bool run(CompactProtocolReader* cpr, int field_type, - const int &field, - std::tuple &ops) + const int& field, + std::tuple& ops) { if (field == std::get<0>(ops).field()) { return std::get<0>(ops)(cpr, field_type); @@ -115,7 +116,7 @@ struct FunctionSwitchImpl<0> { }; template -inline bool function_builder(CompactProtocolReader *cpr, std::tuple &op) +inline bool function_builder(CompactProtocolReader* cpr, std::tuple& op) { constexpr int index = std::tuple_size>::value - 1; int field = 0; @@ -131,7 +132,7 @@ inline bool function_builder(CompactProtocolReader *cpr, std::tuple return true; } -bool CompactProtocolReader::read(FileMetaData *f) +bool CompactProtocolReader::read(FileMetaData* f) { auto op = std::make_tuple(ParquetFieldInt32(1, f->version), ParquetFieldStructList(2, f->schema), @@ -142,7 +143,7 @@ bool CompactProtocolReader::read(FileMetaData *f) return function_builder(this, op); } -bool CompactProtocolReader::read(SchemaElement *s) +bool CompactProtocolReader::read(SchemaElement* s) { auto op = std::make_tuple(ParquetFieldEnum(1, s->type), ParquetFieldInt32(2, s->type_length), @@ -156,7 +157,7 @@ bool CompactProtocolReader::read(SchemaElement *s) return function_builder(this, op); } -bool CompactProtocolReader::read(LogicalType *l) +bool CompactProtocolReader::read(LogicalType* l) { auto op = std::make_tuple(ParquetFieldUnion(1, l->isset.STRING, l->STRING), @@ -174,40 +175,40 @@ bool CompactProtocolReader::read(LogicalType *l) return function_builder(this, op); } -bool CompactProtocolReader::read(DecimalType *d) +bool CompactProtocolReader::read(DecimalType* d) { auto op = std::make_tuple(ParquetFieldInt32(1, d->scale), ParquetFieldInt32(2, d->precision)); return function_builder(this, op); } -bool CompactProtocolReader::read(TimeType *t) +bool CompactProtocolReader::read(TimeType* t) { auto op = std::make_tuple(ParquetFieldBool(1, t->isAdjustedToUTC), ParquetFieldStruct(2, t->unit)); return function_builder(this, op); } -bool CompactProtocolReader::read(TimestampType *t) +bool CompactProtocolReader::read(TimestampType* t) { auto op = std::make_tuple(ParquetFieldBool(1, t->isAdjustedToUTC), ParquetFieldStruct(2, t->unit)); return function_builder(this, op); } -bool CompactProtocolReader::read(TimeUnit *u) +bool CompactProtocolReader::read(TimeUnit* u) { auto op = std::make_tuple(ParquetFieldUnion(1, u->isset.MILLIS, u->MILLIS), ParquetFieldUnion(2, u->isset.MICROS, u->MICROS)); return function_builder(this, op); } -bool CompactProtocolReader::read(IntType *i) +bool CompactProtocolReader::read(IntType* i) { auto op = std::make_tuple(ParquetFieldInt8(1, i->bitWidth), ParquetFieldBool(2, i->isSigned)); return function_builder(this, op); } -bool CompactProtocolReader::read(RowGroup *r) +bool CompactProtocolReader::read(RowGroup* r) { auto op = std::make_tuple(ParquetFieldStructList(1, r->columns), ParquetFieldInt64(2, r->total_byte_size), @@ -215,7 +216,7 @@ bool CompactProtocolReader::read(RowGroup *r) return function_builder(this, op); } -bool CompactProtocolReader::read(ColumnChunk *c) +bool CompactProtocolReader::read(ColumnChunk* c) { auto op = std::make_tuple(ParquetFieldString(1, c->file_path), ParquetFieldInt64(2, c->file_offset), @@ -227,7 +228,7 @@ bool CompactProtocolReader::read(ColumnChunk *c) return function_builder(this, op); } -bool CompactProtocolReader::read(ColumnChunkMetaData *c) +bool CompactProtocolReader::read(ColumnChunkMetaData* c) { auto op = std::make_tuple(ParquetFieldEnum(1, c->type), ParquetFieldEnumList(2, c->encodings), @@ -243,7 +244,7 @@ bool CompactProtocolReader::read(ColumnChunkMetaData *c) return function_builder(this, op); } -bool CompactProtocolReader::read(PageHeader *p) +bool CompactProtocolReader::read(PageHeader* p) { auto op = std::make_tuple(ParquetFieldEnum(1, p->type), ParquetFieldInt32(2, p->uncompressed_page_size), @@ -253,7 +254,7 @@ bool CompactProtocolReader::read(PageHeader *p) return function_builder(this, op); } -bool CompactProtocolReader::read(DataPageHeader *d) +bool CompactProtocolReader::read(DataPageHeader* d) { auto op = std::make_tuple(ParquetFieldInt32(1, d->num_values), ParquetFieldEnum(2, d->encoding), @@ -262,14 +263,14 @@ bool CompactProtocolReader::read(DataPageHeader *d) return function_builder(this, op); } -bool CompactProtocolReader::read(DictionaryPageHeader *d) +bool CompactProtocolReader::read(DictionaryPageHeader* d) { auto op = std::make_tuple(ParquetFieldInt32(1, d->num_values), ParquetFieldEnum(2, d->encoding)); return function_builder(this, op); } -bool CompactProtocolReader::read(KeyValue *k) +bool CompactProtocolReader::read(KeyValue* k) { auto op = std::make_tuple(ParquetFieldString(1, k->key), ParquetFieldString(2, k->value)); return function_builder(this, op); @@ -282,7 +283,7 @@ bool CompactProtocolReader::read(KeyValue *k) * * @return True if schema constructed completely, false otherwise */ -bool CompactProtocolReader::InitSchema(FileMetaData *md) +bool CompactProtocolReader::InitSchema(FileMetaData* md) { if (static_cast(WalkSchema(md)) != md->schema.size()) return false; @@ -292,14 +293,14 @@ bool CompactProtocolReader::InitSchema(FileMetaData *md) * schema_idx of each column of each row to it corresonding row_group. This is effectively * mapping the columns to the schema. */ - for (auto &row_group : md->row_groups) { + for (auto& row_group : md->row_groups) { int current_schema_index = 0; - for (auto &column : row_group.columns) { + for (auto& column : row_group.columns) { int parent = 0; // root of schema - for (auto const &path : column.meta_data.path_in_schema) { + for (auto const& path : column.meta_data.path_in_schema) { auto const it = [&] { // find_if starting at (current_schema_index + 1) and then wrapping - auto schema = [&](auto const &e) { return e.parent_idx == parent && e.name == path; }; + auto schema = [&](auto const& e) { return e.parent_idx == parent && e.name == path; }; auto mid = md->schema.cbegin() + current_schema_index + 1; auto it = std::find_if(mid, md->schema.cend(), schema); if (it != md->schema.cend()) return it; @@ -328,10 +329,10 @@ bool CompactProtocolReader::InitSchema(FileMetaData *md) * @return The node index that was populated */ int CompactProtocolReader::WalkSchema( - FileMetaData *md, int idx, int parent_idx, int max_def_level, int max_rep_level) + FileMetaData* md, int idx, int parent_idx, int max_def_level, int max_rep_level) { if (idx >= 0 && (size_t)idx < md->schema.size()) { - SchemaElement *e = &md->schema[idx]; + SchemaElement* e = &md->schema[idx]; if (e->repetition_type == OPTIONAL) { ++max_def_level; } else if (e->repetition_type == REPEATED) { diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp index eefff518a9a..391ca61327e 100644 --- a/cpp/src/io/parquet/parquet.hpp +++ b/cpp/src/io/parquet/parquet.hpp @@ -166,7 +166,7 @@ struct SchemaElement { int max_repetition_level = 0; int parent_idx = 0; - bool operator==(SchemaElement const &other) const + bool operator==(SchemaElement const& other) const { return type == other.type && converted_type == other.converted_type && type_length == other.type_length && repetition_type == other.repetition_type && @@ -356,8 +356,8 @@ class CompactProtocolReader { static const uint8_t g_list2struct[16]; public: - explicit CompactProtocolReader(const uint8_t *base = nullptr, size_t len = 0) { init(base, len); } - void init(const uint8_t *base, size_t len) + explicit CompactProtocolReader(const uint8_t* base = nullptr, size_t len = 0) { init(base, len); } + void init(const uint8_t* base, size_t len) { m_base = m_cur = base; m_end = base + len; @@ -400,7 +400,7 @@ class CompactProtocolReader { uint64_t u = get_u64(); return (int64_t)((u >> 1u) ^ -(int64_t)(u & 1)); } - int32_t get_listh(uint8_t *el_type) noexcept + int32_t get_listh(uint8_t* el_type) noexcept { uint32_t c = getb(); int32_t sz = c >> 4; @@ -412,40 +412,40 @@ class CompactProtocolReader { public: // Generate Thrift structure parsing routines - bool read(FileMetaData *f); - bool read(SchemaElement *s); - bool read(LogicalType *l); - bool read(DecimalType *d); - bool read(TimeType *t); - bool read(TimeUnit *u); - bool read(TimestampType *t); - bool read(IntType *t); - bool read(RowGroup *r); - bool read(ColumnChunk *c); - bool read(ColumnChunkMetaData *c); - bool read(PageHeader *p); - bool read(DataPageHeader *d); - bool read(DictionaryPageHeader *d); - bool read(KeyValue *k); + bool read(FileMetaData* f); + bool read(SchemaElement* s); + bool read(LogicalType* l); + bool read(DecimalType* d); + bool read(TimeType* t); + bool read(TimeUnit* u); + bool read(TimestampType* t); + bool read(IntType* t); + bool read(RowGroup* r); + bool read(ColumnChunk* c); + bool read(ColumnChunkMetaData* c); + bool read(PageHeader* p); + bool read(DataPageHeader* d); + bool read(DictionaryPageHeader* d); + bool read(KeyValue* k); public: static int NumRequiredBits(uint32_t max_level) noexcept { return 32 - CountLeadingZeros32(max_level); } - bool InitSchema(FileMetaData *md); + bool InitSchema(FileMetaData* md); protected: - int WalkSchema(FileMetaData *md, + int WalkSchema(FileMetaData* md, int idx = 0, int parent_idx = 0, int max_def_level = 0, int max_rep_level = 0); protected: - const uint8_t *m_base = nullptr; - const uint8_t *m_cur = nullptr; - const uint8_t *m_end = nullptr; + const uint8_t* m_base = nullptr; + const uint8_t* m_cur = nullptr; + const uint8_t* m_end = nullptr; friend class ParquetFieldBool; friend class ParquetFieldInt8; @@ -473,12 +473,12 @@ class CompactProtocolReader { */ class ParquetFieldBool { int field_val; - bool &val; + bool& val; public: - ParquetFieldBool(int f, bool &v) : field_val(f), val(v) {} + ParquetFieldBool(int f, bool& v) : field_val(f), val(v) {} - inline bool operator()(CompactProtocolReader *cpr, int field_type) + inline bool operator()(CompactProtocolReader* cpr, int field_type) { return (field_type != ST_FLD_TRUE && field_type != ST_FLD_FALSE) || !(val = (field_type == ST_FLD_TRUE), true); @@ -494,12 +494,12 @@ class ParquetFieldBool { */ class ParquetFieldInt8 { int field_val; - int8_t &val; + int8_t& val; public: - ParquetFieldInt8(int f, int8_t &v) : field_val(f), val(v) {} + ParquetFieldInt8(int f, int8_t& v) : field_val(f), val(v) {} - inline bool operator()(CompactProtocolReader *cpr, int field_type) + inline bool operator()(CompactProtocolReader* cpr, int field_type) { val = cpr->getb(); return (field_type != ST_FLD_BYTE); @@ -515,12 +515,12 @@ class ParquetFieldInt8 { */ class ParquetFieldInt32 { int field_val; - int32_t &val; + int32_t& val; public: - ParquetFieldInt32(int f, int32_t &v) : field_val(f), val(v) {} + ParquetFieldInt32(int f, int32_t& v) : field_val(f), val(v) {} - inline bool operator()(CompactProtocolReader *cpr, int field_type) + inline bool operator()(CompactProtocolReader* cpr, int field_type) { val = cpr->get_i32(); return (field_type != ST_FLD_I32); @@ -536,12 +536,12 @@ class ParquetFieldInt32 { */ class ParquetFieldInt64 { int field_val; - int64_t &val; + int64_t& val; public: - ParquetFieldInt64(int f, int64_t &v) : field_val(f), val(v) {} + ParquetFieldInt64(int f, int64_t& v) : field_val(f), val(v) {} - inline bool operator()(CompactProtocolReader *cpr, int field_type) + inline bool operator()(CompactProtocolReader* cpr, int field_type) { val = cpr->get_i64(); return (field_type < ST_FLD_I16 || field_type > ST_FLD_I64); @@ -559,12 +559,12 @@ class ParquetFieldInt64 { template class ParquetFieldStructListFunctor { int field_val; - std::vector &val; + std::vector& val; public: - ParquetFieldStructListFunctor(int f, std::vector &v) : field_val(f), val(v) {} + ParquetFieldStructListFunctor(int f, std::vector& v) : field_val(f), val(v) {} - inline bool operator()(CompactProtocolReader *cpr, int field_type) + inline bool operator()(CompactProtocolReader* cpr, int field_type) { if (field_type != ST_FLD_LIST) return true; @@ -584,7 +584,7 @@ class ParquetFieldStructListFunctor { }; template -ParquetFieldStructListFunctor ParquetFieldStructList(int f, std::vector &v) +ParquetFieldStructListFunctor ParquetFieldStructList(int f, std::vector& v) { return ParquetFieldStructListFunctor(f, v); } @@ -597,17 +597,17 @@ ParquetFieldStructListFunctor ParquetFieldStructList(int f, std::vector &v */ class ParquetFieldString { int field_val; - std::string &val; + std::string& val; public: - ParquetFieldString(int f, std::string &v) : field_val(f), val(v) {} + ParquetFieldString(int f, std::string& v) : field_val(f), val(v) {} - inline bool operator()(CompactProtocolReader *cpr, int field_type) + inline bool operator()(CompactProtocolReader* cpr, int field_type) { if (field_type != ST_FLD_BINARY) return true; uint32_t n = cpr->get_u32(); if (n < (size_t)(cpr->m_end - cpr->m_cur)) { - val.assign((const char *)cpr->m_cur, n); + val.assign((const char*)cpr->m_cur, n); cpr->m_cur += n; return false; } else { @@ -627,12 +627,12 @@ class ParquetFieldString { template class ParquetFieldStructFunctor { int field_val; - T &val; + T& val; public: - ParquetFieldStructFunctor(int f, T &v) : field_val(f), val(v) {} + ParquetFieldStructFunctor(int f, T& v) : field_val(f), val(v) {} - inline bool operator()(CompactProtocolReader *cpr, int field_type) + inline bool operator()(CompactProtocolReader* cpr, int field_type) { return (field_type != ST_FLD_STRUCT || !(cpr->read(&val))); } @@ -641,7 +641,7 @@ class ParquetFieldStructFunctor { }; template -ParquetFieldStructFunctor ParquetFieldStruct(int f, T &v) +ParquetFieldStructFunctor ParquetFieldStruct(int f, T& v) { return ParquetFieldStructFunctor(f, v); } @@ -657,13 +657,13 @@ ParquetFieldStructFunctor ParquetFieldStruct(int f, T &v) template class ParquetFieldUnionFunctor { int field_val; - bool &is_set; - T &val; + bool& is_set; + T& val; public: - ParquetFieldUnionFunctor(int f, bool &b, T &v) : field_val(f), is_set(b), val(v) {} + ParquetFieldUnionFunctor(int f, bool& b, T& v) : field_val(f), is_set(b), val(v) {} - inline bool operator()(CompactProtocolReader *cpr, int field_type) + inline bool operator()(CompactProtocolReader* cpr, int field_type) { if (field_type != ST_FLD_STRUCT) { return true; @@ -679,13 +679,13 @@ class ParquetFieldUnionFunctor { template struct ParquetFieldUnionFunctor { int field_val; - bool &is_set; - T &val; + bool& is_set; + T& val; public: - ParquetFieldUnionFunctor(int f, bool &b, T &v) : field_val(f), is_set(b), val(v) {} + ParquetFieldUnionFunctor(int f, bool& b, T& v) : field_val(f), is_set(b), val(v) {} - inline bool operator()(CompactProtocolReader *cpr, int field_type) + inline bool operator()(CompactProtocolReader* cpr, int field_type) { if (field_type != ST_FLD_STRUCT) { return true; @@ -700,7 +700,7 @@ struct ParquetFieldUnionFunctor { }; template -ParquetFieldUnionFunctor::value> ParquetFieldUnion(int f, bool &b, T &v) +ParquetFieldUnionFunctor::value> ParquetFieldUnion(int f, bool& b, T& v) { return ParquetFieldUnionFunctor::value>(f, b, v); } @@ -713,11 +713,11 @@ ParquetFieldUnionFunctor::value> ParquetFieldUnion(int f, bo template class ParquetFieldEnum { int field_val; - Enum &val; + Enum& val; public: - ParquetFieldEnum(int f, Enum &v) : field_val(f), val(v) {} - inline bool operator()(CompactProtocolReader *cpr, int field_type) + ParquetFieldEnum(int f, Enum& v) : field_val(f), val(v) {} + inline bool operator()(CompactProtocolReader* cpr, int field_type) { val = static_cast(cpr->get_i32()); return (field_type != ST_FLD_I32); @@ -735,11 +735,11 @@ class ParquetFieldEnum { template class ParquetFieldEnumListFunctor { int field_val; - std::vector &val; + std::vector& val; public: - ParquetFieldEnumListFunctor(int f, std::vector &v) : field_val(f), val(v) {} - inline bool operator()(CompactProtocolReader *cpr, int field_type) + ParquetFieldEnumListFunctor(int f, std::vector& v) : field_val(f), val(v) {} + inline bool operator()(CompactProtocolReader* cpr, int field_type) { if (field_type != ST_FLD_LIST) return true; int current_byte = cpr->getb(); @@ -747,7 +747,9 @@ class ParquetFieldEnumListFunctor { int n = current_byte >> 4; if (n == 0xf) n = cpr->get_u32(); val.resize(n); - for (int32_t i = 0; i < n; i++) { val[i] = static_cast(cpr->get_i32()); } + for (int32_t i = 0; i < n; i++) { + val[i] = static_cast(cpr->get_i32()); + } return false; } @@ -755,7 +757,7 @@ class ParquetFieldEnumListFunctor { }; template -ParquetFieldEnumListFunctor ParquetFieldEnumList(int field, std::vector &v) +ParquetFieldEnumListFunctor ParquetFieldEnumList(int field, std::vector& v) { return ParquetFieldEnumListFunctor(field, v); } @@ -768,11 +770,11 @@ ParquetFieldEnumListFunctor ParquetFieldEnumList(int field, std::vector &v */ class ParquetFieldStringList { int field_val; - std::vector &val; + std::vector& val; public: - ParquetFieldStringList(int f, std::vector &v) : field_val(f), val(v) {} - inline bool operator()(CompactProtocolReader *cpr, int field_type) + ParquetFieldStringList(int f, std::vector& v) : field_val(f), val(v) {} + inline bool operator()(CompactProtocolReader* cpr, int field_type) { if (field_type != ST_FLD_LIST) return true; int current_byte = cpr->getb(); @@ -783,7 +785,7 @@ class ParquetFieldStringList { for (int32_t i = 0; i < n; i++) { uint32_t l = cpr->get_u32(); if (l < (size_t)(cpr->m_end - cpr->m_cur)) { - val[i].assign((const char *)cpr->m_cur, l); + val[i].assign((const char*)cpr->m_cur, l); cpr->m_cur += l; } else return true; @@ -801,14 +803,14 @@ class ParquetFieldStringList { */ class ParquetFieldStructBlob { int field_val; - std::vector &val; + std::vector& val; public: - ParquetFieldStructBlob(int f, std::vector &v) : field_val(f), val(v) {} - inline bool operator()(CompactProtocolReader *cpr, int field_type) + ParquetFieldStructBlob(int f, std::vector& v) : field_val(f), val(v) {} + inline bool operator()(CompactProtocolReader* cpr, int field_type) { if (field_type != ST_FLD_STRUCT) return true; - const uint8_t *start = cpr->m_cur; + const uint8_t* start = cpr->m_cur; cpr->skip_struct_field(field_type); if (cpr->m_cur > start) { val.assign(start, cpr->m_cur - 1); } return false; diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 1b6bb9ad7ca..abd7ccef523 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -95,15 +95,15 @@ struct PageNestingInfo { int32_t value_count; // total # of values decoded in this page/nesting-level int32_t null_count; // null count int32_t valid_map_offset; // current offset in bits relative to valid_map - uint8_t *data_out; // pointer into output buffer - uint32_t *valid_map; // pointer into output validity buffer + uint8_t* data_out; // pointer into output buffer + uint32_t* valid_map; // pointer into output validity buffer }; /** * @brief Struct describing a particular page of column chunk data */ struct PageInfo { - uint8_t *page_data; // Compressed page data before decompression, or uncompressed data after + uint8_t* page_data; // Compressed page data before decompression, or uncompressed data after // decompression int32_t compressed_page_size; // compressed data size in bytes int32_t uncompressed_page_size; // uncompressed data size in bytes @@ -139,7 +139,7 @@ struct PageInfo { // nesting information (input/output) for each page int num_nesting_levels; - PageNestingInfo *nesting; + PageNestingInfo* nesting; }; /** @@ -148,7 +148,7 @@ struct PageInfo { struct ColumnChunkDesc { ColumnChunkDesc() = default; explicit constexpr ColumnChunkDesc(size_t compressed_size_, - uint8_t *compressed_data_, + uint8_t* compressed_data_, size_t num_values_, uint16_t datatype_, uint16_t datatype_length_, @@ -190,7 +190,7 @@ struct ColumnChunkDesc { { } - uint8_t const *compressed_data; // pointer to compressed column chunk data + uint8_t const* compressed_data; // pointer to compressed column chunk data size_t compressed_size; // total compressed data size for this chunk size_t num_values; // total number of values in this column size_t start_row; // starting row of this chunk @@ -204,11 +204,11 @@ struct ColumnChunkDesc { int32_t num_data_pages; // number of data pages int32_t num_dict_pages; // number of dictionary pages int32_t max_num_pages; // size of page_info array - PageInfo *page_info; // output page info for up to num_dict_pages + + PageInfo* page_info; // output page info for up to num_dict_pages + // num_data_pages (dictionary pages first) - string_index_pair *str_dict_index; // index for string dictionary - uint32_t **valid_map_base; // base pointers of valid bit map for this column - void **column_data_base; // base pointers of column data + string_index_pair* str_dict_index; // index for string dictionary + uint32_t** valid_map_base; // base pointers of valid bit map for this column + void** column_data_base; // base pointers of column data int8_t codec; // compressed codec enum int8_t converted_type; // converted type enum int8_t decimal_scale; // decimal scale pow(10, -decimal_scale) @@ -222,21 +222,21 @@ struct ColumnChunkDesc { * @brief Struct describing an encoder column */ struct parquet_column_device_view : stats_column_desc { - uint32_t *dict_index; //!< Dictionary index [row] - uint32_t *dict_data; //!< Dictionary data (unique row indices) + uint32_t* dict_index; //!< Dictionary index [row] + uint32_t* dict_data; //!< Dictionary data (unique row indices) uint8_t physical_type; //!< physical data type uint8_t converted_type; //!< logical data type uint8_t level_bits; //!< bits to encode max definition (lower nibble) & repetition (upper nibble) //!< levels constexpr uint8_t num_def_level_bits() { return level_bits & 0xf; } constexpr uint8_t num_rep_level_bits() { return level_bits >> 4; } - size_type const *const - *nesting_offsets; //!< If column is a nested type, contains offset array of each nesting level + size_type const* const* + nesting_offsets; //!< If column is a nested type, contains offset array of each nesting level - size_type const *level_offsets; //!< Offset array for per-row pre-calculated rep/def level values - uint8_t const *rep_values; //!< Pre-calculated repetition level values - uint8_t const *def_values; //!< Pre-calculated definition level values - uint8_t *nullability; //!< Array of nullability of each nesting level. e.g. nullable[0] is + size_type const* level_offsets; //!< Offset array for per-row pre-calculated rep/def level values + uint8_t const* rep_values; //!< Pre-calculated repetition level values + uint8_t const* def_values; //!< Pre-calculated definition level values + uint8_t* nullability; //!< Array of nullability of each nesting level. e.g. nullable[0] is //!< nullability of parent_column. May be different from col.nullable() in //!< case of chunked writing. }; @@ -265,7 +265,7 @@ constexpr size_t kDictScratchSize = (1 << kDictHashBits) * sizeof(uint32_t); /** * @brief Return the byte length of parquet dtypes that are physically represented by INT32 */ -inline uint32_t __device__ GetDtypeLogicalLen(column_device_view *col) +inline uint32_t __device__ GetDtypeLogicalLen(column_device_view* col) { switch (col->type().id()) { case cudf::type_id::INT8: @@ -291,18 +291,18 @@ struct EncPage; * @brief Struct describing an encoder column chunk */ struct EncColumnChunk { - parquet_column_device_view const *col_desc; //!< Column description - PageFragment *fragments; //!< First fragment in chunk - uint8_t *uncompressed_bfr; //!< Uncompressed page data - uint8_t *compressed_bfr; //!< Compressed page data - statistics_chunk const *stats; //!< Fragment statistics + parquet_column_device_view const* col_desc; //!< Column description + PageFragment* fragments; //!< First fragment in chunk + uint8_t* uncompressed_bfr; //!< Uncompressed page data + uint8_t* compressed_bfr; //!< Compressed page data + statistics_chunk const* stats; //!< Fragment statistics uint32_t bfr_size; //!< Uncompressed buffer size uint32_t compressed_size; //!< Compressed buffer size uint32_t start_row; //!< First row of chunk uint32_t num_rows; //!< Number of rows in chunk uint32_t num_values; //!< Number of values in chunk. Different from num_rows for nested types uint32_t first_fragment; //!< First fragment of chunk - EncPage *pages; //!< Ptr to pages that belong to this chunk + EncPage* pages; //!< Ptr to pages that belong to this chunk uint32_t first_page; //!< First page of chunk uint32_t num_pages; //!< Number of pages in chunk uint32_t dictionary_id; //!< Dictionary id for this chunk @@ -318,12 +318,12 @@ struct EncColumnChunk { * @brief Struct describing an encoder data page */ struct EncPage { - uint8_t *page_data; //!< Ptr to uncompressed page - uint8_t *compressed_data; //!< Ptr to compressed page + uint8_t* page_data; //!< Ptr to uncompressed page + uint8_t* compressed_data; //!< Ptr to compressed page uint16_t num_fragments; //!< Number of fragments in page PageType page_type; //!< Page type uint8_t dict_bits_plus1; //!< 0=plain, nonzero:bits to encoding dictionary indices + 1 - EncColumnChunk *chunk; //!< Chunk that this page belongs to + EncColumnChunk* chunk; //!< Chunk that this page belongs to uint32_t chunk_id; //!< Index in chunk array uint32_t hdr_size; //!< Size of page header uint32_t max_hdr_size; //!< Maximum size of page header @@ -333,7 +333,7 @@ struct EncPage { uint32_t num_leaf_values; //!< Values in page. Different from num_rows in case of nested types uint32_t num_values; //!< Number of def/rep level values in page. Includes null/empty elements in //!< non-leaf levels - gpu_inflate_status_s *comp_stat; //!< Ptr to compression status + gpu_inflate_status_s* comp_stat; //!< Ptr to compression status }; /** @@ -343,7 +343,7 @@ struct EncPage { * @param[in] num_chunks Number of column chunks * @param[in] stream CUDA stream to use, default 0 */ -void DecodePageHeaders(ColumnChunkDesc *chunks, int32_t num_chunks, rmm::cuda_stream_view stream); +void DecodePageHeaders(ColumnChunkDesc* chunks, int32_t num_chunks, rmm::cuda_stream_view stream); /** * @brief Launches kernel for building the dictionary index for the column @@ -353,7 +353,7 @@ void DecodePageHeaders(ColumnChunkDesc *chunks, int32_t num_chunks, rmm::cuda_st * @param[in] num_chunks Number of column chunks * @param[in] stream CUDA stream to use, default 0 */ -void BuildStringDictionaryIndex(ColumnChunkDesc *chunks, +void BuildStringDictionaryIndex(ColumnChunkDesc* chunks, int32_t num_chunks, rmm::cuda_stream_view stream); @@ -376,14 +376,14 @@ void BuildStringDictionaryIndex(ColumnChunkDesc *chunks, * @param[in] min_rows crop all rows below min_row * @param[in] stream Cuda stream */ -void PreprocessColumnData(hostdevice_vector &pages, - hostdevice_vector const &chunks, - std::vector &input_columns, - std::vector &output_columns, +void PreprocessColumnData(hostdevice_vector& pages, + hostdevice_vector const& chunks, + std::vector& input_columns, + std::vector& output_columns, size_t num_rows, size_t min_row, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr); + rmm::mr::device_memory_resource* mr); /** * @brief Launches kernel for reading the column data stored in the pages @@ -397,8 +397,8 @@ void PreprocessColumnData(hostdevice_vector &pages, * @param[in] min_row Minimum number of rows to read * @param[in] stream CUDA stream to use, default 0 */ -void DecodePageData(hostdevice_vector &pages, - hostdevice_vector const &chunks, +void DecodePageData(hostdevice_vector& pages, + hostdevice_vector const& chunks, size_t num_rows, size_t min_row, rmm::cuda_stream_view stream); @@ -436,8 +436,8 @@ struct dremel_data { * @return A struct containing dremel data */ dremel_data get_dremel_data(column_view h_col, - rmm::device_uvector const &d_nullability, - std::vector const &nullability, + rmm::device_uvector const& d_nullability, + std::vector const& nullability, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** @@ -486,8 +486,8 @@ void InitEncoderPages(cudf::detail::device_2dspan chunks, device_span pages, device_span col_desc, int32_t num_columns, - statistics_merge_group *page_grstats = nullptr, - statistics_merge_group *chunk_grstats = nullptr, + statistics_merge_group* page_grstats = nullptr, + statistics_merge_group* chunk_grstats = nullptr, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** @@ -524,7 +524,7 @@ void DecideCompression(device_span chunks, void EncodePageHeaders(device_span pages, device_span comp_out = {}, device_span page_stats = {}, - const statistics_chunk *chunk_stats = nullptr, + const statistics_chunk* chunk_stats = nullptr, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** @@ -546,7 +546,7 @@ void GatherPages(device_span chunks, * @param[in] stream CUDA stream to use, default 0 */ void BuildChunkDictionaries(device_span chunks, - uint32_t *dev_scratch, + uint32_t* dev_scratch, rmm::cuda_stream_view stream); } // namespace gpu diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index 0863bca7b03..3bf11063035 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -51,7 +51,7 @@ constexpr uint32_t PARQUET_COLUMN_BUFFER_FLAG_LIST_TERMINATED = (1 << 24); namespace { -parquet::ConvertedType logical_type_to_converted_type(parquet::LogicalType const &logical) +parquet::ConvertedType logical_type_to_converted_type(parquet::LogicalType const& logical) { if (logical.isset.STRING) { return parquet::UTF8; @@ -96,7 +96,7 @@ parquet::ConvertedType logical_type_to_converted_type(parquet::LogicalType const /** * @brief Function that translates Parquet datatype to cuDF type enum */ -type_id to_type_id(SchemaElement const &schema, +type_id to_type_id(SchemaElement const& schema, bool strings_to_categorical, type_id timestamp_type_id, bool strict_decimal_types) @@ -232,7 +232,7 @@ std::tuple conversion_info(type_id column_type_id, } // namespace -std::string name_from_path(const std::vector &path_in_schema) +std::string name_from_path(const std::vector& path_in_schema) { // For the case of lists, we will see a schema that looks like: // a.list.element.list.element @@ -273,16 +273,16 @@ std::string name_from_path(const std::vector &path_in_schema) * @brief Class for parsing dataset metadata */ struct metadata : public FileMetaData { - explicit metadata(datasource *source) + explicit metadata(datasource* source) { constexpr auto header_len = sizeof(file_header_s); constexpr auto ender_len = sizeof(file_ender_s); const auto len = source->size(); const auto header_buffer = source->host_read(0, header_len); - const auto header = reinterpret_cast(header_buffer->data()); + const auto header = reinterpret_cast(header_buffer->data()); const auto ender_buffer = source->host_read(len - ender_len, ender_len); - const auto ender = reinterpret_cast(ender_buffer->data()); + const auto ender = reinterpret_cast(ender_buffer->data()); CUDF_EXPECTS(len > header_len + ender_len, "Incorrect data source"); CUDF_EXPECTS(header->magic == parquet_magic && ender->magic == parquet_magic, "Corrupted header or footer"); @@ -304,11 +304,11 @@ class aggregate_metadata { /** * @brief Create a metadata object from each element in the source vector */ - auto metadatas_from_sources(std::vector> const &sources) + auto metadatas_from_sources(std::vector> const& sources) { std::vector metadatas; std::transform( - sources.cbegin(), sources.cend(), std::back_inserter(metadatas), [](auto const &source) { + sources.cbegin(), sources.cend(), std::back_inserter(metadatas), [](auto const& source) { return metadata(source.get()); }); return metadatas; @@ -321,8 +321,10 @@ class aggregate_metadata { { std::map merged; // merge key/value maps TODO: warn/throw if there are mismatches? - for (auto const &pfm : per_file_metadata) { - for (auto const &kv : pfm.key_value_metadata) { merged[kv.key] = kv.value; } + for (auto const& pfm : per_file_metadata) { + for (auto const& kv : pfm.key_value_metadata) { + merged[kv.key] = kv.value; + } } return merged; } @@ -333,7 +335,7 @@ class aggregate_metadata { size_type calc_num_rows() const { return std::accumulate( - per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto &sum, auto &pfm) { + per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto& sum, auto& pfm) { return sum + pfm.num_rows; }); } @@ -344,13 +346,13 @@ class aggregate_metadata { size_type calc_num_row_groups() const { return std::accumulate( - per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto &sum, auto &pfm) { + per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto& sum, auto& pfm) { return sum + pfm.row_groups.size(); }); } public: - aggregate_metadata(std::vector> const &sources) + aggregate_metadata(std::vector> const& sources) : per_file_metadata(metadatas_from_sources(sources)), agg_keyval_map(merge_keyval_metadata()), num_rows(calc_num_rows()), @@ -358,7 +360,7 @@ class aggregate_metadata { { // Verify that the input files have matching numbers of columns size_type num_cols = -1; - for (auto const &pfm : per_file_metadata) { + for (auto const& pfm : per_file_metadata) { if (pfm.row_groups.size() != 0) { if (num_cols == -1) num_cols = pfm.row_groups[0].columns.size(); @@ -368,27 +370,27 @@ class aggregate_metadata { } } // Verify that the input files have matching schemas - for (auto const &pfm : per_file_metadata) { + for (auto const& pfm : per_file_metadata) { CUDF_EXPECTS(per_file_metadata[0].schema == pfm.schema, "All sources must have the same schemas"); } } - auto const &get_row_group(size_type row_group_index, size_type src_idx) const + auto const& get_row_group(size_type row_group_index, size_type src_idx) const { CUDF_EXPECTS(src_idx >= 0 && src_idx < static_cast(per_file_metadata.size()), "invalid source index"); return per_file_metadata[src_idx].row_groups[row_group_index]; } - auto const &get_column_metadata(size_type row_group_index, + auto const& get_column_metadata(size_type row_group_index, size_type src_idx, int schema_idx) const { auto col = std::find_if( per_file_metadata[src_idx].row_groups[row_group_index].columns.begin(), per_file_metadata[src_idx].row_groups[row_group_index].columns.end(), - [schema_idx](ColumnChunk const &col) { return col.schema_idx == schema_idx ? true : false; }); + [schema_idx](ColumnChunk const& col) { return col.schema_idx == schema_idx ? true : false; }); CUDF_EXPECTS(col != std::end(per_file_metadata[src_idx].row_groups[row_group_index].columns), "Found no metadata for schema index"); return col->meta_data; @@ -398,9 +400,9 @@ class aggregate_metadata { auto get_num_row_groups() const { return num_row_groups; } - auto const &get_schema(int schema_idx) const { return per_file_metadata[0].schema[schema_idx]; } + auto const& get_schema(int schema_idx) const { return per_file_metadata[0].schema[schema_idx]; } - auto const &get_key_value_metadata() const { return agg_keyval_map; } + auto const& get_key_value_metadata() const { return agg_keyval_map; } /** * @brief Gets the concrete nesting depth of output cudf columns @@ -411,7 +413,7 @@ class aggregate_metadata { */ inline int get_output_nesting_depth(int schema_index) const { - auto &pfm = per_file_metadata[0]; + auto& pfm = per_file_metadata[0]; int depth = 0; // walk upwards, skipping repeated fields @@ -462,7 +464,7 @@ class aggregate_metadata { * * @param names List of column names to load, where index column name(s) will be added */ - void add_pandas_index_names(std::vector &names) const + void add_pandas_index_names(std::vector& names) const { auto str = get_pandas_index(); if (str.length() != 0) { @@ -499,9 +501,9 @@ class aggregate_metadata { * * @return List of row group indexes and its starting row */ - auto select_row_groups(std::vector> const &row_groups, - size_type &row_start, - size_type &row_count) const + auto select_row_groups(std::vector> const& row_groups, + size_type& row_start, + size_type& row_count) const { if (!row_groups.empty()) { std::vector selection; @@ -510,7 +512,7 @@ class aggregate_metadata { row_count = 0; for (size_t src_idx = 0; src_idx < row_groups.size(); ++src_idx) { - for (auto const &rowgroup_idx : row_groups[src_idx]) { + for (auto const& rowgroup_idx : row_groups[src_idx]) { CUDF_EXPECTS( rowgroup_idx >= 0 && rowgroup_idx < static_cast(per_file_metadata[src_idx].row_groups.size()), @@ -561,16 +563,16 @@ class aggregate_metadata { * @param[in] strict_decimal_types True if it is an error to load an unsupported decimal type * */ - void build_column_info(int &schema_idx, - std::vector &input_columns, - std::vector &output_columns, - std::deque &nesting, + void build_column_info(int& schema_idx, + std::vector& input_columns, + std::vector& output_columns, + std::deque& nesting, bool strings_to_categorical, type_id timestamp_type_id, bool strict_decimal_types) const { int start_schema_idx = schema_idx; - auto const &schema = get_schema(schema_idx); + auto const& schema = get_schema(schema_idx); schema_idx++; // if I am a stub, continue on @@ -595,7 +597,7 @@ class aggregate_metadata { ? data_type{col_type, numeric::scale_type{-schema.decimal_scale}} : data_type{col_type}; output_columns.emplace_back(dtype, schema.repetition_type == OPTIONAL ? true : false); - column_buffer &output_col = output_columns.back(); + column_buffer& output_col = output_columns.back(); output_col.name = schema.name; // build each child @@ -613,7 +615,7 @@ class aggregate_metadata { // data stored) so add me to the list. if (schema.num_children == 0) { input_columns.emplace_back(input_column_info{start_schema_idx, schema.name}); - input_column_info &input_col = input_columns.back(); + input_column_info& input_col = input_columns.back(); std::copy(nesting.begin(), nesting.end(), std::back_inserter(input_col.nesting)); } @@ -631,13 +633,13 @@ class aggregate_metadata { * @return input column information, output column information, list of output column schema * indices */ - auto select_columns(std::vector const &use_names, + auto select_columns(std::vector const& use_names, bool include_index, bool strings_to_categorical, type_id timestamp_type_id, bool strict_decimal_types) const { - auto const &pfm = per_file_metadata[0]; + auto const& pfm = per_file_metadata[0]; // determine the list of output columns // @@ -659,16 +661,16 @@ class aggregate_metadata { if (use_names.empty()) { // walk the schema and choose all top level columns for (size_t schema_idx = 1; schema_idx < pfm.schema.size(); schema_idx++) { - auto const &schema = pfm.schema[schema_idx]; + auto const& schema = pfm.schema[schema_idx]; if (schema.parent_idx == 0) { output_column_schemas.push_back(schema_idx); } } } else { // Load subset of columns; include PANDAS index unless excluded std::vector local_use_names = use_names; if (include_index) { add_pandas_index_names(local_use_names); } - for (const auto &use_name : local_use_names) { + for (const auto& use_name : local_use_names) { for (size_t schema_idx = 1; schema_idx < pfm.schema.size(); schema_idx++) { - auto const &schema = pfm.schema[schema_idx]; + auto const& schema = pfm.schema[schema_idx]; // We select only top level columns by name. Selecting nested columns by name is not // supported. Top level columns are identified by their parent being the root (idx == 0) if (use_name == schema.name and schema.parent_idx == 0) { @@ -711,9 +713,9 @@ class aggregate_metadata { * @param src_col_schema The column schema to generate the new mapping for * @param md File metadata information */ -void generate_depth_remappings(std::map, std::vector>> &remap, +void generate_depth_remappings(std::map, std::vector>>& remap, int src_col_schema, - aggregate_metadata const &md) + aggregate_metadata const& md) { // already generated for this level if (remap.find(src_col_schema) != remap.end()) { return; } @@ -724,11 +726,11 @@ void generate_depth_remappings(std::map, std::ve "Attempting to remap a schema more than once"); auto inserted = remap.insert(std::pair, std::vector>>{src_col_schema, {}}); - auto &depth_remap = inserted.first->second; + auto& depth_remap = inserted.first->second; - std::vector &rep_depth_remap = (depth_remap.first); + std::vector& rep_depth_remap = (depth_remap.first); rep_depth_remap.resize(schema.max_repetition_level + 1); - std::vector &def_depth_remap = (depth_remap.second); + std::vector& def_depth_remap = (depth_remap.second); def_depth_remap.resize(schema.max_definition_level + 1); // the key: @@ -822,12 +824,12 @@ void generate_depth_remappings(std::map, std::ve * @copydoc cudf::io::detail::parquet::read_column_chunks */ void reader::impl::read_column_chunks( - std::vector> &page_data, - hostdevice_vector &chunks, // TODO const? + std::vector>& page_data, + hostdevice_vector& chunks, // TODO const? size_t begin_chunk, size_t end_chunk, - const std::vector &column_chunk_offsets, - std::vector const &chunk_source_map, + const std::vector& column_chunk_offsets, + std::vector const& chunk_source_map, rmm::cuda_stream_view stream) { // Transfer chunk data, coalescing adjacent chunks @@ -850,7 +852,7 @@ void reader::impl::read_column_chunks( next_chunk++; } if (io_size != 0) { - auto &source = _sources[chunk_source_map[chunk]]; + auto& source = _sources[chunk_source_map[chunk]]; if (source->is_device_read_preferred(io_size)) { page_data[chunk] = source->device_read(io_offset, io_size, stream); } else { @@ -872,7 +874,7 @@ void reader::impl::read_column_chunks( /** * @copydoc cudf::io::detail::parquet::count_page_headers */ -size_t reader::impl::count_page_headers(hostdevice_vector &chunks, +size_t reader::impl::count_page_headers(hostdevice_vector& chunks, rmm::cuda_stream_view stream) { size_t total_pages = 0; @@ -891,8 +893,8 @@ size_t reader::impl::count_page_headers(hostdevice_vector /** * @copydoc cudf::io::detail::parquet::decode_page_headers */ -void reader::impl::decode_page_headers(hostdevice_vector &chunks, - hostdevice_vector &pages, +void reader::impl::decode_page_headers(hostdevice_vector& chunks, + hostdevice_vector& pages, rmm::cuda_stream_view stream) { // IMPORTANT : if you change how pages are stored within a chunk (dist pages, then data pages), @@ -912,15 +914,17 @@ void reader::impl::decode_page_headers(hostdevice_vector & * @copydoc cudf::io::detail::parquet::decompress_page_data */ rmm::device_buffer reader::impl::decompress_page_data( - hostdevice_vector &chunks, - hostdevice_vector &pages, + hostdevice_vector& chunks, + hostdevice_vector& pages, rmm::cuda_stream_view stream) { - auto for_each_codec_page = [&](parquet::Compression codec, const std::function &f) { + auto for_each_codec_page = [&](parquet::Compression codec, const std::function& f) { for (size_t c = 0, page_count = 0; c < chunks.size(); c++) { const auto page_stride = chunks[c].max_num_pages; if (chunks[c].codec == codec) { - for (int k = 0; k < page_stride; k++) { f(page_count + k); } + for (int k = 0; k < page_stride; k++) { + f(page_count + k); + } } page_count += page_stride; } @@ -936,7 +940,7 @@ rmm::device_buffer reader::impl::decompress_page_data( std::make_pair(parquet::SNAPPY, 0), std::make_pair(parquet::BROTLI, 0)}; - for (auto &codec : codecs) { + for (auto& codec : codecs) { for_each_codec_page(codec.first, [&](size_t page) { total_decomp_size += pages[page].uncompressed_page_size; codec.second++; @@ -954,12 +958,12 @@ rmm::device_buffer reader::impl::decompress_page_data( size_t decomp_offset = 0; int32_t argc = 0; - for (const auto &codec : codecs) { + for (const auto& codec : codecs) { if (codec.second > 0) { int32_t start_pos = argc; for_each_codec_page(codec.first, [&](size_t page) { - auto dst_base = static_cast(decomp_pages.data()); + auto dst_base = static_cast(decomp_pages.data()); inflate_in[argc].srcDevice = pages[page].page_data; inflate_in[argc].srcSize = pages[page].compressed_page_size; inflate_in[argc].dstDevice = dst_base + decomp_offset; @@ -969,7 +973,7 @@ rmm::device_buffer reader::impl::decompress_page_data( inflate_out[argc].status = static_cast(-1000); inflate_out[argc].reserved = 0; - pages[page].page_data = static_cast(inflate_in[argc].dstDevice); + pages[page].page_data = static_cast(inflate_in[argc].dstDevice); decomp_offset += inflate_in[argc].dstSize; argc++; }); @@ -1027,17 +1031,17 @@ rmm::device_buffer reader::impl::decompress_page_data( /** * @copydoc cudf::io::detail::parquet::allocate_nesting_info */ -void reader::impl::allocate_nesting_info(hostdevice_vector const &chunks, - hostdevice_vector &pages, - hostdevice_vector &page_nesting_info, +void reader::impl::allocate_nesting_info(hostdevice_vector const& chunks, + hostdevice_vector& pages, + hostdevice_vector& page_nesting_info, rmm::cuda_stream_view stream) { // compute total # of page_nesting infos needed and allocate space. doing this in one // buffer to keep it to a single gpu allocation size_t const total_page_nesting_infos = std::accumulate( - chunks.host_ptr(), chunks.host_ptr() + chunks.size(), 0, [&](int total, auto &chunk) { + chunks.host_ptr(), chunks.host_ptr() + chunks.size(), 0, [&](int total, auto& chunk) { // the schema of the input column - auto const &schema = _metadata->get_schema(chunk.src_col_schema); + auto const& schema = _metadata->get_schema(chunk.src_col_schema); auto const per_page_nesting_info_size = max( schema.max_definition_level + 1, _metadata->get_output_nesting_depth(chunk.src_col_schema)); return total + (per_page_nesting_info_size * chunk.num_data_pages); @@ -1053,7 +1057,7 @@ void reader::impl::allocate_nesting_info(hostdevice_vector int src_info_index = 0; for (size_t idx = 0; idx < chunks.size(); idx++) { int src_col_schema = chunks[idx].src_col_schema; - auto &schema = _metadata->get_schema(src_col_schema); + auto& schema = _metadata->get_schema(src_col_schema); auto const per_page_nesting_info_size = std::max( schema.max_definition_level + 1, _metadata->get_output_nesting_depth(src_col_schema)); @@ -1078,7 +1082,7 @@ void reader::impl::allocate_nesting_info(hostdevice_vector int src_col_schema = chunks[idx].src_col_schema; // schema of the input column - auto &schema = _metadata->get_schema(src_col_schema); + auto& schema = _metadata->get_schema(src_col_schema); // real depth of the output cudf column hierarchy (1 == no nesting, 2 == 1 level, etc) int max_depth = _metadata->get_output_nesting_depth(src_col_schema); @@ -1101,7 +1105,7 @@ void reader::impl::allocate_nesting_info(hostdevice_vector if (!cur_schema.is_stub()) { // initialize each page within the chunk for (int p_idx = 0; p_idx < chunks[idx].num_data_pages; p_idx++) { - gpu::PageNestingInfo *pni = + gpu::PageNestingInfo* pni = &page_nesting_info[nesting_info_index + (p_idx * per_page_nesting_info_size)]; // if we have lists, set our start and end depth remappings @@ -1109,8 +1113,8 @@ void reader::impl::allocate_nesting_info(hostdevice_vector auto remap = depth_remapping.find(src_col_schema); CUDF_EXPECTS(remap != depth_remapping.end(), "Could not find depth remapping for schema"); - std::vector const &rep_depth_remap = (remap->second.first); - std::vector const &def_depth_remap = (remap->second.second); + std::vector const& rep_depth_remap = (remap->second.first); + std::vector const& def_depth_remap = (remap->second.second); for (size_t m = 0; m < rep_depth_remap.size(); m++) { pni[m].start_depth = rep_depth_remap[m]; @@ -1145,8 +1149,8 @@ void reader::impl::allocate_nesting_info(hostdevice_vector /** * @copydoc cudf::io::detail::parquet::preprocess_columns */ -void reader::impl::preprocess_columns(hostdevice_vector &chunks, - hostdevice_vector &pages, +void reader::impl::preprocess_columns(hostdevice_vector& chunks, + hostdevice_vector& pages, size_t min_row, size_t total_rows, bool has_lists, @@ -1158,10 +1162,10 @@ void reader::impl::preprocess_columns(hostdevice_vector &c // if there are no lists, simply allocate every allocate every output // column to be of size num_rows if (!has_lists) { - std::function &)> create_columns = - [&](std::vector &cols) { + std::function&)> create_columns = + [&](std::vector& cols) { for (size_t idx = 0; idx < cols.size(); idx++) { - auto &col = cols[idx]; + auto& col = cols[idx]; col.create(total_rows, stream, _mr); create_columns(col.children); } @@ -1178,14 +1182,14 @@ void reader::impl::preprocess_columns(hostdevice_vector &c /** * @copydoc cudf::io::detail::parquet::decode_page_data */ -void reader::impl::decode_page_data(hostdevice_vector &chunks, - hostdevice_vector &pages, - hostdevice_vector &page_nesting, +void reader::impl::decode_page_data(hostdevice_vector& chunks, + hostdevice_vector& pages, + hostdevice_vector& page_nesting, size_t min_row, size_t total_rows, rmm::cuda_stream_view stream) { - auto is_dict_chunk = [](const gpu::ColumnChunkDesc &chunk) { + auto is_dict_chunk = [](const gpu::ColumnChunkDesc& chunk) { return (chunk.data_type & 0x7) == BYTE_ARRAY && chunk.num_dict_pages > 0; }; @@ -1207,20 +1211,20 @@ void reader::impl::decode_page_data(hostdevice_vector &chu std::accumulate(chunks.host_ptr(), chunks.host_ptr(chunks.size()), 0, - [&](size_t cursum, gpu::ColumnChunkDesc const &chunk) { + [&](size_t cursum, gpu::ColumnChunkDesc const& chunk) { return cursum + _metadata->get_output_nesting_depth(chunk.src_col_schema); }); // In order to reduce the number of allocations of hostdevice_vector, we allocate a single vector // to store all per-chunk pointers to nested data/nullmask. `chunk_offsets[i]` will store the // offset into `chunk_nested_data`/`chunk_nested_valids` for the array of pointers for chunk `i` - auto chunk_nested_valids = hostdevice_vector(sum_max_depths); - auto chunk_nested_data = hostdevice_vector(sum_max_depths); + auto chunk_nested_valids = hostdevice_vector(sum_max_depths); + auto chunk_nested_data = hostdevice_vector(sum_max_depths); auto chunk_offsets = std::vector(); // Update chunks with pointers to column data. for (size_t c = 0, page_count = 0, str_ofs = 0, chunk_off = 0; c < chunks.size(); c++) { - input_column_info const &input_col = _input_columns[chunks[c].src_col_index]; + input_column_info const& input_col = _input_columns[chunks[c].src_col_index]; CUDF_EXPECTS(input_col.schema_idx == chunks[c].src_col_schema, "Column/page schema index mismatch"); @@ -1275,9 +1279,9 @@ void reader::impl::decode_page_data(hostdevice_vector &chu // // we do this by only handing out the pointers to the first child we come across. // - auto *cols = &_output_columns; + auto* cols = &_output_columns; for (size_t idx = 0; idx < max_depth; idx++) { - auto &out_buf = (*cols)[input_col.nesting[idx]]; + auto& out_buf = (*cols)[input_col.nesting[idx]]; cols = &out_buf.children; int owning_schema = out_buf.user_data & PARQUET_COLUMN_BUFFER_SCHEMA_MASK; @@ -1317,11 +1321,11 @@ void reader::impl::decode_page_data(hostdevice_vector &chu // last value that should then be followed by a terminator (because rows can span // page boundaries). for (size_t idx = 0; idx < _input_columns.size(); idx++) { - input_column_info const &input_col = _input_columns[idx]; + input_column_info const& input_col = _input_columns[idx]; - auto *cols = &_output_columns; + auto* cols = &_output_columns; for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) { - auto &out_buf = (*cols)[input_col.nesting[l_idx]]; + auto& out_buf = (*cols)[input_col.nesting[l_idx]]; cols = &out_buf.children; if (out_buf.type.id() != type_id::LIST || @@ -1329,11 +1333,11 @@ void reader::impl::decode_page_data(hostdevice_vector &chu continue; } CUDF_EXPECTS(l_idx < input_col.nesting_depth() - 1, "Encountered a leaf list column"); - auto &child = (*cols)[input_col.nesting[l_idx + 1]]; + auto& child = (*cols)[input_col.nesting[l_idx + 1]]; // the final offset for a list at level N is the size of it's child int offset = child.type.id() == type_id::LIST ? child.size - 1 : child.size; - cudaMemcpyAsync(static_cast(out_buf.data()) + (out_buf.size - 1), + cudaMemcpyAsync(static_cast(out_buf.data()) + (out_buf.size - 1), &offset, sizeof(offset), cudaMemcpyHostToDevice, @@ -1344,17 +1348,17 @@ void reader::impl::decode_page_data(hostdevice_vector &chu // update null counts in the final column buffers for (size_t idx = 0; idx < pages.size(); idx++) { - gpu::PageInfo *pi = &pages[idx]; + gpu::PageInfo* pi = &pages[idx]; if (pi->flags & gpu::PAGEINFO_FLAGS_DICTIONARY) { continue; } - gpu::ColumnChunkDesc *col = &chunks[pi->chunk_idx]; - input_column_info const &input_col = _input_columns[col->src_col_index]; + gpu::ColumnChunkDesc* col = &chunks[pi->chunk_idx]; + input_column_info const& input_col = _input_columns[col->src_col_index]; int index = pi->nesting - page_nesting.device_ptr(); - gpu::PageNestingInfo *pni = &page_nesting[index]; + gpu::PageNestingInfo* pni = &page_nesting[index]; - auto *cols = &_output_columns; + auto* cols = &_output_columns; for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) { - auto &out_buf = (*cols)[input_col.nesting[l_idx]]; + auto& out_buf = (*cols)[input_col.nesting[l_idx]]; cols = &out_buf.children; // if I wasn't the one who wrote out the validity bits, skip it @@ -1368,9 +1372,9 @@ void reader::impl::decode_page_data(hostdevice_vector &chu stream.synchronize(); } -reader::impl::impl(std::vector> &&sources, - parquet_reader_options const &options, - rmm::mr::device_memory_resource *mr) +reader::impl::impl(std::vector>&& sources, + parquet_reader_options const& options, + rmm::mr::device_memory_resource* mr) : _mr(mr), _sources(std::move(sources)) { // Open and parse the source dataset metadata @@ -1397,7 +1401,7 @@ reader::impl::impl(std::vector> &&sources, table_with_metadata reader::impl::read(size_type skip_rows, size_type num_rows, - std::vector> const &row_group_list, + std::vector> const& row_group_list, rmm::cuda_stream_view stream) { // Select only row groups required @@ -1431,8 +1435,8 @@ table_with_metadata reader::impl::read(size_type skip_rows, // Initialize column chunk information size_t total_decompressed_size = 0; auto remaining_rows = num_rows; - for (const auto &rg : selected_row_groups) { - const auto &row_group = _metadata->get_row_group(rg.index, rg.source_index); + for (const auto& rg : selected_row_groups) { + const auto& row_group = _metadata->get_row_group(rg.index, rg.source_index); auto const row_group_start = rg.start_row; auto const row_group_source = rg.source_index; auto const row_group_rows = std::min(remaining_rows, row_group.num_rows); @@ -1442,8 +1446,8 @@ table_with_metadata reader::impl::read(size_type skip_rows, for (size_t i = 0; i < num_input_columns; ++i) { auto col = _input_columns[i]; // look up metadata - auto &col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx); - auto &schema = _metadata->get_schema(col.schema_idx); + auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx); + auto& schema = _metadata->get_schema(col.schema_idx); // this column contains repetition levels and will require a preprocess if (schema.max_repetition_level > 0) { has_lists = true; } @@ -1579,7 +1583,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, // Return column names (must match order of returned columns) out_metadata.column_names.resize(_output_columns.size()); for (size_t i = 0; i < _output_column_schemas.size(); i++) { - auto const &schema = _metadata->get_schema(_output_column_schemas[i]); + auto const& schema = _metadata->get_schema(_output_column_schemas[i]); out_metadata.column_names[i] = schema.name; } @@ -1590,19 +1594,19 @@ table_with_metadata reader::impl::read(size_type skip_rows, } // Forward to implementation -reader::reader(std::vector const &filepaths, - parquet_reader_options const &options, +reader::reader(std::vector const& filepaths, + parquet_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : _impl(std::make_unique(datasource::create(filepaths), options, mr)) { } // Forward to implementation -reader::reader(std::vector> &&sources, - parquet_reader_options const &options, +reader::reader(std::vector>&& sources, + parquet_reader_options const& options, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : _impl(std::make_unique(std::move(sources), options, mr)) { } @@ -1611,7 +1615,7 @@ reader::reader(std::vector> &&sources, reader::~reader() = default; // Forward to implementation -table_with_metadata reader::read(parquet_reader_options const &options, +table_with_metadata reader::read(parquet_reader_options const& options, rmm::cuda_stream_view stream) { return _impl->read( diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp index ffd8975a8d2..b93107aa9b2 100644 --- a/cpp/src/io/parquet/reader_impl.hpp +++ b/cpp/src/io/parquet/reader_impl.hpp @@ -60,9 +60,9 @@ class reader::impl { * @param options Settings for controlling reading behavior * @param mr Device memory resource to use for device memory allocation */ - explicit impl(std::vector> &&sources, - parquet_reader_options const &options, - rmm::mr::device_memory_resource *mr); + explicit impl(std::vector>&& sources, + parquet_reader_options const& options, + rmm::mr::device_memory_resource* mr); /** * @brief Read an entire set or a subset of data and returns a set of columns @@ -76,7 +76,7 @@ class reader::impl { */ table_with_metadata read(size_type skip_rows, size_type num_rows, - std::vector> const &row_group_indices, + std::vector> const& row_group_indices, rmm::cuda_stream_view stream); private: @@ -91,12 +91,12 @@ class reader::impl { * @param stream CUDA stream used for device memory operations and kernel launches. * */ - void read_column_chunks(std::vector> &page_data, - hostdevice_vector &chunks, + void read_column_chunks(std::vector>& page_data, + hostdevice_vector& chunks, size_t begin_chunk, size_t end_chunk, - const std::vector &column_chunk_offsets, - std::vector const &chunk_source_map, + const std::vector& column_chunk_offsets, + std::vector const& chunk_source_map, rmm::cuda_stream_view stream); /** @@ -107,7 +107,7 @@ class reader::impl { * * @return The total number of pages */ - size_t count_page_headers(hostdevice_vector &chunks, + size_t count_page_headers(hostdevice_vector& chunks, rmm::cuda_stream_view stream); /** @@ -117,8 +117,8 @@ class reader::impl { * @param pages List of page information * @param stream CUDA stream used for device memory operations and kernel launches. */ - void decode_page_headers(hostdevice_vector &chunks, - hostdevice_vector &pages, + void decode_page_headers(hostdevice_vector& chunks, + hostdevice_vector& pages, rmm::cuda_stream_view stream); /** @@ -130,8 +130,8 @@ class reader::impl { * * @return Device buffer to decompressed page data */ - rmm::device_buffer decompress_page_data(hostdevice_vector &chunks, - hostdevice_vector &pages, + rmm::device_buffer decompress_page_data(hostdevice_vector& chunks, + hostdevice_vector& pages, rmm::cuda_stream_view stream); /** @@ -149,9 +149,9 @@ class reader::impl { * @param page_nesting_info The allocated nesting info structs. * @param stream CUDA stream used for device memory operations and kernel launches. */ - void allocate_nesting_info(hostdevice_vector const &chunks, - hostdevice_vector &pages, - hostdevice_vector &page_nesting_info, + void allocate_nesting_info(hostdevice_vector const& chunks, + hostdevice_vector& pages, + hostdevice_vector& page_nesting_info, rmm::cuda_stream_view stream); /** @@ -172,8 +172,8 @@ class reader::impl { * a preprocess. * @param[in] stream Cuda stream */ - void preprocess_columns(hostdevice_vector &chunks, - hostdevice_vector &pages, + void preprocess_columns(hostdevice_vector& chunks, + hostdevice_vector& pages, size_t min_row, size_t total_rows, bool has_lists, @@ -189,15 +189,15 @@ class reader::impl { * @param total_rows Number of rows to output * @param stream CUDA stream used for device memory operations and kernel launches. */ - void decode_page_data(hostdevice_vector &chunks, - hostdevice_vector &pages, - hostdevice_vector &page_nesting, + void decode_page_data(hostdevice_vector& chunks, + hostdevice_vector& pages, + hostdevice_vector& page_nesting, size_t min_row, size_t total_rows, rmm::cuda_stream_view stream); private: - rmm::mr::device_memory_resource *_mr = nullptr; + rmm::mr::device_memory_resource* _mr = nullptr; std::vector> _sources; std::unique_ptr _metadata; diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 77210b5a2ab..73924512bce 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -87,14 +87,14 @@ struct linked_column_view : public column_view { // copy of this object. Options: // 1. Inherit from column_view_base. Only lose out on children vector. That is not needed. // 2. Don't inherit at all. make linked_column_view keep a reference wrapper to its column_view - linked_column_view(column_view const &col) : column_view(col), parent(nullptr) + linked_column_view(column_view const& col) : column_view(col), parent(nullptr) { for (auto child_it = col.child_begin(); child_it < col.child_end(); ++child_it) { children.push_back(std::make_shared(this, *child_it)); } } - linked_column_view(linked_column_view *parent, column_view const &col) + linked_column_view(linked_column_view* parent, column_view const& col) : column_view(col), parent(parent) { for (auto child_it = col.child_begin(); child_it < col.child_end(); ++child_it) { @@ -102,7 +102,7 @@ struct linked_column_view : public column_view { } } - linked_column_view *parent; //!< Pointer to parent of this column. Nullptr if root + linked_column_view* parent; //!< Pointer to parent of this column. Nullptr if root LinkedColVector children; }; @@ -112,10 +112,10 @@ struct linked_column_view : public column_view { * @param table table of columns to convert * @return Vector of converted linked_column_views */ -LinkedColVector input_table_to_linked_columns(table_view const &table) +LinkedColVector input_table_to_linked_columns(table_view const& table) { LinkedColVector result; - for (column_view const &col : table) { + for (column_view const& col : table) { result.emplace_back(std::make_shared(col)); } @@ -144,9 +144,9 @@ struct schema_tree_node : public SchemaElement { }; struct leaf_schema_fn { - schema_tree_node &col_schema; - LinkedColPtr const &col; - column_in_metadata const &col_meta; + schema_tree_node& col_schema; + LinkedColPtr const& col; + column_in_metadata const& col_meta; bool timestamp_is_int96; template @@ -370,8 +370,8 @@ struct leaf_schema_fn { * Recursively traverses through linked_columns and corresponding metadata to construct schema tree. * The resulting schema tree is stored in a vector in pre-order traversal order. */ -std::vector construct_schema_tree(LinkedColVector const &linked_columns, - table_input_metadata const &metadata, +std::vector construct_schema_tree(LinkedColVector const& linked_columns, + table_input_metadata const& metadata, bool single_write_mode, bool int96_timestamps) { @@ -384,8 +384,8 @@ std::vector construct_schema_tree(LinkedColVector const &linke root.parent_idx = -1; // root schema has no parent schema.push_back(std::move(root)); - std::function add_schema = - [&](LinkedColPtr const &col, column_in_metadata const &col_meta, size_t parent_idx) { + std::function add_schema = + [&](LinkedColPtr const& col, column_in_metadata const& col_meta, size_t parent_idx) { bool col_nullable = [&]() { if (single_write_mode) { return col->nullable(); @@ -500,8 +500,8 @@ std::vector construct_schema_tree(LinkedColVector const &linke * */ struct parquet_column_view { - parquet_column_view(schema_tree_node const &schema_node, - std::vector const &schema_tree, + parquet_column_view(schema_tree_node const& schema_node, + std::vector const& schema_tree, rmm::cuda_stream_view stream); column_view leaf_column_view() const; @@ -510,7 +510,7 @@ struct parquet_column_view { column_view cudf_column_view() const { return cudf_col; } parquet::Type physical_type() const { return schema_node.type; } - std::vector const &get_path_in_schema() { return path_in_schema; } + std::vector const& get_path_in_schema() { return path_in_schema; } // LIST related member functions uint8_t max_def_level() const noexcept { return _max_def_level; } @@ -518,8 +518,8 @@ struct parquet_column_view { bool is_list() const noexcept { return _is_list; } // Dictionary related member functions - uint32_t *get_dict_data() { return (_dict_data.size()) ? _dict_data.data() : nullptr; } - uint32_t *get_dict_index() { return (_dict_index.size()) ? _dict_index.data() : nullptr; } + uint32_t* get_dict_data() { return (_dict_data.size()) ? _dict_data.data() : nullptr; } + uint32_t* get_dict_index() { return (_dict_index.size()) ? _dict_index.data() : nullptr; } void use_dictionary(bool use_dict) { _dictionary_used = use_dict; } void alloc_dictionary(size_t max_num_rows, rmm::cuda_stream_view stream) { @@ -563,8 +563,8 @@ struct parquet_column_view { rmm::device_uvector _dict_index; }; -parquet_column_view::parquet_column_view(schema_tree_node const &schema_node, - std::vector const &schema_tree, +parquet_column_view::parquet_column_view(schema_tree_node const& schema_node, + std::vector const& schema_tree, rmm::cuda_stream_view stream) : schema_node(schema_node), _d_nullability(0, stream), @@ -578,7 +578,7 @@ parquet_column_view::parquet_column_view(schema_tree_node const &schema_node, auto curr_col = schema_node.leaf_column.get(); column_view single_inheritance_cudf_col = *curr_col; while (curr_col->parent) { - auto const &parent = *curr_col->parent; + auto const& parent = *curr_col->parent; // For list columns, we still need to retain the offset child column. auto children = @@ -718,7 +718,7 @@ gpu::parquet_column_device_view parquet_column_view::get_device_view(rmm::cuda_s return desc; } -void writer::impl::init_page_fragments(cudf::detail::hostdevice_2dvector &frag, +void writer::impl::init_page_fragments(cudf::detail::hostdevice_2dvector& frag, device_span col_desc, uint32_t num_rows, uint32_t fragment_size) @@ -745,7 +745,7 @@ void writer::impl::gather_fragment_statistics( } void writer::impl::build_chunk_dictionaries( - hostdevice_2dvector &chunks, + hostdevice_2dvector& chunks, device_span col_desc, uint32_t num_columns, uint32_t num_dictionaries) @@ -762,11 +762,11 @@ void writer::impl::build_chunk_dictionaries( chunks.device_to_host(stream, true); } -void writer::impl::init_encoder_pages(hostdevice_2dvector &chunks, +void writer::impl::init_encoder_pages(hostdevice_2dvector& chunks, device_span col_desc, device_span pages, - statistics_chunk *page_stats, - statistics_chunk *frag_stats, + statistics_chunk* page_stats, + statistics_chunk* frag_stats, uint32_t num_columns, uint32_t num_pages, uint32_t num_stats_bfr) @@ -795,14 +795,14 @@ void writer::impl::init_encoder_pages(hostdevice_2dvector & stream.synchronize(); } -void writer::impl::encode_pages(hostdevice_2dvector &chunks, +void writer::impl::encode_pages(hostdevice_2dvector& chunks, device_span pages, uint32_t pages_in_batch, uint32_t first_page_in_batch, uint32_t rowgroups_in_batch, uint32_t first_rowgroup, - const statistics_chunk *page_stats, - const statistics_chunk *chunk_stats) + const statistics_chunk* page_stats, + const statistics_chunk* chunk_stats) { auto batch_pages = pages.subspan(first_page_in_batch, pages_in_batch); @@ -844,10 +844,10 @@ void writer::impl::encode_pages(hostdevice_2dvector &chunks } writer::impl::impl(std::unique_ptr sink, - parquet_writer_options const &options, + parquet_writer_options const& options, SingleWriteMode mode, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : _mr(mr), stream(stream), compression_(to_parquet_compression(options.get_compression())), @@ -863,10 +863,10 @@ writer::impl::impl(std::unique_ptr sink, } writer::impl::impl(std::unique_ptr sink, - chunked_parquet_writer_options const &options, + chunked_parquet_writer_options const& options, SingleWriteMode mode, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : _mr(mr), stream(stream), compression_(to_parquet_compression(options.get_compression())), @@ -892,7 +892,7 @@ void writer::impl::init_state() current_chunk_offset = sizeof(file_header_s); } -void writer::impl::write(table_view const &table) +void writer::impl::write(table_view const& table) { CUDF_EXPECTS(not closed, "Data has already been flushed to out and closed"); @@ -901,8 +901,8 @@ void writer::impl::write(table_view const &table) if (not table_meta) { table_meta = std::make_unique(table); } // Fill unnamed columns' names in table_meta - std::function add_default_name = - [&](column_in_metadata &col_meta, std::string default_name) { + std::function add_default_name = + [&](column_in_metadata& col_meta, std::string default_name) { if (col_meta.get_name().empty()) col_meta.set_name(default_name); for (size_type i = 0; i < col_meta.num_children(); ++i) { add_default_name(col_meta.child(i), col_meta.get_name() + "_" + std::to_string(i)); @@ -917,14 +917,16 @@ void writer::impl::write(table_view const &table) // Construct parquet_column_views from the schema tree leaf nodes. std::vector parquet_columns; - for (schema_tree_node const &schema_node : schema_tree) { + for (schema_tree_node const& schema_node : schema_tree) { if (schema_node.leaf_column) { parquet_columns.emplace_back(schema_node, schema_tree, stream); } } // Mass allocation of column_device_views for each parquet_column_view std::vector cudf_cols; cudf_cols.reserve(parquet_columns.size()); - for (auto const &parq_col : parquet_columns) { cudf_cols.push_back(parq_col.cudf_column_view()); } + for (auto const& parq_col : parquet_columns) { + cudf_cols.push_back(parq_col.cudf_column_view()); + } table_view single_streams_table(cudf_cols); size_type num_columns = single_streams_table.num_columns(); @@ -938,7 +940,7 @@ void writer::impl::write(table_view const &table) std::transform(table_meta->user_data.begin(), table_meta->user_data.end(), std::back_inserter(md.key_value_metadata), - [](auto const &kv) { + [](auto const& kv) { return KeyValue{kv.first, kv.second}; }); md.schema = this_table_schema; @@ -960,7 +962,7 @@ void writer::impl::write(table_view const &table) // This should've been `auto const&` but isn't since dictionary space is allocated when calling // get_device_view(). Fix during dictionary refactor. std::transform( - parquet_columns.begin(), parquet_columns.end(), col_desc.host_ptr(), [&](auto &pcol) { + parquet_columns.begin(), parquet_columns.end(), col_desc.host_ptr(), [&](auto& pcol) { return pcol.get_device_view(stream); }); @@ -1039,7 +1041,7 @@ void writer::impl::write(table_view const &table) md.row_groups[global_r].total_byte_size = 0; md.row_groups[global_r].columns.resize(num_columns); for (int i = 0; i < num_columns; i++) { - gpu::EncColumnChunk *ck = &chunks[r][i]; + gpu::EncColumnChunk* ck = &chunks[r][i]; bool dict_enable = false; *ck = {}; @@ -1088,7 +1090,9 @@ void writer::impl::write(table_view const &table) } // Free unused dictionaries - for (auto &col : parquet_columns) { col.check_dictionary_used(stream); } + for (auto& col : parquet_columns) { + col.check_dictionary_used(stream); + } // Build chunk dictionaries and count pages if (num_chunks != 0) { @@ -1107,7 +1111,7 @@ void writer::impl::write(table_view const &table) size_t rowgroup_size = 0; if (r < num_rowgroups) { for (int i = 0; i < num_columns; i++) { - gpu::EncColumnChunk *ck = &chunks[r][i]; + gpu::EncColumnChunk* ck = &chunks[r][i]; ck->first_page = num_pages; num_pages += ck->num_pages; pages_in_batch += ck->num_pages; @@ -1146,11 +1150,11 @@ void writer::impl::write(table_view const &table) // This contains stats for both the pages and the rowgroups. TODO: make them separate. rmm::device_uvector page_stats(num_stats_bfr, stream); for (uint32_t b = 0, r = 0; b < (uint32_t)batch_list.size(); b++) { - uint8_t *bfr = static_cast(uncomp_bfr.data()); - uint8_t *bfr_c = static_cast(comp_bfr.data()); + uint8_t* bfr = static_cast(uncomp_bfr.data()); + uint8_t* bfr_c = static_cast(comp_bfr.data()); for (uint32_t j = 0; j < batch_list[b]; j++, r++) { for (int i = 0; i < num_columns; i++) { - gpu::EncColumnChunk *ck = &chunks[r][i]; + gpu::EncColumnChunk* ck = &chunks[r][i]; ck->uncompressed_bfr = bfr; ck->compressed_bfr = bfr_c; bfr += ck->bfr_size; @@ -1194,8 +1198,8 @@ void writer::impl::write(table_view const &table) : nullptr); for (; r < rnext; r++, global_r++) { for (auto i = 0; i < num_columns; i++) { - gpu::EncColumnChunk *ck = &chunks[r][i]; - uint8_t *dev_bfr; + gpu::EncColumnChunk* ck = &chunks[r][i]; + uint8_t* dev_bfr; if (ck->is_compressed) { md.row_groups[global_r].columns[i].meta_data.codec = compression_; dev_bfr = ck->compressed_bfr; @@ -1220,7 +1224,7 @@ void writer::impl::write(table_view const &table) } else { if (!host_bfr) { host_bfr = pinned_buffer{[](size_t size) { - uint8_t *ptr = nullptr; + uint8_t* ptr = nullptr; CUDA_TRY(cudaMallocHost(&ptr, size)); return ptr; }(max_chunk_bfr_size), @@ -1255,7 +1259,7 @@ void writer::impl::write(table_view const &table) } std::unique_ptr> writer::impl::close( - std::string const &column_chunks_file_path) + std::string const& column_chunks_file_path) { if (closed) { return nullptr; } closed = true; @@ -1273,15 +1277,17 @@ std::unique_ptr> writer::impl::close( file_header_s fhdr = {parquet_magic}; buffer_.resize(0); buffer_.insert(buffer_.end(), - reinterpret_cast(&fhdr), - reinterpret_cast(&fhdr) + sizeof(fhdr)); - for (auto &rowgroup : md.row_groups) { - for (auto &col : rowgroup.columns) { col.file_path = column_chunks_file_path; } + reinterpret_cast(&fhdr), + reinterpret_cast(&fhdr) + sizeof(fhdr)); + for (auto& rowgroup : md.row_groups) { + for (auto& col : rowgroup.columns) { + col.file_path = column_chunks_file_path; + } } fendr.footer_len = static_cast(cpw.write(md)); buffer_.insert(buffer_.end(), - reinterpret_cast(&fendr), - reinterpret_cast(&fendr) + sizeof(fendr)); + reinterpret_cast(&fendr), + reinterpret_cast(&fendr) + sizeof(fendr)); return std::make_unique>(std::move(buffer_)); } else { return {nullptr}; @@ -1290,19 +1296,19 @@ std::unique_ptr> writer::impl::close( // Forward to implementation writer::writer(std::unique_ptr sink, - parquet_writer_options const &options, + parquet_writer_options const& options, SingleWriteMode mode, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : _impl(std::make_unique(std::move(sink), options, mode, stream, mr)) { } writer::writer(std::unique_ptr sink, - chunked_parquet_writer_options const &options, + chunked_parquet_writer_options const& options, SingleWriteMode mode, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : _impl(std::make_unique(std::move(sink), options, mode, stream, mr)) { } @@ -1311,23 +1317,23 @@ writer::writer(std::unique_ptr sink, writer::~writer() = default; // Forward to implementation -void writer::write(table_view const &table) { _impl->write(table); } +void writer::write(table_view const& table) { _impl->write(table); } // Forward to implementation -std::unique_ptr> writer::close(std::string const &column_chunks_file_path) +std::unique_ptr> writer::close(std::string const& column_chunks_file_path) { return _impl->close(column_chunks_file_path); } std::unique_ptr> writer::merge_rowgroup_metadata( - const std::vector>> &metadata_list) + const std::vector>>& metadata_list) { std::vector output; CompactProtocolWriter cpw(&output); FileMetaData md; md.row_groups.reserve(metadata_list.size()); - for (const auto &blob : metadata_list) { + for (const auto& blob : metadata_list) { CompactProtocolReader cpreader( blob.get()->data(), std::max(blob.get()->size(), sizeof(file_ender_s)) - sizeof(file_ender_s)); @@ -1356,13 +1362,13 @@ std::unique_ptr> writer::merge_rowgroup_metadata( file_ender_s fendr; fhdr.magic = parquet_magic; output.insert(output.end(), - reinterpret_cast(&fhdr), - reinterpret_cast(&fhdr) + sizeof(fhdr)); + reinterpret_cast(&fhdr), + reinterpret_cast(&fhdr) + sizeof(fhdr)); fendr.footer_len = static_cast(cpw.write(md)); fendr.magic = parquet_magic; output.insert(output.end(), - reinterpret_cast(&fendr), - reinterpret_cast(&fendr) + sizeof(fendr)); + reinterpret_cast(&fendr), + reinterpret_cast(&fendr) + sizeof(fendr)); return std::make_unique>(std::move(output)); } diff --git a/cpp/src/io/statistics/column_statistics.cuh b/cpp/src/io/statistics/column_statistics.cuh index fd148724712..333f0e1aae7 100644 --- a/cpp/src/io/statistics/column_statistics.cuh +++ b/cpp/src/io/statistics/column_statistics.cuh @@ -60,21 +60,21 @@ using block_reduce_storage = detail::block_reduce_storage; */ template struct calculate_group_statistics_functor { - block_reduce_storage &temp_storage; + block_reduce_storage& temp_storage; /** * @brief Construct a statistics calculator * * @param d_temp_storage Temporary storage to be used by cub calls */ - __device__ calculate_group_statistics_functor(block_reduce_storage &d_temp_storage) + __device__ calculate_group_statistics_functor(block_reduce_storage& d_temp_storage) : temp_storage(d_temp_storage) { } template ::is_ignored> * = nullptr> - __device__ void operator()(stats_state_s &s, uint32_t t) + std::enable_if_t::is_ignored>* = nullptr> + __device__ void operator()(stats_state_s& s, uint32_t t) { // No-op for unsupported aggregation types } @@ -88,8 +88,8 @@ struct calculate_group_statistics_functor { * @param t thread id */ template ::is_ignored> * = nullptr> - __device__ void operator()(stats_state_s &s, uint32_t t) + std::enable_if_t::is_ignored>* = nullptr> + __device__ void operator()(stats_state_s& s, uint32_t t) { detail::storage_wrapper storage(temp_storage); @@ -123,17 +123,17 @@ struct calculate_group_statistics_functor { */ template struct merge_group_statistics_functor { - block_reduce_storage &temp_storage; + block_reduce_storage& temp_storage; - __device__ merge_group_statistics_functor(block_reduce_storage &d_temp_storage) + __device__ merge_group_statistics_functor(block_reduce_storage& d_temp_storage) : temp_storage(d_temp_storage) { } template ::is_ignored> * = nullptr> - __device__ void operator()(merge_state_s &s, - const statistics_chunk *chunks, + std::enable_if_t::is_ignored>* = nullptr> + __device__ void operator()(merge_state_s& s, + const statistics_chunk* chunks, const uint32_t num_chunks, uint32_t t) { @@ -141,9 +141,9 @@ struct merge_group_statistics_functor { } template ::is_ignored> * = nullptr> - __device__ void operator()(merge_state_s &s, - const statistics_chunk *chunks, + std::enable_if_t::is_ignored>* = nullptr> + __device__ void operator()(merge_state_s& s, + const statistics_chunk* chunks, const uint32_t num_chunks, uint32_t t) { @@ -151,7 +151,9 @@ struct merge_group_statistics_functor { typed_statistics_chunk::is_aggregated> chunk; - for (uint32_t i = t; i < num_chunks; i += block_size) { chunk.reduce(chunks[i]); } + for (uint32_t i = t; i < num_chunks; i += block_size) { + chunk.reduce(chunks[i]); + } chunk.has_minmax = (chunk.minimum_value <= chunk.maximum_value); chunk = block_reduce(chunk, storage); @@ -170,17 +172,16 @@ struct merge_group_statistics_functor { * @tparam T Type of object */ template -__device__ void cooperative_load(T &destination, const T *source = nullptr) +__device__ void cooperative_load(T& destination, const T* source = nullptr) { using load_type = std::conditional_t<((sizeof(T) % sizeof(uint32_t)) == 0), uint32_t, uint8_t>; if (source == nullptr) { for (auto i = threadIdx.x; i < (sizeof(T) / sizeof(load_type)); i += blockDim.x) { - reinterpret_cast(&destination)[i] = load_type{0}; + reinterpret_cast(&destination)[i] = load_type{0}; } } else { for (auto i = threadIdx.x; i < sizeof(T) / sizeof(load_type); i += blockDim.x) { - reinterpret_cast(&destination)[i] = - reinterpret_cast(source)[i]; + reinterpret_cast(&destination)[i] = reinterpret_cast(source)[i]; } } } @@ -195,7 +196,7 @@ __device__ void cooperative_load(T &destination, const T *source = nullptr) */ template __global__ void __launch_bounds__(block_size, 1) - gpu_calculate_group_statistics(statistics_chunk *chunks, const statistics_group *groups) + gpu_calculate_group_statistics(statistics_chunk* chunks, const statistics_group* groups) { __shared__ __align__(8) stats_state_s state; __shared__ block_reduce_storage storage; @@ -229,8 +230,8 @@ namespace detail { * @tparam IO File format for which statistics calculation is being done */ template -void calculate_group_statistics(statistics_chunk *chunks, - const statistics_group *groups, +void calculate_group_statistics(statistics_chunk* chunks, + const statistics_group* groups, uint32_t num_chunks, rmm::cuda_stream_view stream) { @@ -250,9 +251,9 @@ void calculate_group_statistics(statistics_chunk *chunks, */ template __global__ void __launch_bounds__(block_size, 1) - gpu_merge_group_statistics(statistics_chunk *chunks_out, - const statistics_chunk *chunks_in, - const statistics_merge_group *groups) + gpu_merge_group_statistics(statistics_chunk* chunks_out, + const statistics_chunk* chunks_in, + const statistics_merge_group* groups) { __shared__ __align__(8) merge_state_s state; __shared__ block_reduce_storage storage; @@ -284,9 +285,9 @@ __global__ void __launch_bounds__(block_size, 1) * @tparam IO File format for which statistics calculation is being done */ template -void merge_group_statistics(statistics_chunk *chunks_out, - const statistics_chunk *chunks_in, - const statistics_merge_group *groups, +void merge_group_statistics(statistics_chunk* chunks_out, + const statistics_chunk* chunks_in, + const statistics_merge_group* groups, uint32_t num_chunks, rmm::cuda_stream_view stream) { diff --git a/cpp/src/io/statistics/orc_column_statistics.cu b/cpp/src/io/statistics/orc_column_statistics.cu index ad8a05a56f5..9e0dc1c1b7d 100644 --- a/cpp/src/io/statistics/orc_column_statistics.cu +++ b/cpp/src/io/statistics/orc_column_statistics.cu @@ -26,14 +26,14 @@ namespace io { namespace detail { template <> -void merge_group_statistics(statistics_chunk *chunks_out, - const statistics_chunk *chunks_in, - const statistics_merge_group *groups, +void merge_group_statistics(statistics_chunk* chunks_out, + const statistics_chunk* chunks_in, + const statistics_merge_group* groups, uint32_t num_chunks, rmm::cuda_stream_view stream); template <> -void calculate_group_statistics(statistics_chunk *chunks, - const statistics_group *groups, +void calculate_group_statistics(statistics_chunk* chunks, + const statistics_group* groups, uint32_t num_chunks, rmm::cuda_stream_view stream); diff --git a/cpp/src/io/statistics/parquet_column_statistics.cu b/cpp/src/io/statistics/parquet_column_statistics.cu index ad067cd4aad..525065576de 100644 --- a/cpp/src/io/statistics/parquet_column_statistics.cu +++ b/cpp/src/io/statistics/parquet_column_statistics.cu @@ -26,14 +26,14 @@ namespace io { namespace detail { template <> -void merge_group_statistics(statistics_chunk *chunks_out, - const statistics_chunk *chunks_in, - const statistics_merge_group *groups, +void merge_group_statistics(statistics_chunk* chunks_out, + const statistics_chunk* chunks_in, + const statistics_merge_group* groups, uint32_t num_chunks, rmm::cuda_stream_view stream); template <> -void calculate_group_statistics(statistics_chunk *chunks, - const statistics_group *groups, +void calculate_group_statistics(statistics_chunk* chunks, + const statistics_group* groups, uint32_t num_chunks, rmm::cuda_stream_view stream); diff --git a/cpp/src/io/statistics/statistics.cuh b/cpp/src/io/statistics/statistics.cuh index f7bf6e407c1..c60e4eebaa0 100644 --- a/cpp/src/io/statistics/statistics.cuh +++ b/cpp/src/io/statistics/statistics.cuh @@ -53,15 +53,15 @@ struct stats_column_desc { //!< nested columns int32_t ts_scale; //!< timestamp scale (>0: multiply by scale, <0: divide by -scale) - column_device_view *leaf_column; //!< Pointer to leaf column - column_device_view *parent_column; //!< Pointer to parent column. Is nullptr if not list type. + column_device_view* leaf_column; //!< Pointer to leaf column + column_device_view* parent_column; //!< Pointer to parent column. Is nullptr if not list type. }; struct string_stats { - const char *ptr; //!< ptr to character data + const char* ptr; //!< ptr to character data uint32_t length; //!< length of string - __host__ __device__ __forceinline__ volatile string_stats &operator=( - const string_view &val) volatile + __host__ __device__ __forceinline__ volatile string_stats& operator=( + const string_view& val) volatile { ptr = val.data(); length = val.size_bytes(); @@ -99,13 +99,13 @@ struct statistics_chunk { }; struct statistics_group { - const stats_column_desc *col; //!< Column information + const stats_column_desc* col; //!< Column information uint32_t start_row; //!< Start row of this group uint32_t num_rows; //!< Number of rows in group }; struct statistics_merge_group { - const stats_column_desc *col; //!< Column information + const stats_column_desc* col; //!< Column information uint32_t start_chunk; //!< Start chunk of this group uint32_t num_chunks; //!< Number of chunks in group }; diff --git a/cpp/src/io/utilities/block_utils.cuh b/cpp/src/io/utilities/block_utils.cuh index 759aa2517b6..2b4f69df10f 100644 --- a/cpp/src/io/utilities/block_utils.cuh +++ b/cpp/src/io/utilities/block_utils.cuh @@ -124,18 +124,18 @@ inline __device__ double Int128ToDouble_rn(uint64_t lo, int64_t hi) return sign * __fma_rn(__ll2double_rn(hi), 4294967296.0 * 4294967296.0, __ull2double_rn(lo)); } -inline __device__ uint32_t unaligned_load32(const uint8_t *p) +inline __device__ uint32_t unaligned_load32(const uint8_t* p) { uint32_t ofs = 3 & reinterpret_cast(p); - const uint32_t *p32 = reinterpret_cast(p - ofs); + const uint32_t* p32 = reinterpret_cast(p - ofs); uint32_t v = p32[0]; return (ofs) ? __funnelshift_r(v, p32[1], ofs * 8) : v; } -inline __device__ uint64_t unaligned_load64(const uint8_t *p) +inline __device__ uint64_t unaligned_load64(const uint8_t* p) { uint32_t ofs = 3 & reinterpret_cast(p); - const uint32_t *p32 = reinterpret_cast(p - ofs); + const uint32_t* p32 = reinterpret_cast(p - ofs); uint32_t v0 = p32[0]; uint32_t v1 = p32[1]; if (ofs) { @@ -146,10 +146,10 @@ inline __device__ uint64_t unaligned_load64(const uint8_t *p) } template -inline __device__ void memcpy_block(void *dstv, const void *srcv, uint32_t len, uint32_t t) +inline __device__ void memcpy_block(void* dstv, const void* srcv, uint32_t len, uint32_t t) { - uint8_t *dst = static_cast(dstv); - const uint8_t *src = static_cast(srcv); + uint8_t* dst = static_cast(dstv); + const uint8_t* src = static_cast(srcv); uint32_t dst_align_bytes, src_align_bytes, src_align_bits; // Align output to 32-bit dst_align_bytes = 3 & -reinterpret_cast(dst); @@ -166,7 +166,7 @@ inline __device__ void memcpy_block(void *dstv, const void *srcv, uint32_t len, src_align_bytes = (uint32_t)(3 & reinterpret_cast(src)); src_align_bits = src_align_bytes * 8; while (len >= 4) { - const uint32_t *src32 = reinterpret_cast(src - src_align_bytes); + const uint32_t* src32 = reinterpret_cast(src - src_align_bytes); uint32_t copy_cnt = min(len >> 2, nthreads); uint32_t v; if (t < copy_cnt) { @@ -174,7 +174,7 @@ inline __device__ void memcpy_block(void *dstv, const void *srcv, uint32_t len, if (src_align_bits != 0) { v = __funnelshift_r(v, src32[t + 1], src_align_bits); } } if (sync_before_store) { __syncthreads(); } - if (t < copy_cnt) { reinterpret_cast(dst)[t] = v; } + if (t < copy_cnt) { reinterpret_cast(dst)[t] = v; } src += copy_cnt * 4; dst += copy_cnt * 4; len -= copy_cnt * 4; diff --git a/cpp/src/io/utilities/column_utils.cuh b/cpp/src/io/utilities/column_utils.cuh index c08f42583ef..03ea041706a 100644 --- a/cpp/src/io/utilities/column_utils.cuh +++ b/cpp/src/io/utilities/column_utils.cuh @@ -49,7 +49,7 @@ namespace io { template rmm::device_uvector create_leaf_column_device_views( typename cudf::device_span col_desc, - const table_device_view &parent_table_device_view, + const table_device_view& parent_table_device_view, rmm::cuda_stream_view stream) { rmm::device_uvector leaf_column_views(parent_table_device_view.num_columns(), @@ -71,7 +71,7 @@ rmm::device_uvector create_leaf_column_device_views( : col.child(0); } // Store leaf_column to device storage - column_device_view *leaf_col_ptr = leaf_columns.begin() + index; + column_device_view* leaf_col_ptr = leaf_columns.begin() + index; *leaf_col_ptr = col; col_desc[index].leaf_column = leaf_col_ptr; }); diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index ac8deccd078..4b23d008344 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -32,7 +32,7 @@ namespace { */ class file_source : public datasource { public: - explicit file_source(const char *filepath) + explicit file_source(const char* filepath) : _file(filepath, O_RDONLY), _cufile_in(detail::make_cufile_input(filepath)) { } @@ -58,7 +58,7 @@ class file_source : public datasource { size_t device_read(size_t offset, size_t size, - uint8_t *dst, + uint8_t* dst, rmm::cuda_stream_view stream) override { CUDF_EXPECTS(supports_device_read(), "Device reads are not supported for this file."); @@ -84,7 +84,7 @@ class file_source : public datasource { */ class memory_mapped_source : public file_source { public: - explicit memory_mapped_source(const char *filepath, size_t offset, size_t size) + explicit memory_mapped_source(const char* filepath, size_t offset, size_t size) : file_source(filepath) { if (_file.size() != 0) map(_file.desc(), offset, size); @@ -103,17 +103,17 @@ class memory_mapped_source : public file_source { auto const read_size = std::min(size, _map_size - (offset - _map_offset)); return std::make_unique( - static_cast(_map_addr) + (offset - _map_offset), read_size); + static_cast(_map_addr) + (offset - _map_offset), read_size); } - size_t host_read(size_t offset, size_t size, uint8_t *dst) override + size_t host_read(size_t offset, size_t size, uint8_t* dst) override { CUDF_EXPECTS(offset >= _map_offset, "Requested offset is outside mapping"); // Clamp length to available data in the mapped region auto const read_size = std::min(size, _map_size - (offset - _map_offset)); - auto const src = static_cast(_map_addr) + (offset - _map_offset); + auto const src = static_cast(_map_addr) + (offset - _map_offset); std::memcpy(dst, src, read_size); return read_size; } @@ -139,7 +139,7 @@ class memory_mapped_source : public file_source { private: size_t _map_size = 0; size_t _map_offset = 0; - void *_map_addr = nullptr; + void* _map_addr = nullptr; }; /** @@ -150,7 +150,7 @@ class memory_mapped_source : public file_source { */ class direct_read_source : public file_source { public: - explicit direct_read_source(const char *filepath) : file_source(filepath) {} + explicit direct_read_source(const char* filepath) : file_source(filepath) {} std::unique_ptr host_read(size_t offset, size_t size) override { @@ -164,7 +164,7 @@ class direct_read_source : public file_source { return buffer::create(std::move(v)); } - size_t host_read(size_t offset, size_t size, uint8_t *dst) override + size_t host_read(size_t offset, size_t size, uint8_t* dst) override { lseek(_file.desc(), offset, SEEK_SET); @@ -186,9 +186,9 @@ class direct_read_source : public file_source { */ class user_datasource_wrapper : public datasource { public: - explicit user_datasource_wrapper(datasource *const source) : source(source) {} + explicit user_datasource_wrapper(datasource* const source) : source(source) {} - size_t host_read(size_t offset, size_t size, uint8_t *dst) override + size_t host_read(size_t offset, size_t size, uint8_t* dst) override { return source->host_read(offset, size, dst); } @@ -202,7 +202,7 @@ class user_datasource_wrapper : public datasource { size_t device_read(size_t offset, size_t size, - uint8_t *dst, + uint8_t* dst, rmm::cuda_stream_view stream) override { return source->device_read(offset, size, dst, stream); @@ -218,12 +218,12 @@ class user_datasource_wrapper : public datasource { size_t size() const override { return source->size(); } private: - datasource *const source; ///< A non-owning pointer to the user-implemented datasource + datasource* const source; ///< A non-owning pointer to the user-implemented datasource }; } // namespace -std::unique_ptr datasource::create(const std::string &filepath, +std::unique_ptr datasource::create(const std::string& filepath, size_t offset, size_t size) { @@ -237,14 +237,14 @@ std::unique_ptr datasource::create(const std::string &filepath, return std::make_unique(filepath.c_str(), offset, size); } -std::unique_ptr datasource::create(host_buffer const &buffer) +std::unique_ptr datasource::create(host_buffer const& buffer) { // Use Arrow IO buffer class for zero-copy reads of host memory return std::make_unique(std::make_shared( - reinterpret_cast(buffer.data), buffer.size)); + reinterpret_cast(buffer.data), buffer.size)); } -std::unique_ptr datasource::create(datasource *source) +std::unique_ptr datasource::create(datasource* source) { // instantiate a wrapper that forwards the calls to the user implementation return std::make_unique(source); diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp index abf3a3fdef0..b5fb9fb51bc 100644 --- a/cpp/src/io/utilities/file_io_utilities.cpp +++ b/cpp/src/io/utilities/file_io_utilities.cpp @@ -32,13 +32,13 @@ size_t get_file_size(int file_descriptor) return static_cast(st.st_size); } -file_wrapper::file_wrapper(std::string const &filepath, int flags) +file_wrapper::file_wrapper(std::string const& filepath, int flags) : fd(open(filepath.c_str(), flags)), _size{get_file_size(fd)} { CUDF_EXPECTS(fd != -1, "Cannot open file " + filepath); } -file_wrapper::file_wrapper(std::string const &filepath, int flags, mode_t mode) +file_wrapper::file_wrapper(std::string const& filepath, int flags, mode_t mode) : fd(open(filepath.c_str(), flags, mode)), _size{get_file_size(fd)} { CUDF_EXPECTS(fd != -1, "Cannot open file " + filepath); @@ -46,7 +46,7 @@ file_wrapper::file_wrapper(std::string const &filepath, int flags, mode_t mode) file_wrapper::~file_wrapper() { close(fd); } -std::string getenv_or(std::string const &env_var_name, std::string const &default_val) +std::string getenv_or(std::string const& env_var_name, std::string const& default_val) { auto const env_val = std::getenv(env_var_name.c_str()); return (env_val == nullptr) ? default_val : std::string(env_val); @@ -81,7 +81,7 @@ cufile_config::cufile_config() : policy{getenv_or("LIBCUDF_CUFILE_POLICY", defau } } } -cufile_config const *cufile_config::instance() +cufile_config const* cufile_config::instance() { static cufile_config _instance; return &_instance; @@ -94,18 +94,18 @@ class cufile_shim { private: cufile_shim(); - void *cf_lib = nullptr; - decltype(cuFileDriverOpen) *driver_open = nullptr; - decltype(cuFileDriverClose) *driver_close = nullptr; + void* cf_lib = nullptr; + decltype(cuFileDriverOpen)* driver_open = nullptr; + decltype(cuFileDriverClose)* driver_close = nullptr; std::unique_ptr init_error; auto is_valid() const noexcept { return init_error == nullptr; } public: - cufile_shim(cufile_shim const &) = delete; - cufile_shim &operator=(cufile_shim const &) = delete; + cufile_shim(cufile_shim const&) = delete; + cufile_shim& operator=(cufile_shim const&) = delete; - static cufile_shim const *instance(); + static cufile_shim const* instance(); ~cufile_shim() { @@ -113,10 +113,10 @@ class cufile_shim { dlclose(cf_lib); } - decltype(cuFileHandleRegister) *handle_register = nullptr; - decltype(cuFileHandleDeregister) *handle_deregister = nullptr; - decltype(cuFileRead) *read = nullptr; - decltype(cuFileWrite) *write = nullptr; + decltype(cuFileHandleRegister)* handle_register = nullptr; + decltype(cuFileHandleDeregister)* handle_deregister = nullptr; + decltype(cuFileRead)* read = nullptr; + decltype(cuFileWrite)* write = nullptr; }; cufile_shim::cufile_shim() @@ -140,12 +140,12 @@ cufile_shim::cufile_shim() CUDF_EXPECTS(write != nullptr, "could not find cuFile cuFileWrite symbol"); CUDF_EXPECTS(driver_open().err == CU_FILE_SUCCESS, "Failed to initialize cuFile driver"); - } catch (cudf::logic_error const &err) { + } catch (cudf::logic_error const& err) { init_error = std::make_unique(err); } } -cufile_shim const *cufile_shim::instance() +cufile_shim const* cufile_shim::instance() { static cufile_shim _instance; // Defer throwing to avoid repeated attempts to load the library @@ -165,7 +165,7 @@ void cufile_registered_file::register_handle() cufile_registered_file::~cufile_registered_file() { shim->handle_deregister(cf_handle); } -cufile_input_impl::cufile_input_impl(std::string const &filepath) +cufile_input_impl::cufile_input_impl(std::string const& filepath) : shim{cufile_shim::instance()}, cf_file(shim, filepath, O_RDONLY | O_DIRECT) { } @@ -183,7 +183,7 @@ std::unique_ptr cufile_input_impl::read(size_t offset, size_t cufile_input_impl::read(size_t offset, size_t size, - uint8_t *dst, + uint8_t* dst, rmm::cuda_stream_view stream) { CUDF_EXPECTS(shim->read(cf_file.handle(), dst, size, offset, 0) != -1, @@ -192,19 +192,19 @@ size_t cufile_input_impl::read(size_t offset, return size; } -cufile_output_impl::cufile_output_impl(std::string const &filepath) +cufile_output_impl::cufile_output_impl(std::string const& filepath) : shim{cufile_shim::instance()}, cf_file(shim, filepath, O_CREAT | O_RDWR | O_DIRECT, 0664) { } -void cufile_output_impl::write(void const *data, size_t offset, size_t size) +void cufile_output_impl::write(void const* data, size_t offset, size_t size) { CUDF_EXPECTS(shim->write(cf_file.handle(), data, size, offset, 0) != -1, "cuFile error writing to a file"); } #endif -std::unique_ptr make_cufile_input(std::string const &filepath) +std::unique_ptr make_cufile_input(std::string const& filepath) { #ifdef CUFILE_FOUND if (cufile_config::instance()->is_enabled()) { @@ -218,7 +218,7 @@ std::unique_ptr make_cufile_input(std::string const &filepath return nullptr; } -std::unique_ptr make_cufile_output(std::string const &filepath) +std::unique_ptr make_cufile_output(std::string const& filepath) { #ifdef CUFILE_FOUND if (cufile_config::instance()->is_enabled()) { diff --git a/cpp/src/io/utilities/file_io_utilities.hpp b/cpp/src/io/utilities/file_io_utilities.hpp index 8a742076338..e92191095e3 100644 --- a/cpp/src/io/utilities/file_io_utilities.hpp +++ b/cpp/src/io/utilities/file_io_utilities.hpp @@ -40,8 +40,8 @@ class file_wrapper { size_t _size; public: - explicit file_wrapper(std::string const &filepath, int flags); - explicit file_wrapper(std::string const &filepath, int flags, mode_t mode); + explicit file_wrapper(std::string const& filepath, int flags); + explicit file_wrapper(std::string const& filepath, int flags, mode_t mode); ~file_wrapper(); auto size() const { return _size; } auto desc() const { return fd; } @@ -105,7 +105,7 @@ class cufile_input : public cufile_io_base { * * @return The number of bytes read */ - virtual size_t read(size_t offset, size_t size, uint8_t *dst, rmm::cuda_stream_view stream) = 0; + virtual size_t read(size_t offset, size_t size, uint8_t* dst, rmm::cuda_stream_view stream) = 0; }; /** @@ -122,7 +122,7 @@ class cufile_output : public cufile_io_base { * @param offset Number of bytes from the start * @param size Number of bytes to write */ - virtual void write(void const *data, size_t offset, size_t size) = 0; + virtual void write(void const* data, size_t offset, size_t size) = 0; }; #ifdef CUFILE_FOUND @@ -152,7 +152,7 @@ class cufile_config { */ bool is_required() const { return policy == "ALWAYS"; } - static cufile_config const *instance(); + static cufile_config const* instance(); }; /** @@ -162,14 +162,14 @@ struct cufile_registered_file { void register_handle(); public: - cufile_registered_file(cufile_shim const *shim, std::string const &filepath, int flags) + cufile_registered_file(cufile_shim const* shim, std::string const& filepath, int flags) : _file(filepath, flags), shim{shim} { register_handle(); } - cufile_registered_file(cufile_shim const *shim, - std::string const &filepath, + cufile_registered_file(cufile_shim const* shim, + std::string const& filepath, int flags, mode_t mode) : _file(filepath, flags, mode), shim{shim} @@ -177,14 +177,14 @@ struct cufile_registered_file { register_handle(); } - auto const &handle() const noexcept { return cf_handle; } + auto const& handle() const noexcept { return cf_handle; } ~cufile_registered_file(); private: file_wrapper const _file; CUfileHandle_t cf_handle = nullptr; - cufile_shim const *shim = nullptr; + cufile_shim const* shim = nullptr; }; /** @@ -194,16 +194,16 @@ struct cufile_registered_file { */ class cufile_input_impl final : public cufile_input { public: - cufile_input_impl(std::string const &filepath); + cufile_input_impl(std::string const& filepath); std::unique_ptr read(size_t offset, size_t size, rmm::cuda_stream_view stream) override; - size_t read(size_t offset, size_t size, uint8_t *dst, rmm::cuda_stream_view stream) override; + size_t read(size_t offset, size_t size, uint8_t* dst, rmm::cuda_stream_view stream) override; private: - cufile_shim const *shim = nullptr; + cufile_shim const* shim = nullptr; cufile_registered_file const cf_file; }; @@ -214,12 +214,12 @@ class cufile_input_impl final : public cufile_input { */ class cufile_output_impl final : public cufile_output { public: - cufile_output_impl(std::string const &filepath); + cufile_output_impl(std::string const& filepath); - void write(void const *data, size_t offset, size_t size) override; + void write(void const* data, size_t offset, size_t size) override; private: - cufile_shim const *shim = nullptr; + cufile_shim const* shim = nullptr; cufile_registered_file const cf_file; }; #else @@ -233,7 +233,7 @@ class cufile_input_impl final : public cufile_input { CUDF_FAIL("Only used to compile without cufile library, should not be called"); } - size_t read(size_t offset, size_t size, uint8_t *dst, rmm::cuda_stream_view stream) override + size_t read(size_t offset, size_t size, uint8_t* dst, rmm::cuda_stream_view stream) override { CUDF_FAIL("Only used to compile without cufile library, should not be called"); } @@ -241,7 +241,7 @@ class cufile_input_impl final : public cufile_input { class cufile_output_impl final : public cufile_output { public: - void write(void const *data, size_t offset, size_t size) override + void write(void const* data, size_t offset, size_t size) override { CUDF_FAIL("Only used to compile without cufile library, should not be called"); } @@ -254,7 +254,7 @@ class cufile_output_impl final : public cufile_output { * Returns a null pointer if an exception occurs in the `cufile_input_impl` constructor, or if the * cuFile library is not installed. */ -std::unique_ptr make_cufile_input(std::string const &filepath); +std::unique_ptr make_cufile_input(std::string const& filepath); /** * @brief Creates a `cufile_output_impl` object @@ -262,7 +262,7 @@ std::unique_ptr make_cufile_input(std::string const &filepath * Returns a null pointer if an exception occurs in the `cufile_output_impl` constructor, or if the * cuFile library is not installed. */ -std::unique_ptr make_cufile_output(std::string const &filepath); +std::unique_ptr make_cufile_output(std::string const& filepath); } // namespace detail } // namespace io diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index 8758042241f..147e53ba32b 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -38,8 +38,8 @@ class hostdevice_vector { hostdevice_vector() {} - hostdevice_vector(hostdevice_vector &&v) { move(std::move(v)); } - hostdevice_vector &operator=(hostdevice_vector &&v) + hostdevice_vector(hostdevice_vector&& v) { move(std::move(v)); } + hostdevice_vector& operator=(hostdevice_vector&& v) { move(std::move(v)); return *this; @@ -70,7 +70,7 @@ class hostdevice_vector { } } - bool insert(const T &data) + bool insert(const T& data) { if (num_elements < max_elements) { h_data[num_elements] = data; @@ -84,12 +84,12 @@ class hostdevice_vector { size_t size() const noexcept { return num_elements; } size_t memory_size() const noexcept { return sizeof(T) * num_elements; } - T &operator[](size_t i) const { return h_data[i]; } - T *host_ptr(size_t offset = 0) const { return h_data + offset; } - T *device_ptr(size_t offset = 0) { return reinterpret_cast(d_data.data()) + offset; } - T const *device_ptr(size_t offset = 0) const + T& operator[](size_t i) const { return h_data[i]; } + T* host_ptr(size_t offset = 0) const { return h_data + offset; } + T* device_ptr(size_t offset = 0) { return reinterpret_cast(d_data.data()) + offset; } + T const* device_ptr(size_t offset = 0) const { - return reinterpret_cast(d_data.data()) + offset; + return reinterpret_cast(d_data.data()) + offset; } operator cudf::device_span() { return {device_ptr(), max_elements}; } @@ -113,7 +113,7 @@ class hostdevice_vector { } private: - void move(hostdevice_vector &&v) + void move(hostdevice_vector&& v) { stream = v.stream; max_elements = v.max_elements; @@ -129,7 +129,7 @@ class hostdevice_vector { rmm::cuda_stream_view stream{}; size_t max_elements{}; size_t num_elements{}; - T *h_data{}; + T* h_data{}; rmm::device_buffer d_data{}; }; @@ -175,12 +175,12 @@ class hostdevice_2dvector { auto size() const noexcept { return _size; } - T *base_host_ptr(size_t offset = 0) { return _data.host_ptr(offset); } - T *base_device_ptr(size_t offset = 0) { return _data.device_ptr(offset); } + T* base_host_ptr(size_t offset = 0) { return _data.host_ptr(offset); } + T* base_device_ptr(size_t offset = 0) { return _data.device_ptr(offset); } - T const *base_host_ptr(size_t offset = 0) const { return _data.host_ptr(offset); } + T const* base_host_ptr(size_t offset = 0) const { return _data.host_ptr(offset); } - T const *base_device_ptr(size_t offset = 0) const { return _data.device_ptr(offset); } + T const* base_device_ptr(size_t offset = 0) const { return _data.device_ptr(offset); } size_t memory_size() const noexcept { return _data.memory_size(); } diff --git a/cpp/src/io/utilities/parsing_utils.cuh b/cpp/src/io/utilities/parsing_utils.cuh index c7eae48cbbc..a6b4978aeab 100644 --- a/cpp/src/io/utilities/parsing_utils.cuh +++ b/cpp/src/io/utilities/parsing_utils.cuh @@ -335,7 +335,9 @@ __device__ __inline__ cudf::size_type* infer_integral_field_counter(char const* // Remove preceding zeros if (digit_count >= (sizeof(int64_max_abs) - 1)) { // Trim zeros at the beginning of raw_data - while (*data_begin == '0' && (data_begin < data_end)) { data_begin++; } + while (*data_begin == '0' && (data_begin < data_end)) { + data_begin++; + } } digit_count = data_end - data_begin; diff --git a/cpp/src/io/utilities/trie.cu b/cpp/src/io/utilities/trie.cu index 82d8f5e8336..bf03d6a6a89 100644 --- a/cpp/src/io/utilities/trie.cu +++ b/cpp/src/io/utilities/trie.cu @@ -33,7 +33,7 @@ namespace cudf { namespace detail { -rmm::device_uvector create_serialized_trie(const std::vector &keys, +rmm::device_uvector create_serialized_trie(const std::vector& keys, rmm::cuda_stream_view stream) { static constexpr int alphabet_size = std::numeric_limits::max() + 1; @@ -47,8 +47,8 @@ rmm::device_uvector create_serialized_trie(const std::vectorchildren[character] == nullptr) @@ -61,9 +61,9 @@ rmm::device_uvector create_serialized_trie(const std::vector 0 && is_white(no_comments[stop])) { stop--; } + while (stop > 0 && is_white(no_comments[stop])) { + stop--; + } CUDF_EXPECTS(stop != 0 || !is_white(no_comments[0]), "No CUDA device function name found in the input CUDA code.\n"); start = stop; - while (start > 0 && !is_white(no_comments[start])) { start--; } + while (start > 0 && !is_white(no_comments[start])) { + start--; + } start++; stop++; CUDF_EXPECTS(start < stop, "No CUDA device function name found in the input CUDA code.\n"); diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index dfe3231e897..66bc508f1ce 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -33,14 +33,14 @@ namespace cudf { namespace detail { std::pair, std::unique_ptr
> get_empty_joined_table( - table_view const &probe, table_view const &build) + table_view const& probe, table_view const& build) { std::unique_ptr
empty_probe = empty_like(probe); std::unique_ptr
empty_build = empty_like(build); return std::make_pair(std::move(empty_probe), std::move(empty_build)); } -VectorPair concatenate_vector_pairs(VectorPair &a, VectorPair &b, rmm::cuda_stream_view stream) +VectorPair concatenate_vector_pairs(VectorPair& a, VectorPair& b, rmm::cuda_stream_view stream) { CUDF_EXPECTS((a.first->size() == a.second->size()), "Mismatch between sizes of vectors in vector pair"); @@ -91,11 +91,11 @@ struct valid_range { std::pair>, std::unique_ptr>> get_left_join_indices_complement( - std::unique_ptr> &right_indices, + std::unique_ptr>& right_indices, size_type left_table_row_count, size_type right_table_row_count, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { // Get array of indices that do not appear in right_indices @@ -169,8 +169,8 @@ get_left_join_indices_complement( * * @return Built hash table. */ -std::unique_ptr> build_join_hash_table( - cudf::table_view const &build, null_equality compare_nulls, rmm::cuda_stream_view stream) +std::unique_ptr> build_join_hash_table( + cudf::table_view const& build, null_equality compare_nulls, rmm::cuda_stream_view stream) { auto build_device_table = cudf::table_device_view::create(build, stream); @@ -198,7 +198,7 @@ std::unique_ptr> build_join_ *hash_table, hash_build, build_table_num_rows, - static_cast(row_bitmask.data()), + static_cast(row_bitmask.data()), failure.data()); // Check error code from the kernel if (failure.value(stream) == 1) { CUDF_FAIL("Hash Table insert failure."); } @@ -228,11 +228,11 @@ std::pair>, std::unique_ptr>> probe_join_hash_table(cudf::table_device_view build_table, cudf::table_device_view probe_table, - multimap_type const &hash_table, + multimap_type const& hash_table, null_equality compare_nulls, std::optional output_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { // Use the output size directly if provided. Otherwise, compute the exact output size constexpr cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN) @@ -308,10 +308,10 @@ probe_join_hash_table(cudf::table_device_view build_table, */ std::size_t get_full_join_size(cudf::table_device_view build_table, cudf::table_device_view probe_table, - multimap_type const &hash_table, + multimap_type const& hash_table, null_equality compare_nulls, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { std::size_t join_size = compute_join_output_size( build_table, probe_table, hash_table, compare_nulls, stream); @@ -383,8 +383,8 @@ std::size_t get_full_join_size(cudf::table_device_view build_table, return join_size + left_join_complement_size; } -std::unique_ptr combine_table_pair(std::unique_ptr &&left, - std::unique_ptr &&right) +std::unique_ptr combine_table_pair(std::unique_ptr&& left, + std::unique_ptr&& right) { auto joined_cols = left->release(); auto right_cols = right->release(); @@ -398,7 +398,7 @@ std::unique_ptr combine_table_pair(std::unique_ptr &&l hash_join::hash_join_impl::~hash_join_impl() = default; -hash_join::hash_join_impl::hash_join_impl(cudf::table_view const &build, +hash_join::hash_join_impl::hash_join_impl(cudf::table_view const& build, null_equality compare_nulls, rmm::cuda_stream_view stream) : _hash_table(nullptr) @@ -421,11 +421,11 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const &build, std::pair>, std::unique_ptr>> -hash_join::hash_join_impl::inner_join(cudf::table_view const &probe, +hash_join::hash_join_impl::inner_join(cudf::table_view const& probe, null_equality compare_nulls, std::optional output_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) const + rmm::mr::device_memory_resource* mr) const { CUDF_FUNC_RANGE(); return compute_hash_join( @@ -434,11 +434,11 @@ hash_join::hash_join_impl::inner_join(cudf::table_view const &probe, std::pair>, std::unique_ptr>> -hash_join::hash_join_impl::left_join(cudf::table_view const &probe, +hash_join::hash_join_impl::left_join(cudf::table_view const& probe, null_equality compare_nulls, std::optional output_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) const + rmm::mr::device_memory_resource* mr) const { CUDF_FUNC_RANGE(); return compute_hash_join( @@ -447,18 +447,18 @@ hash_join::hash_join_impl::left_join(cudf::table_view const &probe, std::pair>, std::unique_ptr>> -hash_join::hash_join_impl::full_join(cudf::table_view const &probe, +hash_join::hash_join_impl::full_join(cudf::table_view const& probe, null_equality compare_nulls, std::optional output_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) const + rmm::mr::device_memory_resource* mr) const { CUDF_FUNC_RANGE(); return compute_hash_join( probe, compare_nulls, output_size, stream, mr); } -std::size_t hash_join::hash_join_impl::inner_join_size(cudf::table_view const &probe, +std::size_t hash_join::hash_join_impl::inner_join_size(cudf::table_view const& probe, null_equality compare_nulls, rmm::cuda_stream_view stream) const { @@ -472,7 +472,7 @@ std::size_t hash_join::hash_join_impl::inner_join_size(cudf::table_view const &p *build_table, *probe_table, *_hash_table, compare_nulls, stream); } -std::size_t hash_join::hash_join_impl::left_join_size(cudf::table_view const &probe, +std::size_t hash_join::hash_join_impl::left_join_size(cudf::table_view const& probe, null_equality compare_nulls, rmm::cuda_stream_view stream) const { @@ -488,10 +488,10 @@ std::size_t hash_join::hash_join_impl::left_join_size(cudf::table_view const &pr *build_table, *probe_table, *_hash_table, compare_nulls, stream); } -std::size_t hash_join::hash_join_impl::full_join_size(cudf::table_view const &probe, +std::size_t hash_join::hash_join_impl::full_join_size(cudf::table_view const& probe, null_equality compare_nulls, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) const + rmm::mr::device_memory_resource* mr) const { CUDF_FUNC_RANGE(); @@ -507,11 +507,11 @@ std::size_t hash_join::hash_join_impl::full_join_size(cudf::table_view const &pr template std::pair>, std::unique_ptr>> -hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe, +hash_join::hash_join_impl::compute_hash_join(cudf::table_view const& probe, null_equality compare_nulls, std::optional output_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) const + rmm::mr::device_memory_resource* mr) const { CUDF_EXPECTS(0 != probe.num_columns(), "Hash join probe table is empty"); CUDF_EXPECTS(probe.num_rows() < cudf::detail::MAX_JOIN_SIZE, @@ -533,7 +533,7 @@ hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe, std::cend(_build), std::cbegin(flattened_probe_table), std::cend(flattened_probe_table), - [](const auto &b, const auto &p) { return b.type() == p.type(); }), + [](const auto& b, const auto& p) { return b.type() == p.type(); }), "Mismatch in joining column data types"); return probe_join_indices( @@ -543,11 +543,11 @@ hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe, template std::pair>, std::unique_ptr>> -hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe, +hash_join::hash_join_impl::probe_join_indices(cudf::table_view const& probe, null_equality compare_nulls, std::optional output_size, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) const + rmm::mr::device_memory_resource* mr) const { // Trivial left join case - exit early if (!_hash_table && JoinKind != cudf::detail::join_kind::INNER_JOIN) { diff --git a/cpp/src/lists/explode.cu b/cpp/src/lists/explode.cu index 3ce0f91fd71..55a6523ebdd 100644 --- a/cpp/src/lists/explode.cu +++ b/cpp/src/lists/explode.cu @@ -251,9 +251,9 @@ std::unique_ptr
explode_outer(table_view const& input_table, } } if (null_or_empty[idx]) { - auto invalid_index = null_or_empty_offset_p[idx] == 0 - ? offsets[idx] - : offsets[idx] + null_or_empty_offset_p[idx] - 1; + auto invalid_index = null_or_empty_offset_p[idx] == 0 + ? offsets[idx] + : offsets[idx] + null_or_empty_offset_p[idx] - 1; gather_map_p[invalid_index] = idx; explode_col_gather_map_p[invalid_index] = InvalidIndex; diff --git a/cpp/src/reductions/minmax.cu b/cpp/src/reductions/minmax.cu index c99b366c2dd..5baef2c7639 100644 --- a/cpp/src/reductions/minmax.cu +++ b/cpp/src/reductions/minmax.cu @@ -103,7 +103,7 @@ rmm::device_scalar reduce_device(InputIterator d_in, template struct minmax_binary_op : public thrust::binary_function, minmax_pair, minmax_pair> { - __device__ minmax_pair operator()(minmax_pair const &lhs, minmax_pair const &rhs) const + __device__ minmax_pair operator()(minmax_pair const& lhs, minmax_pair const& rhs) const { return minmax_pair{thrust::min(lhs.min_val, rhs.min_val), thrust::max(lhs.max_val, rhs.max_val)}; @@ -148,7 +148,7 @@ struct minmax_functor { } template - auto reduce(column_view const &col, rmm::cuda_stream_view stream) + auto reduce(column_view const& col, rmm::cuda_stream_view stream) { auto device_col = column_device_view::create(col, stream); // compute minimum and maximum values @@ -174,16 +174,16 @@ struct minmax_functor { *max_data = result->max_val; } - ResultType *result; - T *min_data; - T *max_data; + ResultType* result; + T* min_data; + T* max_data; }; template () and !std::is_same::value and - !cudf::is_dictionary()> * = nullptr> + !cudf::is_dictionary()>* = nullptr> std::pair, std::unique_ptr> operator()( - cudf::column_view const &col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) + cudf::column_view const& col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { // compute minimum and maximum values auto dev_result = reduce(col, stream); @@ -200,9 +200,9 @@ struct minmax_functor { /** * @brief Specialization for strings column. */ - template ::value> * = nullptr> + template ::value>* = nullptr> std::pair, std::unique_ptr> operator()( - cudf::column_view const &col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) + cudf::column_view const& col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { // compute minimum and maximum values auto dev_result = reduce(col, stream); @@ -219,9 +219,9 @@ struct minmax_functor { /** * @brief Specialization for dictionary column. */ - template ()> * = nullptr> + template ()>* = nullptr> std::pair, std::unique_ptr> operator()( - cudf::column_view const &col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) + cudf::column_view const& col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { // compute minimum and maximum values auto dev_result = reduce(col, stream); @@ -236,9 +236,9 @@ struct minmax_functor { get_element(keys, static_cast(host_result.max_val), stream, mr)}; } - template ()> * = nullptr> + template ()>* = nullptr> std::pair, std::unique_ptr> operator()( - cudf::column_view const &, rmm::cuda_stream_view, rmm::mr::device_memory_resource *) + cudf::column_view const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) { CUDF_FAIL("type not supported for minmax() operation"); } @@ -247,7 +247,7 @@ struct minmax_functor { } // namespace std::pair, std::unique_ptr> minmax( - cudf::column_view const &col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) + cudf::column_view const& col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { if (col.null_count() == col.size()) { // this handles empty and all-null columns @@ -264,7 +264,7 @@ std::pair, std::unique_ptr> minmax( * @copydoc cudf::minmax */ std::pair, std::unique_ptr> minmax( - const column_view &col, rmm::mr::device_memory_resource *mr) + const column_view& col, rmm::mr::device_memory_resource* mr) { return detail::minmax(col, rmm::cuda_stream_default, mr); } diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 00539b6d7a5..a8117373ca4 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -32,19 +32,19 @@ namespace detail { struct reduce_dispatch_functor { column_view const col; data_type output_dtype; - rmm::mr::device_memory_resource *mr; + rmm::mr::device_memory_resource* mr; rmm::cuda_stream_view stream; - reduce_dispatch_functor(column_view const &col, + reduce_dispatch_functor(column_view const& col, data_type output_dtype, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) : col(col), output_dtype(output_dtype), mr(mr), stream(stream) { } template - std::unique_ptr operator()(std::unique_ptr const &agg) + std::unique_ptr operator()(std::unique_ptr const& agg) { switch (k) { case aggregation::SUM: return reduction::sum(col, output_dtype, stream, mr); break; @@ -58,11 +58,11 @@ struct reduce_dispatch_functor { break; case aggregation::MEAN: return reduction::mean(col, output_dtype, stream, mr); break; case aggregation::VARIANCE: { - auto var_agg = dynamic_cast(agg.get()); + auto var_agg = dynamic_cast(agg.get()); return reduction::variance(col, output_dtype, var_agg->_ddof, stream, mr); } break; case aggregation::STD: { - auto var_agg = dynamic_cast(agg.get()); + auto var_agg = dynamic_cast(agg.get()); return reduction::standard_deviation(col, output_dtype, var_agg->_ddof, stream, mr); } break; case aggregation::MEDIAN: { @@ -73,7 +73,7 @@ struct reduce_dispatch_functor { return get_element(*col_ptr, 0, stream, mr); } break; case aggregation::QUANTILE: { - auto quantile_agg = dynamic_cast(agg.get()); + auto quantile_agg = dynamic_cast(agg.get()); CUDF_EXPECTS(quantile_agg->_quantiles.size() == 1, "Reduction quantile accepts only one quantile value"); auto sorted_indices = sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream, mr); @@ -89,7 +89,7 @@ struct reduce_dispatch_functor { return get_element(*col_ptr, 0, stream, mr); } break; case aggregation::NUNIQUE: { - auto nunique_agg = dynamic_cast(agg.get()); + auto nunique_agg = dynamic_cast(agg.get()); return make_fixed_width_scalar( detail::distinct_count( col, nunique_agg->_null_handling, nan_policy::NAN_IS_VALID, stream), @@ -97,7 +97,7 @@ struct reduce_dispatch_functor { mr); } break; case aggregation::NTH_ELEMENT: { - auto nth_agg = dynamic_cast(agg.get()); + auto nth_agg = dynamic_cast(agg.get()); return reduction::nth_element(col, nth_agg->_n, nth_agg->_null_handling, stream, mr); } break; default: CUDF_FAIL("Unsupported reduction operator"); @@ -106,11 +106,11 @@ struct reduce_dispatch_functor { }; std::unique_ptr reduce( - column_view const &col, - std::unique_ptr const &agg, + column_view const& col, + std::unique_ptr const& agg, data_type output_dtype, rmm::cuda_stream_view stream = rmm::cuda_stream_default, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { std::unique_ptr result = make_default_constructed_scalar(output_dtype, stream, mr); result->set_valid_async(false, stream); @@ -124,10 +124,10 @@ std::unique_ptr reduce( } } // namespace detail -std::unique_ptr reduce(column_view const &col, - std::unique_ptr const &agg, +std::unique_ptr reduce(column_view const& col, + std::unique_ptr const& agg, data_type output_dtype, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::reduce(col, agg, output_dtype, rmm::cuda_stream_default, mr); diff --git a/cpp/src/reshape/tile.cu b/cpp/src/reshape/tile.cu index 2f19c8158c5..fa12fabffdc 100644 --- a/cpp/src/reshape/tile.cu +++ b/cpp/src/reshape/tile.cu @@ -40,10 +40,10 @@ struct tile_functor { } // anonymous namespace namespace detail { -std::unique_ptr
tile(const table_view &in, +std::unique_ptr
tile(const table_view& in, size_type count, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(count >= 0, "Count cannot be negative"); @@ -59,9 +59,9 @@ std::unique_ptr
tile(const table_view &in, } } // namespace detail -std::unique_ptr
tile(const table_view &in, +std::unique_ptr
tile(const table_view& in, size_type count, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); return detail::tile(in, count, rmm::cuda_stream_default, mr); diff --git a/cpp/src/rolling/rolling_detail.hpp b/cpp/src/rolling/rolling_detail.hpp index bd64cc39f47..5fabcf5b14e 100644 --- a/cpp/src/rolling/rolling_detail.hpp +++ b/cpp/src/rolling/rolling_detail.hpp @@ -29,30 +29,30 @@ namespace detail { // store functor template struct rolling_store_output_functor { - CUDA_HOST_DEVICE_CALLABLE void operator()(T &out, T &val, size_type count) { out = val; } + CUDA_HOST_DEVICE_CALLABLE void operator()(T& out, T& val, size_type count) { out = val; } }; // Specialization for MEAN template struct rolling_store_output_functor<_T, true> { // SFINAE for non-bool types - template () || cudf::is_timestamp())> * = nullptr> - CUDA_HOST_DEVICE_CALLABLE void operator()(T &out, T &val, size_type count) + template () || cudf::is_timestamp())>* = nullptr> + CUDA_HOST_DEVICE_CALLABLE void operator()(T& out, T& val, size_type count) { out = val / count; } // SFINAE for bool type - template ()> * = nullptr> - CUDA_HOST_DEVICE_CALLABLE void operator()(T &out, T &val, size_type count) + template ()>* = nullptr> + CUDA_HOST_DEVICE_CALLABLE void operator()(T& out, T& val, size_type count) { out = static_cast(val) / count; } // SFINAE for timestamp types - template ()> * = nullptr> - CUDA_HOST_DEVICE_CALLABLE void operator()(T &out, T &val, size_type count) + template ()>* = nullptr> + CUDA_HOST_DEVICE_CALLABLE void operator()(T& out, T& val, size_type count) { out = static_cast(val.time_since_epoch() / count); } diff --git a/cpp/src/rolling/rolling_jit_detail.hpp b/cpp/src/rolling/rolling_jit_detail.hpp index bba82f4d669..7fe9b68103e 100644 --- a/cpp/src/rolling/rolling_jit_detail.hpp +++ b/cpp/src/rolling/rolling_jit_detail.hpp @@ -30,8 +30,8 @@ T minimum(T a, T b) } struct preceding_window_wrapper { - const cudf::size_type *d_group_offsets; - const cudf::size_type *d_group_labels; + const cudf::size_type* d_group_offsets; + const cudf::size_type* d_group_labels; cudf::size_type preceding_window; cudf::size_type operator[](cudf::size_type idx) @@ -43,8 +43,8 @@ struct preceding_window_wrapper { }; struct following_window_wrapper { - const cudf::size_type *d_group_offsets; - const cudf::size_type *d_group_labels; + const cudf::size_type* d_group_offsets; + const cudf::size_type* d_group_labels; cudf::size_type following_window; cudf::size_type operator[](cudf::size_type idx) diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index 66548ac1e73..c8a908e44cd 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -101,7 +101,7 @@ template void tie_break_ranks_transform(cudf::device_span dense_rank_sorted, TieIterator tie_iter, - column_view const &sorted_order_view, + column_view const& sorted_order_view, outputIterator rank_iter, TieBreaker tie_breaker, Transformer transformer, @@ -227,18 +227,18 @@ void rank_average(cudf::device_span group_keys, } // anonymous namespace -std::unique_ptr rank(column_view const &input, +std::unique_ptr rank(column_view const& input, rank_method method, order column_order, null_policy null_handling, null_order null_precedence, bool percentage, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { - data_type const output_type = (percentage or method == rank_method::AVERAGE) - ? data_type(type_id::FLOAT64) - : data_type(type_to_id()); + data_type const output_type = (percentage or method == rank_method::AVERAGE) + ? data_type(type_id::FLOAT64) + : data_type(type_to_id()); std::unique_ptr rank_column = [&null_handling, &output_type, &input, &stream, &mr] { // na_option=keep assign NA to NA values if (null_handling == null_policy::EXCLUDE) @@ -329,13 +329,13 @@ std::unique_ptr rank(column_view const &input, } } // namespace detail -std::unique_ptr rank(column_view const &input, +std::unique_ptr rank(column_view const& input, rank_method method, order column_order, null_policy null_handling, null_order null_precedence, bool percentage, - rmm::mr::device_memory_resource *mr) + rmm::mr::device_memory_resource* mr) { return detail::rank(input, method, diff --git a/cpp/src/strings/combine/join.cu b/cpp/src/strings/combine/join.cu index 5a69ac7b3d5..ccbedf99bc2 100644 --- a/cpp/src/strings/combine/join.cu +++ b/cpp/src/strings/combine/join.cu @@ -94,9 +94,9 @@ std::unique_ptr join_strings(strings_column_view const& strings, // only one entry so it is either all valid or all null auto const null_count = static_cast(strings.null_count() == strings_count && !narep.is_valid()); - auto null_mask = null_count - ? cudf::detail::create_null_mask(1, cudf::mask_state::ALL_NULL, stream, mr) - : rmm::device_buffer{0, stream, mr}; + auto null_mask = null_count + ? cudf::detail::create_null_mask(1, cudf::mask_state::ALL_NULL, stream, mr) + : rmm::device_buffer{0, stream, mr}; auto chars_column = create_chars_child_column(bytes, stream, mr); auto d_chars = chars_column->mutable_view().data(); thrust::for_each_n( diff --git a/cpp/src/strings/combine/join_list_elements.cu b/cpp/src/strings/combine/join_list_elements.cu index c012663794b..2ef27759124 100644 --- a/cpp/src/strings/combine/join_list_elements.cu +++ b/cpp/src/strings/combine/join_list_elements.cu @@ -135,8 +135,8 @@ struct compute_size_and_concatenate_fn { struct scalar_separator_fn { string_scalar_device_view const d_separator; - __device__ bool is_null_list(column_device_view const& lists_dv, size_type const idx) const - noexcept + __device__ bool is_null_list(column_device_view const& lists_dv, + size_type const idx) const noexcept { return lists_dv.is_null(idx); } @@ -202,8 +202,8 @@ struct column_separators_fn { column_device_view const separators_dv; string_scalar_device_view const sep_narep_dv; - __device__ bool is_null_list(column_device_view const& lists_dv, size_type const idx) const - noexcept + __device__ bool is_null_list(column_device_view const& lists_dv, + size_type const idx) const noexcept { return lists_dv.is_null(idx) || (separators_dv.is_null(idx) && !sep_narep_dv.is_valid()); } diff --git a/cpp/src/strings/contains.cu b/cpp/src/strings/contains.cu index e2188365785..628dbcb8755 100644 --- a/cpp/src/strings/contains.cu +++ b/cpp/src/strings/contains.cu @@ -56,8 +56,8 @@ struct contains_fn { if (d_strings.is_null(idx)) return 0; string_view d_str = d_strings.element(idx); int32_t begin = 0; - int32_t end = bmatch ? 1 // match only the beginning of the string; - : -1; // this handles empty strings too + int32_t end = bmatch ? 1 // match only the beginning of the string; + : -1; // this handles empty strings too return static_cast(prog.find(idx, d_str, begin, end)); } }; diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu index 0ec13b3648b..d804ac66961 100644 --- a/cpp/src/strings/convert/convert_datetime.cu +++ b/cpp/src/strings/convert/convert_datetime.cu @@ -796,7 +796,8 @@ struct datetime_formatter { val = val / 10; } ptr = tmpl + bytes - 1; - while (bytes-- > 0) *str++ = *ptr--; + while (bytes-- > 0) + *str++ = *ptr--; return str; } diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu index 7e6769a869b..aaee8c45169 100644 --- a/cpp/src/strings/convert/convert_durations.cu +++ b/cpp/src/strings/convert/convert_durations.cu @@ -267,7 +267,8 @@ struct duration_to_string_fn : public duration_to_string_size_fn { } digits_idx = std::max(digits_idx, min_digits); // digits are backwards, reverse the string into the output - while (digits_idx-- > 0) *str++ = digits[digits_idx]; + while (digits_idx-- > 0) + *str++ = digits[digits_idx]; return str; } diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index d4d6974cef5..708bb387c5a 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -252,7 +252,8 @@ struct ftos_converter { *ptr++ = (char)('0' + (value % 10)); value /= 10; } - while (ptr != buffer) *output++ = *--ptr; // 54321 -> 12345 + while (ptr != buffer) + *output++ = *--ptr; // 54321 -> 12345 return output; } diff --git a/cpp/src/strings/convert/convert_hex.cu b/cpp/src/strings/convert/convert_hex.cu index 7043174f5bf..c624819999f 100644 --- a/cpp/src/strings/convert/convert_hex.cu +++ b/cpp/src/strings/convert/convert_hex.cu @@ -154,7 +154,9 @@ struct integer_to_hex_fn { // compute the number of output bytes int bytes = sizeof(IntegerType); int byte_index = sizeof(IntegerType); - while ((--byte_index > 0) && (value_bytes[byte_index] & 0xFF) == 0) { --bytes; } + while ((--byte_index > 0) && (value_bytes[byte_index] & 0xFF) == 0) { + --bytes; + } // create output byte_index = bytes - 1; diff --git a/cpp/src/strings/convert/convert_ipv4.cu b/cpp/src/strings/convert/convert_ipv4.cu index d7b79547f29..4e323b98a2e 100644 --- a/cpp/src/strings/convert/convert_ipv4.cu +++ b/cpp/src/strings/convert/convert_ipv4.cu @@ -146,7 +146,8 @@ struct integers_to_ipv4_fn { else { char digits[3]; int num_digits = convert(value, digits); - while (num_digits-- > 0) *out_ptr++ = digits[num_digits]; + while (num_digits-- > 0) + *out_ptr++ = digits[num_digits]; } if ((n + 1) < 4) *out_ptr++ = '.'; shift_bits -= 8; diff --git a/cpp/src/strings/convert/convert_urls.cu b/cpp/src/strings/convert/convert_urls.cu index 33647c7b22f..abf2dc25097 100644 --- a/cpp/src/strings/convert/convert_urls.cu +++ b/cpp/src/strings/convert/convert_urls.cu @@ -335,9 +335,9 @@ std::unique_ptr url_decode( size_type chars_start = (strings.offset() == 0) ? 0 : cudf::detail::get_value( strings.offsets(), strings.offset(), stream); - size_type chars_end = (offset_count == strings.offsets().size()) - ? strings.chars_size() - : cudf::detail::get_value( + size_type chars_end = (offset_count == strings.offsets().size()) + ? strings.chars_size() + : cudf::detail::get_value( strings.offsets(), strings.offset() + strings_count, stream); size_type chars_bytes = chars_end - chars_start; diff --git a/cpp/src/strings/convert/utilities.cuh b/cpp/src/strings/convert/utilities.cuh index 75ae7b3af6c..746923526a1 100644 --- a/cpp/src/strings/convert/utilities.cuh +++ b/cpp/src/strings/convert/utilities.cuh @@ -81,7 +81,8 @@ __device__ inline size_type integer_to_string(IntegerType value, char* d_buffer) char* ptr = d_buffer; if (is_negative) *ptr++ = '-'; // digits are backwards, reverse the string into the output - while (digits_idx-- > 0) *ptr++ = digits[digits_idx]; + while (digits_idx-- > 0) + *ptr++ = digits[digits_idx]; return bytes; } diff --git a/cpp/src/strings/padding.cu b/cpp/src/strings/padding.cu index 253bf846993..6fee47ea225 100644 --- a/cpp/src/strings/padding.cu +++ b/cpp/src/strings/padding.cu @@ -95,7 +95,8 @@ std::unique_ptr pad( string_view d_str = d_strings.element(idx); auto length = d_str.length(); char* ptr = d_chars + d_offsets[idx]; - while (length++ < width) ptr += from_char_utf8(d_fill_char, ptr); + while (length++ < width) + ptr += from_char_utf8(d_fill_char, ptr); copy_string(ptr, d_str); }); } else if (side == pad_side::RIGHT) { @@ -109,7 +110,8 @@ std::unique_ptr pad( auto length = d_str.length(); char* ptr = d_chars + d_offsets[idx]; ptr = copy_string(ptr, d_str); - while (length++ < width) ptr += from_char_utf8(d_fill_char, ptr); + while (length++ < width) + ptr += from_char_utf8(d_fill_char, ptr); }); } else if (side == pad_side::BOTH) { thrust::for_each_n( @@ -124,9 +126,11 @@ std::unique_ptr pad( auto right_pad = (width & 1) ? pad / 2 : (pad - pad / 2); // odd width = right-justify auto left_pad = pad - right_pad; // e.g. width=7 gives "++foxx+" while width=6 gives "+fox++" - while (left_pad-- > 0) ptr += from_char_utf8(d_fill_char, ptr); + while (left_pad-- > 0) + ptr += from_char_utf8(d_fill_char, ptr); ptr = copy_string(ptr, d_str); - while (right_pad-- > 0) ptr += from_char_utf8(d_fill_char, ptr); + while (right_pad-- > 0) + ptr += from_char_utf8(d_fill_char, ptr); }); } @@ -181,7 +185,8 @@ std::unique_ptr zfill( string_view d_str = d_strings.element(idx); auto length = d_str.length(); char* out_ptr = d_chars + d_offsets[idx]; - while (length++ < width) *out_ptr++ = '0'; // prepend zero char + while (length++ < width) + *out_ptr++ = '0'; // prepend zero char copy_string(out_ptr, d_str); }); diff --git a/cpp/src/strings/regex/regcomp.cpp b/cpp/src/strings/regex/regcomp.cpp index 6cac49d3c26..0e00221dabf 100644 --- a/cpp/src/strings/regex/regcomp.cpp +++ b/cpp/src/strings/regex/regcomp.cpp @@ -701,11 +701,13 @@ class regex_compiler { regex_parser::Item item = in[i]; if (item.d.yycount.n <= 0) { // need to erase - for (std::size_t j = 0; j < i - rep_start; j++) out.pop_back(); + for (std::size_t j = 0; j < i - rep_start; j++) + out.pop_back(); } else { // repeat for (int j = 1; j < item.d.yycount.n; j++) - for (std::size_t k = rep_start; k < i; k++) out.push_back(in[k]); + for (std::size_t k = rep_start; k < i; k++) + out.push_back(in[k]); } // optional repeats @@ -715,7 +717,8 @@ class regex_compiler { o_item.t = LBRA_NC; o_item.d.yy = 0; out.push_back(o_item); - for (std::size_t k = rep_start; k < i; k++) out.push_back(in[k]); + for (std::size_t k = rep_start; k < i; k++) + out.push_back(in[k]); } for (int j = item.d.yycount.n; j < item.d.yycount.m; j++) { regex_parser::Item o_item; @@ -746,7 +749,8 @@ class regex_compiler { } } else // copy it once then put '*' { - for (std::size_t k = rep_start; k < i; k++) out.push_back(in[k]); + for (std::size_t k = rep_start; k < i; k++) + out.push_back(in[k]); if (item.t == COUNTED) { o_item.t = STAR; @@ -841,12 +845,14 @@ void reprog::optimize1() if (_insts[i].type != NOP) { { int target_id = _insts[i].u2.next_id; - while (_insts[target_id].type == NOP) target_id = _insts[target_id].u2.next_id; + while (_insts[target_id].type == NOP) + target_id = _insts[target_id].u2.next_id; _insts[i].u2.next_id = target_id; } if (_insts[i].type == OR) { int target_id = _insts[i].u1.right_id; - while (_insts[target_id].type == NOP) target_id = _insts[target_id].u2.next_id; + while (_insts[target_id].type == NOP) + target_id = _insts[target_id].u2.next_id; _insts[i].u1.right_id = target_id; } } @@ -854,7 +860,8 @@ void reprog::optimize1() // skip NOPs from the beginning { int target_id = _startinst_id; - while (_insts[target_id].type == NOP) target_id = _insts[target_id].u2.next_id; + while (_insts[target_id].type == NOP) + target_id = _insts[target_id].u2.next_id; _startinst_id = target_id; } // actually remove the no-ops @@ -950,7 +957,8 @@ void reprog::print() printf("startinst_id=%d\n", _startinst_id); if (_startinst_ids.size() > 0) { printf("startinst_ids:"); - for (size_t i = 0; i < _startinst_ids.size(); i++) printf(" %d", _startinst_ids[i]); + for (size_t i = 0; i < _startinst_ids.size(); i++) + printf(" %d", _startinst_ids[i]); printf("\n"); } diff --git a/cpp/src/strings/regex/regex.inl b/cpp/src/strings/regex/regex.inl index eddda3fe0eb..854fce15fd4 100644 --- a/cpp/src/strings/regex/regex.inl +++ b/cpp/src/strings/regex/regex.inl @@ -231,7 +231,8 @@ __device__ inline int32_t reprog_device::regexec( if (((eos < 0) || (pos < eos)) && match == 0) { int32_t i = 0; auto ids = startinst_ids(); - while (ids[i] >= 0) jnk.list1->activate(ids[i++], (group_id == 0 ? pos : -1), -1); + while (ids[i] >= 0) + jnk.list1->activate(ids[i++], (group_id == 0 ? pos : -1), -1); } c = static_cast(pos >= txtlen ? 0 : *itr); diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu index 2d9d40e2d68..ec3ef4d94ae 100644 --- a/cpp/src/strings/replace/multi_re.cu +++ b/cpp/src/strings/replace/multi_re.cu @@ -105,8 +105,8 @@ struct replace_multi_regex_fn { size_type end = d_ranges[ptn_idx].second; string_view d_repl = d_repls.size() > 1 ? d_repls.element(ptn_idx) : d_repls.element(0); - auto spos = d_str.byte_offset(begin); - auto epos = d_str.byte_offset(end); + auto spos = d_str.byte_offset(begin); + auto epos = d_str.byte_offset(end); nbytes += d_repl.size_bytes() - (epos - spos); if (out_ptr) { // copy unmodified content plus new replacement string out_ptr = copy_and_increment(out_ptr, in_ptr + lpos, spos - lpos); diff --git a/cpp/src/strings/replace/replace.cu b/cpp/src/strings/replace/replace.cu index 4185e6db685..979974a2fdb 100644 --- a/cpp/src/strings/replace/replace.cu +++ b/cpp/src/strings/replace/replace.cu @@ -568,9 +568,9 @@ std::unique_ptr replace(strings_column_view con (strings.offset() == 0) ? 0 : cudf::detail::get_value(strings.offsets(), strings.offset(), stream); - size_type const chars_end = (offset_count == strings.offsets().size()) - ? strings.chars_size() - : cudf::detail::get_value( + size_type const chars_end = (offset_count == strings.offsets().size()) + ? strings.chars_size() + : cudf::detail::get_value( strings.offsets(), strings.offset() + strings_count, stream); size_type const chars_bytes = chars_end - chars_start; @@ -604,11 +604,11 @@ std::unique_ptr replace( auto const offset_count = strings_count + 1; auto const d_offsets = strings.offsets().data() + strings.offset(); size_type chars_start = (strings.offset() == 0) ? 0 - : cudf::detail::get_value( + : cudf::detail::get_value( strings.offsets(), strings.offset(), stream); - size_type chars_end = (offset_count == strings.offsets().size()) - ? strings.chars_size() - : cudf::detail::get_value( + size_type chars_end = (offset_count == strings.offsets().size()) + ? strings.chars_size() + : cudf::detail::get_value( strings.offsets(), strings.offset() + strings_count, stream); return replace_char_parallel( strings, chars_start, chars_end, d_target, d_repl, maxrepl, stream, mr); diff --git a/cpp/src/strings/split/split.cu b/cpp/src/strings/split/split.cu index ae0ea4b90e6..9c5be1c9ca3 100644 --- a/cpp/src/strings/split/split.cu +++ b/cpp/src/strings/split/split.cu @@ -154,10 +154,10 @@ struct split_tokenizer_fn : base_split_tokenizer { auto next_delim = ((idx + col) < positions_count) // boundary check for delims in last string ? (base_ptr + d_positions[idx + col]) // start of next delimiter : str_end_ptr; // or end of this string - auto eptr = (next_delim < str_end_ptr) // make sure delimiter is inside this string + auto eptr = (next_delim < str_end_ptr) // make sure delimiter is inside this string && (col + 1 < token_count) // and this is not the last token - ? next_delim - : str_end_ptr; + ? next_delim + : str_end_ptr; // store the token into the output vector d_tokens[col * d_strings.size()] = string_index_pair{str_ptr, static_cast(eptr - str_ptr)}; @@ -281,10 +281,10 @@ struct rsplit_tokenizer_fn : base_split_tokenizer { auto prev_delim = (idx >= col) // boundary check for delims in first string ? (base_ptr + d_positions[idx - col] + 1) // end of prev delimiter : str_begin_ptr; // or the start of this string - auto sptr = (prev_delim > str_begin_ptr) // make sure delimiter is inside the string + auto sptr = (prev_delim > str_begin_ptr) // make sure delimiter is inside the string && (col + 1 < token_count) // and this is not the last token - ? prev_delim - : str_begin_ptr; + ? prev_delim + : str_begin_ptr; // store the token into the output -- building the array backwards d_tokens[d_strings.size() * (token_count - 1 - col)] = string_index_pair{sptr, static_cast(str_ptr - sptr)}; diff --git a/cpp/src/table/table.cpp b/cpp/src/table/table.cpp index 4cd85fc5e7e..904ce5470ce 100644 --- a/cpp/src/table/table.cpp +++ b/cpp/src/table/table.cpp @@ -28,7 +28,9 @@ table::table(table const& other) : _num_rows{other.num_rows()} { CUDF_FUNC_RANGE(); _columns.reserve(other._columns.size()); - for (auto const& c : other._columns) { _columns.emplace_back(std::make_unique(*c)); } + for (auto const& c : other._columns) { + _columns.emplace_back(std::make_unique(*c)); + } } // Move the contents of a vector `unique_ptr` @@ -53,7 +55,9 @@ table::table(table_view view, rmm::cuda_stream_view stream, rmm::mr::device_memo { CUDF_FUNC_RANGE(); _columns.reserve(view.num_columns()); - for (auto const& c : view) { _columns.emplace_back(std::make_unique(c, stream, mr)); } + for (auto const& c : view) { + _columns.emplace_back(std::make_unique(c, stream, mr)); + } } // Create immutable view @@ -61,7 +65,9 @@ table_view table::view() const { std::vector views; views.reserve(_columns.size()); - for (auto const& c : _columns) { views.push_back(c->view()); } + for (auto const& c : _columns) { + views.push_back(c->view()); + } return table_view{views}; } @@ -70,7 +76,9 @@ mutable_table_view table::mutable_view() { std::vector views; views.reserve(_columns.size()); - for (auto const& c : _columns) { views.push_back(c->mutable_view()); } + for (auto const& c : _columns) { + views.push_back(c->mutable_view()); + } return mutable_table_view{views}; } diff --git a/cpp/src/table/table_view.cpp b/cpp/src/table/table_view.cpp index c64bf5b2823..abd909f8cfc 100644 --- a/cpp/src/table/table_view.cpp +++ b/cpp/src/table/table_view.cpp @@ -43,7 +43,9 @@ auto concatenate_column_views(std::vector const& views) { using ColumnView = typename ViewType::ColumnView; std::vector concat_cols; - for (auto& view : views) { concat_cols.insert(concat_cols.end(), view.begin(), view.end()); } + for (auto& view : views) { + concat_cols.insert(concat_cols.end(), view.begin(), view.end()); + } return concat_cols; } diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp index aca93c17b9f..d82ff7f2ac4 100644 --- a/cpp/tests/bitmask/bitmask_tests.cpp +++ b/cpp/tests/bitmask/bitmask_tests.cpp @@ -270,7 +270,9 @@ TEST_F(CountUnsetBitsTest, NullMask) std::vector indices = {0, 32, 7, 25}; auto counts = cudf::segmented_count_unset_bits(nullptr, indices); EXPECT_EQ(indices.size(), counts.size() * 2); - for (size_t i = 0; i < counts.size(); i++) { EXPECT_EQ(0, counts[i]); } + for (size_t i = 0; i < counts.size(); i++) { + EXPECT_EQ(0, counts[i]); + } } TEST_F(CountUnsetBitsTest, SingleWordAllBits) @@ -377,9 +379,9 @@ struct CopyBitmaskTest : public cudf::test::BaseFixture, cudf::test::UniformRand CopyBitmaskTest() : cudf::test::UniformRandomGenerator{0, 1} {} }; -void cleanEndWord(rmm::device_buffer &mask, int begin_bit, int end_bit) +void cleanEndWord(rmm::device_buffer& mask, int begin_bit, int end_bit) { - auto ptr = static_cast(mask.data()); + auto ptr = static_cast(mask.data()); auto number_of_mask_words = cudf::num_bitmask_words(static_cast(end_bit - begin_bit)); auto number_of_bits = end_bit - begin_bit; @@ -421,7 +423,9 @@ TEST_F(CopyBitmaskTest, NullPtr) TEST_F(CopyBitmaskTest, TestZeroOffset) { std::vector validity_bit(1000); - for (auto &m : validity_bit) { m = this->generate(); } + for (auto& m : validity_bit) { + m = this->generate(); + } auto input_mask = cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end()); int begin_bit = 0; @@ -430,7 +434,7 @@ TEST_F(CopyBitmaskTest, TestZeroOffset) validity_bit.begin() + end_bit); auto splice_mask = cudf::copy_bitmask( - static_cast(input_mask.data()), begin_bit, end_bit); + static_cast(input_mask.data()), begin_bit, end_bit); cleanEndWord(splice_mask, begin_bit, end_bit); auto number_of_bits = end_bit - begin_bit; @@ -441,7 +445,9 @@ TEST_F(CopyBitmaskTest, TestZeroOffset) TEST_F(CopyBitmaskTest, TestNonZeroOffset) { std::vector validity_bit(1000); - for (auto &m : validity_bit) { m = this->generate(); } + for (auto& m : validity_bit) { + m = this->generate(); + } auto input_mask = cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end()); int begin_bit = 321; @@ -450,7 +456,7 @@ TEST_F(CopyBitmaskTest, TestNonZeroOffset) validity_bit.begin() + end_bit); auto splice_mask = cudf::copy_bitmask( - static_cast(input_mask.data()), begin_bit, end_bit); + static_cast(input_mask.data()), begin_bit, end_bit); cleanEndWord(splice_mask, begin_bit, end_bit); auto number_of_bits = end_bit - begin_bit; @@ -463,7 +469,9 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorContiguous) cudf::data_type t{cudf::type_id::INT32}; cudf::size_type num_elements = 1001; std::vector validity_bit(num_elements); - for (auto &m : validity_bit) { m = this->generate(); } + for (auto& m : validity_bit) { + m = this->generate(); + } auto gold_mask = cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end()); rmm::device_buffer copy_mask{gold_mask, rmm::cuda_stream_default}; @@ -501,7 +509,9 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorDiscontiguous) cudf::data_type t{cudf::type_id::INT32}; cudf::size_type num_elements = 1001; std::vector validity_bit(num_elements); - for (auto &m : validity_bit) { m = this->generate(); } + for (auto& m : validity_bit) { + m = this->generate(); + } auto gold_mask = cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end()); std::vector split{0, 104, 128, 152, 311, 491, 583, 734, 760, num_elements}; diff --git a/cpp/tests/bitmask/set_nullmask_tests.cu b/cpp/tests/bitmask/set_nullmask_tests.cu index 235aec7ddf8..91f72c8de5f 100644 --- a/cpp/tests/bitmask/set_nullmask_tests.cu +++ b/cpp/tests/bitmask/set_nullmask_tests.cu @@ -38,7 +38,8 @@ struct valid_bit_functor { std::ostream& operator<<(std::ostream& stream, thrust::host_vector const& bits) { - for (auto _bit : bits) stream << int(_bit); + for (auto _bit : bits) + stream << int(_bit); return stream; } diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp index 2c7984b5f79..bdaa20f63bb 100644 --- a/cpp/tests/column/factories_test.cpp +++ b/cpp/tests/column/factories_test.cpp @@ -530,7 +530,7 @@ TYPED_TEST(ListsDictionaryLeafTest, FromNested) DCW leaf({1, 3, -1, 1, 3, 1, 3, -1, 1, 3}, {1, 1, 0, 1, 1, 1, 1, 0, 1, 1}); offset_t offsets{0, 3, 3, 6, 6, 10}; auto mask = cudf::create_null_mask(5, cudf::mask_state::ALL_VALID); - cudf::set_null_mask(static_cast(mask.data()), 1, 2, false); + cudf::set_null_mask(static_cast(mask.data()), 1, 2, false); auto data = cudf::make_lists_column(5, offsets.release(), leaf.release(), 0, std::move(mask)); auto s = cudf::make_list_scalar(*data); @@ -542,9 +542,9 @@ TYPED_TEST(ListsDictionaryLeafTest, FromNested) {1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1}); offset_t offsets2{0, 3, 3, 6, 6, 10, 13, 13, 16, 16, 20, 23, 23, 26, 26, 30}; auto mask2 = cudf::create_null_mask(15, cudf::mask_state::ALL_VALID); - cudf::set_null_mask(static_cast(mask2.data()), 1, 2, false); - cudf::set_null_mask(static_cast(mask2.data()), 6, 7, false); - cudf::set_null_mask(static_cast(mask2.data()), 11, 12, false); + cudf::set_null_mask(static_cast(mask2.data()), 1, 2, false); + cudf::set_null_mask(static_cast(mask2.data()), 6, 7, false); + cudf::set_null_mask(static_cast(mask2.data()), 11, 12, false); auto nested = cudf::make_lists_column(15, offsets2.release(), leaf2.release(), 3, std::move(mask2)); @@ -658,7 +658,7 @@ TYPED_TEST(ListsStructsLeafTest, FromNested) LCWinner_t({LCWinner_t{}, LCWinner_t{42}}, valid_t{1, 1}.begin()), valid_t{0, 1}.begin()); auto mask = cudf::create_null_mask(3, cudf::mask_state::ALL_VALID); - cudf::set_null_mask(static_cast(mask.data()), 0, 1, false); + cudf::set_null_mask(static_cast(mask.data()), 0, 1, false); auto data = cudf::make_lists_column(3, offset_t{0, 0, 1, 2}.release(), leaf.release(), 1, std::move(mask)); auto s = cudf::make_list_scalar(*data); @@ -674,9 +674,9 @@ TYPED_TEST(ListsStructsLeafTest, FromNested) valid_t{1, 1, 1, 1, 1, 1}.begin()), valid_t{0, 1, 0, 1, 0, 1}.begin()); auto mask2 = cudf::create_null_mask(9, cudf::mask_state::ALL_VALID); - cudf::set_null_mask(static_cast(mask2.data()), 0, 1, false); - cudf::set_null_mask(static_cast(mask2.data()), 3, 4, false); - cudf::set_null_mask(static_cast(mask2.data()), 6, 7, false); + cudf::set_null_mask(static_cast(mask2.data()), 0, 1, false); + cudf::set_null_mask(static_cast(mask2.data()), 3, 4, false); + cudf::set_null_mask(static_cast(mask2.data()), 6, 7, false); auto data2 = cudf::make_lists_column( 9, offset_t{0, 0, 1, 2, 2, 3, 4, 4, 5, 6}.release(), leaf2.release(), 3, std::move(mask2)); auto expected = cudf::make_lists_column(3, diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index 303f0a00658..c07db17ec15 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -49,7 +49,7 @@ TYPED_TEST(FixedWidthGetValueTest, BasicGet) auto s = get_element(col, 0); using ScalarType = scalar_type_t; - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ(cudf::test::make_type_param_scalar(9), typed_s->value()); @@ -61,7 +61,7 @@ TYPED_TEST(FixedWidthGetValueTest, GetFromNullable) auto s = get_element(col, 1); using ScalarType = scalar_type_t; - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ(cudf::test::make_type_param_scalar(8), typed_s->value()); @@ -91,7 +91,7 @@ TEST_F(StringGetValueTest, BasicGet) strings_column_wrapper col{"this", "is", "a", "test"}; auto s = get_element(col, 3); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ("test", typed_s->to_string()); @@ -102,7 +102,7 @@ TEST_F(StringGetValueTest, GetEmpty) strings_column_wrapper col{"this", "is", "", "test"}; auto s = get_element(col, 2); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ("", typed_s->to_string()); @@ -113,7 +113,7 @@ TEST_F(StringGetValueTest, GetFromNullable) strings_column_wrapper col({"this", "is", "a", "test"}, {0, 1, 0, 1}); auto s = get_element(col, 1); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ("is", typed_s->to_string()); @@ -142,7 +142,7 @@ TYPED_TEST(DictionaryGetValueTest, BasicGet) auto s = get_element(*col, 2); using ScalarType = scalar_type_t; - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ(cudf::test::make_type_param_scalar(7), typed_s->value()); @@ -157,7 +157,7 @@ TYPED_TEST(DictionaryGetValueTest, GetFromNullable) auto s = get_element(*col, 3); using ScalarType = scalar_type_t; - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); EXPECT_EQ(cudf::test::make_type_param_scalar(8), typed_s->value()); @@ -204,7 +204,7 @@ TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNonNullNonEmpty) size_type index = 0; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -219,7 +219,7 @@ TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNonNullEmpty) size_type index = 1; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -234,7 +234,7 @@ TYPED_TEST(ListGetFixedWidthValueTest, NonNestedGetNull) size_type index = 2; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_FALSE(s->is_valid()); // Test preserve column hierarchy @@ -258,7 +258,7 @@ TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullNonEmpty) size_type index = 3; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -281,7 +281,7 @@ TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullNonEmptyPreserveNull) size_type index = 3; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -303,7 +303,7 @@ TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNonNullEmpty) size_type index = 1; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -328,7 +328,7 @@ TYPED_TEST(ListGetFixedWidthValueTest, NestedGetNull) size_type index = 1; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); auto expected_data = make_lists_column(0, offset_t{}.release(), FCW{}.release(), 0, rmm::device_buffer{}); @@ -358,7 +358,7 @@ TEST_F(ListGetStringValueTest, NonNestedGetNonNullNonEmpty) size_type index = 0; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -373,7 +373,7 @@ TEST_F(ListGetStringValueTest, NonNestedGetNonNullEmpty) size_type index = 1; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -389,7 +389,7 @@ TEST_F(ListGetStringValueTest, NonNestedGetNull) size_type index = 2; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_FALSE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(typed_s->view(), StringCW{}); @@ -411,7 +411,7 @@ TEST_F(ListGetStringValueTest, NestedGetNonNullNonEmpty) size_type index = 2; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -435,7 +435,7 @@ TEST_F(ListGetStringValueTest, NestedGetNonNullNonEmptyPreserveNull) size_type index = 2; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_data, typed_s->view()); @@ -457,7 +457,7 @@ TEST_F(ListGetStringValueTest, NestedGetNonNullEmpty) size_type index = 3; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); // Relax to equivalent. `expected_data` leaf string column does not @@ -484,7 +484,7 @@ TEST_F(ListGetStringValueTest, NestedGetNull) size_type index = 0; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); auto expected_data = make_lists_column(0, offset_t{}.release(), StringCW{}.release(), 0, rmm::device_buffer{}); @@ -521,7 +521,7 @@ struct ListGetStructValueTest : public BaseFixture { std::for_each( thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_lists), [&](auto i) { if (*(null_mask.begin() + i)) { - set_null_mask(static_cast(d_null_mask.data()), i, i + 1, true); + set_null_mask(static_cast(d_null_mask.data()), i, i + 1, true); } }); } @@ -553,7 +553,7 @@ struct ListGetStructValueTest : public BaseFixture { { std::vector views; std::transform( - rows.begin(), rows.end(), std::back_inserter(views), [](auto &r) { return column_view(r); }); + rows.begin(), rows.end(), std::back_inserter(views), [](auto& r) { return column_view(r); }); return cudf::concatenate(views); } @@ -616,7 +616,7 @@ TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullNonEmpty) auto expected_data = this->row2(); auto s = get_element(list_column->view(), index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); // Relax to equivalent. The nested list column in struct allocates `null_mask`. @@ -634,7 +634,7 @@ TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullNonEmpty2) auto expected_data = this->concat({this->row0(), this->row1()}); auto s = get_element(list_column->view(), index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_data, typed_s->view()); @@ -654,7 +654,7 @@ TYPED_TEST(ListGetStructValueTest, NonNestedGetNonNullEmpty) auto expected_data = this->zero_length_struct(); auto s = get_element(list_column->view(), index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); // Relax to equivalent. The nested list column in struct allocates `null_mask`. @@ -673,7 +673,7 @@ TYPED_TEST(ListGetStructValueTest, NonNestedGetNull) size_type index = 0; auto s = get_element(list_column->view(), index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); auto expected_data = this->make_test_structs_column({}, {}, {}, valid_t{}.begin()); @@ -695,7 +695,7 @@ TYPED_TEST(ListGetStructValueTest, NestedGetNonNullNonEmpty) size_type index = 0; auto s = get_element(list_column_nested->view(), index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_data, typed_s->view()); @@ -716,7 +716,7 @@ TYPED_TEST(ListGetStructValueTest, NestedGetNonNullNonEmpty2) size_type index = 0; auto s = get_element(list_column_nested->view(), index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_data, typed_s->view()); @@ -736,7 +736,7 @@ TYPED_TEST(ListGetStructValueTest, NestedGetNonNullNonEmpty3) size_type index = 1; auto s = get_element(list_column_nested->view(), index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); // Relax to equivalent. For `get_element`, the nested list column in struct @@ -760,7 +760,7 @@ TYPED_TEST(ListGetStructValueTest, NestedGetNonNullEmpty) size_type index = 1; auto s = get_element(list_column_nested->view(), index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_TRUE(s->is_valid()); // Relax to equivalent. The sliced version still has the array for fields @@ -784,7 +784,7 @@ TYPED_TEST(ListGetStructValueTest, NestedGetNull) size_type index = 2; auto s = get_element(list_column_nested->view(), index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); auto nested = this->make_test_structs_column({}, {}, {}, valid_t{}.begin()); auto expected_data = @@ -816,7 +816,7 @@ TYPED_TEST(StructGetValueTestTyped, mixed_types_valid) size_type index = 2; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); // expect fields fixed_width_column_wrapper ef1{3}; @@ -846,7 +846,7 @@ TYPED_TEST(StructGetValueTestTyped, mixed_types_valid_with_nulls) size_type index = 1; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); // expect fields fixed_width_column_wrapper ef1({-1}, {false}); @@ -880,7 +880,7 @@ TYPED_TEST(StructGetValueTestTyped, mixed_types_invalid) size_type index = 0; auto s = get_element(col, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); EXPECT_FALSE(typed_s->is_valid()); @@ -912,7 +912,7 @@ TEST_F(StructGetValueTest, multi_level_nested) size_type index = 0; auto s = get_element(l0, index); - auto typed_s = static_cast(s.get()); + auto typed_s = static_cast(s.get()); // Expect fields column_view cv = column_view(l0); diff --git a/cpp/tests/copying/pack_tests.cpp b/cpp/tests/copying/pack_tests.cpp index 367c6a3ae53..2e7c41333d5 100644 --- a/cpp/tests/copying/pack_tests.cpp +++ b/cpp/tests/copying/pack_tests.cpp @@ -173,7 +173,7 @@ std::vector> generate_structs(bool include_validity) std::vector ages_validity = {1, 1, 1, 1, 0, 1, 0, 0, 1}; auto ages_column = include_validity ? fixed_width_column_wrapper( ages.begin(), ages.end(), ages_validity.begin()) - : fixed_width_column_wrapper(ages.begin(), ages.end()); + : fixed_width_column_wrapper(ages.begin(), ages.end()); // 3. Boolean "is_human" column. std::vector is_human{true, true, false, false, false, false, true, true, true}; diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cpp index 32a7ce1a038..47ffe497ce3 100644 --- a/cpp/tests/copying/split_tests.cpp +++ b/cpp/tests/copying/split_tests.cpp @@ -717,7 +717,9 @@ void split_null_input_strings_column_value(SplitFunc Split, CompareFunc Compare) auto expected = create_expected_string_tables_for_splits(strings, validity_masks, splits); - for (std::size_t i = 0; i < result.size(); ++i) { Compare(expected[i], result[i]); } + for (std::size_t i = 0; i < result.size(); ++i) { + Compare(expected[i], result[i]); + } } // split with strings @@ -903,7 +905,7 @@ void split_structs(bool include_validity, SplitFunc Split, CompareFunc Compare) std::vector ages_validity = {1, 1, 1, 1, 0, 1, 0, 0, 1}; auto ages_column = include_validity ? fixed_width_column_wrapper( ages.begin(), ages.end(), ages_validity.begin()) - : fixed_width_column_wrapper(ages.begin(), ages.end()); + : fixed_width_column_wrapper(ages.begin(), ages.end()); // 3. Boolean "is_human" column. std::vector is_human{true, true, false, false, false, false, true, true, true}; @@ -928,9 +930,9 @@ void split_structs(bool include_validity, SplitFunc Split, CompareFunc Compare) auto expected_names = include_validity ? create_expected_string_columns_for_splits(names, splits, names_validity) : create_expected_string_columns_for_splits(names, splits, false); - auto expected_ages = include_validity - ? create_expected_columns_for_splits(splits, ages, ages_validity) - : create_expected_columns_for_splits(splits, ages, false); + auto expected_ages = include_validity + ? create_expected_columns_for_splits(splits, ages, ages_validity) + : create_expected_columns_for_splits(splits, ages, false); auto expected_is_human = include_validity ? create_expected_columns_for_splits(splits, is_human, is_human_validity) : create_expected_columns_for_splits(splits, is_human, false); diff --git a/cpp/tests/filling/repeat_tests.cpp b/cpp/tests/filling/repeat_tests.cpp index b450015c0c8..730ef8c1d16 100644 --- a/cpp/tests/filling/repeat_tests.cpp +++ b/cpp/tests/filling/repeat_tests.cpp @@ -117,7 +117,9 @@ TYPED_TEST(RepeatTypedTestFixture, RepeatNullable) std::vector input_values(num_values); std::iota(input_values.begin(), input_values.end(), 0); std::vector input_valids(num_values); - for (size_t i{0}; i < input_valids.size(); i++) { input_valids[i] = (i % 2) == 0 ? true : false; } + for (size_t i{0}; i < input_valids.size(); i++) { + input_valids[i] = (i % 2) == 0 ? true : false; + } std::vector counts(num_values); std::transform(counts.begin(), counts.end(), counts.begin(), [&](cudf::size_type count) { diff --git a/cpp/tests/groupby/collect_set_tests.cpp b/cpp/tests/groupby/collect_set_tests.cpp index 8ce0380ad66..2f89b04c745 100644 --- a/cpp/tests/groupby/collect_set_tests.cpp +++ b/cpp/tests/groupby/collect_set_tests.cpp @@ -25,11 +25,11 @@ namespace cudf { namespace test { -#define COL_K cudf::test::fixed_width_column_wrapper -#define COL_V cudf::test::fixed_width_column_wrapper -#define COL_S cudf::test::strings_column_wrapper -#define LCL_V cudf::test::lists_column_wrapper -#define LCL_S cudf::test::lists_column_wrapper +#define COL_K cudf::test::fixed_width_column_wrapper +#define COL_V cudf::test::fixed_width_column_wrapper +#define COL_S cudf::test::strings_column_wrapper +#define LCL_V cudf::test::lists_column_wrapper +#define LCL_S cudf::test::lists_column_wrapper #define VALIDITY std::initializer_list struct CollectSetTest : public cudf::test::BaseFixture { diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index 9c3a9a1b015..9a541a8cde0 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -787,7 +787,9 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampSeconds) column_wrapper(input_vals.begin(), input_vals.end()); { std::ofstream outfile(filepath, std::ofstream::out); - for (auto v : input_vals) { outfile << v << "\n"; } + for (auto v : input_vals) { + outfile << v << "\n"; + } } cudf_io::csv_reader_options in_opts = @@ -814,7 +816,9 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampMilliSeconds) input_vals.begin(), input_vals.end()); { std::ofstream outfile(filepath, std::ofstream::out); - for (auto v : input_vals) { outfile << v << "\n"; } + for (auto v : input_vals) { + outfile << v << "\n"; + } } cudf_io::csv_reader_options in_opts = @@ -841,7 +845,9 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampMicroSeconds) input_vals.begin(), input_vals.end()); { std::ofstream outfile(filepath, std::ofstream::out); - for (auto v : input_vals) { outfile << v << "\n"; } + for (auto v : input_vals) { + outfile << v << "\n"; + } } cudf_io::csv_reader_options in_opts = @@ -868,7 +874,9 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampNanoSeconds) input_vals.begin(), input_vals.end()); { std::ofstream outfile(filepath, std::ofstream::out); - for (auto v : input_vals) { outfile << v << "\n"; } + for (auto v : input_vals) { + outfile << v << "\n"; + } } cudf_io::csv_reader_options in_opts = @@ -1189,7 +1197,8 @@ TEST_F(CsvReaderTest, InvalidFloatingPoint) const auto col_data = cudf::test::to_host(view.column(0)); // col_data.first contains the column data - for (const auto& elem : col_data.first) ASSERT_TRUE(std::isnan(elem)); + for (const auto& elem : col_data.first) + ASSERT_TRUE(std::isnan(elem)); // col_data.second contains the bitmasks ASSERT_EQ(0u, col_data.second[0]); } diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index e59a4accf66..ef2ad29fc80 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -1718,7 +1718,9 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullability) // and considers all columns nullable. However cudf::concatenate will not force nulls in case no // columns are nullable. To get the expected result, we tell the writer the nullability of all // columns in advance. - for (auto& col_meta : metadata.column_metadata) { col_meta.set_nullability(false); } + for (auto& col_meta : metadata.column_metadata) { + col_meta.set_nullability(false); + } cudf_io::chunked_parquet_writer_options args = cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 1d3782daac9..95bacd87931 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -36,7 +36,7 @@ using aggregation = cudf::aggregation; template typename std::enable_if::value, std::vector>::type convert_values( - std::vector const &int_values) + std::vector const& int_values) { std::vector v(int_values.size()); std::transform(int_values.begin(), int_values.end(), v.begin(), [](int x) { @@ -48,7 +48,7 @@ typename std::enable_if::value, std::vector>::type c template typename std::enable_if::value, std::vector>::type convert_values( - std::vector const &int_values) + std::vector const& int_values) { std::vector v(int_values.size()); std::transform(int_values.begin(), int_values.end(), v.begin(), [](int x) { @@ -59,16 +59,16 @@ typename std::enable_if::value, std::vector>::type co } template -cudf::test::fixed_width_column_wrapper construct_null_column(std::vector const &values, - std::vector const &bools) +cudf::test::fixed_width_column_wrapper construct_null_column(std::vector const& values, + std::vector const& bools) { if (values.size() > bools.size()) { throw std::logic_error("input vector size mismatch."); } return cudf::test::fixed_width_column_wrapper(values.begin(), values.end(), bools.begin()); } template -std::vector replace_nulls(std::vector const &values, - std::vector const &bools, +std::vector replace_nulls(std::vector const& values, + std::vector const& bools, T identity) { std::vector v(values.size()); @@ -95,7 +95,7 @@ struct ReductionTest : public cudf::test::BaseFixture { void reduction_test(const cudf::column_view underlying_column, T_out expected_value, bool succeeded_condition, - std::unique_ptr const &agg, + std::unique_ptr const& agg, cudf::data_type output_dtype = cudf::data_type{}, bool expected_null = false) { @@ -104,7 +104,7 @@ struct ReductionTest : public cudf::test::BaseFixture { auto statement = [&]() { std::unique_ptr result = cudf::reduce(underlying_column, agg, output_dtype); using ScalarType = cudf::scalar_type_t; - auto result1 = static_cast(result.get()); + auto result1 = static_cast(result.get()); EXPECT_EQ(expected_null, !result1->is_valid()); if (result1->is_valid()) { EXPECT_EQ(expected_value, result1->value()); } }; @@ -148,8 +148,8 @@ TYPED_TEST(MinMaxReductionTest, MinMax) auto res = cudf::minmax(col); using ScalarType = cudf::scalar_type_t; - auto min_result = static_cast(res.first.get()); - auto max_result = static_cast(res.second.get()); + auto min_result = static_cast(res.first.get()); + auto max_result = static_cast(res.second.get()); EXPECT_EQ(min_result->value(), expected_min_result); EXPECT_EQ(max_result->value(), expected_max_result); @@ -170,8 +170,8 @@ TYPED_TEST(MinMaxReductionTest, MinMax) auto null_res = cudf::minmax(col_nulls); using ScalarType = cudf::scalar_type_t; - auto min_null_result = static_cast(null_res.first.get()); - auto max_null_result = static_cast(null_res.second.get()); + auto min_null_result = static_cast(null_res.first.get()); + auto max_null_result = static_cast(null_res.second.get()); EXPECT_EQ(min_null_result->value(), expected_min_null_result); EXPECT_EQ(max_null_result->value(), expected_max_null_result); @@ -202,8 +202,8 @@ TYPED_TEST(MinMaxReductionTest, MinMax) auto all_null_res = cudf::minmax(col_all_nulls); using ScalarType = cudf::scalar_type_t; - auto min_all_null_result = static_cast(all_null_res.first.get()); - auto max_all_null_result = static_cast(all_null_res.second.get()); + auto min_all_null_result = static_cast(all_null_res.first.get()); + auto max_all_null_result = static_cast(all_null_res.second.get()); EXPECT_EQ(min_all_null_result->is_valid(), false); EXPECT_EQ(max_all_null_result->is_valid(), false); } @@ -244,7 +244,7 @@ TYPED_TEST(ReductionTest, Product) std::vector host_bools({1, 1, 0, 0, 1, 1, 1}); std::vector v = convert_values(int_values); - auto calc_prod = [](std::vector &v) { + auto calc_prod = [](std::vector& v) { T expected_value = std::accumulate(v.begin(), v.end(), T{1}, [](T acc, T i) { return acc * i; }); return expected_value; @@ -273,7 +273,7 @@ TYPED_TEST(ReductionTest, SumOfSquare) std::vector host_bools({1, 1, 0, 0, 1, 1, 1, 1}); std::vector v = convert_values(int_values); - auto calc_reduction = [](std::vector &v) { + auto calc_reduction = [](std::vector& v) { T value = std::accumulate(v.begin(), v.end(), T{0}, [](T acc, T i) { return acc + i * i; }); return value; }; @@ -373,7 +373,7 @@ TYPED_TEST(MultiStepReductionTest, Mean) std::vector int_values({-3, 2, 1, 0, 5, -3, -2, 28}); std::vector host_bools({1, 1, 0, 1, 1, 1, 0, 1}); - auto calc_mean = [](std::vector &v, cudf::size_type valid_count) { + auto calc_mean = [](std::vector& v, cudf::size_type valid_count) { double sum = std::accumulate(v.begin(), v.end(), double{0}); return sum / valid_count; }; @@ -414,7 +414,7 @@ TYPED_TEST(MultiStepReductionTest, DISABLED_var_std) std::vector int_values({-3, 2, 1, 0, 5, -3, -2, 28}); std::vector host_bools({1, 1, 0, 1, 1, 1, 0, 1}); - auto calc_var = [](std::vector &v, cudf::size_type valid_count) { + auto calc_var = [](std::vector& v, cudf::size_type valid_count) { double mean = std::accumulate(v.begin(), v.end(), double{0}); mean /= valid_count; @@ -459,9 +459,9 @@ TYPED_TEST(MultiStepReductionTest, DISABLED_var_std) template struct ReductionMultiStepErrorCheck : public ReductionTest { - void reduction_error_check(cudf::test::fixed_width_column_wrapper &col, + void reduction_error_check(cudf::test::fixed_width_column_wrapper& col, bool succeeded_condition, - std::unique_ptr const &agg, + std::unique_ptr const& agg, cudf::data_type output_dtype) { const cudf::column_view underlying_column = col; @@ -528,10 +528,10 @@ TYPED_TEST(ReductionMultiStepErrorCheck, DISABLED_ErrorHandling) struct ReductionDtypeTest : public cudf::test::BaseFixture { template - void reduction_test(std::vector &int_values, + void reduction_test(std::vector& int_values, T_out expected_value, bool succeeded_condition, - std::unique_ptr const &agg, + std::unique_ptr const& agg, cudf::data_type out_dtype, bool expected_overflow = false) { @@ -542,7 +542,7 @@ struct ReductionDtypeTest : public cudf::test::BaseFixture { auto statement = [&]() { std::unique_ptr result = cudf::reduce(col, agg, out_dtype); using ScalarType = cudf::scalar_type_t; - auto result1 = static_cast(result.get()); + auto result1 = static_cast(result.get()); if (result1->is_valid() && !expected_overflow) { EXPECT_EQ(expected_value, result1->value()); } @@ -724,7 +724,7 @@ TEST_P(ReductionParamTest, DISABLED_std_var) std::vector int_values({-3, 2, 1, 0, 5, -3, -2, 28}); std::vector host_bools({1, 1, 0, 1, 1, 1, 0, 1}); - auto calc_var = [ddof](std::vector &v, cudf::size_type valid_count) { + auto calc_var = [ddof](std::vector& v, cudf::size_type valid_count) { double mean = std::accumulate(v.begin(), v.end(), double{0}); mean /= valid_count; @@ -772,7 +772,7 @@ struct StringReductionTest : public cudf::test::BaseFixture, void reduction_test(const cudf::column_view underlying_column, std::string expected_value, bool succeeded_condition, - std::unique_ptr const &agg, + std::unique_ptr const& agg, cudf::data_type output_dtype = cudf::data_type{}) { if (cudf::data_type{} == output_dtype) output_dtype = underlying_column.type(); @@ -780,7 +780,7 @@ struct StringReductionTest : public cudf::test::BaseFixture, auto statement = [&]() { std::unique_ptr result = cudf::reduce(underlying_column, agg, output_dtype); using ScalarType = cudf::scalar_type_t; - auto result1 = static_cast(result.get()); + auto result1 = static_cast(result.get()); EXPECT_TRUE(result1->is_valid()); if (!result1->is_valid()) std::cout << "expected=" << expected_value << ",got=" << result1->to_string() << std::endl; @@ -844,14 +844,14 @@ TEST_P(StringReductionTest, MinMax) // MINMAX auto result = cudf::minmax(col); - EXPECT_EQ(static_cast(result.first.get())->to_string(), + EXPECT_EQ(static_cast(result.first.get())->to_string(), expected_min_result); - EXPECT_EQ(static_cast(result.second.get())->to_string(), + EXPECT_EQ(static_cast(result.second.get())->to_string(), expected_max_result); result = cudf::minmax(col_nulls); - EXPECT_EQ(static_cast(result.first.get())->to_string(), + EXPECT_EQ(static_cast(result.first.get())->to_string(), expected_min_null_result); - EXPECT_EQ(static_cast(result.second.get())->to_string(), + EXPECT_EQ(static_cast(result.second.get())->to_string(), expected_max_null_result); } @@ -865,9 +865,9 @@ TEST_P(StringReductionTest, DictionaryMinMax) std::string expected_max_result = *(std::max_element(host_strings.begin(), host_strings.end())); auto result = cudf::minmax(col); - EXPECT_EQ(static_cast(result.first.get())->to_string(), + EXPECT_EQ(static_cast(result.first.get())->to_string(), expected_min_result); - EXPECT_EQ(static_cast(result.second.get())->to_string(), + EXPECT_EQ(static_cast(result.second.get())->to_string(), expected_max_result); // column with nulls @@ -885,9 +885,9 @@ TEST_P(StringReductionTest, DictionaryMinMax) expected_max_result = *(std::max_element(r_strings.begin(), r_strings.end())); result = cudf::minmax(col_nulls); - EXPECT_EQ(static_cast(result.first.get())->to_string(), + EXPECT_EQ(static_cast(result.first.get())->to_string(), expected_min_result); - EXPECT_EQ(static_cast(result.second.get())->to_string(), + EXPECT_EQ(static_cast(result.second.get())->to_string(), expected_max_result); // test sliced column @@ -895,9 +895,9 @@ TEST_P(StringReductionTest, DictionaryMinMax) // 3->2 and 7->5 because r_strings contains no null entries expected_min_result = *(std::min_element(r_strings.begin() + 2, r_strings.begin() + 5)); expected_max_result = *(std::max_element(r_strings.begin() + 2, r_strings.begin() + 5)); - EXPECT_EQ(static_cast(result.first.get())->to_string(), + EXPECT_EQ(static_cast(result.first.get())->to_string(), expected_min_result); - EXPECT_EQ(static_cast(result.second.get())->to_string(), + EXPECT_EQ(static_cast(result.second.get())->to_string(), expected_max_result); } @@ -1063,7 +1063,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionProductZeroScale) auto const out_type = static_cast(column).type(); auto const result = cudf::reduce(column, cudf::make_product_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); auto const result_fp = decimalXX{result_scalar->value()}; EXPECT_EQ(result_fp, expected); @@ -1084,7 +1084,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionProduct) auto const expected = decimalXX{scaled_integer{36, scale_type{i * 6}}}; auto const result = cudf::reduce(column, cudf::make_product_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } @@ -1104,7 +1104,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionProductWithNulls) auto const expected = decimalXX{scaled_integer{6, scale_type{i * 3}}}; auto const result = cudf::reduce(column, cudf::make_product_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } @@ -1125,7 +1125,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSum) auto const out_type = static_cast(column).type(); auto const result = cudf::reduce(column, cudf::make_sum_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } @@ -1149,7 +1149,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSumAlternate) auto const out_type = static_cast(column).type(); auto const result = cudf::reduce(column, cudf::make_sum_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); EXPECT_EQ(result_scalar->fixed_point_value(), TEN); @@ -1169,7 +1169,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSumFractional) auto const expected = decimalXX{scaled_integer{666, scale}}; auto const result = cudf::reduce(column, cudf::make_sum_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } @@ -1192,7 +1192,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSumLarge) auto const expected = decimalXX{scaled_integer{expected_value, scale}}; auto const result = cudf::reduce(column, cudf::make_sum_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } @@ -1212,7 +1212,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMin) auto const out_type = static_cast(column).type(); auto const result = cudf::reduce(column, cudf::make_min_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), ONE); } @@ -1233,7 +1233,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMinLarge) auto const expected = decimalXX{0, scale}; auto const result = cudf::reduce(column, cudf::make_min_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } @@ -1253,7 +1253,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMax) auto const out_type = static_cast(column).type(); auto const result = cudf::reduce(column, cudf::make_max_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), FOUR); } @@ -1274,7 +1274,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMaxLarge) auto const expected = decimalXX{scaled_integer{42, scale}}; auto const result = cudf::reduce(column, cudf::make_max_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } @@ -1293,7 +1293,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionNUnique) auto const out_type = static_cast(column).type(); auto const result = cudf::reduce(column, cudf::make_nunique_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->value(), 4); } @@ -1313,7 +1313,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionSumOfSquares) auto const expected = decimalXX{scaled_integer{30, scale_type{i * 2}}}; auto const result = cudf::reduce(column, cudf::make_sum_of_squares_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } @@ -1333,7 +1333,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMedianOddNumberOfElements) auto const expected = decimalXX{scaled_integer{2, scale}}; auto const result = cudf::reduce(column, cudf::make_median_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } @@ -1353,7 +1353,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionMedianEvenNumberOfElements auto const expected = decimalXX{scaled_integer{25, scale}}; auto const result = cudf::reduce(column, cudf::make_median_aggregation(), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } @@ -1375,7 +1375,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionQuantile) auto const expected = decimalXX{scaled_integer{i + 1, scale}}; auto const result = cudf::reduce( column, cudf::make_quantile_aggregation({i / 4.0}, cudf::interpolation::LINEAR), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } } @@ -1398,7 +1398,7 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointReductionNthElement) auto const expected = decimalXX{scaled_integer{values[i], scale}}; auto const result = cudf::reduce( column, cudf::make_nth_element_aggregation(i, cudf::null_policy::INCLUDE), out_type); - auto const result_scalar = static_cast *>(result.get()); + auto const result_scalar = static_cast*>(result.get()); EXPECT_EQ(result_scalar->fixed_point_value(), expected); } } @@ -1611,7 +1611,7 @@ TYPED_TEST(DictionaryReductionTest, Product) std::vector v = convert_values(int_values); cudf::data_type output_type{cudf::type_to_id()}; - auto calc_prod = [](std::vector const &v) { + auto calc_prod = [](std::vector const& v) { return std::accumulate(v.cbegin(), v.cend(), T{1}, [](T acc, T i) { return acc * i; }); }; @@ -1642,7 +1642,7 @@ TYPED_TEST(DictionaryReductionTest, SumOfSquare) std::vector v = convert_values(int_values); cudf::data_type output_type{cudf::type_to_id()}; - auto calc_reduction = [](std::vector const &v) { + auto calc_reduction = [](std::vector const& v) { return std::accumulate(v.cbegin(), v.cend(), T{0}, [](T acc, T i) { return acc + i * i; }); }; @@ -1673,7 +1673,7 @@ TYPED_TEST(DictionaryReductionTest, Mean) std::vector v = convert_values(int_values); cudf::data_type output_type{cudf::type_to_id()}; - auto calc_mean = [](std::vector const &v, cudf::size_type valid_count) { + auto calc_mean = [](std::vector const& v, cudf::size_type valid_count) { double sum = std::accumulate(v.cbegin(), v.cend(), double{0}); return sum / valid_count; }; @@ -1710,7 +1710,7 @@ TYPED_TEST(DictionaryReductionTest, DISABLED_VarStd) std::vector v = convert_values(int_values); cudf::data_type output_type{cudf::type_to_id()}; - auto calc_var = [](std::vector const &v, cudf::size_type valid_count) { + auto calc_var = [](std::vector const& v, cudf::size_type valid_count) { double mean = std::accumulate(v.cbegin(), v.cend(), double{0}); mean /= valid_count; double sum_of_sq = std::accumulate( diff --git a/cpp/tests/replace/replace_nulls_tests.cpp b/cpp/tests/replace/replace_nulls_tests.cpp index 2dfa28c5364..0ed4c186c92 100644 --- a/cpp/tests/replace/replace_nulls_tests.cpp +++ b/cpp/tests/replace/replace_nulls_tests.cpp @@ -335,11 +335,14 @@ TYPED_TEST(ReplaceNullsTest, ReplacementHasNulls) TYPED_TEST(ReplaceNullsTest, LargeScale) { std::vector inputColumn(10000); - for (size_t i = 0; i < inputColumn.size(); i++) inputColumn[i] = i % 2; + for (size_t i = 0; i < inputColumn.size(); i++) + inputColumn[i] = i % 2; std::vector inputValid(10000); - for (size_t i = 0; i < inputValid.size(); i++) inputValid[i] = i % 2; + for (size_t i = 0; i < inputValid.size(); i++) + inputValid[i] = i % 2; std::vector expectedColumn(10000); - for (size_t i = 0; i < expectedColumn.size(); i++) expectedColumn[i] = 1; + for (size_t i = 0; i < expectedColumn.size(); i++) + expectedColumn[i] = 1; ReplaceNullsColumn( cudf::test::fixed_width_column_wrapper( @@ -352,11 +355,14 @@ TYPED_TEST(ReplaceNullsTest, LargeScale) TYPED_TEST(ReplaceNullsTest, LargeScaleScalar) { std::vector inputColumn(10000); - for (size_t i = 0; i < inputColumn.size(); i++) inputColumn[i] = i % 2; + for (size_t i = 0; i < inputColumn.size(); i++) + inputColumn[i] = i % 2; std::vector inputValid(10000); - for (size_t i = 0; i < inputValid.size(); i++) inputValid[i] = i % 2; + for (size_t i = 0; i < inputValid.size(); i++) + inputValid[i] = i % 2; std::vector expectedColumn(10000); - for (size_t i = 0; i < expectedColumn.size(); i++) expectedColumn[i] = 1; + for (size_t i = 0; i < expectedColumn.size(); i++) + expectedColumn[i] = 1; cudf::numeric_scalar replacement(1); ReplaceNullsScalar(cudf::test::fixed_width_column_wrapper( diff --git a/cpp/tests/search/search_test.cpp b/cpp/tests/search/search_test.cpp index bf52c2609c4..38fc5abb250 100644 --- a/cpp/tests/search/search_test.cpp +++ b/cpp/tests/search/search_test.cpp @@ -644,8 +644,8 @@ TEST_F(SearchTest, contains_nullable_column_false) TEST_F(SearchTest, empty_table_string) { - std::vector h_col_strings{}; - std::vector h_val_strings{"0", "10", "11", "30", "32", "40", "47", "50", "7", "90"}; + std::vector h_col_strings{}; + std::vector h_val_strings{"0", "10", "11", "30", "32", "40", "47", "50", "7", "90"}; cudf::test::strings_column_wrapper column( h_col_strings.begin(), @@ -673,8 +673,8 @@ TEST_F(SearchTest, empty_table_string) TEST_F(SearchTest, empty_values_string) { - std::vector h_col_strings{"10", "20", "30", "40", "50"}; - std::vector h_val_strings{}; + std::vector h_col_strings{"10", "20", "30", "40", "50"}; + std::vector h_val_strings{}; cudf::test::strings_column_wrapper column( h_col_strings.begin(), @@ -702,8 +702,8 @@ TEST_F(SearchTest, empty_values_string) TEST_F(SearchTest, non_null_column__find_first_string) { - std::vector h_col_strings{"10", "20", "30", "40", "50"}; - std::vector h_val_strings{ + std::vector h_col_strings{"10", "20", "30", "40", "50"}; + std::vector h_val_strings{ "00", "07", "10", "11", "30", "32", "40", "47", "50", "90"}; cudf::test::strings_column_wrapper column( @@ -732,8 +732,8 @@ TEST_F(SearchTest, non_null_column__find_first_string) TEST_F(SearchTest, non_null_column__find_last_string) { - std::vector h_col_strings{"10", "20", "30", "40", "50"}; - std::vector h_val_strings{ + std::vector h_col_strings{"10", "20", "30", "40", "50"}; + std::vector h_val_strings{ "00", "07", "10", "11", "30", "32", "40", "47", "50", "90"}; cudf::test::strings_column_wrapper column( @@ -762,8 +762,8 @@ TEST_F(SearchTest, non_null_column__find_last_string) TEST_F(SearchTest, non_null_column_desc__find_first_string) { - std::vector h_col_strings{"50", "40", "30", "20", "10"}; - std::vector h_val_strings{ + std::vector h_col_strings{"50", "40", "30", "20", "10"}; + std::vector h_val_strings{ "00", "07", "10", "11", "30", "32", "40", "47", "50", "90"}; cudf::test::strings_column_wrapper column( @@ -792,8 +792,8 @@ TEST_F(SearchTest, non_null_column_desc__find_first_string) TEST_F(SearchTest, non_null_column_desc__find_last_string) { - std::vector h_col_strings{"50", "40", "30", "20", "10"}; - std::vector h_val_strings{ + std::vector h_col_strings{"50", "40", "30", "20", "10"}; + std::vector h_val_strings{ "00", "07", "10", "11", "30", "32", "40", "47", "50", "90"}; cudf::test::strings_column_wrapper column( @@ -822,8 +822,8 @@ TEST_F(SearchTest, non_null_column_desc__find_last_string) TEST_F(SearchTest, nullable_column__find_last__nulls_as_smallest_string) { - std::vector h_col_strings{nullptr, nullptr, "10", "20", "30", "40", "50"}; - std::vector h_val_strings{ + std::vector h_col_strings{nullptr, nullptr, "10", "20", "30", "40", "50"}; + std::vector h_val_strings{ nullptr, "08", "10", "11", "30", "32", "40", "47", "50", "90"}; cudf::test::strings_column_wrapper column( @@ -852,8 +852,8 @@ TEST_F(SearchTest, nullable_column__find_last__nulls_as_smallest_string) TEST_F(SearchTest, nullable_column__find_first__nulls_as_smallest_string) { - std::vector h_col_strings{nullptr, nullptr, "10", "20", "30", "40", "50"}; - std::vector h_val_strings{ + std::vector h_col_strings{nullptr, nullptr, "10", "20", "30", "40", "50"}; + std::vector h_val_strings{ nullptr, "08", "10", "11", "30", "32", "40", "47", "50", "90"}; cudf::test::strings_column_wrapper column( @@ -882,8 +882,8 @@ TEST_F(SearchTest, nullable_column__find_first__nulls_as_smallest_string) TEST_F(SearchTest, nullable_column__find_last__nulls_as_largest_string) { - std::vector h_col_strings{"10", "20", "30", "40", "50", nullptr, nullptr}; - std::vector h_val_strings{ + std::vector h_col_strings{"10", "20", "30", "40", "50", nullptr, nullptr}; + std::vector h_val_strings{ "08", "10", "11", "30", "32", "40", "47", "50", "90", nullptr}; cudf::test::strings_column_wrapper column( @@ -931,8 +931,8 @@ TEST_F(SearchTest, non_null_column__nullable_values__find_last__nulls_as_largest TEST_F(SearchTest, nullable_column__find_first__nulls_as_largest_string) { - std::vector h_col_strings{"10", "20", "30", "40", "50", nullptr, nullptr}; - std::vector h_val_strings{ + std::vector h_col_strings{"10", "20", "30", "40", "50", nullptr, nullptr}; + std::vector h_val_strings{ "08", "10", "11", "30", "32", "40", "47", "50", "90", nullptr}; cudf::test::strings_column_wrapper column( @@ -961,15 +961,15 @@ TEST_F(SearchTest, nullable_column__find_first__nulls_as_largest_string) TEST_F(SearchTest, table__find_first_string) { - std::vector h_col_0_strings{"10", "20", "20", "20", "20", "20", "50"}; - std::vector h_col_2_strings{"90", "77", "78", "61", "62", "63", "41"}; + std::vector h_col_0_strings{"10", "20", "20", "20", "20", "20", "50"}; + std::vector h_col_2_strings{"90", "77", "78", "61", "62", "63", "41"}; - std::vector h_val_0_strings{"0", "0", "0", "0", "10", "10", "10", "10", "10", - "10", "10", "10", "11", "20", "20", "20", "20", "20", - "20", "20", "20", "20", "20", "20", "30", "50", "60"}; - std::vector h_val_2_strings{"0", "91", "0", "91", "0", "79", "90", "91", "77", - "80", "90", "91", "91", "00", "76", "77", "78", "30", - "65", "77", "78", "80", "62", "78", "64", "41", "20"}; + std::vector h_val_0_strings{"0", "0", "0", "0", "10", "10", "10", "10", "10", + "10", "10", "10", "11", "20", "20", "20", "20", "20", + "20", "20", "20", "20", "20", "20", "30", "50", "60"}; + std::vector h_val_2_strings{"0", "91", "0", "91", "0", "79", "90", "91", "77", + "80", "90", "91", "91", "00", "76", "77", "78", "30", + "65", "77", "78", "80", "62", "78", "64", "41", "20"}; fixed_width_column_wrapper column_1{5.0, .5, .5, .7, .7, .7, .7}; fixed_width_column_wrapper values_1{0., 0., 6., 5., 0., 5., 5., 5., 5., 6., 6., 6., 9., 0., @@ -1030,15 +1030,15 @@ TEST_F(SearchTest, table__find_first_string) TEST_F(SearchTest, table__find_last_string) { - std::vector h_col_0_strings{"10", "20", "20", "20", "20", "20", "50"}; - std::vector h_col_2_strings{"90", "77", "78", "61", "62", "63", "41"}; + std::vector h_col_0_strings{"10", "20", "20", "20", "20", "20", "50"}; + std::vector h_col_2_strings{"90", "77", "78", "61", "62", "63", "41"}; - std::vector h_val_0_strings{"0", "0", "0", "0", "10", "10", "10", "10", "10", - "10", "10", "10", "11", "20", "20", "20", "20", "20", - "20", "20", "20", "20", "20", "20", "30", "50", "60"}; - std::vector h_val_2_strings{"0", "91", "0", "91", "0", "79", "90", "91", "77", - "80", "90", "91", "91", "00", "76", "77", "78", "30", - "65", "77", "78", "80", "62", "78", "64", "41", "20"}; + std::vector h_val_0_strings{"0", "0", "0", "0", "10", "10", "10", "10", "10", + "10", "10", "10", "11", "20", "20", "20", "20", "20", + "20", "20", "20", "20", "20", "20", "30", "50", "60"}; + std::vector h_val_2_strings{"0", "91", "0", "91", "0", "79", "90", "91", "77", + "80", "90", "91", "91", "00", "76", "77", "78", "30", + "65", "77", "78", "80", "62", "78", "64", "41", "20"}; fixed_width_column_wrapper column_1{5.0, .5, .5, .7, .7, .7, .7}; fixed_width_column_wrapper values_1{0., 0., 6., 5., 0., 5., 5., 5., 5., 6., 6., 6., 9., 0., @@ -1099,15 +1099,15 @@ TEST_F(SearchTest, table__find_last_string) TEST_F(SearchTest, table_partial_desc__find_first_string) { - std::vector h_col_0_strings{"50", "20", "20", "20", "20", "20", "10"}; - std::vector h_col_2_strings{"41", "78", "77", "63", "62", "61", "90"}; + std::vector h_col_0_strings{"50", "20", "20", "20", "20", "20", "10"}; + std::vector h_col_2_strings{"41", "78", "77", "63", "62", "61", "90"}; - std::vector h_val_0_strings{"0", "0", "0", "0", "10", "10", "10", "10", "10", - "10", "10", "10", "11", "20", "20", "20", "20", "20", - "20", "20", "20", "20", "20", "20", "30", "50", "60"}; - std::vector h_val_2_strings{"0", "91", "0", "91", "0", "79", "90", "91", "77", - "80", "90", "91", "91", "00", "76", "77", "78", "30", - "65", "77", "78", "80", "62", "78", "64", "41", "20"}; + std::vector h_val_0_strings{"0", "0", "0", "0", "10", "10", "10", "10", "10", + "10", "10", "10", "11", "20", "20", "20", "20", "20", + "20", "20", "20", "20", "20", "20", "30", "50", "60"}; + std::vector h_val_2_strings{"0", "91", "0", "91", "0", "79", "90", "91", "77", + "80", "90", "91", "91", "00", "76", "77", "78", "30", + "65", "77", "78", "80", "62", "78", "64", "41", "20"}; fixed_width_column_wrapper column_1{.7, .5, .5, .7, .7, .7, 5.0}; fixed_width_column_wrapper values_1{0., 0., 6., 5., 0., 5., 5., 5., 5., 6., 6., 6., 9., 0., @@ -1168,15 +1168,15 @@ TEST_F(SearchTest, table_partial_desc__find_first_string) TEST_F(SearchTest, table_partial_desc__find_last_string) { - std::vector h_col_0_strings{"50", "20", "20", "20", "20", "20", "10"}; - std::vector h_col_2_strings{"41", "78", "77", "63", "62", "61", "90"}; + std::vector h_col_0_strings{"50", "20", "20", "20", "20", "20", "10"}; + std::vector h_col_2_strings{"41", "78", "77", "63", "62", "61", "90"}; - std::vector h_val_0_strings{"0", "0", "0", "0", "10", "10", "10", "10", "10", - "10", "10", "10", "11", "20", "20", "20", "20", "20", - "20", "20", "20", "20", "20", "20", "30", "50", "60"}; - std::vector h_val_2_strings{"0", "91", "0", "91", "0", "79", "90", "91", "77", - "80", "90", "91", "91", "00", "76", "77", "78", "30", - "65", "77", "78", "80", "62", "78", "64", "41", "20"}; + std::vector h_val_0_strings{"0", "0", "0", "0", "10", "10", "10", "10", "10", + "10", "10", "10", "11", "20", "20", "20", "20", "20", + "20", "20", "20", "20", "20", "20", "30", "50", "60"}; + std::vector h_val_2_strings{"0", "91", "0", "91", "0", "79", "90", "91", "77", + "80", "90", "91", "91", "00", "76", "77", "78", "30", + "65", "77", "78", "80", "62", "78", "64", "41", "20"}; fixed_width_column_wrapper column_1{.7, .5, .5, .7, .7, .7, 5.0}; @@ -1238,13 +1238,13 @@ TEST_F(SearchTest, table_partial_desc__find_last_string) TEST_F(SearchTest, table__find_first__nulls_as_smallest_string) { - std::vector h_col_0_strings{ + std::vector h_col_0_strings{ nullptr, "10", "10", "20", "20", "20", "20", "20", "20", "20", "50"}; - std::vector h_col_2_strings{ + std::vector h_col_2_strings{ "50", "95", "90", nullptr, nullptr, "77", "78", "61", "62", "63", "41"}; - std::vector h_val_0_strings{"10", nullptr, "20"}; - std::vector h_val_2_strings{"95", "50", nullptr}; + std::vector h_val_0_strings{"10", nullptr, "20"}; + std::vector h_val_2_strings{"95", "50", nullptr}; fixed_width_column_wrapper column_1{{.5, 6.0, 5.0, .5, .5, .5, .5, .7, .7, .7, .7}, {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}}; @@ -1305,13 +1305,13 @@ TEST_F(SearchTest, table__find_first__nulls_as_smallest_string) TEST_F(SearchTest, table__find_last__nulls_as_smallest_string) { - std::vector h_col_0_strings{ + std::vector h_col_0_strings{ nullptr, "10", "10", "20", "20", "20", "20", "20", "20", "20", "50"}; - std::vector h_col_2_strings{ + std::vector h_col_2_strings{ "50", "90", "95", nullptr, nullptr, "77", "78", "61", "62", "63", "41"}; - std::vector h_val_0_strings{"10", nullptr, "20"}; - std::vector h_val_2_strings{"95", "50", nullptr}; + std::vector h_val_0_strings{"10", nullptr, "20"}; + std::vector h_val_2_strings{"95", "50", nullptr}; fixed_width_column_wrapper column_1{{.5, 6.0, 5.0, .5, .5, .5, .5, .7, .7, .7, .7}, {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}}; @@ -1372,13 +1372,13 @@ TEST_F(SearchTest, table__find_last__nulls_as_smallest_string) TEST_F(SearchTest, table__find_first__nulls_as_largest_string) { - std::vector h_col_0_strings{ + std::vector h_col_0_strings{ "10", "10", "20", "20", "20", "20", "20", "20", "20", "50", nullptr}; - std::vector h_col_2_strings{ + std::vector h_col_2_strings{ "90", "95", "77", "78", nullptr, nullptr, "61", "62", "63", "41", "50"}; - std::vector h_val_0_strings{"10", nullptr, "20"}; - std::vector h_val_2_strings{"95", "50", nullptr}; + std::vector h_val_0_strings{"10", nullptr, "20"}; + std::vector h_val_2_strings{"95", "50", nullptr}; fixed_width_column_wrapper column_1{{5.0, 6.0, .5, .5, .5, .5, .7, .7, .7, .7, .5}, {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}}; @@ -1439,13 +1439,13 @@ TEST_F(SearchTest, table__find_first__nulls_as_largest_string) TEST_F(SearchTest, table__find_last__nulls_as_largest_string) { - std::vector h_col_0_strings{ + std::vector h_col_0_strings{ "10", "10", "20", "20", "20", "20", "20", "20", "20", "50", nullptr}; - std::vector h_col_2_strings{ + std::vector h_col_2_strings{ "90", "95", "77", "78", nullptr, nullptr, "61", "62", "63", "41", "50"}; - std::vector h_val_0_strings{"10", nullptr, "20"}; - std::vector h_val_2_strings{"95", "50", nullptr}; + std::vector h_val_0_strings{"10", nullptr, "20"}; + std::vector h_val_2_strings{"95", "50", nullptr}; fixed_width_column_wrapper column_1{{5.0, 6.0, .5, .5, .5, .5, .7, .7, .7, .7, .5}, {1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}}; @@ -1506,7 +1506,7 @@ TEST_F(SearchTest, table__find_last__nulls_as_largest_string) TEST_F(SearchTest, contains_true_string) { - std::vector h_col_strings{"00", "01", "17", "19", "23", "29", "71"}; + std::vector h_col_strings{"00", "01", "17", "19", "23", "29", "71"}; string_scalar scalar{"23"}; cudf::test::strings_column_wrapper column( @@ -1525,7 +1525,7 @@ TEST_F(SearchTest, contains_true_string) TEST_F(SearchTest, contains_false_string) { - std::vector h_col_strings{"0", "1", "17", "19", "23", "29", "71"}; + std::vector h_col_strings{"0", "1", "17", "19", "23", "29", "71"}; string_scalar scalar{"24"}; cudf::test::strings_column_wrapper column( @@ -1544,7 +1544,7 @@ TEST_F(SearchTest, contains_false_string) TEST_F(SearchTest, contains_empty_value_string) { - std::vector h_col_strings{"0", "1", "17", "19", "23", "29", "71"}; + std::vector h_col_strings{"0", "1", "17", "19", "23", "29", "71"}; string_scalar scalar{"23", false}; cudf::test::strings_column_wrapper column( @@ -1563,7 +1563,7 @@ TEST_F(SearchTest, contains_empty_value_string) TEST_F(SearchTest, contains_empty_column_string) { - std::vector h_col_strings{}; + std::vector h_col_strings{}; string_scalar scalar{"24"}; cudf::test::strings_column_wrapper column( @@ -1582,7 +1582,7 @@ TEST_F(SearchTest, contains_empty_column_string) TEST_F(SearchTest, contains_nullable_column_true_string) { - std::vector h_col_strings{nullptr, nullptr, "17", "19", "23", "29", "71"}; + std::vector h_col_strings{nullptr, nullptr, "17", "19", "23", "29", "71"}; string_scalar scalar{"23"}; cudf::test::strings_column_wrapper column( @@ -1601,7 +1601,7 @@ TEST_F(SearchTest, contains_nullable_column_true_string) TEST_F(SearchTest, contains_nullable_column_false_string) { - std::vector h_col_strings{nullptr, nullptr, "17", "19", nullptr, "29", "71"}; + std::vector h_col_strings{nullptr, nullptr, "17", "19", nullptr, "29", "71"}; string_scalar scalar{"23"}; cudf::test::strings_column_wrapper column( @@ -1648,8 +1648,8 @@ TEST_F(SearchTest, multi_contains_none) TEST_F(SearchTest, multi_contains_some_string) { - std::vector h_haystack_strings{"0", "1", "17", "19", "23", "29", "71"}; - std::vector h_needles_strings{"17", "19", "45", "72"}; + std::vector h_haystack_strings{"0", "1", "17", "19", "23", "29", "71"}; + std::vector h_needles_strings{"17", "19", "45", "72"}; cudf::test::strings_column_wrapper haystack(h_haystack_strings.begin(), h_haystack_strings.end()); @@ -1664,8 +1664,8 @@ TEST_F(SearchTest, multi_contains_some_string) TEST_F(SearchTest, multi_contains_none_string) { - std::vector h_haystack_strings{"0", "1", "17", "19", "23", "29", "71"}; - std::vector h_needles_strings{"2", "3"}; + std::vector h_haystack_strings{"0", "1", "17", "19", "23", "29", "71"}; + std::vector h_needles_strings{"2", "3"}; cudf::test::strings_column_wrapper haystack(h_haystack_strings.begin(), h_haystack_strings.end()); @@ -1710,8 +1710,8 @@ TEST_F(SearchTest, multi_contains_none_with_nulls) TEST_F(SearchTest, multi_contains_some_string_with_nulls) { - std::vector h_haystack_strings{"0", "1", nullptr, "19", "23", "29", "71"}; - std::vector h_needles_strings{"17", "23", nullptr, "72"}; + std::vector h_haystack_strings{"0", "1", nullptr, "19", "23", "29", "71"}; + std::vector h_needles_strings{"17", "23", nullptr, "72"}; fixed_width_column_wrapper expect{{0, 0, 0, 0, 1, 0, 0}, {1, 1, 0, 1, 1, 1, 1}}; @@ -1734,8 +1734,8 @@ TEST_F(SearchTest, multi_contains_some_string_with_nulls) TEST_F(SearchTest, multi_contains_none_string_with_nulls) { - std::vector h_haystack_strings{"0", "1", nullptr, "19", "23", "29", "71"}; - std::vector h_needles_strings{"2", nullptr}; + std::vector h_haystack_strings{"0", "1", nullptr, "19", "23", "29", "71"}; + std::vector h_needles_strings{"2", nullptr}; fixed_width_column_wrapper expect{{0, 0, 0, 0, 0, 0, 0}, {1, 1, 0, 1, 1, 1, 1}}; @@ -1772,8 +1772,8 @@ TEST_F(SearchTest, multi_contains_empty_column) TEST_F(SearchTest, multi_contains_empty_column_string) { - std::vector h_haystack_strings{}; - std::vector h_needles_strings{"17", "19", "45", "72"}; + std::vector h_haystack_strings{}; + std::vector h_needles_strings{"17", "19", "45", "72"}; cudf::test::strings_column_wrapper haystack(h_haystack_strings.begin(), h_haystack_strings.end()); @@ -1802,8 +1802,8 @@ TEST_F(SearchTest, multi_contains_empty_input_set) TEST_F(SearchTest, multi_contains_empty_input_set_string) { - std::vector h_haystack_strings{"0", "1", "17", "19", "23", "29", "71"}; - std::vector h_needles_strings{}; + std::vector h_haystack_strings{"0", "1", "17", "19", "23", "29", "71"}; + std::vector h_needles_strings{}; cudf::test::strings_column_wrapper haystack(h_haystack_strings.begin(), h_haystack_strings.end()); diff --git a/cpp/tests/sort/rank_test.cpp b/cpp/tests/sort/rank_test.cpp index 88c0f699252..dfae68f77cb 100644 --- a/cpp/tests/sort/rank_test.cpp +++ b/cpp/tests/sort/rank_test.cpp @@ -40,7 +40,7 @@ void run_rank_test(table_view input, bool debug = false) { int i = 0; - for (auto &&input_column : input) { + for (auto&& input_column : input) { // Rank auto got_rank_column = cudf::rank(input_column, method, column_order, null_handling, null_precedence, percentage); @@ -77,7 +77,7 @@ struct Rank : public BaseFixture { bool percentage = false) { if (std::is_same::value) return; - for (auto const &test_case : { + for (auto const& test_case : { // Non-null column test_case_t{table_view{{col1}}, table_view{{col1_rank}}}, // Null column diff --git a/cpp/tests/strings/chars_types_tests.cpp b/cpp/tests/strings/chars_types_tests.cpp index 702329edaba..17e08bd21c5 100644 --- a/cpp/tests/strings/chars_types_tests.cpp +++ b/cpp/tests/strings/chars_types_tests.cpp @@ -73,7 +73,8 @@ TEST_P(StringsCharsTestTypes, AllTypes) int x = static_cast(is_parm); int index = 0; int strings_count = static_cast(h_strings.size()); - while (x >>= 1) ++index; + while (x >>= 1) + ++index; bool* sub_expected = &expecteds[index * strings_count]; cudf::test::fixed_width_column_wrapper expected( diff --git a/cpp/tests/strings/hash_string.cu b/cpp/tests/strings/hash_string.cu index 023d648cfdf..b5298d39bda 100644 --- a/cpp/tests/strings/hash_string.cu +++ b/cpp/tests/strings/hash_string.cu @@ -74,5 +74,6 @@ TEST_F(StringsHashTest, HashTest) uint32_t h_expected[] = { 2739798893, 2739798893, 3506676360, 1891213601, 3778137224, 0, 0, 1551088011}; auto h_values = cudf::detail::make_host_vector_sync(d_values); - for (uint32_t idx = 0; idx < h_values.size(); ++idx) EXPECT_EQ(h_values[idx], h_expected[idx]); + for (uint32_t idx = 0; idx < h_values.size(); ++idx) + EXPECT_EQ(h_values[idx], h_expected[idx]); } diff --git a/cpp/tests/text/subword_tests.cpp b/cpp/tests/text/subword_tests.cpp index d23ca3033cc..65cc466fee7 100644 --- a/cpp/tests/text/subword_tests.cpp +++ b/cpp/tests/text/subword_tests.cpp @@ -46,7 +46,8 @@ void create_hashed_vocab(std::string const& hash_file) std::vector> coefficients(23, {65559, 0}); std::ofstream outfile(hash_file, std::ofstream::out); outfile << "1\n0\n" << coefficients.size() << "\n"; - for (auto c : coefficients) outfile << c.first << " " << c.second << "\n"; + for (auto c : coefficients) + outfile << c.first << " " << c.second << "\n"; std::vector hash_table(23, 0); outfile << hash_table.size() << "\n"; hash_table[0] = 3015668L; // based on values @@ -54,7 +55,8 @@ void create_hashed_vocab(std::string const& hash_file) hash_table[5] = 6358029; // bert_hash_table.txt hash_table[16] = 451412625363L; // file for the test hash_table[20] = 6206321707968235495L; // words above - for (auto h : hash_table) outfile << h << "\n"; + for (auto h : hash_table) + outfile << h << "\n"; outfile << "100\n101\n102\n\n"; } diff --git a/cpp/tests/transpose/transpose_test.cpp b/cpp/tests/transpose/transpose_test.cpp index ec15a1b8423..8d81f4dce84 100644 --- a/cpp/tests/transpose/transpose_test.cpp +++ b/cpp/tests/transpose/transpose_test.cpp @@ -51,7 +51,9 @@ auto transpose_vectors(std::vector> const& input) transposed.begin(), transposed.end(), [=](std::vector& col) { col.resize(ncols); }); for (size_t col = 0; col < input.size(); ++col) { - for (size_t row = 0; row < nrows; ++row) { transposed[row][col] = input[col][row]; } + for (size_t row = 0; row < nrows; ++row) { + transposed[row][col] = input[col][row]; + } } return transposed; @@ -63,7 +65,9 @@ auto make_columns(std::vector> const& values) std::vector> columns; columns.reserve(values.size()); - for (auto const& value_col : values) { columns.emplace_back(value_col.begin(), value_col.end()); } + for (auto const& value_col : values) { + columns.emplace_back(value_col.begin(), value_col.end()); + } return columns; } diff --git a/cpp/tests/utilities_tests/span_tests.cu b/cpp/tests/utilities_tests/span_tests.cu index 22e15809a2d..a9a5151e7c3 100644 --- a/cpp/tests/utilities_tests/span_tests.cu +++ b/cpp/tests/utilities_tests/span_tests.cu @@ -45,7 +45,9 @@ template void expect_match(Iterator1 expected, size_t expected_size, host_span input) { EXPECT_EQ(expected_size, input.size()); - for (size_t i = 0; i < expected_size; i++) { EXPECT_EQ(*(expected + i), *(input.begin() + i)); } + for (size_t i = 0; i < expected_size; i++) { + EXPECT_EQ(*(expected + i), *(input.begin() + i)); + } } template diff --git a/cpp/tests/utilities_tests/type_list_tests.cpp b/cpp/tests/utilities_tests/type_list_tests.cpp index 0953c02078a..d0b10b774eb 100644 --- a/cpp/tests/utilities_tests/type_list_tests.cpp +++ b/cpp/tests/utilities_tests/type_list_tests.cpp @@ -60,7 +60,7 @@ template std::string type_name() { int status; - char *realname; + char* realname; realname = abi::__cxa_demangle(typeid(T).name(), 0, 0, &status); std::string name{realname}; free(realname); @@ -86,15 +86,14 @@ TEST(TypeList, GetType) TEST(TypeList, Concat) { EXPECT_SAME_TYPE(Concat<>, Types<>); - EXPECT_SAME_TYPE((Concat>), (Types)); + EXPECT_SAME_TYPE((Concat>), (Types)); - EXPECT_SAME_TYPE((Concat, Types>), - (Types)); + EXPECT_SAME_TYPE((Concat, Types>), + (Types)); - EXPECT_SAME_TYPE((Concat, - Types, - Types>), - (Types)); + EXPECT_SAME_TYPE( + (Concat, Types, Types>), + (Types)); } TEST(TypeList, Flatten) From 718eccf1eb10f9f56b8f7e8be78262e6835f3345 Mon Sep 17 00:00:00 2001 From: Jordan Jacobelli Date: Wed, 7 Jul 2021 19:10:14 +0200 Subject: [PATCH 31/54] Remove flatbuffers dependency (#8671) As discussed with @jakirkham and @trxcllnt, cudf does not depend directly on `flatbuffers` so we can remove the dependency Authors: - Jordan Jacobelli (https://github.com/Ethyling) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - https://github.com/jakirkham - Paul Taylor (https://github.com/trxcllnt) URL: https://github.com/rapidsai/cudf/pull/8671 --- conda/environments/cudf_dev_cuda11.0.yml | 1 - conda/environments/cudf_dev_cuda11.2.yml | 1 - python/cudf/requirements/cuda-11.0/dev_requirements.txt | 1 - python/cudf/requirements/cuda-11.2/dev_requirements.txt | 1 - 4 files changed, 4 deletions(-) diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index d50dde7ff0e..79fd94a3644 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -49,7 +49,6 @@ dependencies: - arrow-cpp-proc * cuda - double-conversion - rapidjson - - flatbuffers - hypothesis - sphinx-markdown-tables - sphinx-copybutton diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index 31118fdbdbc..b1a4c52c882 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -49,7 +49,6 @@ dependencies: - arrow-cpp-proc * cuda - double-conversion - rapidjson - - flatbuffers - hypothesis - sphinx-markdown-tables - sphinx-copybutton diff --git a/python/cudf/requirements/cuda-11.0/dev_requirements.txt b/python/cudf/requirements/cuda-11.0/dev_requirements.txt index 455258d2e2e..87b3a36f865 100644 --- a/python/cudf/requirements/cuda-11.0/dev_requirements.txt +++ b/python/cudf/requirements/cuda-11.0/dev_requirements.txt @@ -10,7 +10,6 @@ cmake-setuptools>=0.1.3 cython>=0.29,<0.30 dlpack fastavro>=0.22.9 -flatbuffers fsspec>=0.6.0 hypothesis mimesis diff --git a/python/cudf/requirements/cuda-11.2/dev_requirements.txt b/python/cudf/requirements/cuda-11.2/dev_requirements.txt index a7e5f1c0993..4e9a399f471 100644 --- a/python/cudf/requirements/cuda-11.2/dev_requirements.txt +++ b/python/cudf/requirements/cuda-11.2/dev_requirements.txt @@ -10,7 +10,6 @@ cmake-setuptools>=0.1.3 cython>=0.29,<0.30 dlpack fastavro>=0.22.9 -flatbuffers fsspec>=0.6.0 hypothesis mimesis From f0f2170972fd9f8c9570b95c9d84e4dbf714b3e3 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 7 Jul 2021 14:22:20 -0400 Subject: [PATCH 32/54] Add ConfigureNVBench to avoid concurrent main() entry points (#8662) This PR fixes a bug where both `nvbench::main` and `benchmark_main` are trying to insert `main()` entry points. It creates a new `ConfigureNVBench` macro to build nvbench benchmarks in a separate directory. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Robert Maynard (https://github.com/robertmaynard) URL: https://github.com/rapidsai/cudf/pull/8662 --- cpp/benchmarks/CMakeLists.txt | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index f8107d526c1..cf681a96cbd 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -29,7 +29,6 @@ target_link_libraries(cudf_datagen GTest::gmock_main GTest::gtest_main benchmark::benchmark - nvbench::main Threads::Threads cudf) @@ -51,11 +50,19 @@ target_link_libraries(cudf_benchmark_common PRIVATE cudf_datagen) function(ConfigureBench CMAKE_BENCH_NAME) add_executable(${CMAKE_BENCH_NAME} ${ARGN}) set_target_properties(${CMAKE_BENCH_NAME} - PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$") + PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$") target_link_libraries(${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen benchmark::benchmark_main) endfunction() +function(ConfigureNVBench CMAKE_BENCH_NAME) + add_executable(${CMAKE_BENCH_NAME} ${ARGN}) + set_target_properties(${CMAKE_BENCH_NAME} + PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$") + target_link_libraries(${CMAKE_BENCH_NAME} + PRIVATE cudf_benchmark_common cudf_datagen nvbench::main) +endfunction() + ################################################################################################### # - column benchmarks ----------------------------------------------------------------------------- ConfigureBench(COLUMN_CONCAT_BENCH column/concatenate_benchmark.cpp) From 53b3c1696ba7e545e0f67566415259362ace9276 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 7 Jul 2021 18:41:25 -0600 Subject: [PATCH 33/54] Implement groupby aggregations `M2` and `MERGE_M2` (#8605) This PR adds the following new groupby aggregations: * `M2`: sum of squares of differences from group mean, which is essentially an unscaled version of variance * `MERGE_M2` for merging distributedly computed `M2` These are auxiliary aggregations used for generating intermediate results for distributed computing of variance and standard deviation, with numerical stability. Below is an overview of the algorithm for distributed computing of `VARIANCE` aggregation: * Partition data into batches and compute (partial results) groupby `COUNT_VALID`, `MEAN` and `M2` aggregations for each batch. * Vertically concatenate results of `COUNT_VALID`, `MEAN` and `M2`, then assemble a structs column with these values in separate children columns. * The new structs column is given as the input values column to `MERGE_M2` aggregation to produce new merged values for those partial results, following the algorithm described here: https://www.wikiwand.com/en/Algorithms_for_calculating_variance#/Parallel_algorithm. As such, the output of `MERGE_M2` is also a structs column similar to its input. * Those partial results (`COUNT_VALID`, `MEAN`, and `M2`) may be merged in several merging steps. * After the last merging step, we have the final `M2` values. Then, a finalizing step will compute variance/std for each key group using cudf binops (`variance = M2 / group_size`, and ``variance_pop = M2 / (group_size - 1)`). Reference: https://www.wikiwand.com/en/Algorithms_for_calculating_variance#/Parallel_algorithm There are also some other clean-up and re-order code in several related files. Authors: - Nghia Truong (https://github.com/ttnghia) Approvers: - Jake Hemstad (https://github.com/jrhemstad) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/8605 --- cpp/CMakeLists.txt | 8 +- cpp/include/cudf/aggregation.hpp | 77 ++- .../cudf/detail/aggregation/aggregation.hpp | 217 +++++--- cpp/src/aggregation/aggregation.cpp | 136 +++-- cpp/src/groupby/sort/aggregate.cpp | 44 ++ cpp/src/groupby/sort/group_m2.cu | 141 ++++++ cpp/src/groupby/sort/group_merge_m2.cu | 196 +++++++ cpp/src/groupby/sort/group_reductions.hpp | 50 ++ cpp/tests/CMakeLists.txt | 2 + cpp/tests/groupby/m2_tests.cpp | 243 +++++++++ cpp/tests/groupby/merge_m2_tests.cpp | 479 ++++++++++++++++++ 11 files changed, 1439 insertions(+), 154 deletions(-) create mode 100644 cpp/src/groupby/sort/group_m2.cu create mode 100644 cpp/src/groupby/sort/group_merge_m2.cu create mode 100644 cpp/tests/groupby/m2_tests.cpp create mode 100644 cpp/tests/groupby/merge_m2_tests.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d0eabd1e5cd..3582f29bf11 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -194,14 +194,16 @@ add_library(cudf src/filling/sequence.cu src/groupby/groupby.cu src/groupby/hash/groupby.cu + src/groupby/sort/aggregate.cpp src/groupby/sort/group_argmax.cu src/groupby/sort/group_argmin.cu - src/groupby/sort/aggregate.cpp src/groupby/sort/group_collect.cu - src/groupby/sort/group_merge_lists.cu src/groupby/sort/group_count.cu + src/groupby/sort/group_m2.cu src/groupby/sort/group_max.cu src/groupby/sort/group_min.cu + src/groupby/sort/group_merge_lists.cu + src/groupby/sort/group_merge_m2.cu src/groupby/sort/group_nth_element.cu src/groupby/sort/group_nunique.cu src/groupby/sort/group_product.cu @@ -272,7 +274,7 @@ add_library(cudf src/join/join.cu src/join/semi_join.cu src/lists/contains.cu - src/lists/combine/concatenate_list_elements.cu + src/lists/combine/concatenate_list_elements.cu src/lists/combine/concatenate_rows.cu src/lists/copying/concatenate.cu src/lists/copying/copying.cu diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 5fab284d506..a2f59de54db 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -67,8 +67,9 @@ class aggregation { ALL, ///< all reduction SUM_OF_SQUARES, ///< sum of squares reduction MEAN, ///< arithmetic mean reduction - VARIANCE, ///< groupwise variance - STD, ///< groupwise standard deviation + M2, ///< sum of squares of differences from the mean + VARIANCE, ///< variance + STD, ///< standard deviation MEDIAN, ///< median reduction QUANTILE, ///< compute specified quantile(s) ARGMAX, ///< Index of max element @@ -78,12 +79,13 @@ class aggregation { ROW_NUMBER, ///< get row-number of current index (relative to rolling window) COLLECT_LIST, ///< collect values into a list COLLECT_SET, ///< collect values into a list without duplicate entries - MERGE_LISTS, ///< merge multiple lists values into one list - MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries LEAD, ///< window function, accesses row at specified offset following current row LAG, ///< window function, accesses row at specified offset preceding current row PTX, ///< PTX UDF based reduction - CUDA ///< CUDA UDF based reduction + CUDA, ///< CUDA UDF based reduction + MERGE_LISTS, ///< merge multiple lists values into one list + MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries + MERGE_M2 ///< merge partial values of M2 aggregation }; aggregation() = delete; @@ -159,6 +161,20 @@ std::unique_ptr make_sum_of_squares_aggregation(); template std::unique_ptr make_mean_aggregation(); +/** + * @brief Factory to create a M2 aggregation + * + * A M2 aggregation is sum of squares of differences from the mean. That is: + * `M2 = SUM((x - MEAN) * (x - MEAN))`. + * + * This aggregation produces the intermediate values that are used to compute variance and standard + * deviation across multiple discrete sets. See + * `https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm` for more + * detail. + */ +template +std::unique_ptr make_m2_aggregation(); + /** * @brief Factory to create a VARIANCE aggregation * @@ -271,11 +287,33 @@ std::unique_ptr make_collect_set_aggregation(null_policy null_handling = n null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::UNEQUAL); +/// Factory to create a LAG aggregation +template +std::unique_ptr make_lag_aggregation(size_type offset); + +/// Factory to create a LEAD aggregation +template +std::unique_ptr make_lead_aggregation(size_type offset); + +/** + * @brief Factory to create an aggregation base on UDF for PTX or CUDA + * + * @param[in] type: either udf_type::PTX or udf_type::CUDA + * @param[in] user_defined_aggregator A string containing the aggregator code + * @param[in] output_type expected output type + * + * @return aggregation unique pointer housing user_defined_aggregator string. + */ +template +std::unique_ptr make_udf_aggregation(udf_type type, + std::string const& user_defined_aggregator, + data_type output_type); + /** * @brief Factory to create a MERGE_LISTS aggregation. * * Given a lists column, this aggregation merges all the lists corresponding to the same key value - * into one list. It is designed specificly to merge the partial results of multiple (distributed) + * into one list. It is designed specifically to merge the partial results of multiple (distributed) * groupby `COLLECT_LIST` aggregations into a final `COLLECT_LIST` result. As such, it requires the * input lists column to be non-nullable (the child column containing list entries is not subjected * to this requirement). @@ -290,7 +328,7 @@ std::unique_ptr make_merge_lists_aggregation(); * value into one list, then it drops all the duplicate entries in each lists, producing a lists * column containing non-repeated entries. * - * This aggregation is designed specificly to merge the partial results of multiple (distributed) + * This aggregation is designed specifically to merge the partial results of multiple (distributed) * groupby `COLLECT_LIST` or `COLLECT_SET` aggregations into a final `COLLECT_SET` result. As such, * it requires the input lists column to be non-nullable (the child column containing list entries * is not subjected to this requirement). @@ -308,27 +346,20 @@ template std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::UNEQUAL); -/// Factory to create a LAG aggregation -template -std::unique_ptr make_lag_aggregation(size_type offset); - -/// Factory to create a LEAD aggregation -template -std::unique_ptr make_lead_aggregation(size_type offset); - /** - * @brief Factory to create an aggregation base on UDF for PTX or CUDA + * @brief Factory to create a MERGE_M2 aggregation * - * @param[in] type: either udf_type::PTX or udf_type::CUDA - * @param[in] user_defined_aggregator A string containing the aggregator code - * @param[in] output_type expected output type + * Merges the results of `M2` aggregations on independent sets into a new `M2` value equivalent to + * if a single `M2` aggregation was done across all of the sets at once. This aggregation is only + * valid on structs whose members are the result of the `COUNT_VALID`, `MEAN`, and `M2` aggregations + * on the same sets. The output of this aggregation is a struct containing the merged `COUNT_VALID`, + * `MEAN`, and `M2` aggregations. * - * @return aggregation unique pointer housing user_defined_aggregator string. + * The input `M2` aggregation values are expected to be all non-negative numbers, since they + * were output from `M2` aggregation. */ template -std::unique_ptr make_udf_aggregation(udf_type type, - std::string const& user_defined_aggregator, - data_type output_type); +std::unique_ptr make_merge_m2_aggregation(); /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 373d695a5b5..10d9d8c1b92 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -53,6 +53,8 @@ class simple_aggregations_collector { // Declares the interface for the simple data_type col_type, class sum_of_squares_aggregation const& agg); virtual std::vector> visit(data_type col_type, class mean_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class m2_aggregation const& agg); virtual std::vector> visit(data_type col_type, class var_aggregation const& agg); virtual std::vector> visit(data_type col_type, @@ -75,14 +77,16 @@ class simple_aggregations_collector { // Declares the interface for the simple data_type col_type, class collect_list_aggregation const& agg); virtual std::vector> visit(data_type col_type, class collect_set_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class lead_lag_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class udf_aggregation const& agg); virtual std::vector> visit(data_type col_type, class merge_lists_aggregation const& agg); virtual std::vector> visit(data_type col_type, class merge_sets_aggregation const& agg); virtual std::vector> visit(data_type col_type, - class lead_lag_aggregation const& agg); - virtual std::vector> visit(data_type col_type, - class udf_aggregation const& agg); + class merge_m2_aggregation const& agg); }; class aggregation_finalizer { // Declares the interface for the finalizer @@ -98,6 +102,7 @@ class aggregation_finalizer { // Declares the interface for the finalizer virtual void visit(class all_aggregation const& agg); virtual void visit(class sum_of_squares_aggregation const& agg); virtual void visit(class mean_aggregation const& agg); + virtual void visit(class m2_aggregation const& agg); virtual void visit(class var_aggregation const& agg); virtual void visit(class std_aggregation const& agg); virtual void visit(class median_aggregation const& agg); @@ -109,10 +114,11 @@ class aggregation_finalizer { // Declares the interface for the finalizer virtual void visit(class row_number_aggregation const& agg); virtual void visit(class collect_list_aggregation const& agg); virtual void visit(class collect_set_aggregation const& agg); - virtual void visit(class merge_lists_aggregation const& agg); - virtual void visit(class merge_sets_aggregation const& agg); virtual void visit(class lead_lag_aggregation const& agg); virtual void visit(class udf_aggregation const& agg); + virtual void visit(class merge_lists_aggregation const& agg); + virtual void visit(class merge_sets_aggregation const& agg); + virtual void visit(class merge_m2_aggregation const& agg); }; /** @@ -286,6 +292,25 @@ class mean_aggregation final : public rolling_aggregation { void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; +/** + * @brief Derived class for specifying a m2 aggregation + */ +class m2_aggregation : public aggregation { + public: + m2_aggregation() : aggregation{M2} {} + + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; + /** * @brief Derived class for specifying a standard deviation/variance aggregation */ @@ -633,66 +658,6 @@ class collect_set_aggregation final : public rolling_aggregation { } }; -/** - * @brief Derived aggregation class for specifying MERGE_LISTs aggregation - */ -class merge_lists_aggregation final : public aggregation { - public: - explicit merge_lists_aggregation() : aggregation{MERGE_LISTS} {} - - std::unique_ptr clone() const override - { - return std::make_unique(*this); - } - std::vector> get_simple_aggregations( - data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override - { - return collector.visit(col_type, *this); - } - void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } -}; - -/** - * @brief Derived aggregation class for specifying MERGE_SETs aggregation - */ -class merge_sets_aggregation final : public aggregation { - public: - explicit merge_sets_aggregation(null_equality nulls_equal, nan_equality nans_equal) - : aggregation{MERGE_SETS}, _nulls_equal(nulls_equal), _nans_equal(nans_equal) - { - } - - null_equality _nulls_equal; ///< whether to consider nulls as equal value - nan_equality _nans_equal; ///< whether to consider NaNs as equal value (applicable only to - ///< floating point types) - - bool is_equal(aggregation const& _other) const override - { - if (!this->aggregation::is_equal(_other)) { return false; } - auto const& other = dynamic_cast(_other); - return (_nulls_equal == other._nulls_equal && _nans_equal == other._nans_equal); - } - - size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); } - - std::unique_ptr clone() const override - { - return std::make_unique(*this); - } - std::vector> get_simple_aggregations( - data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override - { - return collector.visit(col_type, *this); - } - void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } - - protected: - size_t hash_impl() const - { - return std::hash{}(static_cast(_nulls_equal) ^ static_cast(_nans_equal)); - } -}; - /** * @brief Derived aggregation class for specifying LEAD/LAG window aggregations */ @@ -783,6 +748,85 @@ class udf_aggregation final : public rolling_aggregation { } }; +/** + * @brief Derived aggregation class for specifying MERGE_LISTS aggregation + */ +class merge_lists_aggregation final : public aggregation { + public: + explicit merge_lists_aggregation() : aggregation{MERGE_LISTS} {} + + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; + +/** + * @brief Derived aggregation class for specifying MERGE_SETS aggregation + */ +class merge_sets_aggregation final : public aggregation { + public: + explicit merge_sets_aggregation(null_equality nulls_equal, nan_equality nans_equal) + : aggregation{MERGE_SETS}, _nulls_equal(nulls_equal), _nans_equal(nans_equal) + { + } + + null_equality _nulls_equal; ///< whether to consider nulls as equal value + nan_equality _nans_equal; ///< whether to consider NaNs as equal value (applicable only to + ///< floating point types) + + bool is_equal(aggregation const& _other) const override + { + if (!this->aggregation::is_equal(_other)) { return false; } + auto const& other = dynamic_cast(_other); + return (_nulls_equal == other._nulls_equal && _nans_equal == other._nans_equal); + } + + size_t do_hash() const override { return this->aggregation::do_hash() ^ hash_impl(); } + + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } + + protected: + size_t hash_impl() const + { + return std::hash{}(static_cast(_nulls_equal) ^ static_cast(_nans_equal)); + } +}; + +/** + * @brief Derived aggregation class for specifying MERGE_M2 aggregation + */ +class merge_m2_aggregation final : public aggregation { + public: + explicit merge_m2_aggregation() : aggregation{MERGE_M2} {} + + std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, cudf::detail::simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; + /** * @brief Sentinel value used for `ARGMAX` aggregation. * @@ -904,6 +948,12 @@ struct target_type_impl() && is_su using type = Source; }; +// Always use `double` for M2 +template +struct target_type_impl { + using type = double; +}; + // Always use `double` for VARIANCE template struct target_type_impl { @@ -970,6 +1020,18 @@ struct target_type_impl { using type = cudf::list_view; }; +// Always use Source for LEAD +template +struct target_type_impl { + using type = Source; +}; + +// Always use Source for LAG +template +struct target_type_impl { + using type = Source; +}; + // Always use list for MERGE_LISTS template struct target_type_impl { @@ -982,16 +1044,10 @@ struct target_type_impl { using type = cudf::list_view; }; -// Always use Source for LEAD -template -struct target_type_impl { - using type = Source; -}; - -// Always use Source for LAG -template -struct target_type_impl { - using type = Source; +// Always use struct for MERGE_M2 +template +struct target_type_impl { + using type = cudf::struct_view; }; /** @@ -1061,6 +1117,7 @@ CUDA_HOST_DEVICE_CALLABLE decltype(auto) aggregation_dispatcher(aggregation::Kin return f.template operator()(std::forward(args)...); case aggregation::MEAN: return f.template operator()(std::forward(args)...); + case aggregation::M2: return f.template operator()(std::forward(args)...); case aggregation::VARIANCE: return f.template operator()(std::forward(args)...); case aggregation::STD: @@ -1083,14 +1140,16 @@ CUDA_HOST_DEVICE_CALLABLE decltype(auto) aggregation_dispatcher(aggregation::Kin return f.template operator()(std::forward(args)...); case aggregation::COLLECT_SET: return f.template operator()(std::forward(args)...); - case aggregation::MERGE_LISTS: - return f.template operator()(std::forward(args)...); - case aggregation::MERGE_SETS: - return f.template operator()(std::forward(args)...); case aggregation::LEAD: return f.template operator()(std::forward(args)...); case aggregation::LAG: return f.template operator()(std::forward(args)...); + case aggregation::MERGE_LISTS: + return f.template operator()(std::forward(args)...); + case aggregation::MERGE_SETS: + return f.template operator()(std::forward(args)...); + case aggregation::MERGE_M2: + return f.template operator()(std::forward(args)...); default: { #ifndef __CUDA_ARCH__ CUDF_FAIL("Unsupported aggregation."); diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index f0fd865f685..53a55351f8e 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -88,6 +88,12 @@ std::vector> simple_aggregations_collector::visit( return visit(col_type, static_cast(agg)); } +std::vector> simple_aggregations_collector::visit( + data_type col_type, m2_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + std::vector> simple_aggregations_collector::visit( data_type col_type, var_aggregation const& agg) { @@ -155,25 +161,31 @@ std::vector> simple_aggregations_collector::visit( } std::vector> simple_aggregations_collector::visit( - data_type col_type, merge_lists_aggregation const& agg) + data_type col_type, lead_lag_aggregation const& agg) { return visit(col_type, static_cast(agg)); } std::vector> simple_aggregations_collector::visit( - data_type col_type, merge_sets_aggregation const& agg) + data_type col_type, udf_aggregation const& agg) { return visit(col_type, static_cast(agg)); } std::vector> simple_aggregations_collector::visit( - data_type col_type, lead_lag_aggregation const& agg) + data_type col_type, merge_lists_aggregation const& agg) { return visit(col_type, static_cast(agg)); } std::vector> simple_aggregations_collector::visit( - data_type col_type, udf_aggregation const& agg) + data_type col_type, merge_sets_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + +std::vector> simple_aggregations_collector::visit( + data_type col_type, merge_m2_aggregation const& agg) { return visit(col_type, static_cast(agg)); } @@ -227,6 +239,11 @@ void aggregation_finalizer::visit(mean_aggregation const& agg) visit(static_cast(agg)); } +void aggregation_finalizer::visit(m2_aggregation const& agg) +{ + visit(static_cast(agg)); +} + void aggregation_finalizer::visit(var_aggregation const& agg) { visit(static_cast(agg)); @@ -282,22 +299,27 @@ void aggregation_finalizer::visit(collect_set_aggregation const& agg) visit(static_cast(agg)); } -void aggregation_finalizer::visit(merge_lists_aggregation const& agg) +void aggregation_finalizer::visit(lead_lag_aggregation const& agg) { visit(static_cast(agg)); } -void aggregation_finalizer::visit(merge_sets_aggregation const& agg) +void aggregation_finalizer::visit(udf_aggregation const& agg) { visit(static_cast(agg)); } -void aggregation_finalizer::visit(lead_lag_aggregation const& agg) +void aggregation_finalizer::visit(merge_lists_aggregation const& agg) { visit(static_cast(agg)); } -void aggregation_finalizer::visit(udf_aggregation const& agg) +void aggregation_finalizer::visit(merge_sets_aggregation const& agg) +{ + visit(static_cast(agg)); +} + +void aggregation_finalizer::visit(merge_m2_aggregation const& agg) { visit(static_cast(agg)); } @@ -311,7 +333,7 @@ std::vector> aggregation::get_simple_aggregations( } /// Factory to create a SUM aggregation -template +template std::unique_ptr make_sum_aggregation() { return std::make_unique(); @@ -320,7 +342,7 @@ template std::unique_ptr make_sum_aggregation(); template std::unique_ptr make_sum_aggregation(); /// Factory to create a PRODUCT aggregation -template +template std::unique_ptr make_product_aggregation() { return std::make_unique(); @@ -328,7 +350,7 @@ std::unique_ptr make_product_aggregation() template std::unique_ptr make_product_aggregation(); /// Factory to create a MIN aggregation -template +template std::unique_ptr make_min_aggregation() { return std::make_unique(); @@ -337,7 +359,7 @@ template std::unique_ptr make_min_aggregation(); template std::unique_ptr make_min_aggregation(); /// Factory to create a MAX aggregation -template +template std::unique_ptr make_max_aggregation() { return std::make_unique(); @@ -346,7 +368,7 @@ template std::unique_ptr make_max_aggregation(); template std::unique_ptr make_max_aggregation(); /// Factory to create a COUNT aggregation -template +template std::unique_ptr make_count_aggregation(null_policy null_handling) { auto kind = @@ -359,7 +381,7 @@ template std::unique_ptr make_count_aggregation +template std::unique_ptr make_any_aggregation() { return std::make_unique(); @@ -367,7 +389,7 @@ std::unique_ptr make_any_aggregation() template std::unique_ptr make_any_aggregation(); /// Factory to create a ALL aggregation -template +template std::unique_ptr make_all_aggregation() { return std::make_unique(); @@ -375,7 +397,7 @@ std::unique_ptr make_all_aggregation() template std::unique_ptr make_all_aggregation(); /// Factory to create a SUM_OF_SQUARES aggregation -template +template std::unique_ptr make_sum_of_squares_aggregation() { return std::make_unique(); @@ -383,7 +405,7 @@ std::unique_ptr make_sum_of_squares_aggregation() template std::unique_ptr make_sum_of_squares_aggregation(); /// Factory to create a MEAN aggregation -template +template std::unique_ptr make_mean_aggregation() { return std::make_unique(); @@ -391,8 +413,16 @@ std::unique_ptr make_mean_aggregation() template std::unique_ptr make_mean_aggregation(); template std::unique_ptr make_mean_aggregation(); +/// Factory to create a M2 aggregation +template +std::unique_ptr make_m2_aggregation() +{ + return std::make_unique(); +} +template std::unique_ptr make_m2_aggregation(); + /// Factory to create a VARIANCE aggregation -template +template std::unique_ptr make_variance_aggregation(size_type ddof) { return std::make_unique(ddof); @@ -400,7 +430,7 @@ std::unique_ptr make_variance_aggregation(size_type ddof) template std::unique_ptr make_variance_aggregation(size_type ddof); /// Factory to create a STD aggregation -template +template std::unique_ptr make_std_aggregation(size_type ddof) { return std::make_unique(ddof); @@ -408,7 +438,7 @@ std::unique_ptr make_std_aggregation(size_type ddof) template std::unique_ptr make_std_aggregation(size_type ddof); /// Factory to create a MEDIAN aggregation -template +template std::unique_ptr make_median_aggregation() { return std::make_unique(); @@ -416,7 +446,7 @@ std::unique_ptr make_median_aggregation() template std::unique_ptr make_median_aggregation(); /// Factory to create a QUANTILE aggregation -template +template std::unique_ptr make_quantile_aggregation(std::vector const& q, interpolation i) { return std::make_unique(q, i); @@ -425,7 +455,7 @@ template std::unique_ptr make_quantile_aggregation( std::vector const& q, interpolation i); /// Factory to create an ARGMAX aggregation -template +template std::unique_ptr make_argmax_aggregation() { return std::make_unique(); @@ -434,7 +464,7 @@ template std::unique_ptr make_argmax_aggregation(); template std::unique_ptr make_argmax_aggregation(); /// Factory to create an ARGMIN aggregation -template +template std::unique_ptr make_argmin_aggregation() { return std::make_unique(); @@ -443,7 +473,7 @@ template std::unique_ptr make_argmin_aggregation(); template std::unique_ptr make_argmin_aggregation(); /// Factory to create an NUNIQUE aggregation -template +template std::unique_ptr make_nunique_aggregation(null_policy null_handling) { return std::make_unique(null_handling); @@ -452,7 +482,7 @@ template std::unique_ptr make_nunique_aggregation( null_policy null_handling); /// Factory to create an NTH_ELEMENT aggregation -template +template std::unique_ptr make_nth_element_aggregation(size_type n, null_policy null_handling) { return std::make_unique(n, null_handling); @@ -461,7 +491,7 @@ template std::unique_ptr make_nth_element_aggregation( size_type n, null_policy null_handling); /// Factory to create a ROW_NUMBER aggregation -template +template std::unique_ptr make_row_number_aggregation() { return std::make_unique(); @@ -470,7 +500,7 @@ template std::unique_ptr make_row_number_aggregation() template std::unique_ptr make_row_number_aggregation(); /// Factory to create a COLLECT_LIST aggregation -template +template std::unique_ptr make_collect_list_aggregation(null_policy null_handling) { return std::make_unique(null_handling); @@ -481,7 +511,7 @@ template std::unique_ptr make_collect_list_aggregation +template std::unique_ptr make_collect_set_aggregation(null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal) @@ -493,26 +523,8 @@ template std::unique_ptr make_collect_set_aggregation( template std::unique_ptr make_collect_set_aggregation( null_policy null_handling, null_equality nulls_equal, nan_equality nans_equal); -/// Factory to create a MERGE_LISTS aggregation -template -std::unique_ptr make_merge_lists_aggregation() -{ - return std::make_unique(); -} -template std::unique_ptr make_merge_lists_aggregation(); - -/// Factory to create a MERGE_SETS aggregation -template -std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal, - nan_equality nans_equal) -{ - return std::make_unique(nulls_equal, nans_equal); -} -template std::unique_ptr make_merge_sets_aggregation(null_equality, - nan_equality); - /// Factory to create a LAG aggregation -template +template std::unique_ptr make_lag_aggregation(size_type offset) { return std::make_unique(aggregation::LAG, offset); @@ -522,7 +534,7 @@ template std::unique_ptr make_lag_aggregation +template std::unique_ptr make_lead_aggregation(size_type offset) { return std::make_unique(aggregation::LEAD, offset); @@ -532,7 +544,7 @@ template std::unique_ptr make_lead_aggregation +template std::unique_ptr make_udf_aggregation(udf_type type, std::string const& user_defined_aggregator, data_type output_type) @@ -548,6 +560,32 @@ template std::unique_ptr make_udf_aggregation( template std::unique_ptr make_udf_aggregation( udf_type type, std::string const& user_defined_aggregator, data_type output_type); +/// Factory to create a MERGE_LISTS aggregation +template +std::unique_ptr make_merge_lists_aggregation() +{ + return std::make_unique(); +} +template std::unique_ptr make_merge_lists_aggregation(); + +/// Factory to create a MERGE_SETS aggregation +template +std::unique_ptr make_merge_sets_aggregation(null_equality nulls_equal, + nan_equality nans_equal) +{ + return std::make_unique(nulls_equal, nans_equal); +} +template std::unique_ptr make_merge_sets_aggregation(null_equality, + nan_equality); + +/// Factory to create a MERGE_M2 aggregation +template +std::unique_ptr make_merge_m2_aggregation() +{ + return std::make_unique(); +} +template std::unique_ptr make_merge_m2_aggregation(); + namespace detail { namespace { struct target_type_functor { diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 5e202b9ef3f..4e60d8d3f7d 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -238,6 +238,21 @@ void aggregate_result_functor::operator()(aggregation const& cache.add_result(col_idx, agg, std::move(result)); }; +template <> +void aggregate_result_functor::operator()(aggregation const& agg) +{ + if (cache.has_result(col_idx, agg)) return; + + auto const mean_agg = make_mean_aggregation(); + operator()(*mean_agg); + auto const mean_result = cache.get_result(col_idx, *mean_agg); + + cache.add_result( + col_idx, + agg, + detail::group_m2(get_grouped_values(), mean_result, helper.group_labels(stream), stream, mr)); +}; + template <> void aggregate_result_functor::operator()(aggregation const& agg) { @@ -474,6 +489,35 @@ void aggregate_result_functor::operator()(aggregation c mr)); }; +/** + * @brief Perform merging for the M2 values that correspond to the same key value. + * + * The partial results input to this aggregation is a structs column with children are columns + * generated by three other groupby aggregations: `COUNT_VALID`, `MEAN`, and `M2` that were + * performed on partitioned datasets. After distributedly computed, the results output from these + * aggregations are (vertically) concatenated before assembling into a structs column given as the + * values column for this aggregation. + * + * For recursive merging of `M2` values, the aggregations values of all input (`COUNT_VALID`, + * `MEAN`, and `M2`) are all merged and stored in the output of this aggregation. As such, the + * output will be a structs column containing children columns of merged `COUNT_VALID`, `MEAN`, and + * `M2` values. + * + * The values of M2 are merged following the parallel algorithm described here: + * https://www.wikiwand.com/en/Algorithms_for_calculating_variance#/Parallel_algorithm + */ +template <> +void aggregate_result_functor::operator()(aggregation const& agg) +{ + if (cache.has_result(col_idx, agg)) { return; } + + cache.add_result( + col_idx, + agg, + detail::group_merge_m2( + get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); +}; + } // namespace detail // Sort-based groupby diff --git a/cpp/src/groupby/sort/group_m2.cu b/cpp/src/groupby/sort/group_m2.cu new file mode 100644 index 00000000000..a72f6c6f647 --- /dev/null +++ b/cpp/src/groupby/sort/group_m2.cu @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +namespace cudf { +namespace groupby { +namespace detail { +namespace { + +template +struct m2_transform { + column_device_view const d_values; + Iterator const values_iter; + ResultType const* d_means; + size_type const* d_group_labels; + + __device__ ResultType operator()(size_type const idx) const noexcept + { + if (d_values.is_null(idx)) { return 0.0; } + + auto const x = static_cast(values_iter[idx]); + auto const group_idx = d_group_labels[idx]; + auto const mean = d_means[group_idx]; + auto const diff = x - mean; + return diff * diff; + } +}; + +template +void compute_m2_fn(column_device_view const& values, + Iterator values_iter, + cudf::device_span group_labels, + ResultType const* d_means, + ResultType* d_result, + rmm::cuda_stream_view stream) +{ + auto const var_iter = cudf::detail::make_counting_transform_iterator( + size_type{0}, + m2_transform{ + values, values_iter, d_means, group_labels.data()}); + + thrust::reduce_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + var_iter, + thrust::make_discard_iterator(), + d_result); +} + +struct m2_functor { + template + std::enable_if_t::value, std::unique_ptr> operator()( + column_view const& values, + column_view const& group_means, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + using result_type = cudf::detail::target_type_t; + auto result = make_numeric_column(data_type(type_to_id()), + group_means.size(), + mask_state::UNALLOCATED, + stream, + mr); + + auto const values_dv_ptr = column_device_view::create(values, stream); + auto const d_values = *values_dv_ptr; + auto const d_means = group_means.data(); + auto const d_result = result->mutable_view().data(); + + if (!cudf::is_dictionary(values.type())) { + auto const values_iter = d_values.begin(); + compute_m2_fn(d_values, values_iter, group_labels, d_means, d_result, stream); + } else { + auto const values_iter = + cudf::dictionary::detail::make_dictionary_iterator(*values_dv_ptr); + compute_m2_fn(d_values, values_iter, group_labels, d_means, d_result, stream); + } + + // M2 column values should have the same bitmask as means's. + if (group_means.nullable()) { + result->set_null_mask(cudf::detail::copy_bitmask(group_means, stream, mr), + group_means.null_count()); + } + + return result; + } + + template + std::enable_if_t::value, std::unique_ptr> operator()(Args&&...) + { + CUDF_FAIL("Only numeric types are supported in M2 groupby aggregation"); + } +}; + +} // namespace + +std::unique_ptr group_m2(column_view const& values, + column_view const& group_means, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto values_type = cudf::is_dictionary(values.type()) + ? dictionary_column_view(values).keys().type() + : values.type(); + + return type_dispatcher(values_type, m2_functor{}, values, group_means, group_labels, stream, mr); +} + +} // namespace detail +} // namespace groupby +} // namespace cudf diff --git a/cpp/src/groupby/sort/group_merge_m2.cu b/cpp/src/groupby/sort/group_merge_m2.cu new file mode 100644 index 00000000000..4e2a5b68abc --- /dev/null +++ b/cpp/src/groupby/sort/group_merge_m2.cu @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +namespace cudf { +namespace groupby { +namespace detail { +namespace { +/** + * @brief Struct to store partial results for merging. + */ +template +struct partial_result { + size_type count; + result_type mean; + result_type M2; +}; + +/** + * @brief Functor to accumulate (merge) all partial results corresponding to the same key into a + * final result storing in a member variable. It performs merging for the partial results of + * `COUNT_VALID`, `MEAN`, and `M2` at the same time. + */ +template +struct accumulate_fn { + partial_result merge_vals; + + void __device__ operator()(partial_result const& partial_vals) noexcept + { + if (partial_vals.count == 0) { return; } + + auto const n_ab = merge_vals.count + partial_vals.count; + auto const delta = partial_vals.mean - merge_vals.mean; + merge_vals.M2 += partial_vals.M2 + (delta * delta) * + static_cast(merge_vals.count) * + static_cast(partial_vals.count) / n_ab; + merge_vals.mean = + (merge_vals.mean * merge_vals.count + partial_vals.mean * partial_vals.count) / n_ab; + merge_vals.count = n_ab; + } +}; + +/** + * @brief Functor to merge partial results of `COUNT_VALID`, `MEAN`, and `M2` aggregations + * for a given group (key) index. + */ +template +struct merge_fn { + size_type const* const d_offsets; + size_type const* const d_counts; + result_type const* const d_means; + result_type const* const d_M2s; + + auto __device__ operator()(size_type const group_idx) noexcept + { + auto const start_idx = d_offsets[group_idx], end_idx = d_offsets[group_idx + 1]; + + // This case should never happen, because all groups are non-empty as the results of + // aggregation. Here we just to make sure we cover this case. + if (start_idx == end_idx) { + return thrust::make_tuple(size_type{0}, result_type{0}, result_type{0}, int8_t{0}); + } + + // If `(n = d_counts[idx]) > 0` then `d_means[idx] != null` and `d_M2s[idx] != null`. + // Otherwise (`n == 0`), these value (mean and M2) will always be nulls. + // In such cases, reading `mean` and `M2` from memory will return garbage values. + // By setting these values to zero when `n == 0`, we can safely merge the all-zero tuple without + // affecting the final result. + auto get_partial_result = [&] __device__(size_type idx) { + { + auto const n = d_counts[idx]; + return n > 0 ? partial_result{n, d_means[idx], d_M2s[idx]} + : partial_result{size_type{0}, result_type{0}, result_type{0}}; + }; + }; + + // Firstly, store tuple(count, mean, M2) of the first partial result in an accumulator. + auto accumulator = accumulate_fn{get_partial_result(start_idx)}; + + // Then, accumulate (merge) the remaining partial results into that accumulator. + for (auto idx = start_idx + 1; idx < end_idx; ++idx) { + accumulator(get_partial_result(idx)); + } + + // Get the final result after merging. + auto const& merge_vals = accumulator.merge_vals; + + // If there are all nulls in the partial results (i.e., sum of all valid counts is + // zero), then the output is a null. + auto const is_valid = int8_t{merge_vals.count > 0}; + + return thrust::make_tuple(merge_vals.count, merge_vals.mean, merge_vals.M2, is_valid); + } +}; + +} // namespace + +std::unique_ptr group_merge_m2(column_view const& values, + cudf::device_span group_offsets, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(values.type().id() == type_id::STRUCT, + "Input to `group_merge_m2` must be a structs column."); + CUDF_EXPECTS(values.num_children() == 3, + "Input to `group_merge_m2` must be a structs column having 3 children columns."); + + using result_type = id_to_type; + static_assert( + std::is_same_v, result_type>); + CUDF_EXPECTS(values.child(0).type().id() == type_id::INT32 && + values.child(1).type().id() == type_to_id() && + values.child(2).type().id() == type_to_id(), + "Input to `group_merge_m2` must be a structs column having children columns " + "containing tuples of (M2_value, mean, valid_count)."); + + auto result_counts = make_numeric_column( + data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); + auto result_means = make_numeric_column( + data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); + auto result_M2s = make_numeric_column( + data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); + auto validities = rmm::device_uvector(num_groups, stream); + + // Perform merging for all the aggregations. Their output (and their validity data) are written + // out concurrently through an output zip iterator. + using iterator_tuple = thrust::tuple; + using output_iterator = thrust::zip_iterator; + auto const out_iter = + output_iterator{thrust::make_tuple(result_counts->mutable_view().template data(), + result_means->mutable_view().template data(), + result_M2s->mutable_view().template data(), + validities.begin())}; + + auto const count_valid = values.child(0); + auto const mean_values = values.child(1); + auto const M2_values = values.child(2); + auto const iter = thrust::make_counting_iterator(0); + + auto const fn = merge_fn{group_offsets.begin(), + count_valid.template begin(), + mean_values.template begin(), + M2_values.template begin()}; + thrust::transform(rmm::exec_policy(stream), iter, iter + num_groups, out_iter, fn); + + // Generate bitmask for the output. + // Only mean and M2 values can be nullable. Count column must be non-nullable. + auto [null_mask, null_count] = cudf::detail::valid_if( + validities.begin(), validities.end(), thrust::identity{}, stream, mr); + if (null_count > 0) { + result_means->set_null_mask(null_mask, null_count); // copy null_mask + result_M2s->set_null_mask(std::move(null_mask), null_count); // take over null_mask + } + + // Output is a structs column containing the merged values of `COUNT_VALID`, `MEAN`, and `M2`. + std::vector> out_columns; + out_columns.emplace_back(std::move(result_counts)); + out_columns.emplace_back(std::move(result_means)); + out_columns.emplace_back(std::move(result_M2s)); + auto result = cudf::make_structs_column( + num_groups, std::move(out_columns), 0, rmm::device_buffer{0, stream, mr}, stream, mr); + + return result; +} + +} // namespace detail +} // namespace groupby +} // namespace cudf diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index 3390af29330..2770162da2d 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -217,6 +217,30 @@ std::unique_ptr group_count_all(cudf::device_span group rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Internal API to calculate sum of squares of differences from means. + * + * If there are only nulls in the group, the output value of that group will be null. + * + * @code{.pseudo} + * values = [2, 1, 4, -1, -2, , 4, ] + * group_labels = [0, 0, 0, 1, 1, 2, 2, 3] + * group_means = [2.333333, -1.5, 4.0, ] + * group_m2(...) = [4.666666, 1.0, 0.0, ] + * @endcode + * + * @param values Grouped values to compute M2 values + * @param group_means Pre-computed groupwise MEAN + * @param group_labels ID of group corresponding value in @p values belongs to + * @param mr Device memory resource used to allocate the returned column's device memory + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr group_m2(column_view const& values, + column_view const& group_means, + cudf::device_span group_labels, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** * @brief Internal API to calculate groupwise variance * @@ -392,6 +416,32 @@ std::unique_ptr group_merge_lists(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Internal API to merge grouped M2 values corresponding to the same key. + * + * The values of M2 are merged following the parallel algorithm described here: + * `https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm` + * + * Merging M2 values require accessing to partial M2 values, means, and valid counts. Thus, the + * input to this aggregation need to be a structs column containing tuples of 3 values + * `(valid_count, mean, M2)`. + * + * This aggregation not only merges the partial results of `M2` but also merged all the partial + * results of input aggregations (`COUNT_VALID`, `MEAN`, and `M2`). As such, the output will be a + * structs column containing children columns of merged `COUNT_VALID`, `MEAN`, and `M2` values. + * + * @param values Grouped values (tuples of values `(valid_count, mean, M2)`) to merge. + * @param group_offsets Offsets of groups' starting points within @p values. + * @param num_groups Number of groups. + * @param mr Device memory resource used to allocate the returned column's device memory + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr group_merge_m2(column_view const& values, + cudf::device_span group_offsets, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** @endinternal * */ diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index f844f708562..75fad739534 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -62,11 +62,13 @@ ConfigureTest(GROUPBY_TEST groupby/count_tests.cpp groupby/groups_tests.cpp groupby/keys_tests.cpp + groupby/m2_tests.cpp groupby/min_tests.cpp groupby/max_scan_tests.cpp groupby/max_tests.cpp groupby/mean_tests.cpp groupby/median_tests.cpp + groupby/merge_m2_tests.cpp groupby/merge_lists_tests.cpp groupby/merge_sets_tests.cpp groupby/min_scan_tests.cpp diff --git a/cpp/tests/groupby/m2_tests.cpp b/cpp/tests/groupby/m2_tests.cpp new file mode 100644 index 00000000000..7611dce2271 --- /dev/null +++ b/cpp/tests/groupby/m2_tests.cpp @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include + +using namespace cudf::test::iterators; + +namespace { +constexpr bool print_all{false}; // For debugging +constexpr int32_t null{0}; // Mark for null elements +constexpr double NaN{std::numeric_limits::quiet_NaN()}; // Mark for NaN double elements + +template +using keys_col = cudf::test::fixed_width_column_wrapper; + +template +using vals_col = cudf::test::fixed_width_column_wrapper; + +template +using M2s_col = cudf::test::fixed_width_column_wrapper; + +auto compute_M2(cudf::column_view const& keys, cudf::column_view const& values) +{ + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].values = values; + requests[0].aggregations.emplace_back(cudf::make_m2_aggregation()); + + auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys})); + auto result = gb_obj.aggregate(requests); + return std::make_pair(std::move(result.first->release()[0]), + std::move(result.second[0].results[0])); +} +} // namespace + +template +struct GroupbyM2TypedTest : public cudf::test::BaseFixture { +}; + +using TestTypes = cudf::test::Concat, + cudf::test::FloatingPointTypes>; +TYPED_TEST_SUITE(GroupbyM2TypedTest, TestTypes); + +TYPED_TEST(GroupbyM2TypedTest, EmptyInput) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + auto const keys = keys_col{}; + auto const vals = vals_col{}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_M2s = M2s_col{}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, AllNullKeysInput) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + auto const keys = keys_col{{1, 2, 3}, all_nulls()}; + auto const vals = vals_col{3, 4, 5}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_keys = keys_col{}; + auto const expected_M2s = M2s_col{}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, AllNullValuesInput) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + auto const keys = keys_col{1, 2, 3}; + auto const vals = vals_col{{3, 4, 5}, all_nulls()}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_M2s = M2s_col{{null, null, null}, all_nulls()}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, SimpleInput) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // key = 1: vals = [0, 3, 6] + // key = 2: vals = [1, 4, 5, 9] + // key = 3: vals = [2, 7, 8] + auto const keys = keys_col{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; + auto const vals = vals_col{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_keys = keys_col{1, 2, 3}; + auto const expected_M2s = M2s_col{18.0, 32.75, 20.0 + 2.0 / 3.0}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, SimpleInputHavingNegativeValues) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // key = 1: vals = [0, 3, -6] + // key = 2: vals = [1, -4, -5, 9] + // key = 3: vals = [-2, 7, -8] + auto const keys = keys_col{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; + auto const vals = vals_col{0, 1, -2, 3, -4, -5, -6, 7, -8, 9}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_keys = keys_col{1, 2, 3}; + auto const expected_M2s = M2s_col{42.0, 122.75, 114.0}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, ValuesHaveNulls) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + auto const keys = keys_col{1, 2, 3, 4, 5, 2, 3, 2}; + auto const vals = vals_col{{0, null, 2, 3, null, 5, 6, 7}, nulls_at({1, 4})}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_keys = keys_col{1, 2, 3, 4, 5}; + auto const expected_M2s = M2s_col{{0.0, 2.0, 8.0, 0.0, 0.0 /*NULL*/}, null_at(4)}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, KeysAndValuesHaveNulls) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // key = 1: vals = [null, 3, 6] + // key = 2: vals = [1, 4, null, 9] + // key = 3: vals = [2, 8] + // key = 4: vals = [null] + auto const keys = keys_col{{1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4}, null_at(7)}; + auto const vals = vals_col{{null, 1, 2, 3, 4, null, 6, 7, 8, 9, null}, nulls_at({0, 5, 10})}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{{4.5, 32.0 + 2.0 / 3.0, 18.0, 0.0 /*NULL*/}, null_at(3)}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, InputHaveNullsAndNaNs) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // key = 1: vals = [0, 3, 6] + // key = 2: vals = [1, 4, NaN, 9] + // key = 3: vals = [null, 2, 8] + // key = 4: vals = [null, 10, NaN] + auto const keys = keys_col{{4, 3, 1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4, 4}, null_at(9)}; + auto const vals = vals_col{ + {0.0 /*NULL*/, 0.0 /*NULL*/, 0.0, 1.0, 2.0, 3.0, 4.0, NaN, 6.0, 7.0, 8.0, 9.0, 10.0, NaN}, + nulls_at({0, 1})}; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{18.0, NaN, 18.0, NaN}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} + +TYPED_TEST(GroupbyM2TypedTest, SlicedColumnsInput) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // This test should compute M2 aggregation on the same dataset as the InputHaveNullsAndNaNs test. + // i.e.: + // + // key = 1: vals = [0, 3, 6] + // key = 2: vals = [1, 4, NaN, 9] + // key = 3: vals = [null, 2, 8] + // key = 4: vals = [null, 10, NaN] + + auto const keys_original = + keys_col{{ + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, // will not use, don't care + 4, 3, 1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4, 4, // use this + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5 // will not use, don't care + }, + null_at(19)}; + auto const vals_original = vals_col{ + { + 3.0, 2.0, 5.0, 4.0, 6.0, 9.0, 1.0, 0.0, 1.0, 7.0, // will not use, don't care + 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, NaN, 6.0, 7.0, 8.0, 9.0, 10.0, NaN, // use this + 9.0, 10.0, 11.0, 12.0, 0.0, 5.0, 1.0, 20.0, 19.0, 15.0 // will not use, don't care + }, + nulls_at({10, 11})}; + + auto const keys = cudf::slice(keys_original, {10, 24})[0]; + auto const vals = cudf::slice(vals_original, {10, 24})[0]; + + auto const [out_keys, out_M2s] = compute_M2(keys, vals); + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{18.0, NaN, 18.0, NaN}; + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, *out_M2s, print_all); +} diff --git a/cpp/tests/groupby/merge_m2_tests.cpp b/cpp/tests/groupby/merge_m2_tests.cpp new file mode 100644 index 00000000000..63451f9612d --- /dev/null +++ b/cpp/tests/groupby/merge_m2_tests.cpp @@ -0,0 +1,479 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace cudf::test::iterators; + +namespace { +constexpr bool print_all{false}; // For debugging +constexpr int32_t null{0}; // Mark for null elements +constexpr double NaN{std::numeric_limits::quiet_NaN()}; // Mark for NaN double elements + +template +using keys_col = cudf::test::fixed_width_column_wrapper; + +template +using vals_col = cudf::test::fixed_width_column_wrapper; + +using counts_col = cudf::test::fixed_width_column_wrapper; + +template +using means_col = cudf::test::fixed_width_column_wrapper; + +template +using M2s_col = cudf::test::fixed_width_column_wrapper; + +using structs_col = cudf::test::structs_column_wrapper; +using vcol_views = std::vector; + +/** + * @brief Compute `COUNT_VALID`, `MEAN`, `M2` aggregations for the given values columns. + * @return A pair of unique keys column and a structs column containing the computed values of + * (`COUNT_VALID`, `MEAN`, `M2`). + */ +auto compute_partial_results(cudf::column_view const& keys, cudf::column_view const& values) +{ + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].values = values; + requests[0].aggregations.emplace_back(cudf::make_count_aggregation()); + requests[0].aggregations.emplace_back(cudf::make_mean_aggregation()); + requests[0].aggregations.emplace_back(cudf::make_m2_aggregation()); + + auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys})); + auto [out_keys, out_results] = gb_obj.aggregate(requests); + + auto const num_output_rows = out_keys->num_rows(); + return std::make_pair( + std::move(out_keys->release()[0]), + cudf::make_structs_column( + num_output_rows, std::move(out_results[0].results), 0, rmm::device_buffer{})); +} + +/** + * @brief Perform merging for partial results of M2 aggregations. + * + * @return A pair of unique keys column and a structs column containing the merged values of + * (`COUNT_VALID`, `MEAN`, `M2`). + */ +auto merge_M2(vcol_views const& keys_cols, vcol_views const& values_cols) +{ + // Append all the keys and values together. + auto const keys = cudf::concatenate(keys_cols); + auto const values = cudf::concatenate(values_cols); + + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].values = *values; + requests[0].aggregations.emplace_back(cudf::make_merge_m2_aggregation()); + + auto gb_obj = cudf::groupby::groupby(cudf::table_view({*keys})); + auto result = gb_obj.aggregate(requests); + return std::make_pair(std::move(result.first->release()[0]), + std::move(result.second[0].results[0])); +} +} // namespace + +template +struct GroupbyMergeM2TypedTest : public cudf::test::BaseFixture { +}; + +using TestTypes = cudf::test::Concat, + cudf::test::FloatingPointTypes>; +TYPED_TEST_SUITE(GroupbyMergeM2TypedTest, TestTypes); + +TYPED_TEST(GroupbyMergeM2TypedTest, InvalidInput) +{ + using T = TypeParam; + + auto const keys = keys_col{1, 2, 3}; + + // The input column must be a structs column. + { + auto const values = keys_col{1, 2, 3}; + EXPECT_THROW(merge_M2({keys}, {values}), cudf::logic_error); + } + + // The input column must be a structs column having 3 children. + { + auto vals1 = keys_col{1, 2, 3}; + auto vals2 = vals_col{1.0, 2.0, 3.0}; + auto const vals = structs_col{vals1, vals2}; + EXPECT_THROW(merge_M2({keys}, {vals}), cudf::logic_error); + } + + // The input column must be a structs column having types (int32_t, double, double). + { + auto vals1 = keys_col{1, 2, 3}; + auto vals2 = keys_col{1, 2, 3}; + auto vals3 = keys_col{1, 2, 3}; + auto const vals = structs_col{vals1, vals2, vals3}; + EXPECT_THROW(merge_M2({keys}, {vals}), cudf::logic_error); + } +} + +TYPED_TEST(GroupbyMergeM2TypedTest, EmptyInput) +{ + using T = TypeParam; + using M2_t = cudf::detail::target_type_t; + using mean_t = cudf::detail::target_type_t; + + auto const keys = keys_col{}; + auto vals_count = counts_col{}; + auto vals_mean = means_col{}; + auto vals_M2 = M2s_col{}; + auto const vals = structs_col{vals_count, vals_mean, vals_M2}; + + auto const [out_keys, out_vals] = merge_M2({keys}, {vals}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(keys, *out_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(vals, *out_vals, print_all); +} + +TYPED_TEST(GroupbyMergeM2TypedTest, SimpleInput) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // Full dataset: + // + // keys = [1, 2, 3, 1, 2, 2, 1, 3, 3, 2] + // vals = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + // + // key = 1: vals = [0, 3, 6] + // key = 2: vals = [1, 4, 5, 9] + // key = 3: vals = [2, 7, 8] + + // Partitioned datasets: + auto const keys1 = keys_col{1, 2, 3}; + auto const keys2 = keys_col{1, 2, 2}; + auto const keys3 = keys_col{1, 3, 3, 2}; + + auto const vals1 = vals_col{0, 1, 2}; + auto const vals2 = vals_col{3, 4, 5}; + auto const vals3 = vals_col{6, 7, 8, 9}; + + // The expected results to validate. + auto const expected_keys = keys_col{1, 2, 3}; + auto const expected_M2s = M2s_col{18.0, 32.75, 20.0 + 2.0 / 3.0}; + + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. + // The partial results are also assembled into a structs column. + auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); + auto const [out2_keys, out2_vals] = compute_partial_results(keys2, vals2); + auto const [out3_keys, out3_vals] = compute_partial_results(keys3, vals3); + + // Merge the partial results to the final results. + // Merging can be done in just one merge step, or in multiple steps. + + // Multiple steps merging: + { + auto const [out4_keys, out4_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } + + // One step merging: + { + auto const [final_keys, final_vals] = merge_M2(vcol_views{*out1_keys, *out2_keys, *out3_keys}, + vcol_views{*out1_vals, *out2_vals, *out3_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } +} + +TYPED_TEST(GroupbyMergeM2TypedTest, SimpleInputHavingNegativeValues) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // Full dataset: + // + // keys = [1, 2, 3, 1, 2, 2, 1, 3, 3, 2] + // vals = [0, 1, -2, 3, -4, -5, -6, 7, -8, 9] + // + // key = 1: vals = [0, 3, -6] + // key = 2: vals = [1, -4, -5, 9] + // key = 3: vals = [-2, 7, -8] + + // Partitioned datasets: + auto const keys1 = keys_col{1, 2, 3}; + auto const keys2 = keys_col{1, 2, 2}; + auto const keys3 = keys_col{1, 3, 3, 2}; + + auto const vals1 = vals_col{0, 1, -2}; + auto const vals2 = vals_col{3, -4, -5}; + auto const vals3 = vals_col{-6, 7, -8, 9}; + + // The expected results to validate. + auto const expected_keys = keys_col{1, 2, 3}; + auto const expected_M2s = M2s_col{42.0, 122.75, 114.0}; + + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. + // The partial results are also assembled into a structs column. + auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); + auto const [out2_keys, out2_vals] = compute_partial_results(keys2, vals2); + auto const [out3_keys, out3_vals] = compute_partial_results(keys3, vals3); + + // Merge the partial results to the final results. + // Merging can be done in just one merge step, or in multiple steps. + + // Multiple steps merging: + { + auto const [out4_keys, out4_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } + + // One step merging: + { + auto const [final_keys, final_vals] = merge_M2(vcol_views{*out1_keys, *out2_keys, *out3_keys}, + vcol_views{*out1_vals, *out2_vals, *out3_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } +} + +TYPED_TEST(GroupbyMergeM2TypedTest, InputHasNulls) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // Full dataset: + // + // keys = [1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4] + // vals = [null, 1, 2, 3, 4, null, 6, 7, 8, 9, null] + // + // key = 1: vals = [null, 3, 6] + // key = 2: vals = [1, 4, null, 9] + // key = 3: vals = [2, 8] + // key = 4: vals = [null] + + // Partitioned datasets: + auto const keys1 = keys_col{1, 2, 3, 1}; + auto const keys2 = keys_col{{2, 2, 1, null}, null_at(3)}; + auto const keys3 = keys_col{3, 2, 4}; + + auto const vals1 = vals_col{{null, 1, 2, 3}, null_at(0)}; + auto const vals2 = vals_col{{4, null, 6, 7}, null_at(1)}; + auto const vals3 = vals_col{{8, 9, null}, null_at(2)}; + + // The expected results to validate. + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{{4.5, 32.0 + 2.0 / 3.0, 18.0, 0.0 /*NULL*/}, null_at(3)}; + + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. + // The partial results are also assembled into a structs column. + auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); + auto const [out2_keys, out2_vals] = compute_partial_results(keys2, vals2); + auto const [out3_keys, out3_vals] = compute_partial_results(keys3, vals3); + + // Merge the partial results to the final results. + // Merging can be done in just one merge step, or in multiple steps. + + // Multiple steps merging: + { + auto const [out4_keys, out4_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } + + // One step merging: + { + auto const [final_keys, final_vals] = merge_M2(vcol_views{*out1_keys, *out2_keys, *out3_keys}, + vcol_views{*out1_vals, *out2_vals, *out3_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } +} + +TYPED_TEST(GroupbyMergeM2TypedTest, InputHaveNullsAndNaNs) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // Full dataset: + // + // keys = [4, 3, 1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4, 4] + // vals = [null, null, 0.0, 1.0, 2.0, 3.0, 4.0, NaN, 6.0, 7.0, 8.0, 9.0, 10.0, NaN] + // + // key = 1: vals = [0, 3, 6] + // key = 2: vals = [1, 4, NaN, 9] + // key = 3: vals = [null, 2, 8] + // key = 4: vals = [null, 10, NaN] + + // Partitioned datasets: + auto const keys1 = keys_col{4, 3, 1, 2}; + auto const keys2 = keys_col{3, 1, 2}; + auto const keys3 = keys_col{{2, 1, null}, null_at(2)}; + auto const keys4 = keys_col{3, 2, 4, 4}; + + auto const vals1 = vals_col{{0.0 /*NULL*/, 0.0 /*NULL*/, 0.0, 1.0}, nulls_at({0, 1})}; + auto const vals2 = vals_col{2.0, 3.0, 4.0}; + auto const vals3 = vals_col{NaN, 6.0, 7.0}; + auto const vals4 = vals_col{8.0, 9.0, 10.0, NaN}; + + // The expected results to validate. + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{18.0, NaN, 18.0, NaN}; + + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. + // The partial results are also assembled into a structs column. + auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); + auto const [out2_keys, out2_vals] = compute_partial_results(keys2, vals2); + auto const [out3_keys, out3_vals] = compute_partial_results(keys3, vals3); + auto const [out4_keys, out4_vals] = compute_partial_results(keys4, vals4); + + // Merge the partial results to the final results. + // Merging can be done in just one merge step, or in multiple steps. + + // Multiple steps merging: + { + auto const [out5_keys, out5_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [out6_keys, out6_vals] = + merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out5_keys, *out6_keys}, vcol_views{*out5_vals, *out6_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } + + // One step merging: + { + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys, *out3_keys, *out4_keys}, + vcol_views{*out1_vals, *out2_vals, *out3_vals, *out4_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } +} + +TYPED_TEST(GroupbyMergeM2TypedTest, SlicedColumnsInput) +{ + using T = TypeParam; + using R = cudf::detail::target_type_t; + + // This test should compute M2 aggregation on the same dataset as the InputHaveNullsAndNaNs test. + // i.e.: + // + // keys = [4, 3, 1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4, 4] + // vals = [null, null, 0.0, 1.0, 2.0, 3.0, 4.0, NaN, 6.0, 7.0, 8.0, 9.0, 10.0, NaN] + // + // key = 1: vals = [0, 3, 6] + // key = 2: vals = [1, 4, NaN, 9] + // key = 3: vals = [null, 2, 8] + // key = 4: vals = [null, 10, NaN] + + auto const keys_original = + keys_col{{ + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, // will not use, don't care + 4, 3, 1, 2, 3, 1, 2, 2, 1, null, 3, 2, 4, 4, // use this + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5 // will not use, don't care + }, + null_at(19)}; + auto const vals_original = vals_col{ + { + 3.0, 2.0, 5.0, 4.0, 6.0, 9.0, 1.0, 0.0, 1.0, 7.0, // will not use, don't care + 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, NaN, 6.0, 7.0, 8.0, 9.0, 10.0, NaN, // use this + 9.0, 10.0, 11.0, 12.0, 0.0, 5.0, 1.0, 20.0, 19.0, 15.0 // will not use, don't care + }, + nulls_at({10, 11})}; + + // Partitioned datasets, taken from the original dataset in the range [10, 24). + auto const keys1 = cudf::slice(keys_original, {10, 14})[0]; // {4, 3, 1, 2} + auto const keys2 = cudf::slice(keys_original, {14, 17})[0]; // {3, 1, 2} + auto const keys3 = cudf::slice(keys_original, {17, 20})[0]; // {2, 1, null} + auto const keys4 = cudf::slice(keys_original, {20, 24})[0]; // {3, 2, 4, 4} + + auto const vals1 = cudf::slice(vals_original, {10, 14})[0]; // {null, null, 0.0, 1.0} + auto const vals2 = cudf::slice(vals_original, {14, 17})[0]; // {2.0, 3.0, 4.0} + auto const vals3 = cudf::slice(vals_original, {17, 20})[0]; // {NaN, 6.0, 7.0} + auto const vals4 = cudf::slice(vals_original, {20, 24})[0]; // {8.0, 9.0, 10.0, NaN} + + // The expected results to validate. + auto const expected_keys = keys_col{1, 2, 3, 4}; + auto const expected_M2s = M2s_col{18.0, NaN, 18.0, NaN}; + + // Compute partial results (`COUNT_VALID`, `MEAN`, `M2`) of each dataset. + // The partial results are also assembled into a structs column. + auto const [out1_keys, out1_vals] = compute_partial_results(keys1, vals1); + auto const [out2_keys, out2_vals] = compute_partial_results(keys2, vals2); + auto const [out3_keys, out3_vals] = compute_partial_results(keys3, vals3); + auto const [out4_keys, out4_vals] = compute_partial_results(keys4, vals4); + + // Merge the partial results to the final results. + // Merging can be done in just one merge step, or in multiple steps. + + // Multiple steps merging: + { + auto const [out5_keys, out5_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys}, vcol_views{*out1_vals, *out2_vals}); + auto const [out6_keys, out6_vals] = + merge_M2(vcol_views{*out3_keys, *out4_keys}, vcol_views{*out3_vals, *out4_vals}); + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out5_keys, *out6_keys}, vcol_views{*out5_vals, *out6_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } + + // One step merging: + { + auto const [final_keys, final_vals] = + merge_M2(vcol_views{*out1_keys, *out2_keys, *out3_keys, *out4_keys}, + vcol_views{*out1_vals, *out2_vals, *out3_vals, *out4_vals}); + + auto const out_M2s = final_vals->child(2); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_keys, *final_keys, print_all); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_M2s, out_M2s, print_all); + } +} From b123eb847f7571c12519cd069daf774124df93a0 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Thu, 8 Jul 2021 09:47:37 -0500 Subject: [PATCH 34/54] JNI build no longer looks for Arrow in conda environment (#8686) Updates the Java bindings native build to no longer look for the Arrow dependency in the conda environment. It now expects to find it in the libcudf build area or under the `ARROW_ROOT` environment variable if the user has specified it. Authors: - Jason Lowe (https://github.com/jlowe) Approvers: - Kuhu Shukla (https://github.com/kuhushukla) - MithunR (https://github.com/mythrocks) - Paul Taylor (https://github.com/trxcllnt) URL: https://github.com/rapidsai/cudf/pull/8686 --- java/src/main/native/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 84b44b546a3..e779a74290d 100755 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -173,7 +173,6 @@ message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}") find_path(ARROW_INCLUDE "arrow" HINTS "$ENV{ARROW_ROOT}/include" - "$ENV{CONDA_PREFIX}/include" "${CUDF_CPP_BUILD_DIR}/_deps/arrow-src/cpp/src") message(STATUS "ARROW: ARROW_INCLUDE set to ${ARROW_INCLUDE}") @@ -187,7 +186,6 @@ endif(CUDF_JNI_ARROW_STATIC) find_library(ARROW_LIBRARY ${CUDF_JNI_ARROW_LIBNAME} REQUIRED HINTS "$ENV{ARROW_ROOT}/lib" - "$ENV{CONDA_PREFIX}/lib" "${CUDF_CPP_BUILD_DIR}/_deps/arrow-build/release") if(NOT ARROW_LIBRARY) From a5427d2658b8e323b702a1f5e11a1512f2bab459 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Thu, 8 Jul 2021 09:13:30 -0700 Subject: [PATCH 35/54] Add DeprecationWarning to `ser.str.subword_tokenize` (#8603) This PR adds deprecationWarning to `ser.str.subword_tokenize` . Checkout related issue for details: https://github.com/rapidsai/cudf/issues/8604 Authors: - Vibhu Jawa (https://github.com/VibhuJawa) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/8603 --- python/cudf/cudf/core/column/string.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 0902167be8b..e17ecec766a 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -4620,6 +4620,9 @@ def subword_tokenize( This function requires about 21x the number of character bytes in the input strings column as working memory. + ``ser.str.subword_tokenize`` will be depreciated in future versions. + Use ``cudf.core.subword_tokenizer.SubwordTokenizer`` instead. + Parameters ---------- hash_file : str @@ -4691,6 +4694,14 @@ def subword_tokenize( array([[0, 0, 2], [1, 0, 1]], dtype=uint32) """ + warning_message = ( + "`ser.str.subword_tokenize` API will be depreciated" + " in future versions of cudf.\n" + "Use `cudf.core.subword_tokenizer.SubwordTokenizer` " + "instead" + ) + + warnings.warn(warning_message, FutureWarning) tokens, masks, metadata = cpp_subword_tokenize_vocab_file( self._column, hash_file, From 8aaf8e656de3a3269b78d5e0be012f2f9da48fa8 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Fri, 9 Jul 2021 00:20:04 +0530 Subject: [PATCH 36/54] Fixed spelling mistakes in libcudf documentation (#8664) ran spell check and made some corrections. Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - Nghia Truong (https://github.com/ttnghia) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/8664 --- .../common/generate_benchmark_input.cpp | 3 ++- .../common/generate_benchmark_input.hpp | 4 ++-- cpp/benchmarks/join/generate_input_tables.cuh | 6 +++--- cpp/docs/DEVELOPER_GUIDE.md | 2 +- cpp/docs/TESTING.md | 2 +- cpp/include/cudf/column/column.hpp | 2 +- .../cudf/column/column_device_view.cuh | 4 ++-- cpp/include/cudf/column/column_factories.hpp | 6 +++--- cpp/include/cudf/column/column_view.hpp | 2 +- .../cudf/detail/aggregation/aggregation.cuh | 2 +- cpp/include/cudf/detail/indexalator.cuh | 6 +++--- cpp/include/cudf/detail/iterator.cuh | 4 ++-- cpp/include/cudf/detail/nvtx/nvtx3.hpp | 8 ++++---- cpp/include/cudf/detail/unary.hpp | 2 +- .../cudf/detail/utilities/device_atomics.cuh | 2 +- cpp/include/cudf/detail/valid_if.cuh | 2 +- cpp/include/cudf/groupby.hpp | 4 ++-- cpp/include/cudf/io/parquet.hpp | 2 +- cpp/include/cudf/scalar/scalar.hpp | 2 +- .../cudf/scalar/scalar_device_view.cuh | 2 +- cpp/include/cudf/sorting.hpp | 4 ++-- cpp/include/cudf/strings/combine.hpp | 2 +- cpp/include/cudf/strings/repeat_strings.hpp | 6 +++--- cpp/include/cudf/strings/replace.hpp | 2 +- cpp/include/cudf/strings/split/split.hpp | 4 ++-- cpp/include/cudf/strings/string_view.hpp | 2 +- cpp/include/cudf/table/table_device_view.cuh | 2 +- cpp/include/cudf/transform.hpp | 6 +++--- cpp/include/cudf_test/base_fixture.hpp | 2 +- cpp/include/cudf_test/column_wrapper.hpp | 4 ++-- cpp/include/cudf_test/type_list_utilities.hpp | 4 ++-- cpp/include/cudf_test/type_lists.hpp | 2 +- cpp/src/bitmask/null_mask.cu | 2 +- cpp/src/copying/contiguous_split.cu | 6 +++--- cpp/src/dictionary/replace.cu | 2 +- .../hash/concurrent_unordered_multimap.cuh | 8 ++++---- cpp/src/interop/from_arrow.cu | 2 +- cpp/src/interop/to_arrow.cu | 2 +- cpp/src/io/json/json_gpu.cu | 4 ++-- cpp/src/io/json/json_gpu.h | 4 ++-- cpp/src/io/json/reader_impl.cu | 2 +- cpp/src/io/orc/orc.cpp | 2 +- cpp/src/io/orc/stripe_enc.cu | 20 +++++++++---------- cpp/src/io/orc/writer_impl.hpp | 2 +- cpp/src/io/parquet/page_data.cu | 2 +- cpp/src/io/parquet/parquet.cpp | 2 +- cpp/src/io/parquet/parquet.hpp | 2 +- .../io/statistics/conversion_type_select.cuh | 2 +- .../statistics_type_identification.cuh | 4 ++-- cpp/src/jit/parser.hpp | 6 +++--- cpp/src/join/hash_join.cu | 2 +- cpp/src/reshape/byte_cast.cu | 4 ++-- cpp/src/rolling/rolling_detail.cuh | 8 ++++---- .../strings/convert/convert_fixed_point.cu | 2 +- cpp/src/strings/convert/convert_floats.cu | 2 +- cpp/src/strings/json/json_path.cu | 2 +- cpp/src/strings/replace/multi_re.cu | 2 +- cpp/src/text/generate_ngrams.cu | 2 +- cpp/src/transform/row_bit_count.cu | 2 +- cpp/tests/io/csv_test.cpp | 2 +- cpp/tests/io/orc_test.cpp | 4 ++-- cpp/tests/io/parquet_test.cpp | 8 ++++---- cpp/tests/join/join_tests.cpp | 10 +++++----- cpp/tests/sort/segmented_sort_tests.cpp | 2 +- cpp/tests/strings/repeat_strings_tests.cpp | 2 +- cpp/tests/table/table_view_tests.cu | 2 +- cpp/tests/utilities/column_utilities.cu | 2 +- 67 files changed, 120 insertions(+), 119 deletions(-) diff --git a/cpp/benchmarks/common/generate_benchmark_input.cpp b/cpp/benchmarks/common/generate_benchmark_input.cpp index 4280fd0c2ba..ea54d4daf05 100644 --- a/cpp/benchmarks/common/generate_benchmark_input.cpp +++ b/cpp/benchmarks/common/generate_benchmark_input.cpp @@ -108,7 +108,8 @@ size_t avg_element_bytes(data_profile const& profile, cudf::type_id tid) /** * @brief Functor that computes a random column element with the given data profile. * - * The implementation is SFINAEd for diffent type groups. Currently only used for fixed-width types. + * The implementation is SFINAEd for different type groups. Currently only used for fixed-width + * types. */ template struct random_value_fn; diff --git a/cpp/benchmarks/common/generate_benchmark_input.hpp b/cpp/benchmarks/common/generate_benchmark_input.hpp index acb8adc98e9..6c2a43a34e2 100644 --- a/cpp/benchmarks/common/generate_benchmark_input.hpp +++ b/cpp/benchmarks/common/generate_benchmark_input.hpp @@ -137,7 +137,7 @@ struct distribution_params< }; /** - * @brief Boolens are parameterized with the probability of getting `true` value. + * @brief Booleans are parameterized with the probability of getting `true` value. */ template struct distribution_params::value>> { @@ -195,7 +195,7 @@ std::vector get_type_or_group(int32_t id); * * If an element of the input vector is a `cudf::type_id` enumerator, function return value simply * includes this type. If an element of the input vector is a `type_group_id` enumerator, function - * return value includes all types coresponding to the group enumerator. + * return value includes all types corresponding to the group enumerator. * * @param ids Vector of integers equal to either a `cudf::type_id` enumerator or a `type_group_id` * enumerator. diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh index 285a9241a26..d7f64716e58 100644 --- a/cpp/benchmarks/join/generate_input_tables.cuh +++ b/cpp/benchmarks/join/generate_input_tables.cuh @@ -141,7 +141,7 @@ __global__ void init_probe_tbl(key_type* const probe_tbl, * (e.g. device memory, zero copy memory or unified memory). Each value in the build table * will be from [0,rand_max] and if uniq_build_tbl_keys is true it is ensured that each value * will be uniq in the build table. Each value in the probe table will be also in the build - * table with a propability of selectivity and a random number from + * table with a probability of selectivity and a random number from * [0,rand_max] \setminus \{build_tbl\} otherwise. * * @param[out] build_tbl The build table to generate. Usually the smaller table used to @@ -150,7 +150,7 @@ __global__ void init_probe_tbl(key_type* const probe_tbl, * @param[out] probe_tbl The probe table to generate. Usually the larger table used to * probe into the hash table created from the build table. * @param[in] build_tbl_size number of keys in the build table - * @param[in] selectivity propability with which an element of the probe table is + * @param[in] selectivity probability with which an element of the probe table is * present in the build table. * @param[in] rand_max maximum random number to generate. I.e. random numbers are * integers from [0,rand_max]. @@ -169,7 +169,7 @@ void generate_input_tables(key_type* const build_tbl, // expense of not being that accurate with applying the selectivity an especially more memory // efficient implementations would be to partition the random numbers into two intervals and then // let one table choose random numbers from only one interval and the other only select with - // selectivity propability from the same interval and from the other in the other cases. + // selective probability from the same interval and from the other in the other cases. static_assert(std::is_signed::value, "key_type needs to be signed for lottery to work"); diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index 8ec111acdb2..9ec64060847 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -470,7 +470,7 @@ libcudf, and you should not use it in new code in libcudf without careful consid use `rmm::device_uvector` along with the utility factories in `device_factories.hpp`. These utilities enable creation of `uvector`s from host-side vectors, or creating zero-initialized `uvector`s, so that they are as convenient to use as `device_vector`. Avoiding `device_vector` has -a number of benefits, as described in the folling section on `rmm::device_uvector`. +a number of benefits, as described in the following section on `rmm::device_uvector`. #### `rmm::device_uvector` diff --git a/cpp/docs/TESTING.md b/cpp/docs/TESTING.md index 2c7b62b8b6d..3c741b5d4e7 100644 --- a/cpp/docs/TESTING.md +++ b/cpp/docs/TESTING.md @@ -67,7 +67,7 @@ not necessary for your test fixtures to inherit from it. Example: ```c++ -class MyTestFiture : public cudf::test::BaseFixture {...}; +class MyTestFixture : public cudf::test::BaseFixture {...}; ``` ## Typed Tests diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index ee367840644..8decce7f260 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -293,7 +293,7 @@ class column { /** * @brief Implicit conversion operator to a `mutable_column_view`. * - * This allows pasing a `column` object into a function that accepts a + * This allows passing a `column` object into a function that accepts a *`mutable_column_view`. The conversion is automatic. * @note Creating a mutable view of a `column` invalidates the `column`'s diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 8cb05ca0bad..02e3eee6b43 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -37,7 +37,7 @@ /** * @file column_device_view.cuh - * @brief Column device view class definitons + * @brief Column device view class definitions */ namespace cudf { @@ -541,7 +541,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { * * optional_begin with mode `DYNAMIC` defers the assumption of nullability to * runtime, with the user stating on construction of the iterator if column has nulls. - * `DYNAMIC` mode is nice when an algorithm is going to execute on mutliple + * `DYNAMIC` mode is nice when an algorithm is going to execute on multiple * iterators and you don't want to compile all the combinations of iterator types * * Example: diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index e5424f0fc44..bdb7fd48e60 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -399,7 +399,7 @@ std::unique_ptr make_strings_column( * one more than the total number of strings so the `offsets.back()` is the total number of bytes * in the strings array. `offsets.front()` must always be 0 to point to the beginning of `strings`. * @param[in] null_mask Device span containing the null element indicator bitmask. Arrow format for - * nulls is used for interpeting this bitmask. + * nulls is used for interpreting this bitmask. * @param[in] null_count The number of null string entries. If equal to `UNKNOWN_NULL_COUNT`, the * null count will be computed dynamically on the first invocation of `column::null_count()` * @param[in] stream CUDA stream used for device memory operations and kernel launches. @@ -428,7 +428,7 @@ std::unique_ptr make_strings_column( * strings are identified by the offsets and the nullmask. * @param[in] null_count The number of null string entries. * @param[in] null_mask The bits specifying the null strings in device memory. Arrow format for - * nulls is used for interpeting this bitmask. + * nulls is used for interpreting this bitmask. * @param[in] stream CUDA stream used for device memory operations and kernel launches. * @param[in] mr Device memory resource used for allocation of the column's `null_mask` and children * columns' device memory. @@ -491,7 +491,7 @@ std::unique_ptr make_strings_column( * further nested. * @param[in] null_count The number of null list entries. * @param[in] null_mask The bits specifying the null lists in device memory. - * Arrow format for nulls is used for interpeting this bitmask. + * Arrow format for nulls is used for interpreting this bitmask. * @param[in] stream Optional stream for use with all memory allocation * and device kernels * @param[in] mr Optional resource to use for device memory diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 82326a21d7d..7ab8cc0f6b1 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -22,7 +22,7 @@ /** * @file column_view.hpp - * @brief column view class definitons + * @brief column view class definitions */ namespace cudf { diff --git a/cpp/include/cudf/detail/aggregation/aggregation.cuh b/cpp/include/cudf/detail/aggregation/aggregation.cuh index 09763d66403..53c1f47c201 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.cuh +++ b/cpp/include/cudf/detail/aggregation/aggregation.cuh @@ -643,7 +643,7 @@ struct identity_initializer { * The `i`th column will be initialized with the identity value of the `i`th * aggregation operation in `aggs`. * - * @throw cudf::logic_error if column type and corresponging agg are incompatible + * @throw cudf::logic_error if column type and corresponding agg are incompatible * @throw cudf::logic_error if column type is not fixed-width * * @param table The table of columns to initialize. diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh index 8bbd0d1aada..4a2b40e8be7 100644 --- a/cpp/include/cudf/detail/indexalator.cuh +++ b/cpp/include/cudf/detail/indexalator.cuh @@ -29,7 +29,7 @@ namespace detail { /** * @brief The base class for the input or output index normalizing iterator. * - * This implementation uses CTRP to define the `input_indexalator` and the + * This implementation uses CRTP to define the `input_indexalator` and the * `output_indexalator` classes. This is so this class can manipulate the * uniquely typed subclass member variable `p_` directly without requiring * virtual functions since iterator instances will be copied to device memory. @@ -241,7 +241,7 @@ struct base_indexalator { */ struct input_indexalator : base_indexalator { friend struct indexalator_factory; - friend struct base_indexalator; // for CTRP + friend struct base_indexalator; // for CRTP using reference = size_type const; // this keeps STL and thrust happy @@ -326,7 +326,7 @@ struct input_indexalator : base_indexalator { */ struct output_indexalator : base_indexalator { friend struct indexalator_factory; - friend struct base_indexalator; // for CTRP + friend struct base_indexalator; // for CRTP using reference = output_indexalator const&; // required for output iterators diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 4cb0c6e1877..deb161fd9c2 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -177,7 +177,7 @@ auto make_null_replacement_iterator(column_device_view const& column, * * make_optional_iterator with mode `DYNAMIC` defers the assumption of nullability to * runtime, with the user stating on construction of the iterator if column has nulls. - * `DYNAMIC` mode is nice when an algorithm is going to execute on mutliple + * `DYNAMIC` mode is nice when an algorithm is going to execute on multiple * iterators and you don't want to compile all the combinations of iterator types * * Example: @@ -819,7 +819,7 @@ auto inline make_pair_iterator(scalar const& scalar_value) * * Else, if the scalar is null, then the value of `p.first` is undefined and `p.second == false`. * - * The behaviour is undefined if the scalar is destroyed before iterator dereferencing. + * The behavior is undefined if the scalar is destroyed before iterator dereferencing. * * @throws cudf::logic_error if scalar datatype and Element type mismatch. * @throws cudf::logic_error if the returned iterator is dereferenced in host diff --git a/cpp/include/cudf/detail/nvtx/nvtx3.hpp b/cpp/include/cudf/detail/nvtx/nvtx3.hpp index add5699e34a..0e1a82a0657 100644 --- a/cpp/include/cudf/detail/nvtx/nvtx3.hpp +++ b/cpp/include/cudf/detail/nvtx/nvtx3.hpp @@ -54,7 +54,7 @@ * \code{.cpp} * #include "nvtx3.hpp" * void some_function(){ - * // Begins a NVTX range with the messsage "some_function" + * // Begins a NVTX range with the message "some_function" * // The range ends when some_function() returns and `r` is destroyed * nvtx3::thread_range r{"some_function"}; * @@ -322,7 +322,7 @@ * Example: * \code{.cpp} * // Create an `event_attributes` with the custom message "my message" - * nvtx3::event_attributes attr{nvtx3::Mesage{"my message"}}; + * nvtx3::event_attributes attr{nvtx3::message{"my message"}}; * * // strings and string literals implicitly assumed to be a `nvtx3::message` * nvtx3::event_attributes attr{"my message"}; @@ -1267,7 +1267,7 @@ class registered_message { * nvtx3::thread_range range1{attr1}; * * // `range2` contains message "message 2" - * nvtx3::thread_range range2{nvtx3::Mesage{"message 2"}}; + * nvtx3::thread_range range2{nvtx3::message{"message 2"}}; * * // `std::string` and string literals are implicitly assumed to be * // the contents of an `nvtx3::message` @@ -1525,7 +1525,7 @@ class payload { * * // For convenience, the arguments that can be passed to the * `event_attributes` - * // constructor may be passed to the `domain_thread_range` contructor where + * // constructor may be passed to the `domain_thread_range` constructor where * // they will be forwarded to the `EventAttribute`s constructor * nvtx3::thread_range r{nvtx3::payload{42}, nvtx3::category{1}, "message"}; * \endcode diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp index 0615e502c60..e672cf01488 100644 --- a/cpp/include/cudf/detail/unary.hpp +++ b/cpp/include/cudf/detail/unary.hpp @@ -31,7 +31,7 @@ namespace detail { * doesn't. * * @tparam InputIterator Iterator type for `begin` and `end` - * @tparam Predicate A predicator type which will be evaludated + * @tparam Predicate A predicator type which will be evaluated * @param begin Beginning of the sequence of elements * @param end End of the sequence of elements * @param p Predicate to be applied to each element in `[begin,end)` diff --git a/cpp/include/cudf/detail/utilities/device_atomics.cuh b/cpp/include/cudf/detail/utilities/device_atomics.cuh index b94257daacf..6380e76fdfa 100644 --- a/cpp/include/cudf/detail/utilities/device_atomics.cuh +++ b/cpp/include/cudf/detail/utilities/device_atomics.cuh @@ -161,7 +161,7 @@ struct genericAtomicOperationImpl { // ----------------------------------------------------------------------- // specialized functions for operators -// `atomicAdd` supports int32, float, double (signed int64 is not supproted.) +// `atomicAdd` supports int32, float, double (signed int64 is not supported.) // `atomicMin`, `atomicMax` support int32_t, int64_t // `atomicAnd`, `atomicOr`, `atomicXor` support int32_t, int64_t template <> diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index 11ce9199c2d..4a7e9b89c80 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -117,7 +117,7 @@ std::pair valid_if( * input ranges. * Given a set of bitmasks, `masks`, the state of bit `j` in mask `i` is - * determined by `p( *(begin1 + i), *(begin2 + j))`. If the predivate evaluates + * determined by `p( *(begin1 + i), *(begin2 + j))`. If the predicate evaluates * to true, the the bit is set to `1`. If false, set to `0`. * * Example Arguments: diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp index 85c469f58f8..5656b38a0ef 100644 --- a/cpp/include/cudf/groupby.hpp +++ b/cpp/include/cudf/groupby.hpp @@ -116,7 +116,7 @@ class groupby { /** * @brief Performs grouped aggregations on the specified values. * - * The values to aggregate and the aggregations to perform are specifed in an + * The values to aggregate and the aggregations to perform are specified in an * `aggregation_request`. Each request contains a `column_view` of values to * aggregate and a set of `aggregation`s to perform on those elements. * @@ -173,7 +173,7 @@ class groupby { /** * @brief Performs grouped scans on the specified values. * - * The values to aggregate and the aggregations to perform are specifed in an + * The values to aggregate and the aggregations to perform are specified in an * `aggregation_request`. Each request contains a `column_view` of values to * aggregate and a set of `aggregation`s to perform on those elements. * diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index 2dd123ca2bc..ecd9607a87e 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -973,7 +973,7 @@ class chunked_parquet_writer_options_builder { * @brief Set to true if timestamps should be written as * int96 types instead of int64 types. Even though int96 is deprecated and is * not an internal type for cudf, it needs to be written for backwards - * compatability reasons. + * compatibility reasons. * * @param enabled Boolean value to enable/disable int96 timestamps. * @return this for chaining. diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 2e57e56255d..717cf8ea7b0 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -78,7 +78,7 @@ class scalar { /** * @brief Indicates whether the scalar contains a valid value. * - * @note Using the value when `is_valid() == false` is undefined behaviour. In addition, this + * @note Using the value when `is_valid() == false` is undefined behavior. In addition, this * function does a stream synchronization. * * @param stream CUDA stream used for device memory operations. diff --git a/cpp/include/cudf/scalar/scalar_device_view.cuh b/cpp/include/cudf/scalar/scalar_device_view.cuh index d56d5d5eb0d..884b412d3e2 100644 --- a/cpp/include/cudf/scalar/scalar_device_view.cuh +++ b/cpp/include/cudf/scalar/scalar_device_view.cuh @@ -21,7 +21,7 @@ /** * @file scalar_device_view.cuh - * @brief Scalar device view class definitons + * @brief Scalar device view class definitions */ namespace cudf { diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index 2454cfe7c7b..36a8131a78e 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -187,7 +187,7 @@ std::unique_ptr rank( /** * @brief Returns sorted order after sorting each segment in the table. * - * If segment_offsets contains values larger than number of rows, behaviour is undefined. + * If segment_offsets contains values larger than number of rows, behavior is undefined. * @throws cudf::logic_error if `segment_offsets` is not `size_type` column. * * @param keys The table that determines the ordering of elements in each segment @@ -214,7 +214,7 @@ std::unique_ptr segmented_sorted_order( /** * @brief Performs a lexicographic segmented sort of a table * - * If segment_offsets contains values larger than number of rows, behaviour is undefined. + * If segment_offsets contains values larger than number of rows, behavior is undefined. * @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`. * @throws cudf::logic_error if `segment_offsets` is not `size_type` column. * diff --git a/cpp/include/cudf/strings/combine.hpp b/cpp/include/cudf/strings/combine.hpp index 3e069de2f0f..32f8d482a34 100644 --- a/cpp/include/cudf/strings/combine.hpp +++ b/cpp/include/cudf/strings/combine.hpp @@ -272,7 +272,7 @@ std::unique_ptr join_list_elements( * delimited by the @p separator provided. * * A null list row will always result in a null string in the output row. Any non-null list row - * having a null elenent will result in the corresponding output row to be null unless a + * having a null element will result in the corresponding output row to be null unless a * @p narep string is specified to be used in its place. * * If @p separate_nulls is set to `NO` and @p narep is valid then separators are not added to the diff --git a/cpp/include/cudf/strings/repeat_strings.hpp b/cpp/include/cudf/strings/repeat_strings.hpp index 4023dbc6c84..2b39662456b 100644 --- a/cpp/include/cudf/strings/repeat_strings.hpp +++ b/cpp/include/cudf/strings/repeat_strings.hpp @@ -31,7 +31,7 @@ namespace strings { * @brief Repeat the given string scalar by a given number of times. * * For a given string scalar, an output string scalar is generated by repeating the input string by - * a number of times given by the @p `repeat_times` parameter. If `repeat_times` is not a positve + * a number of times given by the @p `repeat_times` parameter. If `repeat_times` is not a positive * value, an empty (valid) string scalar will be returned. An invalid input scalar will always * result in an invalid output scalar regardless of the value of `repeat_times` parameter. * @@ -42,7 +42,7 @@ namespace strings { * out is '123XYZ-123XYZ-123XYZ-' * @endcode * - * @throw cudf::logic_error if the size of the ouput string scalar exceeds the maximum value that + * @throw cudf::logic_error if the size of the output string scalar exceeds the maximum value that * can be stored by the index type * (i.e., `input.size() * repeat_times > numeric_limits::max()`). * @@ -61,7 +61,7 @@ std::unique_ptr repeat_strings( * * For a given strings column, an output strings column is generated by repeating each string from * the input by a number of times given by the @p `repeat_times` parameter. If `repeat_times` is not - * a positve value, all the rows of the output strings column will be an empty string. Any null row + * a positive value, all the rows of the output strings column will be an empty string. Any null row * will result in a null row regardless of the value of `repeat_times` parameter. * * Note that this function cannot handle the cases when the size of the output column exceeds the diff --git a/cpp/include/cudf/strings/replace.hpp b/cpp/include/cudf/strings/replace.hpp index e9091b88b08..40eb796eba7 100644 --- a/cpp/include/cudf/strings/replace.hpp +++ b/cpp/include/cudf/strings/replace.hpp @@ -36,7 +36,7 @@ namespace strings { * input string. If not found, the output entry is just a copy of the * corresponding input string. * - * Specifing an empty string for repl will essentially remove the target + * Specifying an empty string for repl will essentially remove the target * string if found in each string. * * Null string entries will return null output string entries. diff --git a/cpp/include/cudf/strings/split/split.hpp b/cpp/include/cudf/strings/split/split.hpp index 82b191a8e1b..4978bad3bb3 100644 --- a/cpp/include/cudf/strings/split/split.hpp +++ b/cpp/include/cudf/strings/split/split.hpp @@ -139,7 +139,7 @@ std::unique_ptr
rsplit( * * @throw cudf:logic_error if `delimiter` is invalid. * - * @param strings A column of string elements to be splitted. + * @param strings A column of string elements to be split. * @param delimiter The string to identify split points in each string. * Default of empty string indicates split on whitespace. * @param maxsplit Maximum number of splits to perform. @@ -216,7 +216,7 @@ std::unique_ptr split_record( * * @throw cudf:logic_error if `delimiter` is invalid. * - * @param strings A column of string elements to be splitted. + * @param strings A column of string elements to be split. * @param delimiter The string to identify split points in each string. * Default of empty string indicates split on whitespace. * @param maxsplit Maximum number of splits to perform. diff --git a/cpp/include/cudf/strings/string_view.hpp b/cpp/include/cudf/strings/string_view.hpp index 5a3dbd5c1bc..be182cb0e9d 100644 --- a/cpp/include/cudf/strings/string_view.hpp +++ b/cpp/include/cudf/strings/string_view.hpp @@ -410,7 +410,7 @@ CUDA_HOST_DEVICE_CALLABLE size_type to_char_utf8(const char* str, char_utf8& cha * @brief Place a char_utf8 value into a char array. * * @param character Single character - * @param[out] str Allocated char array with enough space to hold the encoded characer. + * @param[out] str Allocated char array with enough space to hold the encoded character. * @return The number of bytes in the character */ CUDA_HOST_DEVICE_CALLABLE size_type from_char_utf8(char_utf8 character, char* str) diff --git a/cpp/include/cudf/table/table_device_view.cuh b/cpp/include/cudf/table/table_device_view.cuh index 7c80c958f92..71e48370ccf 100644 --- a/cpp/include/cudf/table/table_device_view.cuh +++ b/cpp/include/cudf/table/table_device_view.cuh @@ -27,7 +27,7 @@ /** * @file table_device_view.cuh - * @brief Table device view class definitons + * @brief Table device view class definitions */ namespace cudf { diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index e99e0db21c5..460c62e3598 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -40,7 +40,7 @@ namespace cudf { * * @param input An immutable view of the input column to transform * @param unary_udf The PTX/CUDA string of the unary function to apply - * @param outout_type The output type that is compatible with the output type in the UDF + * @param output_type The output type that is compatible with the output type in the UDF * @param is_ptx true: the UDF is treated as PTX code; false: the UDF is treated as CUDA code * @param mr Device memory resource used to allocate the returned column's device memory * @return The column resulting from applying the unary function to @@ -133,7 +133,7 @@ std::pair, std::unique_ptr> encode( * @param bitmask A device pointer to the bitmask which needs to be converted * @param begin_bit position of the bit from which the conversion should start * @param end_bit position of the bit before which the conversion should stop - * @param mr Device memory resource used to allocate the returned columns's device memory + * @param mr Device memory resource used to allocate the returned columns' device memory * @return A boolean column representing the given mask from [begin_bit, end_bit). */ std::unique_ptr mask_to_bools( @@ -164,7 +164,7 @@ std::unique_ptr mask_to_bools( * row_bit_count(column(x)) >= row_bit_count(gather(column(x))) * * @param t The table view to perform the computation on. - * @param mr Device memory resource used to allocate the returned columns's device memory + * @param mr Device memory resource used to allocate the returned columns' device memory * @return A 32-bit integer column containing the per-row bit counts. */ std::unique_ptr row_bit_count( diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp index 462317ad66b..8502d5832e6 100644 --- a/cpp/include/cudf_test/base_fixture.hpp +++ b/cpp/include/cudf_test/base_fixture.hpp @@ -252,7 +252,7 @@ inline std::shared_ptr create_memory_resource( /** * @brief Parses the cuDF test command line options. * - * Currently only supports 'rmm_mode' string paramater, which set the rmm + * Currently only supports 'rmm_mode' string parameter, which set the rmm * allocation mode. The default value of the parameter is 'pool'. * * @return Parsing results in the form of unordered map diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index 74d22085b26..a4857552831 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -1239,7 +1239,7 @@ class lists_column_wrapper : public detail::column_wrapper { /** * @brief Construct a lists column containing a single list of fixed-width - * type from an interator range. + * type from an iterator range. * * Example: * @code{.cpp} @@ -1621,7 +1621,7 @@ class lists_column_wrapper : public detail::column_wrapper { std::back_inserter(cols), [&](lists_column_wrapper const& l) -> column_view { // depth mismatch. attempt to normalize the short column. - // this function will also catch if this is a legitmately broken + // this function will also catch if this is a legitimately broken // set of input if (l.depth < expected_depth) { if (l.root) { diff --git a/cpp/include/cudf_test/type_list_utilities.hpp b/cpp/include/cudf_test/type_list_utilities.hpp index a3f771c2f72..1588e3c9be9 100644 --- a/cpp/include/cudf_test/type_list_utilities.hpp +++ b/cpp/include/cudf_test/type_list_utilities.hpp @@ -32,7 +32,7 @@ * template * class TestFixture : ::testing::Test { }; * - * TYPED_TEST_CASE(TestFixure, TestTypes); + * TYPED_TEST_CASE(TestFixture, TestTypes); * * TYPED_TEST(TestFixture, mytest){ * using Type0 = GetType; // the first type element @@ -169,7 +169,7 @@ struct ConcatImpl<> { }; /** - * @brief Concantenates compile-time lists of types into a single type list. + * @brief Concatenates compile-time lists of types into a single type list. * * Example: * ``` diff --git a/cpp/include/cudf_test/type_lists.hpp b/cpp/include/cudf_test/type_lists.hpp index aeddafae253..5c1b0c6c458 100644 --- a/cpp/include/cudf_test/type_lists.hpp +++ b/cpp/include/cudf_test/type_lists.hpp @@ -59,7 +59,7 @@ constexpr std::array types_to_ids_impl( * array == {type_id::INT32, type_id::FLOAT}; * ``` * - * @tparam TYPES List of types to conver to `type_id`s + * @tparam TYPES List of types to convert to `type_id`s * @return `std::array` of `type_id`s corresponding to each type in `TYPES` */ template diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index e202a682311..c3add0ea97e 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -513,7 +513,7 @@ std::vector segmented_count_set_bits(bitmask_type const* bitmask, // first_word_indices and last_word_indices to have the same type. to_word_index(false, d_last_indices.data())); - // first allocate temporary memroy + // first allocate temporary memory size_t temp_storage_bytes{0}; CUDA_TRY(cub::DeviceSegmentedReduce::Sum(nullptr, diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index 4b11382a3f2..d4d54a3f94f 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -53,7 +53,7 @@ inline __device__ std::size_t _round_up_safe(std::size_t number_to_round, std::s * The definition of "buffer" used throughout this module is a component piece of a * cudf column. So for example, a fixed-width column with validity would have 2 associated * buffers : the data itself and the validity buffer. contiguous_split operates by breaking - * each column up into it's individual components and copying each one as a seperate kernel + * each column up into it's individual components and copying each one as a separate kernel * block. */ struct src_buf_info { @@ -188,7 +188,7 @@ __device__ void copy_buffer(uint8_t* __restrict__ dst, } // if we're performing a value shift (offsets), or a bit shift (validity) the # of bytes and - // alignment must be a multiple of 4. value shifting and bit shifting are mututally exclusive + // alignment must be a multiple of 4. value shifting and bit shifting are mutually exclusive // and will never both be true at the same time. if (value_shift || bit_shift) { std::size_t idx = (num_bytes - remainder) / 4; @@ -249,7 +249,7 @@ __device__ void copy_buffer(uint8_t* __restrict__ dst, * * @param num_src_bufs Total number of source buffers (N) * @param src_bufs Input source buffers (N) - * @param dst_bufs Desination buffers (N*M) + * @param dst_bufs Destination buffers (N*M) * @param buf_info Information on the range of values to be copied for each destination buffer. */ template diff --git a/cpp/src/dictionary/replace.cu b/cpp/src/dictionary/replace.cu index 1dbb844a606..37118779248 100644 --- a/cpp/src/dictionary/replace.cu +++ b/cpp/src/dictionary/replace.cu @@ -123,7 +123,7 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, } CUDF_EXPECTS(input.keys().type() == replacement.type(), "keys must match scalar type"); - // first add the replacment to the keys so only the indices need to be processed + // first add the replacement to the keys so only the indices need to be processed auto input_matched = dictionary::detail::add_keys( input, make_column_from_scalar(replacement, 1, stream)->view(), stream, mr); auto const input_view = dictionary_column_view(input_matched->view()); diff --git a/cpp/src/hash/concurrent_unordered_multimap.cuh b/cpp/src/hash/concurrent_unordered_multimap.cuh index 071214e80b0..2b92c9142ca 100644 --- a/cpp/src/hash/concurrent_unordered_multimap.cuh +++ b/cpp/src/hash/concurrent_unordered_multimap.cuh @@ -239,7 +239,7 @@ class concurrent_unordered_multimap { * @param[in] precomputed_hash A flag indicating whether or not a precomputed * hash value is passed in * @param[in] precomputed_hash_value A precomputed hash value to use for - * determing the write location of the key into the hash map instead of + * determining the write location of the key into the hash map instead of * computing the the hash value directly from the key * @tparam hash_value_type The datatype of the hash value * @@ -284,7 +284,7 @@ class concurrent_unordered_multimap { * @param[in] precomputed_hash A flag indicating whether or not a precomputed * hash value is passed in * @param[in] precomputed_hash_value A precomputed hash value to use for - * determing the write location of the key into the hash map instead of + * determining the write location of the key into the hash map instead of * computing the the hash value directly from the key * @param[in] keys_are_equal An optional functor for comparing if two keys are * equal @@ -375,7 +375,7 @@ class concurrent_unordered_multimap { * @param[in] precomputed_hash A flag indicating whether or not a precomputed * hash value is passed in * @param[in] precomputed_hash_value A precomputed hash value to use for - * determing the write location of the key into the hash map instead of + * determining the write location of the key into the hash map instead of * computing the the hash value directly from the key * @param[in] keys_are_equal An optional functor for comparing if two keys are * equal @@ -423,7 +423,7 @@ class concurrent_unordered_multimap { * @param[in] precomputed_hash A flag indicating whether or not a precomputed * hash value is passed in * @param[in] precomputed_hash_value A precomputed hash value to use for - * determing the write location of the key into the hash map instead of + * determining the write location of the key into the hash map instead of * computing the the hash value directly from the key * @param[in] keys_are_equal An optional functor for comparing if two keys are * equal diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu index 28fc2ae9d4f..917a5b1ac9c 100644 --- a/cpp/src/interop/from_arrow.cu +++ b/cpp/src/interop/from_arrow.cu @@ -94,7 +94,7 @@ namespace { */ struct dispatch_to_cudf_column { /** - * @brief Returns mask from an array withut any offsets. + * @brief Returns mask from an array without any offsets. */ std::unique_ptr get_mask_buffer(arrow::Array const& array, rmm::cuda_stream_view stream, diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu index f8fcf03a77e..3cd515e9981 100644 --- a/cpp/src/interop/to_arrow.cu +++ b/cpp/src/interop/to_arrow.cu @@ -96,7 +96,7 @@ std::shared_ptr fetch_mask_buffer(column_view input_view, */ struct dispatch_to_arrow { /** - * @brief Creates vector Arrays from given cudf column childrens + * @brief Creates vector Arrays from given cudf column children */ std::vector> fetch_child_array( column_view input_view, diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu index 8451255bfda..ba6bc30e0d4 100644 --- a/cpp/src/io/json/json_gpu.cu +++ b/cpp/src/io/json/json_gpu.cu @@ -416,7 +416,7 @@ struct field_descriptor { * @param[in] end pointer to the first character after the parsing range * @param[in] opts The global parsing behavior options * @param[in] field_idx Index of the current field in the input row - * @param[in] col_map Pointer to the (column name hash -> solumn index) map in device memory. + * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory. * nullptr is passed when the input file does not consist of objects. * @return Descriptor of the parsed field */ @@ -481,7 +481,7 @@ __device__ std::pair get_row_data_range( * @param[in] data The entire data to read * @param[in] row_offsets The offset of each row in the input * @param[in] column_types The data type of each column - * @param[in] col_map Pointer to the (column name hash -> solumn index) map in device memory. + * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory. * nullptr is passed when the input file does not consist of objects. * @param[out] output_columns The output column data * @param[out] valid_fields The bitmaps indicating whether column fields are valid diff --git a/cpp/src/io/json/json_gpu.h b/cpp/src/io/json/json_gpu.h index 4010461da44..7a6bce5e5a5 100644 --- a/cpp/src/io/json/json_gpu.h +++ b/cpp/src/io/json/json_gpu.h @@ -44,7 +44,7 @@ using col_map_type = concurrent_unordered_map; * @param[in] data The entire data to read * @param[in] row_offsets The start of each data record * @param[in] dtypes The data type of each column - * @param[in] col_map Pointer to the (column name hash -> solumn index) map in device memory. + * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory. * nullptr is passed when the input file does not consist of objects. * @param[out] output_columns The output column data * @param[out] valid_fields The bitmaps indicating whether column fields are valid @@ -68,7 +68,7 @@ void convert_json_to_columns(parse_options_view const& options, * @param[in] data Input data buffer * @param[in] row_offsets The offset of each row in the input * @param[in] num_columns The number of columns of input data - * @param[in] col_map Pointer to the (column name hash -> solumn index) map in device memory. + * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory. * nullptr is passed when the input file does not consist of objects. * @param[in] stream CUDA stream used for device memory operations and kernel launches. * diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu index 087aad5759f..b4395d6c965 100644 --- a/cpp/src/io/json/reader_impl.cu +++ b/cpp/src/io/json/reader_impl.cu @@ -134,7 +134,7 @@ col_map_ptr_type create_col_names_hash_map(column_view column_name_hashes, * @param[in] row_offsets Device array of row start locations in the input buffer * @param[in] stream CUDA stream used for device memory operations and kernel launches * - * @return std::unique_ptr
cudf table with three columns (offsets, lenghts, hashes) + * @return std::unique_ptr
cudf table with three columns (offsets, lengths, hashes) */ std::unique_ptr
create_json_keys_info_table(const parse_options_view& options, device_span const data, diff --git a/cpp/src/io/orc/orc.cpp b/cpp/src/io/orc/orc.cpp index dc23af594a5..287364c3191 100644 --- a/cpp/src/io/orc/orc.cpp +++ b/cpp/src/io/orc/orc.cpp @@ -459,7 +459,7 @@ metadata::metadata(datasource* const src) : source(src) auto md_data = decompressor->Decompress(buffer->data(), ps.metadataLength, &md_length); orc::ProtobufReader(md_data, md_length).read(md); - // Initilize the column names + // Initialize the column names init_column_names(); } diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index cfc2d54a1b7..e007c49e61c 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -115,9 +115,9 @@ static inline __device__ uint32_t CountLeadingBytes64(uint64_t v) { return __clz /** * @brief Raw data output * - * @param[in] cid stream type (strm_pos[cid] will be updated and output stored at - *streams[cid]+strm_pos[cid]) - * @param[in] inmask input buffer position mask for circular buffers + * @tparam cid stream type (strm_pos[cid] will be updated and output stored at + * streams[cid]+strm_pos[cid]) + * @tparam inmask input buffer position mask for circular buffers * @param[in] s encoder state * @param[in] inbuf base input buffer * @param[in] inpos position in input buffer @@ -143,12 +143,12 @@ static __device__ void StoreBytes( /** * @brief ByteRLE encoder * - * @param[in] cid stream type (strm_pos[cid] will be updated and output stored at - *streams[cid]+strm_pos[cid]) + * @tparam cid stream type (strm_pos[cid] will be updated and output stored at + * streams[cid]+strm_pos[cid]) + * @tparam inmask input buffer position mask for circular buffers * @param[in] s encoder state * @param[in] inbuf base input buffer * @param[in] inpos position in input buffer - * @param[in] inmask input buffer position mask for circular buffers * @param[in] numvals max number of values to encode * @param[in] flush encode all remaining values if nonzero * @param[in] t thread id @@ -324,12 +324,12 @@ static inline __device__ void StoreBitsBigEndian( /** * @brief Integer RLEv2 encoder * - * @param[in] cid stream type (strm_pos[cid] will be updated and output stored at - *streams[cid]+strm_pos[cid]) + * @tparam cid stream type (strm_pos[cid] will be updated and output stored at + * streams[cid]+strm_pos[cid]) + * @tparam inmask input buffer position mask for circular buffers * @param[in] s encoder state * @param[in] inbuf base input buffer * @param[in] inpos position in input buffer - * @param[in] inmask input buffer position mask for circular buffers * @param[in] numvals max number of values to encode * @param[in] flush encode all remaining values if nonzero * @param[in] t thread id @@ -619,7 +619,7 @@ static const __device__ __constant__ int32_t kTimeScale[10] = { * @brief Encode column data * * @param[in] chunks encoder chunks device array [column][rowgroup] - * @param[in, out] chunks cunk streams device array [column][rowgroup] + * @param[in, out] streams chunk streams device array [column][rowgroup] */ // blockDim {512,1,1} template diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index ba8754ce6ca..db5cd349198 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -74,7 +74,7 @@ struct encoder_decimal_info { }; /** - * @brief Returns the total number of rowgroups in the list of contigious stripes. + * @brief Returns the total number of rowgroups in the list of contiguous stripes. */ inline auto stripes_size(host_span stripes) { diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index 9e1301d6355..f8158eaa6e9 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -1467,7 +1467,7 @@ __device__ void gpuDecodeLevels(page_state_s* s, int32_t target_leaf_count, int gpuDecodeStream(s->def, s, cur_leaf_count, t, level_type::DEFINITION); __syncwarp(); - // because the rep and def streams are encoded seperately, we cannot request an exact + // because the rep and def streams are encoded separately, we cannot request an exact // # of values to be decoded at once. we can only process the lowest # of decoded rep/def // levels we get. int actual_leaf_count = has_repetition ? min(s->lvl_count[level_type::REPETITION], diff --git a/cpp/src/io/parquet/parquet.cpp b/cpp/src/io/parquet/parquet.cpp index febfdf8b06a..6c658788fa1 100644 --- a/cpp/src/io/parquet/parquet.cpp +++ b/cpp/src/io/parquet/parquet.cpp @@ -290,7 +290,7 @@ bool CompactProtocolReader::InitSchema(FileMetaData* md) /* Inside FileMetaData, there is a std::vector of RowGroups and each RowGroup contains a * a std::vector of ColumnChunks. Each ColumnChunk has a member ColumnMetaData, which contains * a std::vector of std::strings representing paths. The purpose of the code below is to set the - * schema_idx of each column of each row to it corresonding row_group. This is effectively + * schema_idx of each column of each row to it corresponding row_group. This is effectively * mapping the columns to the schema. */ for (auto& row_group : md->row_groups) { diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp index 391ca61327e..2232017409d 100644 --- a/cpp/src/io/parquet/parquet.hpp +++ b/cpp/src/io/parquet/parquet.hpp @@ -232,7 +232,7 @@ struct ColumnChunkMetaData { * column * * Each column chunk lives in a particular row group and are guaranteed to be - * contiguous in the file. Any mssing or corrupted chunks can be skipped during + * contiguous in the file. Any missing or corrupted chunks can be skipped during * reading. */ struct ColumnChunk { diff --git a/cpp/src/io/statistics/conversion_type_select.cuh b/cpp/src/io/statistics/conversion_type_select.cuh index 225377bfc4b..b76a5fcf3cd 100644 --- a/cpp/src/io/statistics/conversion_type_select.cuh +++ b/cpp/src/io/statistics/conversion_type_select.cuh @@ -70,7 +70,7 @@ template class Detect; /** - * @brief Utility class to detect multiple occurences of a type in the first element of pairs in a + * @brief Utility class to detect multiple occurrences of a type in the first element of pairs in a * tuple For eg. with the following tuple : * * using conversion_types = diff --git a/cpp/src/io/statistics/statistics_type_identification.cuh b/cpp/src/io/statistics/statistics_type_identification.cuh index 84399a307a5..869e2833285 100644 --- a/cpp/src/io/statistics/statistics_type_identification.cuh +++ b/cpp/src/io/statistics/statistics_type_identification.cuh @@ -55,8 +55,8 @@ struct conversion_map { std::pair>; }; -// In Parquet timestamps and durations with second resoluion are converted to -// milliseconds. Timestamps and durations with nanosecond resoluion are +// In Parquet timestamps and durations with second resolution are converted to +// milliseconds. Timestamps and durations with nanosecond resolution are // converted to microseconds. template <> struct conversion_map { diff --git a/cpp/src/jit/parser.hpp b/cpp/src/jit/parser.hpp index 61228d7ffce..0b752d77d1f 100644 --- a/cpp/src/jit/parser.hpp +++ b/cpp/src/jit/parser.hpp @@ -106,7 +106,7 @@ class ptx_parser { std::vector parse_function_body(const std::string& src); /** - * @brief Remove leading white chractors and call `parse_instruction`. + * @brief Remove leading white characters and call `parse_instruction`. * * @param src The statement to be parsed. * @return The resulting CUDA statement. @@ -124,8 +124,8 @@ class ptx_parser { * * ---> asm volatile (" fma.rn.f32 _f4, _f3, _f1, _f2;"); * - * If a regiter from the input parameters list is used in an instruction - * its type is inferred from the intruction and saved in the `input_arg_list` + * If a register from the input parameters list is used in an instruction + * its type is inferred from the instruction and saved in the `input_arg_list` * to be used in when parsing the function header. * * See the document at https://github.com/hummingtree/cudf/wiki/PTX-parser diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index 66bc508f1ce..1133477669d 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -342,7 +342,7 @@ std::size_t get_full_join_size(cudf::table_device_view build_table, right_indices->data(), write_index.data(), join_size); - // Rlease intermediate memory alloation + // Release intermediate memory allocation left_indices->resize(0, stream); auto const left_table_row_count = probe_table.num_rows(); diff --git a/cpp/src/reshape/byte_cast.cu b/cpp/src/reshape/byte_cast.cu index 5bbdb5988e7..98156224cfe 100644 --- a/cpp/src/reshape/byte_cast.cu +++ b/cpp/src/reshape/byte_cast.cu @@ -108,7 +108,7 @@ std::unique_ptr byte_list_conversion::operator()( } // namespace /** - * @copydoc cudf::byte_cast(input_column,flip_endianess,rmm::mr::device_memory_resource) + * @copydoc cudf::byte_cast(input_column,flip_endianness,rmm::mr::device_memory_resource) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -124,7 +124,7 @@ std::unique_ptr byte_cast(column_view const& input_column, } // namespace detail /** - * @copydoc cudf::byte_cast(input_column,flip_endianess,rmm::mr::device_memory_resource) + * @copydoc cudf::byte_cast(input_column,flip_endianness,rmm::mr::device_memory_resource) */ std::unique_ptr byte_cast(column_view const& input_column, flip_endianness endian_configuration, diff --git a/cpp/src/rolling/rolling_detail.cuh b/cpp/src/rolling/rolling_detail.cuh index d7114608787..862e44a0d2b 100644 --- a/cpp/src/rolling/rolling_detail.cuh +++ b/cpp/src/rolling/rolling_detail.cuh @@ -339,8 +339,8 @@ std::unique_ptr empty_output_for_rolling_aggregation(column_view const& // TODO: // Ideally, for UDF aggregations, the returned column would match // the agg's return type. It currently returns empty_like(input), because: - // 1. This preserves prior behaviour for empty input columns. - // 2. There is insufficient information to construct nested return colums. + // 1. This preserves prior behavior for empty input columns. + // 2. There is insufficient information to construct nested return columns. // `cudf::make_udf_aggregation()` expresses the return type as a `data_type` // which cannot express recursively nested types (e.g. `STRUCT>`.) // 3. In any case, UDFs that return nested types are not currently supported. @@ -616,7 +616,7 @@ class rolling_aggregation_preprocessor final : public cudf::detail::simple_aggre return aggs; } - // COLLECT_LIST aggregations do not peform a rolling operation at all. They get processed + // COLLECT_LIST aggregations do not perform a rolling operation at all. They get processed // entirely in the finalize() step. std::vector> visit( data_type, cudf::detail::collect_list_aggregation const&) override @@ -624,7 +624,7 @@ class rolling_aggregation_preprocessor final : public cudf::detail::simple_aggre return {}; } - // COLLECT_SET aggregations do not peform a rolling operation at all. They get processed + // COLLECT_SET aggregations do not perform a rolling operation at all. They get processed // entirely in the finalize() step. std::vector> visit( data_type, cudf::detail::collect_set_aggregation const&) override diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 94c34f92c66..2f57b38249f 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -192,7 +192,7 @@ namespace { * @brief Calculate the size of the each string required for * converting each value in base-10 format. * - * ouput format is [-]integer.fraction + * output format is [-]integer.fraction */ template struct decimal_to_string_size_fn { diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index 708bb387c5a..b0910acb2a2 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -234,7 +234,7 @@ struct ftos_converter { static constexpr double upper_limit = 1000000000; // max is 1x10^9 static constexpr double lower_limit = 0.0001; // printf uses scientific notation below this // Tables for doing normalization: converting to exponent form - // IEEE double float has maximum exponent of 305 so these should cover everthing + // IEEE double float has maximum exponent of 305 so these should cover everything const double upper10[9] = {10, 100, 10000, 1e8, 1e16, 1e32, 1e64, 1e128, 1e256}; const double lower10[9] = {.1, .01, .0001, 1e-8, 1e-16, 1e-32, 1e-64, 1e-128, 1e-256}; const double blower10[9] = {1.0, .1, .001, 1e-7, 1e-15, 1e-31, 1e-63, 1e-127, 1e-255}; diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu index 0cf08892adc..409e1892c91 100644 --- a/cpp/src/strings/json/json_path.cu +++ b/cpp/src/strings/json/json_path.cu @@ -669,7 +669,7 @@ std::pair>, int> build_comma if (op.type == path_operator_type::ROOT) { CUDF_EXPECTS(h_operators.size() == 0, "Root operator ($) can only exist at the root"); } - // if we havent' gotten a root operator to start, and we're not empty, quietly push a + // if we have not gotten a root operator to start, and we're not empty, quietly push a // root operator now. if (h_operators.size() == 0 && op.type != path_operator_type::ROOT && op.type != path_operator_type::END) { diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu index ec3ef4d94ae..5b058d7b696 100644 --- a/cpp/src/strings/replace/multi_re.cu +++ b/cpp/src/strings/replace/multi_re.cu @@ -56,7 +56,7 @@ struct replace_multi_regex_fn { reprog_device* progs; // array of regex progs size_type number_of_patterns; found_range* d_found_ranges; // working array matched (begin,end) values - column_device_view const d_repls; // replacment strings + column_device_view const d_repls; // replacement strings int32_t* d_offsets{}; // these are null when char* d_chars{}; // only computing size diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index cab5a54a57d..f9b2355b2ff 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -41,7 +41,7 @@ namespace { /** * @brief Generate ngrams from strings column. * - * Adjacent strings are concatented with the provided separator. + * Adjacent strings are concatenated with the provided separator. * The number of adjacent strings join depends on the specified ngrams value. * For example: for bigrams (ngrams=2), pairs of strings are concatenated. */ diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu index f99c831e745..e20c7120571 100644 --- a/cpp/src/transform/row_bit_count.cu +++ b/cpp/src/transform/row_bit_count.cu @@ -380,7 +380,7 @@ __device__ size_type row_size_functor::operator()(column_device_vie /** * @brief Kernel for computing per-row sizes in bits. * - * @param cols An span of column_device_views represeting a column hierarcy + * @param cols An span of column_device_views representing a column hierarchy * @param info An span of column_info structs corresponding the elements in `cols` * @param output Output span of size (# rows) where per-row bit sizes are stored * @param max_branch_depth Maximum depth of the span stack needed per-thread diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index 9a541a8cde0..4e1ad57080a 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -171,7 +171,7 @@ void check_float_column(cudf::column_view const& col_lhs, } // timestamp column checker within tolerance -// given by `tol_ms` (miliseconds) +// given by `tol_ms` (milliseconds) void check_timestamp_column(cudf::column_view const& col_lhs, cudf::column_view const& col_rhs, long tol_ms = 1000l) diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 4eed81298a2..56573ddab40 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -526,7 +526,7 @@ TEST_F(OrcWriterTest, Strings) TEST_F(OrcWriterTest, SlicedTable) { - // This test checks for writing zero copy, offseted views into existing cudf tables + // This test checks for writing zero copy, offsetted views into existing cudf tables std::vector strings{ "Monday", "Monday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"}; @@ -609,7 +609,7 @@ TEST_F(OrcWriterTest, HostBuffer) TEST_F(OrcWriterTest, negTimestampsNano) { // This is a separate test because ORC format has a bug where writing a timestamp between -1 and 0 - // seconds from UNIX epoch is read as that timestamp + 1 second. We mimic that behaviour and so + // seconds from UNIX epoch is read as that timestamp + 1 second. We mimic that behavior and so // this test has to hardcode test values which are < -1 second. // Details: https://github.com/rapidsai/cudf/pull/5529#issuecomment-648768925 using namespace cudf::test; diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index ef2ad29fc80..1ad844d6706 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -526,7 +526,7 @@ TEST_F(ParquetWriterTest, MultiColumnWithNulls) CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); // TODO: Need to be able to return metadata in tree form from reader so they can be compared. - // Unfortunately the closest thing to a heirarchical schema is column_name_info which does not + // Unfortunately the closest thing to a hierarchical schema is column_name_info which does not // have any tests for it c++ or python. compare_metadata_equality(expected_metadata, result.metadata); } @@ -573,7 +573,7 @@ TEST_F(ParquetWriterTest, Strings) TEST_F(ParquetWriterTest, SlicedTable) { - // This test checks for writing zero copy, offseted views into existing cudf tables + // This test checks for writing zero copy, offsetted views into existing cudf tables std::vector strings{ "Monday", "Wȅdnȅsday", "Friday", "Monday", "Friday", "Friday", "Friday", "Funday"}; @@ -1569,7 +1569,7 @@ TEST_F(ParquetChunkedWriterTest, ReadingUnclosedFile) srand(31337); auto table = create_random_fixed_table(4, 4, true); - auto filepath = temp_env->get_temp_filepath("ReadingUnlosedFile.parquet"); + auto filepath = temp_env->get_temp_filepath("ReadingUnclosedFile.parquet"); cudf_io::chunked_parquet_writer_options args = cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath}); cudf_io::parquet_chunked_writer writer(args); @@ -2288,7 +2288,7 @@ TEST_F(ParquetReaderTest, UserBoundsWithNulls) // skip_rows / num_rows // clang-format off - std::vector> params{ {-1, -1}, {1, 3}, {3, -1}, + std::vector> params{ {-1, -1}, {1, 3}, {3, -1}, {31, -1}, {32, -1}, {33, -1}, {31, 5}, {32, 5}, {33, 5}, {-1, 7}, {-1, 31}, {-1, 32}, {-1, 33}, diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index 212458d5118..e468368842a 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -265,7 +265,7 @@ TEST_F(JoinTest, FullJoinOnNulls) cols_gold.push_back(col_gold_3.release()); cols_gold.push_back(col_gold_4.release()); cols_gold.push_back(col_gold_5.release()); - + Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); @@ -549,7 +549,7 @@ TEST_F(JoinTest, LeftJoinOnNulls) { 1, 1, 0}); column_wrapper col_gold_5{{ 2, 8, -1}, { 1, 1, 0}}; - + CVector cols_gold; cols_gold.push_back(col_gold_0.release()); cols_gold.push_back(col_gold_1.release()); @@ -579,7 +579,7 @@ TEST_F(JoinTest, LeftJoinOnNulls) result_sort_order = cudf::sorted_order(result->view()); sorted_result = cudf::gather(result->view(), *result_sort_order); - + col_gold_0 = {{ 3, -1, 2}, { 1, 0, 1}}; col_gold_1 = {{ "s0", "s1", "s2"}, @@ -782,7 +782,7 @@ TEST_F(JoinTest, InnerJoinWithStructsAndNulls) CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } -// // Test to check join behaviour when join keys are null. +// // Test to check join behavior when join keys are null. TEST_F(JoinTest, InnerJoinOnNulls) { // clang-format off @@ -826,7 +826,7 @@ TEST_F(JoinTest, InnerJoinOnNulls) cols_gold.push_back(col_gold_3.release()); cols_gold.push_back(col_gold_4.release()); cols_gold.push_back(col_gold_5.release()); - + Table gold(std::move(cols_gold)); auto gold_sort_order = cudf::sorted_order(gold.view()); diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp index e907212c9e8..1e5cb941392 100644 --- a/cpp/tests/sort/segmented_sort_tests.cpp +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -264,7 +264,7 @@ TEST_F(SegmentedSortInt, ErrorsMismatchArgSizes) {order::ASCENDING, order::ASCENDING}, {null_order::AFTER, null_order::AFTER}), logic_error); - // segmented_offsets beyond num_rows - undefined behaviour, no throw. + // segmented_offsets beyond num_rows - undefined behavior, no throw. CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(input1, input1, col2)); } diff --git a/cpp/tests/strings/repeat_strings_tests.cpp b/cpp/tests/strings/repeat_strings_tests.cpp index a229e1b468a..feca4b25c4d 100644 --- a/cpp/tests/strings/repeat_strings_tests.cpp +++ b/cpp/tests/strings/repeat_strings_tests.cpp @@ -71,7 +71,7 @@ TEST_F(RepeatJoinStringTest, ValidStringScalar) EXPECT_EQ(result->size(), 0); } - // Negatitve repeat times. + // Negative repeat times. { auto const result = cudf::strings::repeat_strings(str, -10); EXPECT_EQ(result->is_valid(), true); diff --git a/cpp/tests/table/table_view_tests.cu b/cpp/tests/table/table_view_tests.cu index 1fb4b88c79e..3a792573108 100644 --- a/cpp/tests/table/table_view_tests.cu +++ b/cpp/tests/table/table_view_tests.cu @@ -31,7 +31,7 @@ #include // Compares two tables row by row, if table1 row is less than table2, then corresponding row value -// in `ouput` would be `true`/1 else `false`/0. +// in `output` would be `true`/1 else `false`/0. struct TableViewTest : public cudf::test::BaseFixture { }; void row_comparison(cudf::table_view input1, diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 2ff06436853..7177f78e652 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -683,7 +683,7 @@ struct column_view_printer { { lists_column_view lcv(col); - // propage slicing to the child if necessary + // propagate slicing to the child if necessary column_view child = lcv.get_sliced_child(rmm::cuda_stream_default); bool const is_sliced = lcv.offset() > 0 || child.offset() > 0; From 569282b16f61957275cbf28ecc88aee55858546f Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Thu, 8 Jul 2021 16:13:34 -0400 Subject: [PATCH 37/54] Update `conda` environment name for CI (#8692) The `gdf` conda environment has been replaced with the `rapids` environment. A symlink was put in place for `gdf` to continue to work, but the symlink will be removed in the near future. This PR updates all scripts to use the `rapids` environment name. Authors: - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Dillon Cullinan (https://github.com/dillon-cullinan) URL: https://github.com/rapidsai/cudf/pull/8692 --- ci/checks/style.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/checks/style.sh b/ci/checks/style.sh index 981e886d31c..8235f9de0e5 100755 --- a/ci/checks/style.sh +++ b/ci/checks/style.sh @@ -11,7 +11,8 @@ LC_ALL=C.UTF-8 LANG=C.UTF-8 # Activate common conda env -source activate gdf +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids # Run isort and get results/return code ISORT=`isort --check-only python/**/*.py` From 73df850a81cd82a0cbe69478e2d01edc528b8f24 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Thu, 8 Jul 2021 15:33:27 -0500 Subject: [PATCH 38/54] Add options to build Arrow with Python and Parquet support (#8670) This PR adds two options when CPM builds Arrow during a libcudf source build: `CUDF_ENABLE_ARROW_PYTHON`, and `CUDF_ENABLE_ARROW_PARQUET`. These options enable building `libarrow.so` with Python and Parquet support, so that we can build the pyarrow and cuDF Cython after building libcudf. For example: ```shell export PARALLEL_LEVEL=$(nproc --ignore=2) # Clone cuDF git clone --depth 1 --branch branch-21.08 https://github.com/rapidsai/cudf.git /opt/rapids/cudf # Build and install libcudf (also builds libarrow/libarrow_cuda) cmake -GNinja \ -S /opt/rapids/cudf/cpp \ -B /opt/rapids/cudf/cpp/build \ -D CUDF_ENABLE_ARROW_S3=OFF \ -D CUDF_ENABLE_ARROW_PYTHON=ON \ -D CUDF_ENABLE_ARROW_PARQUET=ON \ && cmake --build /opt/rapids/cudf/cpp/build -j${PARALLEL_LEVEL} -v --target install # Build and install pyarrow cd /opt/rapids/cudf/cpp/build/_deps/arrow-src/python \ && ARROW_HOME=/usr/local \ PYARROW_WITH_S3=OFF \ PYARROW_WITH_ORC=ON \ PYARROW_WITH_CUDA=ON \ PYARROW_WITH_HDFS=OFF \ PYARROW_WITH_FLIGHT=OFF \ PYARROW_WITH_PLASMA=OFF \ PYARROW_WITH_DATASET=ON \ PYARROW_WITH_GANDIVA=OFF \ PYARROW_WITH_PARQUET=ON \ PYARROW_BUILD_TYPE=Release \ PYARROW_CMAKE_GENERATOR=Ninja \ PYARROW_PARALLEL=${PARALLEL_LEVEL} \ ARROW_PYTHON_DIR=/opt/rapids/cudf/cpp/build/_deps/arrow-src/python \ && python setup.py install --single-version-externally-managed --record=record.txt # Build and install cudf python cd /opt/rapids/cudf/python/cudf \ && pip install --upgrade \ "nvtx>=0.2.1" \ "numba>=0.53.1" \ "fsspec>=0.6.0" \ "protobuf>=3.0.0" \ "fastavro>=0.22.9" \ "transformers>=4.8" \ "pandas>=1.0,<1.3.0dev0" \ "cmake-setuptools>=0.1.3" \ "cupy-cuda112>7.1.0,<10.0.0a0" \ "git+https://github.com/dask/dask.git@main" \ "git+https://github.com/dask/distributed.git@main" \ "git+https://github.com/rapidsai/dask-cuda.git@branch-21.08" \ && python setup.py build_ext -j${PARALLEL_LEVEL} --inplace \ && python setup.py install --single-version-externally-managed --record=record.txt # Build and install dask_cudf python cd /opt/rapids/cudf/python/dask_cudf \ && python setup.py build_ext -j${PARALLEL_LEVEL} --inplace \ && python setup.py install --single-version-externally-managed --record=record.txt ``` Authors: - Paul Taylor (https://github.com/trxcllnt) Approvers: - Robert Maynard (https://github.com/robertmaynard) URL: https://github.com/rapidsai/cudf/pull/8670 --- cpp/CMakeLists.txt | 2 ++ cpp/cmake/thirdparty/CUDF_GetArrow.cmake | 39 +++++++++++++++++++----- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3582f29bf11..2f8687eb360 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -44,6 +44,8 @@ option(BUILD_BENCHMARKS "Configure CMake to build (google & nvbench) benchmarks" option(BUILD_SHARED_LIBS "Build cuDF shared libraries" ON) option(JITIFY_USE_CACHE "Use a file cache for JIT compiled kernels" ON) option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF) +option(CUDF_ENABLE_ARROW_PYTHON "Find (or build) Arrow with Python support" OFF) +option(CUDF_ENABLE_ARROW_PARQUET "Find (or build) Arrow with Parquet support" OFF) option(CUDF_ENABLE_ARROW_S3 "Build/Enable AWS S3 Arrow filesystem support" ON) option(PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" OFF) option(DISABLE_DEPRECATION_WARNING "Disable warnings generated from deprecated declarations." OFF) diff --git a/cpp/cmake/thirdparty/CUDF_GetArrow.cmake b/cpp/cmake/thirdparty/CUDF_GetArrow.cmake index e15f3f7e16d..8cef3e8b9d0 100644 --- a/cpp/cmake/thirdparty/CUDF_GetArrow.cmake +++ b/cpp/cmake/thirdparty/CUDF_GetArrow.cmake @@ -14,11 +14,10 @@ # limitations under the License. #============================================================================= -function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3) +function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_PYTHON ENABLE_PARQUET) set(ARROW_BUILD_SHARED ON) set(ARROW_BUILD_STATIC OFF) - set(ARROW_BUILD_S3 OFF) set(CPMAddOrFindPackage CPMFindPackage) if(NOT ARROW_ARMV8_ARCH) @@ -36,10 +35,23 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3) set(CPMAddOrFindPackage CPMAddPackage) endif() - if(ENABLE_S3) - set(ARROW_BUILD_S3 ON) + set(ARROW_PYTHON_OPTIONS "") + if(ENABLE_PYTHON) + list(APPEND ARROW_PYTHON_OPTIONS "ARROW_PYTHON ON") + # Arrow's logic to build Boost from source is busted, so we have to get it from the system. + list(APPEND ARROW_PYTHON_OPTIONS "BOOST_SOURCE SYSTEM") + # Arrow's logic to find Thrift is busted, so we have to build it from + # source. Why can't we use `THRIFT_SOURCE BUNDLED` you might ask? + # Because that's _also_ busted. The only thing that seems to is to set + # _all_ dependencies to bundled, then optionall un-set BOOST_SOURCE to + # SYSTEM. + list(APPEND ARROW_PYTHON_OPTIONS "ARROW_DEPENDENCY_SOURCE BUNDLED") endif() + # Set this so Arrow correctly finds the CUDA toolkit when the build machine + # does not have the CUDA driver installed. This must be an env var. + set(ENV{CUDA_LIB_PATH} "${CUDAToolkit_LIBRARY_DIR}/stubs") + cmake_language(CALL ${CPMAddOrFindPackage} NAME Arrow VERSION ${VERSION} @@ -55,7 +67,10 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3) "ARROW_WITH_BACKTRACE ON" "ARROW_CXXFLAGS -w" "ARROW_JEMALLOC OFF" - "ARROW_S3 ${ARROW_BUILD_S3}" + "ARROW_S3 ${ENABLE_S3}" + # e.g. needed by blazingsql-io + "ARROW_PARQUET ${ENABLE_PARQUET}" + ${ARROW_PYTHON_OPTIONS} # Arrow modifies CMake's GLOBAL RULE_LAUNCH_COMPILE unless this is off "ARROW_USE_CCACHE OFF" "ARROW_ARMV8_ARCH ${ARROW_ARMV8_ARCH}" @@ -98,13 +113,17 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3) DESTINATION "${Arrow_SOURCE_DIR}/cpp/src/arrow/util") file(INSTALL "${Arrow_BINARY_DIR}/src/arrow/gpu/cuda_version.h" DESTINATION "${Arrow_SOURCE_DIR}/cpp/src/arrow/gpu") + if(ENABLE_PARQUET) + file(INSTALL "${Arrow_BINARY_DIR}/src/parquet/parquet_version.h" + DESTINATION "${Arrow_SOURCE_DIR}/cpp/src/parquet") + endif() ### # This shouldn't be necessary! # # Arrow populates INTERFACE_INCLUDE_DIRECTORIES for the `arrow_static` # and `arrow_shared` targets in FindArrow and FindArrowCUDA respectively, # so for static source-builds, we have to do it after-the-fact. - # + # # This only works because we know exactly which components we're using. # Don't forget to update this list if we add more! ### @@ -129,4 +148,10 @@ endfunction() set(CUDF_VERSION_Arrow 4.0.1) -find_and_configure_arrow(${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3}) +find_and_configure_arrow( + ${CUDF_VERSION_Arrow} + ${CUDF_USE_ARROW_STATIC} + ${CUDF_ENABLE_ARROW_S3} + ${CUDF_ENABLE_ARROW_PYTHON} + ${CUDF_ENABLE_ARROW_PARQUET} +) From ecbec81da6a0d50125e66bd1bcaceb9815a76ca4 Mon Sep 17 00:00:00 2001 From: Alfred Xu Date: Fri, 9 Jul 2021 12:13:04 +0800 Subject: [PATCH 39/54] JNI: Hotfix on testGetCudaRuntimeInfo (#8701) A hot fix on the test for Java APIs which are used to get CUDA runtime information Authors: - Alfred Xu (https://github.com/sperlingxx) Approvers: - pxLi (https://github.com/pxLi) - https://github.com/GaryShen2008 URL: https://github.com/rapidsai/cudf/pull/8701 --- java/src/test/java/ai/rapids/cudf/CudaTest.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/java/src/test/java/ai/rapids/cudf/CudaTest.java b/java/src/test/java/ai/rapids/cudf/CudaTest.java index d33785e0577..8905c2edd56 100644 --- a/java/src/test/java/ai/rapids/cudf/CudaTest.java +++ b/java/src/test/java/ai/rapids/cudf/CudaTest.java @@ -24,8 +24,11 @@ public class CudaTest { @Test public void testGetCudaRuntimeInfo() { - assert Cuda.getDriverVersion() >= Cuda.getRuntimeVersion(); - assert Cuda.getRuntimeVersion() > 1000; + // The driver version is not necessarily larger than runtime version. Drivers of previous + // version are also able to support runtime of later version, only if they support same + // kinds of computeModes. + assert Cuda.getDriverVersion() >= 1000; + assert Cuda.getRuntimeVersion() >= 1000; assertEquals(Cuda.getNativeComputeMode(), Cuda.getComputeMode().nativeId); } From 1b34652e2e8119f329a87803b14d456651ed818b Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 9 Jul 2021 09:14:54 -0400 Subject: [PATCH 40/54] Add strings support to cudf::shift function (#8648) Closes #3915 Added strings specialization logic to the current `cudf::shift` API. Rows are shifted by running a transform functions on the offsets and chars child columns to create the output strings column. The gtests are added to the `copying/shift_tests.cpp` Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Christopher Harris (https://github.com/cwharris) - Nghia Truong (https://github.com/ttnghia) - Karthikeyan (https://github.com/karthikeyann) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cudf/pull/8648 --- cpp/CMakeLists.txt | 1 + cpp/include/cudf/strings/detail/copying.hpp | 28 ++++ cpp/src/copying/shift.cu | 69 +++++++--- cpp/src/strings/copying/shift.cu | 144 ++++++++++++++++++++ cpp/tests/copying/shift_tests.cpp | 32 ++++- 5 files changed, 256 insertions(+), 18 deletions(-) create mode 100644 cpp/src/strings/copying/shift.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2f8687eb360..605b67e77fc 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -358,6 +358,7 @@ add_library(cudf src/strings/convert/convert_urls.cu src/strings/copying/concatenate.cu src/strings/copying/copying.cu + src/strings/copying/shift.cu src/strings/extract.cu src/strings/filling/fill.cu src/strings/filter_chars.cu diff --git a/cpp/include/cudf/strings/detail/copying.hpp b/cpp/include/cudf/strings/detail/copying.hpp index 19dfa193207..6083ebc4a62 100644 --- a/cpp/include/cudf/strings/detail/copying.hpp +++ b/cpp/include/cudf/strings/detail/copying.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -54,6 +55,33 @@ std::unique_ptr copy_slice( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns a new strings column created by shifting the rows by a specified offset. + * + * @code{.pseudo} + * Example: + * s = ["a", "b", "c", "d", "e", "f"] + * r1 = shift(s, 2, "_") + * r1 is now ["_", "_", "a", "b", "c", "d"] + * r2 = shift(s, -2, "_") + * r2 is now ["c", "d", "e", "f", "_", "_"] + * @endcode + * + * The caller should set the validity mask in the output column. + * + * @param input Strings instance for this operation. + * @param offset The offset by which to shift the input. + * @param fill_value Fill value for indeterminable outputs. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return New strings column. + */ +std::unique_ptr shift(strings_column_view const& input, + size_type offset, + scalar const& fill_value, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + } // namespace detail } // namespace strings } // namespace cudf diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu index ebeaf0e3b20..0b88545ffa5 100644 --- a/cpp/src/copying/shift.cu +++ b/cpp/src/copying/shift.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -44,13 +45,55 @@ inline bool __device__ out_of_bounds(size_type size, size_type idx) return idx < 0 || idx >= size; } +std::pair create_null_mask(column_device_view const& input, + size_type offset, + scalar const& fill_value, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto const size = input.size(); + auto func_validity = + [size, offset, fill = fill_value.validity_data(), input] __device__(size_type idx) { + auto src_idx = idx - offset; + return out_of_bounds(size, src_idx) ? *fill : input.is_valid(src_idx); + }; + return detail::valid_if(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size), + func_validity, + stream, + mr); +} + struct shift_functor { template - std::enable_if_t(), std::unique_ptr> operator()(Args&&...) + std::enable_if_t() and not std::is_same_v, + std::unique_ptr> + operator()(Args&&...) { CUDF_FAIL("shift does not support non-fixed-width types."); } + template + std::enable_if_t, std::unique_ptr> operator()( + column_view const& input, + size_type offset, + scalar const& fill_value, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + auto output = cudf::strings::detail::shift( + cudf::strings_column_view(input), offset, fill_value, stream, mr); + + if (input.nullable() || not fill_value.is_valid(stream)) { + auto const d_input = column_device_view::create(input, stream); + auto mask_pair = create_null_mask(*d_input, offset, fill_value, stream, mr); + output->set_null_mask(std::move(std::get<0>(mask_pair))); + output->set_null_count(std::get<1>(mask_pair)); + } + + return output; + } + template std::enable_if_t(), std::unique_ptr> operator()( column_view const& input, @@ -67,29 +110,21 @@ struct shift_functor { detail::allocate_like(input, input.size(), mask_allocation_policy::NEVER, stream, mr); auto device_output = mutable_column_device_view::create(*output); - auto size = input.size(); - auto index_begin = thrust::make_counting_iterator(0); - auto index_end = thrust::make_counting_iterator(size); - - if (input.nullable() || not scalar.is_valid()) { - auto func_validity = [size, - offset, - fill = scalar.validity_data(), - input = *device_input] __device__(size_type idx) { - auto src_idx = idx - offset; - return out_of_bounds(size, src_idx) ? *fill : input.is_valid(src_idx); - }; - - auto mask_pair = detail::valid_if(index_begin, index_end, func_validity, stream, mr); + auto const scalar_is_valid = scalar.is_valid(stream); + if (input.nullable() || not scalar_is_valid) { + auto mask_pair = create_null_mask(*device_input, offset, fill_value, stream, mr); output->set_null_mask(std::move(std::get<0>(mask_pair))); output->set_null_count(std::get<1>(mask_pair)); } - auto data = device_output->data(); + auto const size = input.size(); + auto index_begin = thrust::make_counting_iterator(0); + auto index_end = thrust::make_counting_iterator(size); + auto data = device_output->data(); // avoid assigning elements we know to be invalid. - if (not scalar.is_valid()) { + if (not scalar_is_valid) { if (offset > 0) { index_begin = thrust::make_counting_iterator(offset); data = data + offset; diff --git a/cpp/src/strings/copying/shift.cu b/cpp/src/strings/copying/shift.cu new file mode 100644 index 00000000000..3545ec6d259 --- /dev/null +++ b/cpp/src/strings/copying/shift.cu @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +namespace cudf::strings::detail { + +namespace { + +struct adjust_offsets_fn { + column_device_view const d_column; + string_view const d_filler; + size_type const offset; + + __device__ offset_type operator()(size_type idx) + { + if (offset < 0) { + auto const first = d_column.element(-offset); + auto const last_index = d_column.size() + offset; + if (idx < last_index) { + return d_column.element(idx - offset) - first; + } else { + auto const last = d_column.element(d_column.size() - 1); + return (last - first) + ((idx - last_index + 1) * d_filler.size_bytes()); + } + } else { + if (idx < offset) { + return idx * d_filler.size_bytes(); + } else { + auto const total_filler = d_filler.size_bytes() * offset; + return total_filler + d_column.element(idx - offset); + } + } + } +}; + +struct shift_chars_fn { + column_device_view const d_column; + string_view const d_filler; + size_type const offset; + + __device__ char operator()(size_type idx) + { + if (offset < 0) { + auto const last_index = -offset; + if (idx < last_index) { + auto const first_index = d_column.size() + offset; + return d_column.element(idx + first_index); + } else { + auto const char_index = idx - last_index; + return d_filler.data()[char_index % d_filler.size_bytes()]; + } + } else { + if (idx < offset) { + return d_filler.data()[idx % d_filler.size_bytes()]; + } else { + return d_column.element(idx - offset); + } + } + } +}; + +} // namespace + +std::unique_ptr shift(strings_column_view const& input, + size_type offset, + scalar const& fill_value, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto d_fill_str = static_cast(fill_value).value(stream); + + // output offsets column is the same size as the input + auto const input_offsets = + cudf::slice(input.offsets(), {input.offset(), input.offset() + input.size() + 1}).front(); + auto const offsets_size = input_offsets.size(); + auto offsets_column = cudf::detail::allocate_like( + input_offsets, offsets_size, mask_allocation_policy::NEVER, stream, mr); + + // run kernel to simultaneously shift and adjust the values in the output offsets column + auto d_offsets = mutable_column_device_view::create(offsets_column->mutable_view(), stream); + auto const d_input_offsets = column_device_view::create(input_offsets, stream); + thrust::transform(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(offsets_size), + d_offsets->data(), + adjust_offsets_fn{*d_input_offsets, d_fill_str, offset}); + + // compute the shift-offset for the output characters child column + auto const shift_offset = [&] { + auto const index = (offset >= 0) ? offset : offsets_size - 1 + offset; + return (offset < 0 ? -1 : 1) * + cudf::detail::get_value(offsets_column->view(), index, stream); + }(); + + // create output chars child column + auto const chars_size = + cudf::detail::get_value(offsets_column->view(), offsets_size - 1, stream); + auto chars_column = create_chars_child_column(chars_size, stream, mr); + auto d_chars = mutable_column_device_view::create(chars_column->mutable_view(), stream); + auto const d_input_chars = column_device_view::create(input.chars(), stream); + + // run kernel to shift the characters + thrust::transform(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(chars_size), + d_chars->data(), + shift_chars_fn{*d_input_chars, d_fill_str, shift_offset}); + + // caller sets the null-mask + return make_strings_column(input.size(), + std::move(offsets_column), + std::move(chars_column), + 0, + rmm::device_buffer{}, + stream, + mr); +} + +} // namespace cudf::strings::detail diff --git a/cpp/tests/copying/shift_tests.cpp b/cpp/tests/copying/shift_tests.cpp index f642ad5bd90..80dda1ab930 100644 --- a/cpp/tests/copying/shift_tests.cpp +++ b/cpp/tests/copying/shift_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -201,3 +201,33 @@ TYPED_TEST(ShiftTest, MismatchFillValueDtypes) EXPECT_THROW(output = cudf::shift(input, 5, *fill), cudf::logic_error); } + +struct ShiftTestNonFixedWidth : public cudf::test::BaseFixture { +}; + +TEST_F(ShiftTestNonFixedWidth, StringsShiftTest) +{ + auto input = + cudf::test::strings_column_wrapper({"", "bb", "ccc", "ddddddé", ""}, {0, 1, 1, 1, 0}); + + auto fill = cudf::string_scalar("xx"); + auto results = cudf::shift(input, 2, fill); + auto expected_right = + cudf::test::strings_column_wrapper({"xx", "xx", "", "bb", "ccc"}, {1, 1, 0, 1, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_right, *results); + + results = cudf::shift(input, -2, fill); + auto expected_left = + cudf::test::strings_column_wrapper({"ccc", "ddddddé", "", "xx", "xx"}, {1, 1, 0, 1, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_left, *results); + + auto sliced = cudf::slice(input, {1, 4}).front(); + + results = cudf::shift(sliced, 1, fill); + auto sliced_right = cudf::test::strings_column_wrapper({"xx", "bb", "ccc"}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(sliced_right, *results); + + results = cudf::shift(sliced, -1, fill); + auto sliced_left = cudf::test::strings_column_wrapper({"ccc", "ddddddé", "xx"}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(sliced_left, *results); +} From cef51bdc42f341c3e410dc0e0233e2bc6d1853e3 Mon Sep 17 00:00:00 2001 From: shaneding Date: Fri, 9 Jul 2021 11:29:37 -0400 Subject: [PATCH 41/54] Struct scalar from host dictionary (#8629) Allows the creation of a `struct_scalar` from python dictionary. Partly addresses #8558. Authors: - https://github.com/shaneding Approvers: - https://github.com/brandon-b-miller - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu) URL: https://github.com/rapidsai/cudf/pull/8629 --- python/cudf/cudf/_lib/cpp/scalar/scalar.pxd | 1 + python/cudf/cudf/_lib/scalar.pyx | 35 +++++++++++++++++++-- python/cudf/cudf/core/scalar.py | 17 +++++++++- python/cudf/cudf/tests/test_struct.py | 21 +++++++++++++ 4 files changed, 71 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd index f0b6ea0b606..771ec9100d1 100644 --- a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd +++ b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd @@ -70,4 +70,5 @@ cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil: column_view view() except + cdef cppclass struct_scalar(scalar): + struct_scalar(table_view cols, bool valid) except + table_view view() except + diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index 9429ab0ee57..9e50f42d625 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -30,7 +30,7 @@ from cudf._lib.column cimport Column from cudf._lib.cpp.column.column_view cimport column_view from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.table cimport Table -from cudf._lib.interop import to_arrow +from cudf._lib.interop import to_arrow, from_arrow from cudf._lib.cpp.wrappers.timestamps cimport ( timestamp_s, @@ -87,6 +87,8 @@ cdef class DeviceScalar: elif isinstance(dtype, cudf.ListDtype): _set_list_from_pylist( self.c_value, value, dtype, valid) + elif isinstance(dtype, cudf.StructDtype): + _set_struct_from_pydict(self.c_value, value, dtype, valid) elif pd.api.types.is_string_dtype(dtype): _set_string_from_np_string(self.c_value, value, valid) elif pd.api.types.is_numeric_dtype(dtype): @@ -172,7 +174,6 @@ cdef class DeviceScalar: s.c_value = move(ptr) cdtype = s.get_raw_ptr()[0].type() - if cdtype.id() == libcudf_types.DECIMAL64 and dtype is None: raise TypeError( "Must pass a dtype when constructing from a fixed-point scalar" @@ -308,6 +309,36 @@ cdef _set_decimal64_from_scalar(unique_ptr[scalar]& s, ) ) +cdef _set_struct_from_pydict(unique_ptr[scalar]& s, + object value, + object dtype, + bool valid=True): + arrow_schema = dtype.to_arrow() + columns = [str(i) for i in range(len(arrow_schema))] + if valid: + pyarrow_table = pa.Table.from_arrays( + [ + pa.array([value[f.name]], from_pandas=True, type=f.type) + for f in arrow_schema + ], + names=columns + ) + else: + pyarrow_table = pa.Table.from_arrays( + [ + pa.array([], from_pandas=True, type=f.type) + for f in arrow_schema + ], + names=columns + ) + + cdef Table table = from_arrow(pyarrow_table, column_names=columns) + cdef table_view struct_view = table.view() + + s.reset( + new struct_scalar(struct_view, valid) + ) + cdef _get_py_dict_from_struct(unique_ptr[scalar]& s): if not s.get()[0].is_valid(): return cudf.NA diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py index ad39642cf60..db9bc6d6c85 100644 --- a/python/cudf/cudf/core/scalar.py +++ b/python/cudf/cudf/core/scalar.py @@ -7,7 +7,7 @@ from cudf._lib.scalar import DeviceScalar, _is_null_host_scalar from cudf.core.column.column import ColumnBase -from cudf.core.dtypes import Decimal64Dtype, ListDtype +from cudf.core.dtypes import Decimal64Dtype, ListDtype, StructDtype from cudf.core.index import BaseIndex from cudf.core.series import Series from cudf.utils.dtypes import ( @@ -131,6 +131,21 @@ def _preprocess_host_value(self, value, dtype): raise ValueError(f"Can not coerce {value} to ListDtype") else: return NA, dtype + + if isinstance(value, dict): + if dtype is not None: + raise TypeError("dict may not be cast to a different dtype") + else: + dtype = StructDtype.from_arrow( + pa.infer_type([value], from_pandas=True) + ) + return value, dtype + elif isinstance(dtype, StructDtype): + if value is not None: + raise ValueError(f"Can not coerce {value} to StructDType") + else: + return NA, dtype + if isinstance(dtype, Decimal64Dtype): value = pa.scalar( value, type=pa.decimal128(dtype.precision, dtype.scale) diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py index da2af1469c0..4f3bb9bda92 100644 --- a/python/cudf/cudf/tests/test_struct.py +++ b/python/cudf/cudf/tests/test_struct.py @@ -6,6 +6,7 @@ import pytest import cudf +from cudf.core.dtypes import StructDtype from cudf.testing._utils import assert_eq @@ -98,3 +99,23 @@ def test_serialize_struct_dtype(fields): def test_struct_getitem(series, expected): sr = cudf.Series(series) assert sr[0] == expected + + +@pytest.mark.parametrize( + "data", + [ + {"a": 1, "b": "rapids", "c": [1, 2, 3, 4]}, + {"a": 1, "b": "rapids", "c": [1, 2, 3, 4], "d": cudf.NA}, + {"a": "Hello"}, + {"b": [], "c": [1, 2, 3]}, + ], +) +def test_struct_scalar_host_construction(data): + slr = cudf.Scalar(data) + assert slr.value == data + assert list(slr.device_value.value.values()) == list(data.values()) + + +def test_struct_scalar_null(): + slr = cudf.Scalar(cudf.NA, dtype=StructDtype) + assert slr.device_value.value is cudf.NA From 214d74a6be161da8c5228e4b2fffa8afea955ac1 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 9 Jul 2021 09:38:17 -0700 Subject: [PATCH 42/54] Add proper support for tolerances in testing methods. (#8649) Resolves #8646 so that testing equality between different types of frames can be based on approximate rather than exact equality. Note that this is a blocker for packages that need to move away from relying on `cudf.tests.utils` for testing functions, since that module is no longer exposed by `cudf`. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - https://github.com/brandon-b-miller - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/8649 --- python/cudf/cudf/testing/testing.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py index 16315bbd7f8..cedf2aac7af 100644 --- a/python/cudf/cudf/testing/testing.py +++ b/python/cudf/cudf/testing/testing.py @@ -4,10 +4,12 @@ from typing import Union +import cupy as cp import numpy as np import pandas as pd import cudf +from cudf.api.types import is_numeric_dtype from cudf.core._compat import PANDAS_GE_110 from cudf.utils.dtypes import is_categorical_dtype @@ -203,7 +205,17 @@ def assert_column_equal( columns_equal = False try: - columns_equal = left.equals(right) + columns_equal = ( + ( + cp.all(left.isnull().values == right.isnull().values) + and cp.allclose( + left[left.isnull().unary_operator("not")].values, + right[right.isnull().unary_operator("not")].values, + ) + ) + if not check_exact and is_numeric_dtype(left) + else left.equals(right) + ) except TypeError as e: if str(e) != "Categoricals can only compare with the same type": raise e From 8bd0dfe675249f3a831813745b8ebb5bef1519d4 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Fri, 9 Jul 2021 10:21:12 -0700 Subject: [PATCH 43/54] Support Scatter `struct_scalar` (#8630) Partially addresses #8558 This PR adds support for scattering struct scalars. Implementation notes: Current implementation is based on copying the row data from each field as a new scalar and recursively calls scalar scattering on each field. There maybe an optimization on eliminating such copy. But will require extra scaffolding/scatter machinery. Minor aspects of this PR: - Refactors `column_scalar_scatterer` to include `scatter_scalar_bitmask_inplace` in each level of dispatch. This is required because scalar scattering can be nested. - Adds `count_set_bit` and `count_unset_bit` detail APIs - Adds default stream/mr for `detail::get_element` Authors: - Michael Wang (https://github.com/isVoid) Approvers: - Conor Hoekstra (https://github.com/codereport) - Mike Wilson (https://github.com/hyperbolic2346) - Gera Shegalov (https://github.com/gerashegalov) URL: https://github.com/rapidsai/cudf/pull/8630 --- cpp/include/cudf/detail/copy.hpp | 9 +- cpp/include/cudf/detail/null_mask.hpp | 20 ++ cpp/src/copying/scatter.cu | 127 +++++--- cpp/tests/CMakeLists.txt | 1 + .../copying/scatter_struct_scalar_tests.cpp | 270 ++++++++++++++++++ 5 files changed, 385 insertions(+), 42 deletions(-) create mode 100644 cpp/tests/copying/scatter_struct_scalar_tests.cpp diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp index aebf0c23469..79da4a997da 100644 --- a/cpp/include/cudf/detail/copy.hpp +++ b/cpp/include/cudf/detail/copy.hpp @@ -232,9 +232,10 @@ std::unique_ptr
sample( * * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ -std::unique_ptr get_element(column_view const& input, - size_type index, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::unique_ptr get_element( + column_view const& input, + size_type index, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index 29d75e466de..f757929d839 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -47,6 +47,26 @@ void set_null_mask(bitmask_type* bitmask, bool valid, rmm::cuda_stream_view stream = rmm::cuda_stream_default); +/** + * @copydoc cudf::count_set_bits + * + * @param[in] stream CUDA stream used for device memory operations and kernel launches. + */ +cudf::size_type count_set_bits(bitmask_type const* bitmask, + size_type start, + size_type stop, + rmm::cuda_stream_view stream); + +/** + * @copydoc cudf::count_unset_bits + * + * @param[in] stream CUDA stream used for device memory operations and kernel launches. + */ +cudf::size_type count_unset_bits(bitmask_type const* bitmask, + size_type start, + size_type stop, + rmm::cuda_stream_view stream); + /** * @copydoc cudf::segmented_count_set_bits * diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index a932957ada4..3312316f548 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -63,32 +65,32 @@ __global__ void marking_bitmask_kernel(mutable_column_device_view destination, } template -void scatter_scalar_bitmask(std::vector> const& source, - MapIterator scatter_map, - size_type num_scatter_rows, - std::vector>& target, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +void scatter_scalar_bitmask_inplace(std::reference_wrapper const& source, + MapIterator scatter_map, + size_type num_scatter_rows, + column& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { constexpr size_type block_size = 256; size_type const grid_size = grid_1d(num_scatter_rows, block_size).num_blocks; - for (size_t i = 0; i < target.size(); ++i) { - auto const source_is_valid = source[i].get().is_valid(stream); - if (target[i]->nullable() or not source_is_valid) { - if (not target[i]->nullable()) { - // Target must have a null mask if the source is not valid - auto mask = detail::create_null_mask(target[i]->size(), mask_state::ALL_VALID, stream, mr); - target[i]->set_null_mask(std::move(mask), 0); - } - - auto target_view = mutable_column_device_view::create(target[i]->mutable_view(), stream); - - auto bitmask_kernel = source_is_valid ? marking_bitmask_kernel - : marking_bitmask_kernel; - bitmask_kernel<<>>( - *target_view, scatter_map, num_scatter_rows); + auto const source_is_valid = source.get().is_valid(stream); + if (target.nullable() or not source_is_valid) { + if (not target.nullable()) { + // Target must have a null mask if the source is not valid + auto mask = detail::create_null_mask(target.size(), mask_state::ALL_VALID, stream, mr); + target.set_null_mask(std::move(mask), 0); } + + auto target_view = mutable_column_device_view::create(target, stream); + + auto bitmask_kernel = source_is_valid ? marking_bitmask_kernel + : marking_bitmask_kernel; + bitmask_kernel<<>>( + *target_view, scatter_map, num_scatter_rows); + + target.set_null_count(count_unset_bits(target.view().null_mask(), 0, target.size(), stream)); } } @@ -103,6 +105,7 @@ struct column_scalar_scatterer_impl { { CUDF_EXPECTS(source.get().type() == target.type(), "scalar and column types must match"); + // make a copy of data and null mask from source auto result = std::make_unique(target, stream, mr); auto result_view = result->mutable_view(); @@ -117,6 +120,7 @@ struct column_scalar_scatterer_impl { scatter_iter, result_view.begin()); + scatter_scalar_bitmask_inplace(source, scatter_iter, scatter_rows, *result, stream, mr); return result; } }; @@ -136,7 +140,10 @@ struct column_scalar_scatterer_impl { auto const source_view = string_view(scalar_impl->data(), scalar_impl->size()); auto const begin = thrust::make_constant_iterator(source_view); auto const end = begin + scatter_rows; - return strings::detail::scatter(begin, end, scatter_iter, target, stream, mr); + auto result = strings::detail::scatter(begin, end, scatter_iter, target, stream, mr); + + scatter_scalar_bitmask_inplace(source, scatter_iter, scatter_rows, *result, stream, mr); + return result; } }; @@ -149,17 +156,11 @@ struct column_scalar_scatterer_impl { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - return lists::detail::scatter( - source, scatter_iter, scatter_iter + scatter_rows, target, stream, mr); - } -}; + auto result = + lists::detail::scatter(source, scatter_iter, scatter_iter + scatter_rows, target, stream, mr); -template -struct column_scalar_scatterer_impl { - template - std::unique_ptr operator()(Args&&...) const - { - CUDF_FAIL("scatter scalar to struct_view not implemented"); + scatter_scalar_bitmask_inplace(source, scatter_iter, scatter_rows, *result, stream, mr); + return result; } }; @@ -200,10 +201,13 @@ struct column_scalar_scatterer_impl { // use the keys from the matched column std::unique_ptr keys_column(std::move(dict_target->release().children.back())); // create the output column - return make_dictionary_column(std::move(keys_column), - std::move(indices_column), - std::move(*(contents.null_mask.release())), - null_count); + auto result = make_dictionary_column(std::move(keys_column), + std::move(indices_column), + std::move(*(contents.null_mask.release())), + null_count); + + scatter_scalar_bitmask_inplace(source, scatter_iter, scatter_rows, *result, stream, mr); + return result; } }; @@ -222,6 +226,55 @@ struct column_scalar_scatterer { } }; +template +struct column_scalar_scatterer_impl { + std::unique_ptr operator()(std::reference_wrapper const& source, + MapIterator scatter_iter, + size_type scatter_rows, + column_view const& target, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const + { + // For each field of `source`, copy construct a scalar from the field + // and dispatch to the corresponding scalar scatterer + + auto typed_s = static_cast(&source.get()); + size_type const n_fields = typed_s->view().num_columns(); + CUDF_EXPECTS(n_fields == target.num_children(), "Mismatched number of fields."); + + auto scatter_functor = column_scalar_scatterer{}; + auto fields_iter_begin = make_counting_transform_iterator(0, [&](auto const& i) { + auto row_slr = get_element(typed_s->view().column(i), 0, stream); + return type_dispatcher(row_slr->type(), + scatter_functor, + *row_slr, + scatter_iter, + scatter_rows, + target.child(i), + stream, + mr); + }); + std::vector> fields(fields_iter_begin, fields_iter_begin + n_fields); + + // Compute null mask + rmm::device_buffer null_mask = + target.nullable() ? copy_bitmask(target, stream, mr) + : create_null_mask(target.size(), mask_state::UNALLOCATED, stream, mr); + column null_mask_stub(data_type{type_id::STRUCT}, + target.size(), + rmm::device_buffer{}, + std::move(null_mask), + target.null_count()); + scatter_scalar_bitmask_inplace(source, scatter_iter, scatter_rows, null_mask_stub, stream, mr); + size_type null_count = null_mask_stub.null_count(); + auto contents = null_mask_stub.release(); + + // Null mask pushdown inside factory method + return make_structs_column( + target.size(), std::move(fields), null_count, std::move(*contents.null_mask)); + } +}; + } // namespace std::unique_ptr
scatter(table_view const& source, @@ -305,8 +358,6 @@ std::unique_ptr
scatter(std::vector> mr); }); - scatter_scalar_bitmask(source, scatter_iter, scatter_rows, result, stream, mr); - return std::make_unique
(std::move(result)); } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 75fad739534..48c96316795 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -221,6 +221,7 @@ ConfigureTest(COPYING_TEST copying/scatter_list_tests.cpp copying/scatter_list_scalar_tests.cpp copying/scatter_struct_tests.cpp + copying/scatter_struct_scalar_tests.cpp copying/segmented_gather_list_tests.cpp copying/shift_tests.cpp copying/slice_tests.cpp diff --git a/cpp/tests/copying/scatter_struct_scalar_tests.cpp b/cpp/tests/copying/scatter_struct_scalar_tests.cpp new file mode 100644 index 00000000000..c0594b4fd9a --- /dev/null +++ b/cpp/tests/copying/scatter_struct_scalar_tests.cpp @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace cudf { +namespace test { + +constexpr bool print_all{true}; // For debugging +constexpr int32_t null{0}; // Mark for null child elements +constexpr int32_t XXX{0}; // Mark for null struct elements + +using structs_col = cudf::test::structs_column_wrapper; + +template +struct TypedStructScalarScatterTest : public cudf::test::BaseFixture { +}; + +TYPED_TEST_CASE(TypedStructScalarScatterTest, FixedWidthTypes); + +column scatter_single_scalar(scalar const& slr, column_view scatter_map, column_view target) +{ + auto result = scatter({slr}, scatter_map, table_view{{target}}, false); + return result->get_column(0); +} + +TYPED_TEST(TypedStructScalarScatterTest, Basic) +{ + using fixed_width_wrapper = fixed_width_column_wrapper; + + // Source scalar + fixed_width_wrapper slr_f0{777}; + strings_column_wrapper slr_f1{"hello"}; + auto slr = make_struct_scalar(table_view{{slr_f0, slr_f1}}); + + // Scatter map + fixed_width_column_wrapper scatter_map{2}; + + // Target column + fixed_width_wrapper field0{11, 11, 22, 22, 33}; + strings_column_wrapper field1{"aa", "aa", "bb", "bb", "cc"}; + structs_col target{field0, field1}; + + // Expect column + fixed_width_wrapper ef0{11, 11, 777, 22, 33}; + strings_column_wrapper ef1{"aa", "aa", "hello", "bb", "cc"}; + structs_col expected{ef0, ef1}; + + auto got = scatter_single_scalar(*slr, scatter_map, target); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, got, print_all); +} + +TYPED_TEST(TypedStructScalarScatterTest, FillNulls) +{ + using fixed_width_wrapper = fixed_width_column_wrapper; + + // Source scalar + fixed_width_wrapper slr_f0{777}; + auto slr = make_struct_scalar(table_view{{slr_f0}}); + + // Scatter map + fixed_width_column_wrapper scatter_map{3, 4}; + + // Target column + fixed_width_wrapper field0({11, 11, 22, null, XXX}, iterators::null_at(3)); + structs_col target({field0}, iterators::null_at(4)); + + // Expect column + fixed_width_wrapper ef0{11, 11, 22, 777, 777}; + structs_col expected{ef0}; + + auto got = scatter_single_scalar(*slr, scatter_map, target); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, got, print_all); +} + +TYPED_TEST(TypedStructScalarScatterTest, ScatterNullElements) +{ + using fixed_width_wrapper = fixed_width_column_wrapper; + + // Source scalar + fixed_width_wrapper slr_f0{777}; + std::vector source_fields{slr_f0}; + auto slr = std::make_unique(source_fields, false); + + // Scatter map + fixed_width_column_wrapper scatter_map{0, 3, 4}; + + // Target column + fixed_width_wrapper field0({11, 11, 22, null, XXX}, iterators::null_at(3)); + structs_col target({field0}, iterators::null_at(4)); + + // Expect column + fixed_width_wrapper ef0{XXX, 11, 22, XXX, XXX}; + structs_col expected({ef0}, {false, true, true, false, false}); + + auto got = scatter_single_scalar(*slr, scatter_map, target); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got, print_all); +} + +TYPED_TEST(TypedStructScalarScatterTest, ScatterNullFields) +{ + using fixed_width_wrapper = fixed_width_column_wrapper; + + // Source scalar + fixed_width_wrapper slr_f0({null}, {false}); + auto slr = make_struct_scalar(table_view{{slr_f0}}); + + // Scatter map + fixed_width_column_wrapper scatter_map{0, 2}; + + // Target column + fixed_width_wrapper field0({11, 11, 22, null, XXX}, iterators::null_at(3)); + structs_col target({field0}, iterators::null_at(4)); + + // Expect column + fixed_width_wrapper ef0({null, 11, null, null, XXX}, {false, true, false, false, true}); + structs_col expected({ef0}, iterators::null_at(4)); + + auto got = scatter_single_scalar(*slr, scatter_map, target); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got, print_all); +} + +TYPED_TEST(TypedStructScalarScatterTest, NegativeIndices) +{ + using fixed_width_wrapper = fixed_width_column_wrapper; + + // Source scalar + fixed_width_wrapper slr_f0{777}; + auto slr = make_struct_scalar(table_view{{slr_f0}}); + + // Scatter map + fixed_width_column_wrapper scatter_map{-1, -5}; + + // Target column + fixed_width_wrapper field0({11, 11, 22, null, XXX}, iterators::null_at(3)); + structs_col target({field0}, iterators::null_at(4)); + + // Expect column + fixed_width_wrapper ef0({777, 11, 22, null, 777}, iterators::null_at(3)); + structs_col expected{ef0}; + + auto got = scatter_single_scalar(*slr, scatter_map, target); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, got, print_all); +} + +TYPED_TEST(TypedStructScalarScatterTest, EmptyInputTest) +{ + using fixed_width_wrapper = fixed_width_column_wrapper; + + // Source scalar + fixed_width_wrapper slr_f0{777}; + auto slr = make_struct_scalar(table_view{{slr_f0}}); + + // Scatter map + fixed_width_column_wrapper scatter_map{}; + + // Target column + fixed_width_wrapper field0{}; + structs_col target{field0}; + + // Expect column + fixed_width_wrapper ef0{}; + structs_col expected{ef0}; + + auto got = scatter_single_scalar(*slr, scatter_map, target); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, got, print_all); +} + +TYPED_TEST(TypedStructScalarScatterTest, EmptyScatterMapTest) +{ + using fixed_width_wrapper = fixed_width_column_wrapper; + + // Source scalar + fixed_width_wrapper slr_f0{777}; + auto slr = make_struct_scalar(table_view{{slr_f0}}); + + // Scatter map + fixed_width_column_wrapper scatter_map{}; + + // Target column + fixed_width_wrapper field0({11, 11, 22, null, XXX}, iterators::null_at(3)); + structs_col target({field0}, iterators::null_at(4)); + + auto got = scatter_single_scalar(*slr, scatter_map, target); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(target, got, print_all); +} + +TYPED_TEST(TypedStructScalarScatterTest, FixedWidthStringTypes) +{ + using fixed_width_wrapper = fixed_width_column_wrapper; + + // Source scalar + fixed_width_wrapper slr_f0{777}; + strings_column_wrapper slr_f1{"hello"}; + auto slr = make_struct_scalar(table_view{{slr_f0, slr_f1}}); + + // Scatter map + fixed_width_column_wrapper scatter_map{0, 2, 4}; + + // Target column + fixed_width_wrapper field0({11, 11, 22, null, XXX}, iterators::null_at(3)); + strings_column_wrapper field1({"aa", "null", "ccc", "null", "XXX"}, + {true, false, true, false, true}); + structs_col target({field0, field1}, iterators::null_at(4)); + + // Expect column + fixed_width_wrapper ef0({777, 11, 777, null, 777}, iterators::null_at(3)); + strings_column_wrapper ef1({"hello", "null", "hello", "null", "hello"}, + {true, false, true, false, true}); + structs_col expected{ef0, ef1}; + + auto got = scatter_single_scalar(*slr, scatter_map, target); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, got, print_all); +} + +TYPED_TEST(TypedStructScalarScatterTest, StructOfLists) +{ + using LCW = lists_column_wrapper; + + // Source scalar + LCW slr_f0{777}; + auto slr = make_struct_scalar(table_view{{slr_f0}}); + + // Scatter map + fixed_width_column_wrapper scatter_map{0, 1, 4}; + + // Target column + LCW field0({LCW{XXX}, LCW{22}, LCW{33, 44}, LCW{null}, LCW{55}}, iterators::null_at(3)); + structs_col target({field0}, iterators::null_at(0)); + + // Expect column + LCW ef0({LCW{777}, LCW{777}, LCW{33, 44}, LCW{null}, LCW{777}}, iterators::null_at(3)); + structs_col expected{ef0}; + + auto got = scatter_single_scalar(*slr, scatter_map, target); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, got, print_all); +} + +} // namespace test +} // namespace cudf From 8320a15cda0c2cae51894aab32fcad1daec33039 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" <42624703+rgsl888prabhu@users.noreply.github.com> Date: Fri, 9 Jul 2021 17:05:41 -0500 Subject: [PATCH 44/54] Adding fix for skip_rows and crash in orc reader (#8700) The skip_row issue was due to lack of test case and by mistake the essential line of code was removed in #8142. The crash was a corner issue which has been resolved and valid test case has been added. closes #8665 closes #8690 Authors: - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Devavret Makkar (https://github.com/devavret) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/8700 --- cpp/src/io/orc/reader_impl.cu | 7 +++- cpp/src/io/orc/stripe_data.cu | 32 +++++++++--------- .../data/orc/TestOrcFile.timestamp.issue.orc | Bin 0 -> 53588 bytes python/cudf/cudf/tests/test_orc.py | 13 ++++++- 4 files changed, 34 insertions(+), 18 deletions(-) create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.issue.orc diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 9d7e82f0281..a5465090c2c 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -407,7 +407,8 @@ class aggregate_orc_metadata { CUDF_EXPECTS(row_count >= 0, "Invalid row count"); CUDF_EXPECTS(row_start <= get_num_rows(), "Invalid row start"); - size_type count = 0; + size_type count = 0; + size_type stripe_skip_rows = 0; // Iterate all source files, each source file has corelating metadata for (size_t src_file_idx = 0; src_file_idx < per_file_metadata.size() && count < row_start + row_count; @@ -422,11 +423,15 @@ class aggregate_orc_metadata { if (count > row_start || count == 0) { stripe_infos.push_back( std::make_pair(&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr)); + } else { + stripe_skip_rows = count; } } selected_stripes_mapping.push_back({static_cast(src_file_idx), stripe_infos}); } + // Need to remove skipped rows from the stripes which are not selected. + row_start -= stripe_skip_rows; } // Read each stripe's stripefooter metadata diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 13e606018ce..903f9475e2a 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -651,12 +651,17 @@ static const __device__ __constant__ uint8_t ClosestFixedBitsMap[65] = { * @param[in] vals buffer for output values (uint32_t, int32_t, uint64_t or int64_t) * @param[in] maxvals maximum number of values to decode * @param[in] t thread id + * @param[in] has_buffered_values If true, means there are already buffered values * * @return number of values decoded */ template -static __device__ uint32_t Integer_RLEv2( - orc_bytestream_s* bs, volatile orc_rlev2_state_s* rle, volatile T* vals, uint32_t maxvals, int t) +static __device__ uint32_t Integer_RLEv2(orc_bytestream_s* bs, + volatile orc_rlev2_state_s* rle, + volatile T* vals, + uint32_t maxvals, + int t, + bool has_buffered_values = false) { uint32_t numvals, numruns; int r, tr; @@ -704,6 +709,11 @@ static __device__ uint32_t Integer_RLEv2( } } if ((numvals != 0) and (numvals + n > maxvals)) break; + // case where there are buffered values and can't consume a whole chunk + // from decoded values, so skip adding any more to buffer, work on buffered values and then + // start fresh in next iteration with empty buffer. + if ((numvals == 0) and (n > maxvals) and (has_buffered_values)) break; + pos += l; if (pos > maxpos) break; ((numvals == 0) and (n > maxvals)) ? numvals = maxvals : numvals += n; @@ -1502,9 +1512,11 @@ __global__ void __launch_bounds__(block_size) numvals = ofs + Integer_RLEv1(bs, &s->u.rlev1, &s->vals.u32[ofs], numvals - ofs, t); } else { if (s->chunk.type_kind == TIMESTAMP) - numvals = ofs + Integer_RLEv2(bs, &s->u.rlev2, &s->vals.u64[ofs], numvals - ofs, t); + numvals = + ofs + Integer_RLEv2(bs, &s->u.rlev2, &s->vals.u64[ofs], numvals - ofs, t, ofs > 0); else - numvals = ofs + Integer_RLEv2(bs, &s->u.rlev2, &s->vals.u32[ofs], numvals - ofs, t); + numvals = + ofs + Integer_RLEv2(bs, &s->u.rlev2, &s->vals.u32[ofs], numvals - ofs, t, ofs > 0); } __syncthreads(); if (numvals <= ofs && t >= ofs && t < s->top.data.max_vals) { s->vals.u32[t] = 0; } @@ -1571,18 +1583,6 @@ __global__ void __launch_bounds__(block_size) } else { numvals = Integer_RLEv2(&s->bs2, &s->u.rlev2, s->vals.u64, numvals, t); } - // If we're using an index, we may have to drop values from the initial run - uint32_t skip = 0; - if (num_rowgroups > 0 and false) { - uint32_t run_pos = s->top.data.index.run_pos[CI_DATA2]; - if (run_pos) { - skip = min(numvals, run_pos); - __syncthreads(); - if (t == 0) { s->top.data.index.run_pos[CI_DATA2] = 0; } - numvals -= skip; - } - } - __syncthreads(); } else if (s->chunk.type_kind == BYTE) { numvals = Byte_RLE(&s->bs, &s->u.rle8, s->vals.u8, numvals, t); diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.issue.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.issue.orc new file mode 100644 index 0000000000000000000000000000000000000000..554948031962d638225c10d94cf6d5f94b351bdc GIT binary patch literal 53588 zcmY&%pfceuiNsEhaMe-T965XLwEV`S`; zexd*N{a#wzH9*5Zi$jiI?LqZq7>y8aVF_rI=$Kj+zF~aE^fX*9hW=F&+sTiZEy~3j zpAr9+yH~EDE*%V?u$(jhfBNHR|5M9y4z$s%59`wtl8}(#Pv)Ph;-9JV>@i47Yy8jR z-`v-Ko>bWX|HNx282_YIWsq;mk8zE0tJSqDW;=TogCi~8L~eFMOz_%b`Q^<5dUO6E ziSh%iZ(vE%6eD#$BF9A}jy)A}K^(Q^KQaO$WcU^^CY=l}qvml^njs;V)Af^LsVQY8e1NHccnCQgk9HACv1yp z5?Jr4LkolQcIWi-cE4!n?Jh6Q)3oP!wY%>j5ACjE~XtKp}A7uo#%n`{a7X;uM;tMryx-!b~LD2V*}o- z?o2gDygZ|C%f*s4Bp&gP{Yv6T z2ioE_zuClVhEhiA3DF~DtoTU4VOA*FV6khOpAAWv71kwLp)WYA7KQfdi_%Uq6- z44h&RKQVCN4B5ZHW%|^bZ3oIWVHE_;i{xZM*ub~$6LmUY(1yv=hB;!j-BL(us;MdE z)HMHCrd)S>_u?n#E*`NzI6e{xJ!I#p)L|SVy<$|7(}uM!s}T|xt;n$#r&7EIHlhwm zR-&i@15m$>E3zmBckouY}{`=SRjk4g1^UL-gW-xI^*4iB9o` z!KTF==nTJjgSIB4=)T`aF&X4q*TNSujZ6K$q6>YXJ{2F5cE*+?cZtz7_ItrWa;97=N*H zp-pY(&FDUfVk^AZ7Bir3s^oAYFuJi>g<7$56w~E@8t(l;&P3PD9Qg_P$DkUO6%xV` zku}|`h!2&2VVpef!gP?c?Ns95PbWf_Hm9l5o99ISdrwB4@4`4NJVer*}Z z%3dd=-!TzcIq8A)8@Y<~BgP~BR_aIm@ijTW%M7|RT%3Jq zX(omdFR6ciw^ku8c6z99O=;=(RNnCszcJ*)XV>PFY1d{Clwi!HH2uA7)@_1vf=)+) zpi;1(y(3}np$@Oz-2yLsWVvpWN%!@D{5WTgHK;nIr|o*Qn*Y{%i}!f4UYV5SeO?Y7 zJAZ?eoc#~c8hCm`wH2yc9?C~NK8rY{phha+&U4J}FgZQ!Ha#7;5XN435MA%Lsw4q& zWQRgKjw3~>m%R!zYAZQz2oyM#{Ha{ZJ8q)0Uc;kRE2e&Wk6^FsUOi7F6-%!y8dJS1 zi|1@3t#euIs=e4^n#CPOc;|1QyTtkvbYeSJ4P$rLqPt?zVE~N*PjLGOXt^MCnsw!- zXzam~NFcaw=m;ciO952$vd+)*Pp<{@*NV*~24FJQ3_DK=`m>EoL}1bT?#D>#M8xPq z#B^s+!WSrVc}Kz28Iy=G37#`7C5JgC0@n+N^5^+>wOZMR5`xjKg15?)rNav$qSkAQ ztYu=#HXP`J3){i3#RBbT7)(N<6A{tl$`5sMC38EPtuZNSL?b z!%zI>(@V+-Su^NV4vKFEbKHRfMR~^yieiXWcnwmN1EQ&I3fa^}G`1rh74yI?80WFJ z+2Uh9KdeQzex}0*zgsh~nR@gxn;xzSwCapMrMxJC&n}HvWw`9S&V#kwv*lk(;boO} zC%N2Z6zbB#nFtcJ=(OjwyJwGSebOG%4wVIx-EI9pNbX>TTOK=LKO8y0LrS16@9abE zl|_dtmhpYf=ic|ahVP4Y4YQKFhP%2uhh6$Qhqdpi4{^^FW_N4w+e1>YQY2eH3u4f6 zA+EVM2f4ObZMs{JBxjQ!X_I z?(q|&y7^0N(Hi?02gC_c-}oF%5mZ(bmY2>F2YJG5h_cI5IKOs*$RE2_lG_78Ag6kz z(+1q-&Y$@KTHYx>+W5KWyCG(VfE}JMT=wj9C|Cw?0g;7lfzrDek`saX^HY~vt>alH zGk@`UUa?7b=5=h-*u%Q(I0yzY33sU!fRmkpzHQ8IDAxnypD-;A z(`u?d6TqN8lVJ*xa#Q#i)pE794r*o?Bm>VW5Qxpr!fRoec=ePxn_KU}zXxY4KLH)j zWys4osK|>oO3G*4D;lIcjT$5w>cQaD199JVNAZdx%Y;7m=~}Ol@XPFDD_`!BsD0jBxM#^jSBDy_z!S^NiEYR zLG>8jISKb7%Hm0}oq{wvK?KU>#W2L}#aI4qkyIG7D3PPr5wMjSPM#DF_pw(TVP*7b z4L&+5XHlZ~WCbgG>niy$^Metiy8)0vrkzIna?fIy`yxS|<*@zGwE)sDue`&uNJQ1F zOSuz;fStAbEW*{*2@ol~ph)XbDcjlMn1+|BY*bT5b?RIg;^JIvggnAJLdM~LTdreH z%Ua~gr7QHENoB&b$8r{WW*emrVJhHQqUTs4@ra>lsg`c7GI*BPZK1ovaUplZaUrF% zLxd7GbEe$LkQQ_wfLB%UVOOo5-{@_09Oy7TrDTk@WyZ2wZhczMK8?b)s_D#y;qN+* zv+fTPigu=5dRS*a=ytu`E@t0?OVa}8<%ZnK*lN+u2=FS<7^1%mBG6@v{b{R|SrsN8 zY~=aw8yL&3eG}}5fLXvi++kzU#*^ORf-HpidB+Kmz=%Qx*gXT5;A(T+>*|qp7Xia4 zu1nj#?yvDSi$CR$kB1cXDJ-APXn^{V9h+VC4jMy$_49z`5Cjl;@38g+Ka$=u=aCvv z8xJQR9#t2V$imZchlfYVwNQt9tZIk77Vh=|V0Y0LxO@4mRzdVm%!G%6=H@D2UhO+M zkn({!i;S!!Ys8Rt=MRKQWs{w7cAk`a&L$PS<_l`-97NV_85mQ~D$bitxX~{h_ZrV| zB5NMW6a=OpV@&v1yG{uhU8jI{)|<_I!TBAwbP+;k!$PO}(E#1DD(pAb5flv)%T1N{ug9uA7Qe5h9|9KD8#C+Fsks1Ox zj|=?Q>?$d37~5HF{FDyvf~*cp>UIwcr388L zT5rI`^RZ}n8YmGR6sJR78_51|Aj2-~;0)t`*(bx|t;YiF0faxA>bo7A5SbkgS*2TS zg*WiA<74jw&bPuFHODu1o2pmOtqgn@e@zRhryYn7(hwEpd_E%Zwa7A(Xr> zFotbznO&^Z<0_YekfrF&7T5OmS?JV4tygF(!fPJT`o2#8M5ku_X|(aKUiS2HAm1c% zHbR_UTJ~kF;Y*;q!`pJRlZChzVf~Y6iYsw)o^UB$H$pk6LRRU*&5ofG!YQ7tNn%4$ zy;*b`E1#%{gZ?ye#q;kTl*Mu0n8h(R-3~k~)v_SEkB}-NL7JXL9jnlqcI)e|W$QP) zc270u4{FEja9l$l6ox9rAS=8{@D#$Z8h!7&URpJM;rD?6-2CknjYVw3 z3}AJ@3e=6oCo2s0@jFS7xiy)9i>cPGVI?A*XHxczr&aOn*h=&a-1H=d0ebFF+~P^B zb~2TamenUdfk57K=L54TBc*vch5QlS?euW0 z?915f4@k+7jzx?E)ut>b-Qm9N%{q9ChOh;XVdqW!5y3K@+9Ie-2@I_Nb+w5&$KGPl zJdND!URbVPtAt*xjeu7Ckel^=rg(@0|FX(GTaGVe9=5ml2Hbe`21bDStM3ojdL1Jp z6lsGovXZ285`<{C?hE)|dB0iYEnWw$4h6B>9hw;K4uuD|v$)Dq<>RT3&+CWAvgFwy zLZCsiGqI&bp~-$q5M+pr+;1sIC>z@0TT{c6=bnel$HnFrWRr>L2iA zDt|YKvvJDjpcXRZ9#5yD8n!Ur8LC_uO^X_l>mle(4w^tx;yL113e$EPUDXiOcD3~FC$re~}+-*S) zHn;t_h=X-pz=piF!q!D;7=O@TJ>To^Fx~%aa{%i6du7c#>)>5Y-3)*2ptYhkMTKJ_ z>Ur={x9AP*&i?Bfi|i3@4qnqmqy3{xNkDl*%B(0707$?Br| zrhQ^@VQcjm)^waf7d@c(xP;Ast5!@jHhaY(lBeY5!~zx{J7FK8zB!PV2l~01R8dUC z!oPM)n_s|C95aY9nYb>ciOtl?hw>V8(sn}R-vSP-D>H_%?erxQ3|_{Q!4ZO#kGl5>dw;!Mlcl@Fah^A7l%-xY6nhDyPr zLN|D$LXClc+G-Bhimoh8w*CU-w*U1%*kaQ$4+fEanXeW;aw<65svJ@35$P#@67y{` zdI!1Mi$#AI10TnA0=2kNHO=+;I(v!F(BE5{d8J=}^ghrbo_Ku5$x*&G(q9pRynly~ zE9i1Cp0=wt_-5=@^lh|BQ^W0|10gvSF;d<3XQX=H0trN4HyzRwqd>RuDS5zz$UJt1 z!cm-p+I|hQ>HgB4#`uXi)47u$#RkeR{m}haOIuovRndCC^i(DOWtoBe$o@V>J6r(6 zxs3jdrT_&g)&O>cPMVzlX31s_QayNcY1)e;2mtrib3)!RPCuzwSgY1y|DPl4-0swR z%YMOni0u{wyg4;x&_Vj9uqfK>$js~}WGiF3s96y-7#QHVzPU#(26+daHd{sJin%&YHs*B3p|Y#}B=4N%*x0Hw+~AQg5RVvw3R|QzECoad#*+ z-M;v(n+#HK_qK0zjxF}A4nyVTZo9gsDtRH((Z*MzF_1KJ8$Ro}E*y&LmT;)!qR4CD(nx6ouf3N3ZHh&tcc%x<= zu<|d{ndg6|r&E%?ti|^xGiww;SpjcIG~<_wAXVH9afIwK~g z88-jioe*QTtP_Bo*^_H}jBxnn9QhychRd%U<_w9W*HMcf&@{2!RlJ&-!D&aYP=kwRCWtPN4kw7KN#3 zLWWpHX*X7<%dRG|tvGlfjFfuRCpaXj*%p)ITlC#Z^v6zz7PP>$46X^=0HO zHqs_~$Z}?n<+b`WY&#<;;#z&CcgCNSPfD%|s}Zg<9#~g}m4O&dyZYc%=r6j=z^3(( zV$K4qi*KXHa80)~n?vi)5PZ7&^@&D;=ZM^RG}3=}?#t<>oi)0H%a*Hy?!qbD^CXpq z=6I4#??uv&RSU8mGqA>f%K=*EG0R21^ohe^P5G0ui=E8ulB_nuoDm;4UoO^GZGTfF zzq^N=H77B5_KjE{AzEz2d|?NouhT=2G<1ns{48L4?LvM4103PN^=c5 z;d0?)Usj^i&$9NPO&9wfu*gw1rSW9cCghT*fh;!5q2x+&~5~Nbu4~*wIhIObNYY&C zblO8GIq)gAIoeI@fHg&zfo8TqDNw7aewW5q0GT_$%_U{N$?(~y&C|xGhf(H1AuqyV zvU1@?7lZ4+_pT;t4(ULgzgAs166OA#{Yz3M(awsA{59G?jFHtlc5A@Q!J=`K?#dGM z8iFY>*(lzc(~bM^COI8{NS6ptQp$}d&wk$!pO5A z4>c<0OS_gb-}`05gw2ZOp+3^2I9zRW&n8bQm^=Qv#o}iiXQoiC#zm65_6c3!jc*sy zy4?%lp~h)HI7RLGA#{}cm(scdrC>b}=t&G*&$CS{2GGTRd|~NWAm*L(Ho?aL+1yZx6PyRjGb*9h!2<16R$%^3rCuN+LqrZ|6%+sDJ5Vn zB?ZiU9O`w;_9T#TP|lHYHZabZq_c)GPE%fqdZqB_##-BE^yww@@$;mh;8;fqo|2D2 z8hc1?Izl#Tfd;5^Yd_CrA;0$Z;VZaVBl*W3X-R>D@W0Kbx1sA(o+U92qYm3=VBu0| zIplj?PQBmtB??W|V9RT+|M5KfIMvJ|n`2Lx)1naHsIqAlk(U*ZM(;EmnY|9>eGo;| z=9B24l@Iw+@xPscGmljro5P@mg)qeF^u1NNfsditVv2AVtcHiW>X%3Ty-DIp{g3+u z{9E@D*>>;OZo3Sk@`B+OC8LE3n){*E2zZ%ei|#S+v*&l@A1|`gh{0~tylLG?6xqz= zOd42=?v2B%Ij^8>LqV4fMjzCl-5=E2h#)j`xt@>lPMo@uA_CBthk*_BrshkeH&p4r zSkfb1y#x`!M+Fs6FWW1}%s4z$0AG$SI&-6GrPZcco!wC;74L}b7(s6k3iPQ*~v~db7ulK3Rl7g3C`|}Bu-IBR~ z6AM#G@yniv_cfwSfa3tzJgfXTQ`X&+NRY0oM1+|9%PuaDI(D(HPqoU^AiAQ{zmDDu z8OQ7=PBr3-p(+84o3Gb<*W=+KFXD^5>ikkHvX2V{dF2d$MhNs2H)sE^KPhdbh%n@6 zynNdQb#dW1qOzgy!V%=7|3BGkunT+4X!(kul-S>C!6W0OtUO0DpK14*A+R)Y?ej51 z%V7m5VYe*RDyUvGj% z8-Z_@O;k?h9f?U27DK6G9yEt}K8MA&%I2wAM)i2=;@b)Ry^@l@Nvn?q{!#Q+dMH!_ zi|e1-X_YHq&LoWl9dC^U#j-rDAG)K#W}XN+-sd`)$qudy(!4aE?u;q%q;A2b(t!PT zrY>QzXE*tXZ^mM)+AfX6?Dl~*JJUV8W!G32V=j^|+A2ftH_10zQgdVexpWbU=egPsmMea#V5B&@^oz>m#XHgOfXM|?i9f97d5 zcZ`L_mJk6H>;3aMhHapz36PZjBdbj$_C$}*%%tF>w6wv)P2T`Brecp4MEm8uzXzrU z#gudMno|d|LJ{vl8=r2mi5T>ooL5!&MdilJ43LycR7uquO7Z8)LFY*NeLV4H*CV|z zmur;4DA1bHqP)$}r>wutCyfzKD9%g+c8;vP&xk=U{ZzV(c1E(~*S4(WD_c*x$LxMM$L!?g$fl47RA`~Nx)kw)a zdM#BVQS7$e?PZ9!o>DhDZNYOB<;ep&gOIAAL)q+5@?3_<>q#DQhV_INgzn}`BghlQ zW~N@WiKg zOfd4Nechi;r0dw|GYiTZA#vU&r20Sdvbjv+g$RfW*g;N`PSQG>l{1q-4*&5K1GW7o zwu)ULrGA4Kp?mUekk}rgu|DDiqq*?;?PsFIYfKs9t|R{3i%!DC-e)5s$XUh&@NOGt zqEMLalk);%)^s639@qE@Vuh#9E5Y^w=5-Z(M<>(#BLYFdHS;-uoquO*Q}*>qL%i7g zOC##MzgPL^ga^KAY@K+_v3Gc$Y%AjtG zr%NjvL9IQtzOyY-elh&L7_g&A-YI@=^fAoA=6z&w8DOwie=(}!hsAqkY$2985RJ0D z4SDA~@vr#HOB4fbuKQr>6V5agK7LWgTem}HE0e@iiY=ZR8P_5k+t&-M54OQ(i+C$w zUHXHm&k2b$I|dza>UJ$|JQ0Unp$zJ9Co2EwZ8qRs5pR8`VI9@KstruLg`gbH?@cu# zg+Ma&ZUB+@5{mZL^%SM7n@vI@k&S<7Z=E0i6uv@kwdwySKh!e#_x+pZn?+9E`l)Th zjv1+%wY2*~Ua>Wt{38aAEao?agPlA;VXnEa%dwye(v|Y5S1PZAUM~*|=<>b27K>2> z!4I1|stQ?HH0bI5g?t4|B8|T1P9iivjd3ZyPh&s#J5+2k`d*9|^M?2fM{Nq}G>%P% zU`xG6%cs}pWylyD{Fld8FZnWqB6dczFS_-?<;{^(RK0hG)CT22^1_c|bFLh8dgZM} z78((U6p?R57UDyWcH2U9&aC~-*n)W>w!Y^7XgUwW4b&f$Rdfjl^U_TH@2XWZNDftz zBXm?p7an`n_Z(V6IH;DIBoUZzes5Z27S-ButZlb)gfAusrLgIYx6W@y!h-h_%@ZgX zJ2Uj=1ta{-j}vzGP0|)F5k6*(bolFkY5JyVYGov#8sUo1Be-FYBKnndcpYrdD{&X3 z&l-2j@k!q z_8~rgRV^yNe5C$gmc3%FoWI2lq=?lvoyMgnY{H!5@3WBeAyZ(@aXo5mcnsi_#Nf!{ z#3)AdEv?IA?KhX&n!JNRa8`St=8BzxlEdQS%pt#F7^$fsq0w1304&y}5~b;pH_~S^ zf^1qIcYW)Y6XU5fu(JP$=`jn;)eAW_To+?1WZf5Pt=Wobv?FC*SQ|kVMRIsOL~nn^ zlC0#G-TkRtwJOIk(`7ky>31n!UoT(m7Qak`*e{yBs|sdsG*>Z}KjR&8@W^XC3PK%- z@?>cYeTB@vrko!VxD^{C1W)l;&#~a4p6j8GUnUaH)vyJZc$NXJV&erk@NnSFkmn) z!icV&Y3iArl)c94+0XOXZf7$SP`A6T( zjNe*}ngMtf1WIUB!ZM7Hj5O=`7YA!O)INCJWN3{B`S&>lnWzj4d|ZfG-dW$ z`8b!ZH|1QtUMBXlR$?yUti0*jnHk8M9Is=n%m=8LKHYkqu+x2U$4#_YQ9h+vVBG%X zarL*QDG!q~Fc_1$$Na)u#7CLajv>dfEKpqqy^M5xxhy>NQ;CzPqa&s*5ej!Li!aka zDV|ne@?Ow@A2<-LOAE8#Q*eiy1E+5iKhrKWs#m$2+lO}P9(J|;GAllLIDE7mHq2hG z9kgXY81ueCfGJn?yoxi{JJ3Y}$<{PF~IUduK+zYLlxDD7U_4{>pa^+83 zUhQiu*^_fcmrEl$!~*F-L!AV9s-isD<>YZ2PpRrBucYLz-LM?1DG_EPx15552lA9g z*O`-0*TfS1fsyt}|H%k@2jpA+1f$ELY zrfDaMUg!O^Mt4RB_7-Y_kmZC)up__8jK?ZwyLI)=htD6>1hT|6N>PN~5>_;;cf2l_ z5u05~Ef*i5RiE(_({_EF9@}XFjp;|ewEtd`&Wkreo62o; zEBe0`T(B@yQW6djTPN3I*Zv}6SWQC&&Wps{EEk>2L_foGClY5$>pH8!8MwGc30sB( z|3x8lzKKq+&re2jnjD;I_hpc@A8&>H9iO5;Ey$ppAv$(m4?UB-Wj$_3y@PUU?J5Zg&ry`TQp-fZLFv~RV9qDLQ=9y#CYYV~LAk9kGAsV83ZZt`ktzSyT zRr!X%j%u)s-9-D2pBrXkv$qKdO9orV(~M^>yaKQ14!_y8k$*VyWvY|kIohE>@a5S5 zmUB5-=A%n`;TcDx9SgL$R_61xg?o$l)50`fyw(d)9zTgS)(n=&v@zMcb*X5|se})6 zJ95&rs_iDYwYHkcRVWtMm+GmGTu=VI>QG{0_Ey%n?S2#ef%ppp<3lAsEsML_Bfvjq zI+8z&R}GE7gx2YfI7`#!1Dco#njw#HYl20-`?m7v&7+a(%S28{ynk@iyU%6s3opBi z${UO)NMFj@+KSPyPB$ZUS!1DMG*!qjv<@R*#6}2!6+5MqX-FCn51NYffX`YWC0X3l6-8IKsO2R^!o~36m z1WxY!xwa(woJT_b)WeXVGW|41M8tnTQj71Zj3@YqIR}~{HM~Y+=QGTHYPK}U?VWA0 zjvtFDazvao>+4%}z!DN2^w=}nJfpX9Z$WR--H2#!20h^koZk@%3rcgK^Bs4i>D*0mDap6bCAwEh-D8(@5;Io7C>Z{%D?6L9@ z*m=o+`Q}}{gQW3~pc9EGdQCqvfr(aUSUp$)w(C6|x-x6gLRrwhtY)TTJuI#iStWAP z-f@Tu^w~YJPHY@#3rsc;vs8Kcw!`viBZz%IV$5dC{f<3!WrUI?6PMJTR5xu5+g$D+;XJkU3FK;?yf@( zb1ZpXV_2@ueNUX{kqY9Tb&&T)>(rgalrIvZcji@x&y2Dvi`KKPsE{QEB;7H!b|WFD zmi=CUMt!SKUEAg#4_)=ebN($!^2MVrrfqG<@J0!6PYX>Bi?`c0Er#3d{Qb#D-3^+P zx1YE2nhg!vFQ9c| z>Elb8i^SBx>q-+A+=xPXX@8=lQ8x$(ie#>A8JvXEwBNEvk^G!mMTz1Y(^CWc#(BA( za|({^c3&13%$0Ni+O8EYv5?B^F#mxF-ozn^$G+BW*g!y{W06N+$6V1!QNyK;jf8JO zqRHYI{3@a^80x`+$*WD_UISkP5Z#xzss9|kJ*rm z@5Agi<4=6V1YKsFZ!L4U4z^O3G`Iagyw*hhp(_?-oKnzG?W6r9bEzA7A)2A=#scJ!WZDbDk~~toZ`&Ru5If76-ZqhTeop_17uFfdyuQbdDLI*G6(hcKbYi( zmWwq^lb9r#J+QTBa2AA1+n?Iw{${_UXV%fvN@CZb(IV+%3LQ)$mEkV;zN$Ac%R8ma zLPEQ@9vN_{NVaSr9)#=4EP`B$5|s7=Z^uZZaXPYk-GH>NcWv!BRAt90%ob1+o$&0| zOQ}-puuGtQl-HjgPr4_6pQBF(eplhXfS*H)js_OKa~(IyTK=Hqi-_W~K`5;1@Q}4kT1reThvAG5(e>deM3b$~ z`V0hy4`RHXZ{xt!Zk31};BMmH6@cbJWqoVIFO;2aUO9}z-=y7G`xo&!Mx3QUC|fdN zhv^nw-QJ@B&UU^Qh)_f*&!~=F<(IA}BW8a)eE9RbK7kw>wD2n9&(h6%dhfkxDi`$( z>{Z?^3H9*JJKfVy1t`R$D$ZVYOpyUi&Cn6|~dkWUPd`$8Q=DS1-Py4Yp0>5_fr4`^IA%Yq~ki@mU%B@)R|pxzw@MN+jTB z&PDK@C%R!KmWF`~JRCja2630;LJv#!OWE__1>gb$W3k0%1Pf;op_uNGaj{=}t<`hE zi#Za*0~LvT1_`VQb6S`$y1l%TnLnr-i-s5~AG9(s@0j0|L$4GEbqo zArk}H9*6s*W6}53Ia~b$F3pjYXOBQ;4^6J$nmYqADuv|i(+~||(&dW5{LQZy3u;h_ z%p*a4**mYd4osZKn-&qSN>)`LD!{$!p@6Hb{Fcnq0nM4kwgn4cYJU+VQ)HEC{^w+3 zAdv}daBCAv-z@!6egD;RVAoOb(MIru8Rk4|btAuajcC8vd+q944xeUHEnD!A!5RZ^ z^ljiWyRGjfLD|t@=)a4*gW>3b4@Rf$k@8SoTIu0_MoFcn3OP%`FQ?JnvAB!>7MAuUHf>*r)?D@@)6`zZQ`$qFrIxkqg zD2gmn?RjY???Qj%2?;ox;ka#)DBKu%YIi|>;DIYs{+H%alsT{6v3D$%p#;~ys}yX_P$lBZ)tfh25wLTFIvVyP8RNE!8YK}xjqRU zwMZh98c!>RwKPzGJ(@G(*w8hkZZq3jy#dEMMalbED!sAwJ0{%TMrijLk!#;Ua-zg$ z^WWUftQPy&Nro>k<#o*;K|?`;O+Kw$Vs3@`0dw(()?cq$%RRU?skxC%d*60$%M!G? zS;i-}O!(aMvkDc`N1ktl-mK1j84wni%mC!vtnNZj6*B*cSXMFtx%6KfCztrI^fH%y ztJyNU7g%O{N2D21v#Oc)+L)i|&{|mZ2P%@o`=)@J_AR*!pJUD^Wnm~&`<1Vhcm}M6 z(mKP~Q#6gNDVaFPbH|qj6xhn^`Q4W%PsfQ+Nny?}x60RrGvvH@q~u?G znv!i+B%29m^F3%ewfWHXdr{)aCRr+FU@V&7|7j#{{jJ8J(XdE= z>b_MrM^r0HdWbcgJ*>TL;5X(4rl|hRK5J|EO^$Q;S%QcXPiBZZ;IX-UkB{qMG5b5! z(l;y?p(WLFHt?m->bVqli3ue!j92h={+*xr7RA~4!>{YAug#9cQq4lcGom2tEFj^ z9(EQ2yl$(??Y$~rd>PB=(cN`(C4`Q}z4)VV#CMyg^`pZOe;|{C#GtlH{>1|C#cQh& zPUOTvyZ>^jF)gCeZNqB&X0k`Z$D1Ee?x=FF!Ep}nC(*uGBlSpB>5tH-g`F)EZXa55 z#R}7LCEM1PnJ=3q9u#a^KMI-mlW~(l{p04@`L1^N(X_o6Qg6kQD?OhN&L}Um!zMZW zx7y;WLT%P65u+t2ilUt{3@C zHlg@s?dT6_RR4>NN9 zOa$sk$`j#*>{@26W2bo4_U*g4UBjxIQ@5R5S%6KC1hQ{rsr%~V8edWOGy!8Wz!fd$2VTh$1K;!Yzxbk<=s1N+1 zF4xR#8y<-o+6Ho`oF@kGl&Sv9kYD~;!_yeCea-?N4}=jaM1s9nn10zLOsDm0S1^2_ z&HQimIiAkAec=*l^t^=Yq0Ao-8UQwZ&vd6zt(p&itmjVJ8#5& zMgQ`7tolc90YZ=y?@qtE{c9q5z16{7$ZCW>@y!xj8cX}Nf$7DJUjY`ZOIsG=CW9wt zVtSpfnq|WQ@$gfTHo6VzZQ>t8!}k*>)L2`(fm@pf$|rMYiJ=N0bire$irRRvJV1;=_cAx#ID=%E9Q+dTaB|4A!iF&7U0G)9HHxf+wbZ zG;7K7%ie;tzei*5WPeDl)-QI_k%$Fb+#KgL|8j5PXU@A8K||3 z{3WN>FsG61I84Lm-^-1L3s;2fUZx$JeS%aL(Q)DJ%qWQC`u&&hn9LQpq@HA71qXOA zE+~!%Ofp|NzVfD9q!x@eGcKvm`qCMvaVeT#4TAno1JIyE-B zs@-U#OprH|rkg36Msq{KOXNsn=7B(3M^#a1e>w6vK9r-rf9LLP(XxvRWu0SwbZk~} zUVd&bAAXG?Ai0HTFkzI;7OelWqs9GOgdZjqoPKN-CQaY7@2&9?aobSjeGO_{E1nyP zwd$vBik$cgx4h}ZTnjc8So$PN^m*YJ>X2dg{emW<$4ggspk3nLZY$2}pNyn*5TDe4 zuajMCnzwAL>?{ac@I^C$z09RM8cMGnbP4osPgd>*(AVY}X=>a&b>zs^6K?i zL&2^G6F*(%>i!gt)De5+mJOS0?G%>&#s1I+SzoMCokX)>cCE~<3RnffMi9U6rxtgN z)gxMA(I>dzy`YsbU_DQY#vgb0o#d~)cE|WO{7Y%G0ZNd`c-->SGWxseJ%{1WmGyRV zIpDY2*oB(6`(fY+cl`gXH2B?|5se1&T$9XlYR$j*L7bK9y$zyy0{-valmvbkG-pKOmdc&ol4YrkLYKIf{$ zCYyn)0KfA({!}RpsM5*ZpkrAk z*y38-&fJ`brNJRsfMrZ+K(N0eb@~+$gv0W`Sz*5y58(`n`5>bM4W@ajRE1KJ0 z!7#D40MV5Bgynk9T;j1}d9*#v3MG`Y7&x0Gt*A-+D%M(beJlA%!JoflbcS9ob&`d7rcmaie zk}@bOH2!Q{sX-{Hd_m&iI}4k6Yze*oZdW@z2$*Z&O%5vnPE22_RM!N%&>|G;HWSdDSX36X9`E>jCZK4 z0QoKM0%gZbo5y5w`g_bvGErp`SAnSR$Nu)7iQT!G3tg}C3;)POSWn9_KDlQ+UVebO z+}SU&V}B1Uy0H>aBF4_S`R&EEp?k<@GXl*r)kbGrO>h`)E?UXP1ofGm_f{%SVEdcc zw#enis2M*eohc?%;ko3qg{LKgfxA?nUP@jWxujpd^1>Th>-WBh+drqsDwY1zzpLX{ z1r2SNzY}6i>p>E4re1%1EG(&$eMgR(@cEE5yI7g7_+Vw2f$PZnzSte)iP@^Z7 zOdS6QHbBY0Jiw?WT(R)9q(!jSSDZ~rC>4mAdzN>V>yP%9*Yo!Dd*{0gT_)Ycn8<}A zvL-+^JUpIPS!n~9oy@}4&%3zBcOID%LY>b|ZU71>ae8*miND8#@5+f+_)GM-C}p(T zlX+!@q;@>ikvLGysS5e;y#KQB|6?TT@BXj)Qkt@>Efi|>VyNY`tJ;%*Mt7W^;J=!k zz5*SB+;;RYc5RWZfF4}4mi~bq*7=7T$=1HY@$>I6K)hGwU>;VhySX@aTxOT51uKN` zp-UyithWCF&Wki!Txw{e@tI4P;d}xx8KnWUBZ4P5F_Y|c-U~x;1tDL?@TGs`I#xai zR5u{%qZeJ;d41#nINMuzj{WFzQ-Q%aMMWs6&mGVVgWo1Afb1~v8`tU5q#NBsStBIf zYBwo#%omC)WY@4m4MbaKK*012o-k|X@|4D8pDk%4<8u<^MuU~HD^k0wv*e}0Z+1PL zcMzq_SSbnmCf?r)Fi>~lL(XB4m)n~0?Wf53j-2P96dY@ANF*fzl$BWNEY64Jr%2mJ_IiTH2{edJBCL31CW3@-yFT>+E>laNk@3fI(6PfJaJ~ z?-UeLc;{Owyey)@P(R@1wp)^oq&s8(NGoAA8+JCgMP zBSOBM+;8;g_SLKB_8g>ZQEr2C@upO2xo{lZZN^nJu5tT%e&co0tMODZV2lmx;K4|R zOPUAQx&R)4HwkyGdtjVv5&8PFJ^s zqLVe0GZC&H=}O4xJ=L(B*#3Sf^E6hi-P(E}nSBvzZyXp!Fds|5WgQy17t%x6E5f~1 z=LZXI4F;MfaAfjv)U{X#0j|N%zIox?8A$8=1 z2Fwc`wcWuy7si^_N3YAg7c=HgrE4=+l4Hig!~B=10{_m+qJ9X~1CBSZhU6QwLW--R zAEyp>b`r`8xNzx;tq@a!Lj8Lsd$0oF`cqH+Um6M+0y`N!5xIcE5}YyvudVDRJ3q9t zdgib@gT#|KDsGG!ORYNmZ&(?{E^Te?OgfuDsw7VDvGRqw4i(IU1Gy(udWF6GoGheV zB-}gZyx_Zdg6ie0-G=E9XNGpvs36F$m<7@%t?4XkaKD4r1_ zIEe*hcAf82%ne2a8uVI}47ijh!%jEhKo>rQQ&MB7r|GfF*rp&*y(r>I7 z6MDrdDy}d!+sz;9ECc=~C*P7Q&u%1NwK+Oej#_!B*cj!04$|u~6U-8EA9Xnm6qKAa zP$jN7=Z;lL>L)U}{j*ttnH|gut)lh}pf&s_mVNNj3yZP!v&czFNV(Dl@mx_oHe8sYLA~Ew5cd`aWj!@!Z>8+lW;Qs;r8mY8`?cZt{2WyA@Cj#Bz}TUYD}1$UjCcZzQO%=&bdM( z6N3a7rB0b;f`=yyP9AGx0b;Ivx)mO3yg9fh(W9_>P(jm|vT|}t*5!``xd-^0 zo%N`66>x|0k}tdRFFp0S|AwMCey?`=8+f&%l#`ag+$PvAt4+0DxYB04gY_tz9aNuu zb6108Po%Xn!imm8ZdX@-{9g3c<91W8C$M{}5M23a$HB91&BXzFl@G)iAX)#~aoVQY zB>TE2*(>)*V1{$AWc>iAmyJKg%qu8)4PwBouwxEM_u{-%t(lztfT|329(aX!S#${E z>YWP+I|n<1BNv=eD75>1)XZcgiUoC#FC`kv(#AZ!#%0JVv3E+Ba}>~sYiF{v8U_om zg)&88{75LX0~>FXsyd%jap*CbS@}}=2GZILUMOdys&=1gfm&DW2>697DBO*58ndRh zl)dNEyrc%_UZVX#*-I}9Vo!I+&IIG*lgP;9q=HgaW#oLh`;)OgbOC&<#H7k@Kltm4 zV?IbQLc=fbl)R?Tx~`d;(G!1?>aV|RTnSeo*rnMnvPw}ciXnAUlr&$SYjO>{M@K12 zw}c*z#4ULPoV}qcmfjrp!9P>y(~xK5^$netGD6H6lAWSGqU$r|>2X5dcglg0H{b8c zgIR2DyWRPB612^zDCOus=1$%u-)G#|++XgqKF2<1&j!wRd7!MxAiG5Pv;8En+&ue% zyRy@ou^`5q`(RzdueYb+ua~svi4Fs2ud`pppt{3U9&2UrYQOh9P!;NXA?DfHob1sk zAHmzWaNEm+<8i5K8Vq#ISde@4-#%eNNx+@2ZyfcXMn(M>>A*EhQ3jeNz==U(3rN10 z0*`R|0@Ns4(TlVHV#LQ6`PNd``GP}|-q&|bIsoed=pltdae9!m{6SRoHMO_*$h_t1 z_vOsv@%ngiUd6We`~;0G>Fddx2ZglGl$1|a2=y+kc2>-=GoGugHFiqn0$4vU?Ipfv zb`UcN^j%6Lr3W*DH>Xx{ooz_+4++5}qKrR9gO#>#@m-u55qQFaCMj9bhc@IX<6&OBe$Bgi#;X z6Q{kvJz8M}(x;u>?>V~=^iQw$n^MD;pZ4su=9wLPFP(@%R+sLNM;emy&=-nhsr;PY^D`aHN06`7NksS@DUrN3}CK3)!zI&or(MS&?Wx{2b9`+!0e zg{&|X96DeltZ<|`xniZ)Q@)}94xBny+~yN!U==vyv92t+s!jx=vT!E8G~8CVibu7} zPF`%X07+u8`jBK#5QCu=9r~aKjy29>Q z_rCi5jBDsUE?xH@ssfy)@$0bf1FJiZIVVU<|G=&Trbl9~ag9?)W@Z2%gBSvz_9XAK zRc2r-WGpJj!I@`r7SaRPr+v-XihCsV?aCt~`O0V6Nr~)AmI_ArBWS!+eYBg>i~BqW z?UZ(M!Q10q7ysNp)o(fDkGw;Ym&p^P^O$1D%ppu7c$n30V)L9SLT>!;a)2JPL*_?S z9eNceM*{v^X{`ReI$uxDO~AQKHh{*x*GE-xxK3FU=|b$?5VCEDvQTQL@`%wgjwP0d z3elYfTKCrK$&98Fo1IVBC#5Bczc4>klXOS=yfgTfkw^996k_mG&V>_WVTT?kq){cI zdm!Il*rah*?yYb&c0jAlIGu;8RV&kAGtiJY0Iy#uv5IF>y~(lg+u<6KIfS!2z<)VATg z(RSMc+0l!>00vT@8@>2*R`*$3g5p*c}UUu?C`;)tW%+lCd8p`Z@#RxkJrd+wUSwfecQouu15`%cCS}>s>hEjEW4;id5Uq za^XZV>0pe9qI5=6rFGJ*+rx7IqVb~YQ}t*zi_|5tgYG6&v4MraM*BNZ?Q*h@6*-{1TOOU?%cD8C-apoHc5SnGt~unO>-mNtH1mMNog`#f2jPrb;>CCxMXcdH{}sN+b!QJRZSc+kj0nSr{?d=d^tu>tmyGsDq3qv2IAMy`SrL^v^&j4OIcp@3#7 zlsV-yrqH^hDNvSG{qA?1Bt*YL-$vG&kyX$ytx!&rfVn_h&MF7**xDex!ZDg(eZjlB zky-j=+(mL4S@kDXoBiwfmW56_6@;?`l>*`uZ1nw|AWKatr;E=TVeC!;w4Y6;bG}{VkxjFjJh|%L!RjJ!c-B zcYV3j>^oNWgHPW;*;0!&f&8Vb3j&R4R+Qf}Zk86JiS$N8#W7qX= zPoBT7 zR{bw*BDsg?lu?g*S4TaCA{fUi8Wrsv#*yULFs7uw*stgil+oeNfNKHqqVG(dolJ-7 z95x}{W?$&rh)RJf&3HXuUzCIZfw6 zC4!^*mg?Dz7mjM>j5wb{egOYoU}~D+SsLS?#b+z=G@7sp3At zmkJ)-j_z0E3Fg*1F=f@5QP5s$PX5FT-68mlM!E;82Dx6X^`}C7wPpPl_vb%VFnFrg zB!4ZHCk{>4A6~UD(v6hQJ|!#ll@e|2jx%Ub?n`Ij>JUXg6`wR@uz_A*@Y#?HNyV?U zb65zv$)4gBssOxGD1p+7Eb$w5H|Q(;DlM!}brp(APhb}v>IM3GaZc2z)IoShhoAT; zr=K3aM)o?gx+p7(GqLR01HHMV%N(vzN%X#Qi=mC=V~`R&l1>dmIRqz?#J!MH$oW(H zeCH=8+6`jGB7GjtI*n}W0F=A$)LasD^=kbsYD2ku$m_S`hLcN3zCnS+rKCxp+pWIA zX*lQ|l!{6Jhsw{I2NYfSNm1u%CA8OFeE>P$c6E^C$^AnyBd*Dr_{?5*oN06-rB)oD z_6z5QqKF)S^-;ZxE}pq2@jiBXX?J$kJr$T%c%OQsip*{+)RJNaTALk}PbGyxP!HgW z2p16u59u!aZQ_HytK+L?B(aO>)u*JW#vOky8>=qPO!`-A^0gYd@=!nHa|k)zVTm7fOaISS@>b&cqY=p&8aw@#HGyNVGlCB=rx$9^T zxWsXD-<-~)5$hykycHxRDUS@#u-OxmiKu8)#lU6TbxP(qjve)!&N$Nu_o}g<{JflO z#OK6I+HopBH1)hsnNnqI72#?#xfgO$#!Rngtu46(I+24@I-Fpmgj938=wRdpci*_r zmsuNtVh~=IibsA2GVG{22UeUaSH{vM3nxvntDzDTuZm)e?`32j z{+!hUrx#V?OgcG+K1&*Vk5wvR#w(RE6|8*e9qTTUe5w%lOg*OkI^t>J$yqI}KcIpk zwMT4~-J$BS$@IkaXyqy64Am)f=LhBPoxh`5Dd%OP=_Wl%J&0AQtTf?|#PW;M&P2pd!8cYSJ@X5mY@S`{x3`UEb%<+k2 z>MG2;qc$TWj~oDT@OC4s$~4c5UuwKlQLC_(q%Z+u)|`sOE(`ldl}ZIIZMWw&Xff-a z=C|2HvQp3`jrY@NcS1)(1&0m+q!?wWPYmL3=050eHf|;78ElT{3~U$7mrmD(?J@WM z&AfS?RbR=+?nua(6eeirq+Xr#!9}Gi^)uL?(`jI~$a1*lxr7T2oC0kU&&b@eSQgTl zo&LuDFwQGsZ3meT%Be$PJuvy3ncLTTz_}8fO686X4Zgf8yc!wDp{TF%8KF2Ol_btH zuxE_5QQaMLlZwpi9c%g2VvCuRoKYe-H5IBIugW&`eP@5rUSVuNRjxZTG8c8L>d{Kxw`|-M3k1xXE5OL5^m%9gx*Z=t>9)}uG{b5XZ(KR zzf}y4*#O~-&OK#((b=JIkdx6#!C*hifw3<~`XA<4<&}d1Erxymr1IhQ=lmI5t4hl< zTZX4jNpRMWQJtXoVEl8+HrRxiJO4b^YgL=Z8Ksk_oIl;v@`t zvf3%7fl(-h>a1EcmLG(O&USMuNHir>Blf1jKIka_Xu(&9R4mL3(2D`~T!z|`1U zZBWvoKlbWNIuR#W(l6tBg7uJd(@GT-UO2p1W##xXzQNgM@Sfwn{b-r>c`L{mQ4_zD z*_PDEyRLR&qvWtUBvL#-Cie;8J92`#t|cjylZS;&+wCOghT0u<7(IsZ1UWa|%YlXq zHT3CCB+imDLeh&o&0bB|naEQVM`h$wAA1!=gQ0pD=Oav#FM49JKJ$ z&BdC1kT2mh*45HikYWlS51NYhUN!Of^l3Zt#ETiF#!C;Hh)W9RnXoU=XtEML7H(KL z>;?o0C*xd*u~T=1fp|Q^xyys=lY4GFBPzc=7WCiTD|2+hAj}zSoH$VzeQsl{*(2VQ zJ>Z>`ymxV-M(IK8#jE>VL~*L*N;)%x{#a)d;^?$$5KlF3P}HWXBAjr8GEj_zm5nef zR6{7%wBn*zvmc%+lzv$kE=ao<&(po5x)^_2h$;)vS0`AZDC0hBP8so)Z^Jw2el103-<30r~)=KY$T;W01`cp;0L4rqUrF3sA2E-T*S)Ix^^ooT# zO}#ul7CpLMbE{nBd*>ZYY1*7((;T&W^0}iM9%v@QyD5$;yeeAdho(F_>q9sbecp?* zG7PR&nj4=1$L}Qll9QWIQ;}#*HOAQylS>aFH?2>v)|n&5rSN*N8?d&ZqLNO)D?4fD zxXS2(sUp}-rE%~M1op}9Pwy?p&UmKWw^aU4o5j7xJW-$MQ)S2gV2zIdpPat_9%5g8 zzd3`k_CSyZOLl}a=s}8w4x>|X_Jy!6uor8RP+D`(NO{t(@0=E~Gw{+rV?tGr&xs@* zMhG~Skkg=WrjZE6nGk494WAdg=tN zmDm6C880Z-!FzRnMDZxfUd*$$?sMB<&2I2-^#rLImW(JeI6ET?Ry_}PYq7lE9RFPxGvNO|5`Y%THBPEH?JEy)wMa9a@$9T_` zrR{8Gw2(hfpL&k`IT_JJ-GHw8ALW04DXt11EISu{!j?-V_;;UVLUkdH0NHg$oY0n{ z1{tUHF&E1PGAr+@ecf}Pl$_9_KE!$H>DwOpQm58_qzXf1c(1z2btcNZ}GM z$H@}TY9vX9#0zRk)sObL3f*a!*azvJdT)geTsh83&hguOHP^pXJMJOi z#HoPDxB@X!U2nb@H`MXJbF#5eM-+tCXp=NVVh%WjvK~m9_L+-ZRF(45Q&&^_`Walkv3n$|sDuZY-vy(3 z_neH!XoRd{tsNGo%vp9^U{1KzV3ELJq|U4V zwMdZbZVsF|fy#@tU~=%qV37voR9~&G&F9{CP8r%5_we8lXqlxa^7!NVsjs!)UCCXZ zlR{;|IZ$f#eXeuq#oP?{u4aTCjSgCWE2wlH0~sT%v}O>L2xiYdV-Qy>S|hn9-5>Hh zqUhz6AoM?^5Nj?tQ*E5S9EQy5QdJ9P4p`}>IP{p46BR$Zxo=gx&Q_$i!mXxzKqW}k zVCD<$JBYpRqdoq(aB+6gv5-3;?FPxpR5A4H-fd_3Cuc>ld#lpvdhpAvXv32MTcKA@ zwsNkCE^_j5joIrn&BVDw<7f2t`ulsmAqx*p&iE8)sk)saCFB@yj!*WXLqnCRwUMgG z_eSdkPBx#jvbS8h+dP=e8nNr*ACexxIRnfIF;%Xt$VZ%1o(dADIDv`00O=)MJ<{>e ziz=yy%s`)H&b$YYAsI=G1W8i=X~VzU}$iR_EfbW^>Ub`cm@&*QrQWv*4 zAM0z_p-DKhUjmgV&yWjTQ$Bj zTC;zIxlWFs^b=OTlH*D@OGS+)Lw*1&Ii-T6S|b+|mch6!n4OZb{dKC^j8~F<3hTki zM_Nx&B3m_#MoMl|tE077(nY(o5oY6x<;V$KiMmULeWf}frHoVLQOXLlY0kG=8vO8- z)Z9jAs)&*0G;nv9+HaHyBsB@Oq7{80JR{%Ah=NupJl9s^h*v>v=QJN7qr>xVp8H#J zlu2HlM5nOo^XQM=JD@lKam6FAQ$)PdqRbN_>T~UM&X-3MDn0sr`YK*g22s!HYn;TX z{>taAa~csnDXR^;u3R!61w3&a9_ah&hsA5)UG*6Pj6nL=6QsMu=gaS}tSpke0y9u==sa|d>VbXV&FU1CV^Xw{3ot(S*_I$EDtzq?N|eTl#D{Jo4_lq-QX*!I@y<2+b9quT+gKw$G^~ z!eQJ;3N5mSRml)w3_cBoSDb4?r0wQIs}C!Hs!jZq?-|K3^kV9@P!l;lRj<9AQLohq zJ%9cAU-IgqSG(wR-hul|rCp$2Ll08f_f2cr&BxNuQ9*Im7H2V>b4)n1o>%JDWA4m3 zgX9yC7{u?!t2i$-#a04cx?Vo1Qzv$~N|Z*}?le|^)SjyHoNYjTOFzZ;Ac071hqVYe zvv8}SPXNOaO79eDcwMQ?K%%LijqFJa;F=wMyQ)KyjrhOZH&ze&Li<#-a*!)ve>Ylg zF)BPWzgG|JUPgV1`xV9)xd$h$<{rk*TJSCTYK$?FvmlRu_e45b6z3YMQ|XNDS&`=0 zRkErF=T*=*p{SEXihMvZnTq=mDN96O(jL86=b^5 zjPOjYK*Qrq!mjWbQdjJkM-8Yx%BOfsd+qbiSpml-58Jf}W`%Q3r$P4~_m9=G)|Bu} z8Dr{o%$ZAZ~gWwZ`3g>n_+2J?BI_(gR8 zoPw>-(cge#XIaGE9+FT!DQK`CuG+e2~`+Q z(*{FCe}X<jXv{A%0PA`JHNnRU<1d` z4c!Cq5Tn;(?#S6NH_h)NrKHo;xF+by*{S+jW_vid15cK)EN3FVylfux%it5# zAzaBnDC?fen4~qnp)mQpw;e%%vCCN-KX)_~a6-cjTTnj%D`T^3) zs6d3Hc=t*-xu-VA4yr9jF$zMu4HTSCWTPWhXX^ErGc2?gnQf9vWvycmsLXBY^Bab5xy zD=Zg@%yP8abkt%L?cgb7ss zhgbm3aQpCae&7VtDH7)A@Y$;W_L)t1_uG!isE}&YfobPAZ-eHyp!5azHbOA`UdlfA z)TrcG{WTN(UJ5qw1CJJ|IXG9(@1?G@BM3wdCkrY+Y4X7kjNZsA>C+l{0-)tmqqsJQ z=Y#Kf`(~qu=uGYG0L>G^WYvatChrQU+E4%q#cJ-kX4W)=n4@i0kBcVNs1&O)&Oy$& z!wDzV5!eDVg-m6fan3=|jAXU5V+ECl`dKugc-uWP;lR*DZ@y4Y9l{AR>1z4JU7a2S zuiZK0-FX!zWfCq4Rhg7sMjS_65!PBrmEtr^svuT>&M^Ra=Co5Gu||sU3()H+nb)Y& zvOT%qBrT{*4i=I~ry7;}YetORQ1%()j)ET*H$)HP)%vaxUcGrmqhUT*x?-QP#owe8 zIlQA$$?1$NFd+C>uS!z8IQNQs$|*srR!Ht~<;BXR>dPmmqKj2NKP$Lb8?7|fhtp4^ zm|A{L>w%py!S~YlEI01U+%XIi8ePz97zF))rvUU`r&2gMVTDXdcy1~^dRMK8 zK2e(fn%u~HrIDuRmZHksu9zt^B3dQTM7t8h#l#tt;tIVgXty`31p+Mngm(h197cw< zV>opR9Yk@tDInO@_TSsyMCCQ~yA2fSqf44_0R%0`n4fKnhc9wDB)% zwPl5M<~a=6JuR=c?n_wRNRS$rE(($gi|4ccl=6czlwH7cTa_7ElitnbYO^Bn|C+k@ z7=81yzT+{{R7I;GP|#@IN~=v3DxPB91M70w<*@AjSg1gtgoBDyKqV}#LN$O^}c5&{o~H_%)tI;=6=5S_j_HR z(>1g<^&1xKY{p?;4EhK^2lKM(Bd&R=C}p0RqAP=7j7k;UE`N9FKdY8fuVLEsM{*1( z`lf=jo@LgR__IC>W|Jb$&Nt?SQ_S4>@T1|I@T)x7a@hICx$fj^OiN#av*a_qs^6bZ z{uP^Cv&+6&@{H7(hb>=W&kVnhoQ@P-t5W-(DrYI>JU~tIftg~XbP4$3F;eZjKenEC z7-H+ry_<(Aylm)k{Js5VmAPuC$+_#d+mFVJr*xCkDY0uRP?cYw`E*NpCE&Z``%^AY zqc17jYbU#k;A{~{0&dO@hfhBViJ#hTKB)#-Sco*3@f};@*1X<7H0aF`~K=> zhc-tp&GaemO^#amkCgJNRiF7ZMJ3D0WE?(FwLaZK`1BI#%7EF_qL|OkrERv&Qm^D4 z-0*eJJSoFwS_rcOoU53XSVZU6aF0^Nqy^JO*{7(-DL&68kTu&@9>JK>6#4e&v6OY$ z!#S(WR;6A04a*UnXRjOw&imc5~-SwZIHnDX+j~JGzKJ)PM>Mw;dmD)Vvo#s?hPWGwS?(?&ornKGjzDs4%_tlG1 zHd3{W9csb~<;>DZsNd2t&^coqp82(DpJPZ&@RfB9Rld!OkN0CwN=Djbqmq$cw0U!! zhddoaw)O6*%W#ZiV$4wpHQmgHQk6~c89$q5m_8qtV@H=5-TmT7cL#DVnjdblUwI8~GE0=eFLG&%}Mw(qTXE9qxh zXi(eqFz6O_-fK&0M%tmYXrze6(reyQZPfX$$kCZ^t*%Y3p-n3C{feUi0QqdYQj2)s)+I zcv)4|dcZ0LZn-6`cvf@-Rp!$Im`_UYjulZMRGrQfgAr<8$C81h@-zc;X)kFtFj&if zP;FUXxv0j~V#}K9vy-%(MDAFuj$NL@@l`Kr*Kda46l=|an}gL3)rB`8maRND7gs?x;2ek|EX|H(=C z;?i@jdhoYYt?@D4Qv$WL=G*@E;bDhA4)fEmwgw-zc@};LKX;X@XQ9l0NjezVW!qup z%3rX*jin0_UTRSt1!jtlYODn8(; zC#C8tXBz*sPn%xO8LT_G*Lz=i!VQg$D{B&}zACK!O6m<3`F(SnPC=al5avsDiRDyb znMSt?pu{ApTRXw(qn*4{RmMEQE-*R2`F$t%e(7b$`*lcRbK3`H63{)+sW>NSDenC} zZ#mMSjiXWy$XMRL(}ROY6HS7InJgVNMsnbXnK)!N5~OV)+L8$IjmPRj}4 zOVPE-4_N0Wt2gi4_9*y!-T9rCCAhrW*4n=2@XUR?`}4FPg)I0gm(G+tW%KKsIBL|~ zQ!|A0?~b(F4T(xEv7IiCcDrqfn)`3+@3wQ4m!{UhEXaeG!m>0L`c_I72oh&mbC<`h zq#DyZ%<;gF*%L0=Ro{Uh`TS=ePtAITyxClNre|=oO<~hChVjlJ!`m(LY(pgVrb7(v z0L_BOeT!+hXV%HJ=_4VvcyYgjX%Bl+n^Zx?Go7=nGQLhdEzBdYCBG>5 z;9g@4B-89EwA;umS6iC;m}lRP#K%iV{q)tLMoO4fn*D-(W>7NUyNnNVBoF7AAXd@Y&) zZSVnUivoMWrR>kq>F8>dBjRbBok+CAhNGTqqXFDD%DqNG`dfEJ`wsAT2=;hw$PZI( zj~PLl?=5_}hyufeXCtv}ifcClkjQfKfq+uu5m4%kQG+P5=Rg12LkLL=-ukFO7W}Me71hSJJ{Ttm(W5AENrC;=HyvisFTG8zRIvkl zw340NalhNF{mFztc4Gg|V*v-{6;MN{DMS`@My#oIpS-5fn-xk~lLWrZpqtzRX{9Y? zx@E=oe&kiM@m+%VANH-$sR;b?Zvi)r<7=lx`Ik(P`JCAtJ@jAjs0flHO z=xJmHn|)Bz4yP-=d8W)6Fhz3&@*~HX$h=3eD1`PsgOa-x$BX|^2qXYyJsD9_G$nZz z*?{66i3T2-c(>xmxB@aHHjl-f2B+QQ*N>q>#=wXkAkZFRl00CxA0kIZZqXDA)J~dZ zWbC9$ZB2})YtGt!X7mhl7{Fa%^coycL9Y{Y_B}zuE(FF*wh6#T4uC!y6sOP-b?N=A zBcR5B_n)58S$rfo!21m`YDkd4wm}vNs1+2ckn70K2Ey%qX3yPQMGHGi1^(`n`(G=MkOPkk0GbUVQd_*l{=SNp zSsaAS7md5HtLXSyZz9ZB{EK;TLLs$jYQ{gNs4$ z1>m6r2u5cKBi=6V0yHtJN<)&@_+) z(ksWKV_LyZ8nq&~0Z-tXt>1_>c89b58fg^tzTdefP6H=8V$iFhbnJC~9wJJ>F5vi$ z&q$WY-~cvdaQN8w9mCXi-b6iYk12j+=R+aoU-j(M1|%@Fk}^%)8W8gFw;b_9J12?? z1^Xk)SzOMV$@W~)zY7(}p7DQQe-wH~(`cLqCwH8)=dBbA0wq})fnBioWH3IzhME&2 zJ1@Y@f}XwZOuGD2*PUGc;jcJM5c__{bdf~A{zb?5U!m9&^D4?UChpJ8w+2AkJrEXF z@7=#QO4LwQD`$!Gl3M zC4sVE_OuGEi^w^<#)?oPqzcNNj@SRSUp{spH!3iJ&S@!xV2#`K}jl| zmFMU3U_!-cP<8^eD3E@^Dq4f08h7#qBpV0rLOcmKCe^stKR+TSFMG-9ga7)1vk6QM z+Fr2O_XJS4< zzG(R9XEA|mTYEkbUD|7~+Jbd5fu4=s2tE+JI}(F%H~(@3ep=~)sAM5U?m{qM`&$F_w%b{48|e?o($dq z@&wDPepCmlg@#_P= z==A)36|L%fF(XAWzWRMNr;$YdCJpTNlf=QAEaqZy1`9XJ@VQ99Oa8qV{P^jmD{nc1 z>t|5UHbX`N*7GVJW9_(Dg`TNZr@`q9V`_{gvKtw&*TUotDqs8uDxVmZ7(B9jWVri> ze7G1hG0}Q^$jq_gGlCX0+=iqheQa zjOh2AGrt!0KGGZ<5^1z3p8l4R|JPl1&lA?6_PFhJm;U>+inC1wNd~uPt|*&Xs4hB2 z(L|F#5F-a=zIp@Q>?GBB%<^-E(F$98!EX=#KZlwBOz0b_jg$p^3_1gq zCRGvEG^rQ!dqd?ww(F#>`rvmQ1m0hM=0pTU1UlKBVeG4_veL{_sEaP1WtK+IU9<-2 z4Cu!ApEdkRk>G7vCQ!ck|f$k$csPkUT}qKlbon6dv# zPbC=Ge|;vUAY@XrL>=mn+;M%x8~Xz~#|n}5T9yS|42Cck<}Pg$0Y~9k%<|Ke)uZxj-8e=jgAi5uI#ov3GlCz;31|qX{<@wlQ5|+8N6h z!CsH&iJBltAS{_j;lq1(1entw0I|nj#5|OM?i<=Ix!{i_SC$JZzQI0xvU%eA&nI z7*(k40ln%~-+p*|&!!Lf`sz2YFtW)novw0vAD@&BTG^W}yI1@fmb=4$2W3;}ceqsO z;I1&AXIjI!zZFEZUL}AmKlT1U`S1g&+YBVd2d2H^L*~&Y?&Wyd%;c_Zc1rjbvm^yd z0E>MPK&|J0dc|>1&b69LuRH&DxQZ~gnBNpIa-+uR2MtUWLsP|1*9Nc+%|Y0#+*dv#d={y0Eduu8bs+Fc3IJC?$qc++1no}nIb8t$@ zk$1UAFa#K-d}xKALkqh-eEt5<`1_}&e}af8JI;8_()aLV3INjuy=rgPI8;Cl;<}hQ z>))G>Q*jkGzu(8>7EDJEJ9Ps^R7#>-mikCnz6K(d%kEj}96ZOH#8IX(6~!C}IU08V zQ#wFGr+(0^D0Jc)mTa4bQ^By}6LM|+)@nmu!JXrwZCR$kO6Z6?Sw^t;c*vkL2{Cxd zSKszTgVUrxTh~o@IM&ek!t4L>4<7~FIKT9aYgjZV>7m!2c2I7+Y=Xda>E+MkJ@~kB zwR0=QX|8zy;pC5AzLMgRU;1K}Ge?}}YZ~7iT99dn$yt(VsiV?jq0chM=Nl}0Y*&sQ z2`Zc|xHE1U{>?etO4Zn*iu*5plKa9GDWhvnC}DynR|Gd3*|V*%mPbb2 zAgOh;yMhEHR`B@W^1@S7S@^5J>!mx?DA&f(oGZ@K=zVcPdnj^$k{XhY)9L;W=DR!w7FwjD{+rfSwQ8>N+`Q= zT+)@l_~d-6SN_bWPhwGaVKsn?(0@1yUi zn69wiYbA(Om^Bht{N%?@YLqwoAw`iiDlRH1%3Bxd-*+5SD8`8|VMuYED#k=(YMFO$ z1|Mi|Tzp^z4t#}teBYc-DV;ceq=O8)?irwY;5J+F)Lsg{8w_|p-#k^HJ?d=Yb&L=W zbXtn5F8j)Zcaz6IIwM+OGZ@naXzm6bHocQ_O-iV1TKVR}J74T`(=pt9t0XfyOKAiD zzX@xmr7L!*MlRj`3r}k`ylP_~HFx~3P9{lc@_5e(2joLuv;kkG)7|nPkJDZ9nkfW> zJe0(#z|9%;$}i@r8F^yvNeTp{z@eFAN;Cv2+DQit zrlWGgQ>>bi}(q5E79}~{^_ycB@`~Vw>1TVn_jwUE_OM|^ZR{F3dj?zt)A*2wfmwHLVxFk`o2!x0fv}^~O%L?5pyD?21T0yYPG7 z7oV=3F9BfbndCmbTM(!rG~(AHM+MA92!bYb01i*13{uYZq+O-$h~c;%_}5UC{0zs%u_+ zhf@hJd0N3TG@{Zdq_6T2m~hGQLZ>=cOVyi{kbQQ+b^d#Q_aF|=4Hd?v*Szr5KCw+Sn{wK~MPO**A{z?an@c_mj5jI7chiP2hBah&HTj=ZKQvc7BX z*S+O0l|446i)74{E1((h()0`YTc_;1`sU{zlWi!VOm_XD?@HkYj-ocqeddzfF?cim zGd1V_Kj%5|AP|*KIo@CU6Nds3*-4Fvdf47|)jdy7`~2q_q1LHv#Iv_YPmw^bA`(BvWuGFe&!OdYTy&3+Wv)!eDlk!(l?XtDs@MlNj!D{X*89KMCpLzU^>n-a)B*XJoE{`}J6%Cbo73itQ zSe-Q2_wfY~@u&$VyfptMk2j>i50!tj_N*^_6rH|NUz393f@}n=>=dHcgp(1FWUEj||w) z>F*i{=&AQ=-c8zAI0^tQK+?apyPcl?_jR|N)>FN5Su@jonhOL%<6Wg0H>rX=sJfVL z@2)R8Tv~kB^NE&^{P^degaJ9x|ncI*9{+9B11_mqE7O-`ft z?CE3mB=oY|&01?NGLzw4%{nuoVO3A5h^q`rvEeK^ODl}@puwImHig(gS1G4*ruaft zKame4IIHTC3QX_KL18v8|6FQ&nair&RR;P!y$X1dHZNRe6C|x6lK|WCM}g-!7`0v% zb-u2jbmQ?{uE$Q333D^Y%WkoqB<~`L)QF`{&=Hw@qE5gICy{g)c#mt^RZ@yh4Xz^U zI92QX)v9NGWh9R0c$GD$Lb+(eKiZvFu~oILvdM#KcG}?6){P3wdRrC^-g*TssmkcY(2QhNY`id0WE4fcHlF8YEs$pjCH@C>%m)s*2 zly4inOjA5us4N4|%1CY6_WQp7wMh!{Z~v?3A0&g!InuIW#3fpcJZsO8e@=@|>IYle zbMTP6e02)Y6rt;J+oPtjZD!e0QOY%wIy@|+N_i=z<>)OrD*dsH0*X*tUTcfAVc#!x z7L6e3cyc5dCv1)~wA53%eo}!wcSFQGEhS~lw~_aoJc=}mw8+=bAO+>6W zQ$a}In=8JaLkfkyW0gz3;Pvq(bIe)anj;XCwj#Ba%m#*_Hug8m^p?CjHmXb|7ffJi zo1VC5WFo$h(@o4w$hX~~q!V+fC-A)H34e7;haHxp*ZjbG`No*FCc^A* z$`y6PGmhtMa;NLR{ZHG~l0VR=PnVu=a6&#=aSvGzek_K{2CG4pPc*p6rX?m$Y+WLX zs(0SUezzs3 zqNLX!g)9%J3&B>MIGIiURIyHal28o;qB4HL`;W@Nx|?9G-jdYfmL)&H8+R&(_!2l1 zEq>&hqu(Viv-^?%d}LELhuY>I+%%oUl_sK2j?r5B#BEFU9*{ptQ6JrYBSoA39z3325V4&9YVTSo)~f(*NFe%3_I zSAFj14kyW{A9ye+TW?81w)b)9lun5{U#iFjUbknr*#=7{#==6bIkQ65cg0Al##hOg z4>oUopUIi{vQu~oOR*3z4l}?=!@RCSZhg-|Qoh{QEh%1e={V7N)@6!$efy_QF~OZ` z*3s5AlKs+`KK?kN%p9DgrMUuIL$Sm2l0Wc&fAXZn3C!WDF`Y}Nu{E>`x4m)VMBT=N z9`v}RDa5y-{gcx)U(mBdhJw|F{|%o~(I--c!49J#CN1~ewW&S%5XRoYLHqx5TghUZ zOQ_0K#kAMtXxK87j7Uf3P%M*<>khu{;=IBXO^VSzYtOZ(sPH7EU}B}b4o`vuHiv*D za6Nnkzdd`zKDnMuywYW@ZT-G|V|{)&li!Qbn+Shm@4WqYn46A3`t{2`kfzBYHeoU` zG8syr2Ui<$=j3|{B~%@<(6G$6`Q&kr)-jbXI(beNkG%KSzpDzf2`BFH_NCmg`+7;8 z<4ew+9iSjD<_kdQ6eAZD|ZL{g%CM)?edS(5)g|0 z{Ia)jNV2(xoL|)YdQ-`jDupwIu3de+X+80f-Fv_BPY!__(T>b589`omkyO-Y;AgDj zt>%mAe~^>?T)V(IdVa;fyX6oLPVjQYqkpU2Bte}XaMluX`)Vjw6#Nj$$?7Ry&2p1= zwJIUy>>*MyU-`0LSwyb+r9bUb&*#LRw>Xq>7LsaIZkx$Q;<)6F?=p)QG8!&m+TalR z(n=)f?+NN2I>Fga_>aw58gf;p>)n>bv8&|8xrCZkW}@Iz9-7Cqgg3LO@Akw*vUcjS zm%jX=RXytHSOAp?3Cu8sdUhgNRoixL0>)AnKeAa{6URd9s|v=0IxR@{Lz`g@ds@-~ z{K_)gOo-d=9bY;mPF{^uRkm`wo!P(`8Im4$iATKW*m>T+x}5Ki6P;~fm65!p%RlkT z1YnT8{=WGiL)SKa0pfMPH@yAm$M`AtlqVW9cj`7zzmP9q@zsBDa`^{;zpSc+p?7%F zA*!7OytRod)OViEKet=Lcd9brJwNrb0~LlYIyvm7tD*G{s-99bJu4hma;G7c(qqQM z(j0^+&QI5TGV|A`Z6OSHzpuIC>znRKh!(rTFDN^oPz%el{;8}V{ecM#6VlqbBtCdw zLcyxilnbT=3c8WBWha_qlWThcU^wmYc z$)-O^#60PH&lfxgJGHNy?6|+3h}Qf3;e=@)d&+5gj4@=fJoFjF;jh{(u~iDC`sl~b zEJUhAT8d7pt$CuoP8`gaQ*$S!ejJ}4BMGA%pRa;S$}j!o7q351Sd(YAFPPd=9y9%e zG#H7d+ku;|J3Px&lyC-2)i9;9?o>c55kvnW_EIDy9@F^1KAVGZ#>narRI}PElo6o?LE)OBw9u3etK69{0PW4r1d0B_4^#9*}VcxgoT9sW^ zJhrT9K!_SWPq4Wvs#RWWe0jBHg_&$2$#>6vSl{E`c$$Y5Zao}{T_f?nIc}S9nNS&` zFB5ExMimDO5G&BwP-!$_M3QRY!{2oKgNI*)v&vmFdt!(_uX(uc?xTAbV*+bk#Yv=JN;9GJSx0qo zCYx!u97h7nGQXDsqY5(TJ;ua-pgZ=g{pVjxFdUnM2_BaSOIMKBT&D4MQhgTRZ3;p?rxV}npMBd&yXiQVCLZW5 zZrFqZxx9GHA*XtlCXM0wT-bpjWF+bJMkbaohbL}c29PwsM?UA^A#su4dCztzzu(oL z`@9o#U+WQ75-wccdf9qz(*#IdrFqN*b5YMyfSK5^oe2s92aw6j&{)mrqNO znJFwOEriRCy*jSl*Qdfz$HxmX8hX?eJGoB!3R4Ws22vwWS?P2y)eoY`thjnFwbU}g zniaIZ3LZi|RQ}PV;_*D@s_8{$_-O(LZgUDLJ55!IDURduGv>LrOHvG)Hu%un{?}8x zRdaCUSS`sfesQb^Gn-1O=>SVQ_4GHN3Ob9gTQ6=t+O>c2b;pyZs&TE(Z#p2^H7_@} z{}LUnOrK-ZN3b+I@lwGJ@$Jc@WLt7$*u;nbsH)Dqwv1G&x?N5WJEi+iQI%p6yR(#$ zSe+>;Gd9VB!e)#|V>IoQpNO|~lj%h+J>?Upm!A0ABd>Nfzhp>tey5U*rTL@HMr*dy z+kUzPgPVTt&-+_?Kei(yqq$kL7gehd*}td~;3QjrDMu%4&PKD!AWUQkvgs_;rddKZ zU7{pP9H;mDT_C-=|D|G@25JtSlVd!!6Dw49#xm1!&ohBiJuWl3)D!B>B~N6K{@vd{ zP}jXm9tdrhasU~HlrE#E-h%;f$rKk!6jP)ot(SvjDuU$6k1S6P!^D7nmMq&8k!OF+ zv7^ZalH0szDk=S}Fz5b$Y+Skq$m9C!a@~j3q~RDR8ZIE=7XCEsDZ??$PCAEIDq0sT z&iA15FW+QRSs{mIv+U-3+-W)gspR}b}<%jM@weq0qC z)`T4yr)nm1X$IFk=1_x80akxIJis0YBlB-(-iF%ytahR^0XI2_YK))l&bI~N>%FLxBk^}f^F@%cUebN#7?l`B~@a2 z^QS&1b#)cJYItkCy_jvyKD?YlsV()>n{~yoOxbL$GBL&o!5qepddAbOz&xkps@n4V zPETPCgzkUqCr{yY4|($&vi&{ew?2KOWl%nQFFj-0qPj0t;<*Z3C&qe#MWx)_@00~j zrD5X5=36&^0uGJUfD6LgV1i3-iYb}GwB%mXH)X`X`r0ZYItZx#mBXXT6{Cn{0A82X zn#UNANh}F!IFEme;hJ_M76*GNxeA_fo+7<{AyfA%#pYMQ^duim8SHr}y+v{|sb&G$;L^+TKE29w&oyYlhFAXH(yQp>JlYTg}n?HJP7E9ctG@i*Tp zBbgy9MV{RaXT=z1H9-&YR}?RdTT>q4Wa`uL);NbN9!gSH+?{c^G0wNNJ_z z68JJ=z3g8eI(P;=+^C+3LTY_jcHVOy+GK*q-2Rv`wuC#qS$(1tbiHD!f8(}eSoC6^ z{;j0Yt{+*Pg@1oINGP7mlSmW2tEq8kJ&V>GqcIZ+xhC_k8)|m(0i>AMEs_LUOwG zb7!J!4jD^}Olv3kY%L*ywBu7VH6M6tqKnL3Js$3SoH<=$Kwc``{H-K@it= zbovRb#CE18i>koJ*)<`9N`~*`SH9=K2)0A!GjQUtbi3%^$6cp!f7N#%(HcMV>67MF zopEW!H>^^*?waQu1~%Gm`Q{QJ^kJK8a25#My<$qm=NEi2FEAOHOH>#=7LZcqHIKTv1tLop}C zVbD7K#C;C7XuDa;BWm4MVC{%btetS7=R5iGcT8y;Z!hm@$Z!!8a2h#5f9snw!ru6( zmz|`s_!pbSL_xaB({#srTKXNY^KMj$%p0UaPNy>cPLp(DNmHf43DX;hYk?UIGiz?v zrgs#Fd$&VVX^t%)qKokhD`_xyXs zEH>NfOoOjqtg?8^hhwkq^MkiLsRt8%GJ98Zr|3-l%{&LwXEf7|4g?>qZ^lV4*(bgJ zdvBXMu55FElNcFz2ovx84gLL70-DGYUrbp{=dyb*y;T>KfffERG}!)|-$xeHs_d{y z(~mKeG&4+{V<;~?FsiY!d~9ekx-m>WAEFcN-$Vb~Z@**wGd#fFDH33IGp0e%s{>j(>hGMQCWGOr#DLD}k zbFC;5Q#5rT21#v6|%IbP3zc6 zoT`KyLYmif0nHZCXL(vE7g%+1^zXXQ@n5fIyx&`2m;)&X)aO214`NbY^Ls@f4CxG& zaf-81qrLTnEhoAJy|dE~!Ui$Qv9%b55J-0dg5!JtYe6MYo}CJ*Jn-Cn z3*(uKqo4=%6mDLMPwIxtf3|)M_R(zZPfe@&N3S{1x*?}9z*-eVxFqaJo5CYQ-gWBoS<2Xr~R+STu4*MpJ@S7Fu8tcK5*IN z!Q%Vb)$#ogTXwA~iQi!n4@Fp*Nj43#e6VWn@7^vt)Un?B2Y#)SGGi?}`ZN@w;5-#n zjXOz%hBbHBPX4ryrJZoK{`5e5ikm)1nI(2&ZXXj$o7rS8__VXFrD^eczgcv%IkoBFG=V`ID34Tt_ zK2aUpp-hM=bZw}z9yX+%8dVUG%Eg*rNK@X_{Qh`9@4YwfJ?Ff8uk}o8ZxHW9zj*7_bXS#*;*BP)X>q*c?JLdNkexI(9p2D>K%2Xc)NKe-esM{ZzxFQk)0CRyHp}C z=cRn0u>^nUq@f00#Xb1Z2n{ZnAi=lVOZ<5I`u0C0c+P4GUU$0$yZ4{I|$ccT41-z7jqDM~OUlop=^im)PFxHD0x&#um@k*v4HN z+fdNbmY|gSe5R$>TS{qh7x#Xy{X)u5Um)4FJvF;#k>;v@t=YdCX=3PRNp#pRiPQjz zZ@X6#*}9UrzrLh1-Q6|zc}btOQqq-(l-j?prMs3%>Dl>Gy5~eG&G}h!4UW<5g2C>0 zo_MNe$3`{#_!pYpUq#E^qEg1+UD#I2J;zEp+FHuXXGrDPmn7BdMom6_ye6ODt;zlS zH2KRtnw-B>lRNrpGP6^Y-I{87Wl}=FwboF-shXH|NWu+{)NpvUhR^;|%dH1V=prAV zL1;=nK954Z<&yvBMkz!$N$#fOGbrSJJf&C9!*VIsi`gjEnN}R(j{FKpG|yGEs1Y@O1uYGt5*kz zcjM*aeW4C7-?59dP~9*22~SFX**?jyc#8MJ$u~*<gwu;O{P}HC z3^&!{!jH9>xK4}F543p0D;ny)TtnA&(a`6gYiRo|8hpNq1gF0*!R@cQwfD+QiFLh5 zW1YuqY|(Y@{@Pm2{+!V4^yfAEz;nvK->sTddgi1upsBk)cv}5GOcwvkkQbX?@)X>in z4c&J@L(l!|UTcjLG=%Gv^_u#iJ3s4Gs*a{IuWRbnEgHMNs@`^pulB}_#(H0)vFLjm zyW>ucy;of`YkrW-zaLBHw~}N}Z>8Bb|7x}v*6cZNXg1kRv+0A})&6l))IYzk_^M|tJ_d=KV>90Z>Z$n?j^ZR6C|JRBl)+ROMdQT$v-$(l66;0(mz8I z-+m#9InPS|`~H&O)L9CJo5k~84e>lXOFZjdkeNQrhvE zl(yY3r4RPA8hXAsL&76^Ncg;l%ANS)uTuQt9xX*jYw?>;w7B?HEmigL?_>6s-BxL^!Icu&_qMQVj&97axl)JyCAoV~S3Ix$olx(tbNQ@`FP7YAVxLcO;tv{M zT$V(wPbE>sC-D#V@;Qg=U!mc%&e!k*Q3>DtpkSvYMh%q2$d!_K;Uq~6KUeTMqB^5n z#9Q{3O1nD+)q%US#-9>>VxdN-9--0MUubm2V2z!9sz!JA)O_3Ol5caJoNNZ_nbB~brw@z3Ze z{df$5gM-ZwniSD zC6Sx65}Ey%L?%oW)YVX%zcrNp$KB7F$BG=feT4$`q;A`!srTPfysy++2-)`K1#1y(9no$d=ubEpO4x_9un){mMbg z+JE~h)_!&lygofiLzmsBp_@0d%S5VvE#a+gC310EB1b*K8WoR>ZvkBJa?QXo&M}8o-sSbbJFjUs2q@F|Hs|F8Tz3nXZ@hbp4Vw|GomKJxOt;O$8(Bj=^YH|2S>Ywtd`YV&w|Iv?Je}T6DxOFjW1ZzSdUPl7Y zyeWb6FO$H%Z%ClWIg+2}k-`c0NukO=l7H?7DJ=^~>5FrubnssZ{rjA&KNlUPp^w5u z4dG6|X*k!D2&6J@y;hd`wK8d$hTlC(!iRpB@PdW{1QMP%SRxm`B;mKOBue&w{i+0x zTPA@9Q^Y^sr@r@hs&CF+3RD}s=4%OF)KiFjM|>lp&(3qGV6ClzeOGDjs&6%SL{f8g zR8zJ3YwGG1nriVWpTTX1QX_BH@KbL~cgfB~IvE>q0*pR1% z2KRd$Mn9{+1~;Fjm7%kx(z?A=IxLp*FE2`^{b^F(^Nob(ektK+j+F41r%3qmTlu|b z>En_;=2eH$J~^IM4tNcy0yxii=DX_s_A~LW42m!RsQRk))Y8edwNzT5`G0Fm_{Oag ze(rfe#|hu}oCM$At+-Ns7ij9tQJRV@cU`x*Uy}!)(yVW#LovXAKlhdF(X|BC4ZFD1 zalaO~Jf)>`TWWFJdM$3KxYe+0nfhu?S6{DV)#p7`eO-ns5Jqg~PK~{o)7Vp|Yoz9> z5((cSiPs{Mkk2J?>oi{MzNH>gp7fiPXDpJ`*Q>NN;~yzkdqARtFOyi+A+D3`K3YRJ z1vK=`DnC5cVXzrP&nmcEx=I*>$bI-jfxtm{h zPY-Cwf1jp0SOu{w(er*uRHgGg4`uk@B4BQmHjw_;+mO#O8L;*vfSpKew6yqsG3k!QYEJkWZzu zL@G_LkzDr)noGa0xzqkoAcx#7Sxv0FRTA4`4t+KCOMHu067dz1xS)}5%R+ejim9WRl8Yf5_i*OF<_OEP^=mrSHR|2-3&AeqzCQusctg)iRIVteB<#%)r~ zTPV55D_bPcw=26!wn0#HhjvKrpO+=KVY}r1%?iF}=RVgE~>e3ZQX>YULlcYwl}87By#S1+-LDBPiVZ`F!xMwKcSg=7oH>DH%5zh-Ob`1|FnPy z`A!G#6DtiWKVRMq7 zYx=iGB)7G_lbxE8cN!G&%81WmRs{Qb;umAaY#(ctv;7>z53*;`0Tj$j@+&BKvRLXP@G^ogCR`cA0Mi3)d%X^c!~PY zpR2xuliV|Sqqlkjjm6XL4u^=cY4Lc6iszo=B(?8=0NJE|F{HC!B)01#r&s)XDUof= zxM*e0C}r0%R9blCEh#p;OAB)iO*P!1xu@1D+1e`?N_k>j%I%F|m{ya!=1$EGwtolL zw|jQ`5Y64UgxUr>(s}bG5V?ta1AC6$@zS3qe8xkr+kw%X^1Kp_AN7k?s-<1e=rKzw zttLq2+KEz$?2t}kam?*9Ahx%z5#{kt)V(B&;P6sfJ~U4x&Z9yF!c(!U&=HWcE)bDiBmJkzR)=ZVwB zGjk#lPUO!YB-(kNL{Gj!a{aDlUCk|M?E1yy6V)@Qy?9cl{a8Ote)1!))RA6IwXUtH zi~eDcB%8Hb3ls*Ff7_w-`Q0Ra+-H({>vK(gdy7JK;uAHzlYRdv&DFn}*d5h8`|vML z(bzgp$v>e!jeo&i?|el*3f}4Wk8~F}U%Qg}=DR(}?aof}_QmJyR5Qn3)u2-oX*^AsPlDTB3mee$yU9%`@t8Xs|Ikrx{m3z~8h{G=JNY~+j!C35@45_vf1 z@R%VX?Ap?7Kc6(Y|2G53*QsP;ryRT4@u5eKlJb$$B!Bk^$q(8s`E#0*4|?BOA$gNQ!#YUj_MMXG zI8_pFJm|QvAqZetyeH@rs7Txa8~17Hgce$SN& zjI1Yt;#9}`faQnXObnm9?ljF^mLcEHUHp>frl%y=<~@F&pL!N`s{8{r$+Po|K5?JZ z4fQ0k;~|Ft|M*oxUmGvFWweHm8LQ>t=Sz9r?e4;w#ya`YP-Ca@5F} zxQu_JI>r0%Vuy;~aVAxNAm&)OvBnzyuCW2z|LZ!`ea?H6NEL{JeCURowKVxtDTOv_ z>Bf3mn)x(UbMF6MAF8J`UOMfXF4QQjlX>o1ES{EEOTK9S? z|7-fB#_cNIx?W1z0b1%NTDrfp(mS#^TZ1Hc#%~flIwrx> z;}SKt5PdJEv8G>Z^zoB5cEO7pef@1E8nV3`xo3$aTWphL$EzgS@Dxex`BKssJwqmu zPQFEFgWAGxE8P8ug_`>4kmKTacUSJS@8gQU0|e{c{4bSuc2)Zt>xNtts(sx#4ux19 z1t*8z>z~t1{A2$niY+|7RSGYkEQQ5iNMS;E*HNvDLaU-a2tDbO1f343*w~lh>R+g* z{iBjuFiA3Bca(5%b0M(aQ5UeL@wF-QKuWP~-)n5b#fp96ZQ4mZKNf^{o$c-monuqT zekO&R>r26VKnkbSk-|}zNhNi$LmTKF_Pk474rpn3f2Zl9i`)01$69IV%A2Uw*7{_{Fzs&flxyo+Qzv;C%z%+aWS&!kZDK3S2Gf7 zJ4J&pUE|z`&-!WzY~0pN;ICUHXqr1Tc&z>unlc^t@X?FSpU?wCDJ|Blq=| zNMWXAUg~2AAbVn0h034VH&U|I_fo~CN?__#a?fj;ZWMCw_uX(!Urrxc3&Z#4~w`P-8e$bXeW^dhEMCbYk$HE9<9fWo1jPY&%CQ z8v_bOzM_{YSHjmNg_0VI%~!81cf4`rr{bO2g;fW;DqH7(Qg1OnoO%8X$*dhAr305s z*}v6sl6G%NY2|2k{Bp-R#{Ua@p{u=e&w!r7fGhkvD5~@YkfZ$Za&FAw`n!W-xEhp5jq?%lq7%E za3Mm1XE$PjhLZM~O?|`pW7e&`txfSg`*gYtR0OXWrrr-0h_7W8ry%zIOOuh`1?nN* z8vx<8x+4N{h z{5@E_*%KU>#@(*KbqgfW?*R8g^xS(SvSE&JkJ)~W9o3c@&78BwX?G9);xsqx zCunb`L{f=gc8Pcv=f$(*Bk_#?PP~={;66rW#h%`llJW=dO8K>fl>Z(o`3>`=aB*Gw z#`!PSJKq)#pLxj40|3?kk89GyHfI40A$#*y$2N?CWJe#N*^hry&+AR8BYHYtgWD7F$F6F^`b+h_iI2!J{Ae2qKRlLh zD(Y9Z<^s)n&eiN~trePQHaJk}(mraQyE#>{lamgmb-Iw9Ai8C>Mi*Q6g*H<(EfRa< zt@;}I@D-thWC#WM2)K=SP2qi2h&qALgHMi^;O@P2SA$>9Rr=qiI^7ZVczA+QjH}ri)O@iBiHJ7K1fi{h$Qon*uio5A#_4RydXW2bmpo6dCQBf_i-vIFCerA8Ip`mqL&d&udR zpFJpnA9_gOos%T+?a6d3qS<8<4Ll{#oWLHbnYWtZP}5jfP%{2Hxu7Li80 z&npsNzt4TW(lQ3nfbG3fi>nrwWV<1T01_*sf=-q6R$=u>j+vy%T_9ZwT?_-JH@s{f#h6eKGg5)opE&1-3N^bYHl5V`& zrFd%GEU8rwDpfGUx=7a)?iu5hIpd;mFp8zWxT5LD_ZY3I8QnBB>lsZgk8mZ2j`>7` zOMcKqXtbp3|10V17CObx@GAHKFm-CaRd357q$Z!Msr5amdJ|upzfVkNcr1ADP*?rN zyyJ8cc%^-A6>1!&ndCowjlNNS$qCdNkbq$t=q7VDke#ShkznH&h~I*q1aTen1W3%_ zwF7w=hEI((U(Wt6&_u#GDDgk=>hkBDJ7Ld}-4h-EbMyC%tK>fXQ<#de@3AAYN8BK+ zAV7ci^Z&Xd{JnR(v13xp>2Z(xNA_30+(<fXlQn*mXq7$gncncJLyLw8qEDIkJnUqb5`*uE3TC^C&X4IrOr3#G&6!G7oG4}8 z0x2z>Dy0`}Js6@OKmXn5)`t1L`P(}>9~WI6?p5c<*|FiJq7uOkiG49li>5~uK5xu~ zPhrD&4PG|G`P^fB@v~3;wSdnbN;uaEK5uiEP>IpCnNYf*mQ$Sj&lB&Sm2O8`ddb)E zcJ-P1hzf`6us?cOtw&xVzVHU_#P~1kiB`PTMz{R+Z=*fp0Xkf1$|AatA->A2 z4a&QD``J$)W-5&7FuMZJcay;A%N@#GyHEnw-Pp0gL05k$p1sGr%!~Q)sF-+m(HDNv z=vP;2^!HPgYt#~9#V1B6FVlWX$B$y}B;EfA$Ki~>h3@UDA@dePuRK8h7TVXtAsf?L zz$6Z{2vkXU@%b(0xz^|}flL<(Ts?r@i*>+KX2@iK>(ODkzc$@JqF5;Sh7c=xQ+|Q! zs*RHP(|v`X+r$wPezE~k3cJYoJE@ja1Ardz+I@nG4DG>lph*6WTn~^6Y7KHfUjI)_ zjS-a`nos4Pi85UFv*^d?YjnabN=9Sr)!Sn|*B~k^eEYIxZ0OvLIa1R5ujXeHeC7`F zcw`W`S0;^O;>|zwUS@Ipcb!d#6K+l{Fq9fl^uve9=jp8A>t$am-zU8b}5J9TT}8@t##r6D%|L9ft-15$@$X=ywv^@h-?Pu}>l&@IMm9Q}sNT;R2j zs;6)W|8wQgIg!ZA|0S_+KBV3hH>5xp8L7T&-eL-q@Aoj zOki0`B(=J=>sZ!t=~F$nS&=A7JzHOy5(I9urwqQ0?w&t+mgLXqAcaA1GGPH8QRuaq z7t~KCTWVkAvIhe{5M(Q%=%sUqB#m6{Gw(?Ha#Ns81p}JlDn}jwyTX}mfg%T_ z!BmW0zVm-qx(244`l83sH$t)`0qm}~NMillE=_1^d9ZkuQ-CZb6*L}zE`sbZ)NrPA z?mOKm0|_u=0_H%Fe=G6#i#IH%;aE>0n9xsjC*KZhq8 z**Ss^2oh0D<5*&dX&kFy^x!QG2|o*XI8(fqNFuqklO|W5rAef2;P)D*!81ZNqQ0}^ zQP+hG<-yZJ(uxS}!9h}Z@fLwg;2*ZkB?awoA~9HauC){fccT&-*!l~10aIw^mLSbU zkFD++CKQp_L#M%2IQ4ZY*n6U~imz?mFk@@UGWe8qq)?qLSL$K-@-wRn9E`i)3IBK_sLA;V{l1fQqb zsRAE3aN;}(9DT1#utD20Rh9_YF06Z|Y0maYs<_ z^u5qaA*c1>4n1=BLgr->*M7!*m>3dr87|wWL|*2cuxDTn16hB|;xR)8ZPV&GQ{Zso zErDAOJ?I>1XlmxBV~(VFsIzn4#%~ayl2Ra`WM!eGBugDM+2LJ70O0=Et-zR6aIET~ z67X)yi?3-Tmxu$m!LDna)ZqIa-Re4wS1=U;Wyn?=cpQ|M+@}{w?$MkA$1$_pyqV^! ze&?`_JX89ms5ItdAu-APCOlYF3-fK5+%j~7t`)qxox7X&56_6J z+)`GD@64%V3I)D4GWP$Q1>n>Zkk#DBCGzVNL|ybx_gcpLyM78an7(||r>T&!a~Y02 zte`=iOMf(7GBv-Ux}JXJH(|CM3O8mjTDO(Z^l2J;`%MjfyhV$%pLAZjdHeV~rFu8I z6$IZO3X`pi@b0T5a`KA;g)nl05(T2F2hMCE{v9Vc7XkU*StIBXdM(k#t_d9dL^Fwx zn!_0mDpZEC@i`ajw3Pg>Q=~9_ixl3pROGU^nT$j#2mWp0_@AY4wsDvX2e89~yX8Ok zQt8jXd5>godC*Nq**ZR)_M{qWosZq-y%*?W0I5QIqCWNOSCaevPRV^d-ucZ~Q&10W zT_O#~eDodPIsL*k3?d{`OUYSg#f8Xf-byzeWZogUF)|v4`3oQ3`v1wI>(8b0QJmX} zvkT>u-gGXExi83sLs_N&2sCIZwBVXO%*%J$xZ16+Q`@;a)*LuJ);2uz-c_A%et%Xt z|AMrWbwRion52T*iisaOG={dRmP0{ACcx$?;1u!e9S^vvGpnX0REpIrr1ZveYA$eG zm@9eA5{1Z>nE%Xt0Jt^W04STL&NGpD{9-M9)r-l@V%0m{97njRG7*MkvAH1RE0`Za zQqX?J^eQS}uU1sinesxFg_8mVjcyIy#`K%R`TXJwbKTmv*%5Si%nxCT4O$q{4O|f* z9Mi*?kG8Bd<`@Fozi>Gp%VQ8t*qX>b*2Zx;tQP&hp1?lwtgdGN^DLzxM|1lr+LG1g_- zYmP@N09v@tUi7JF_Xsb2E!&X-3>LkN@TJJaEyF@3ChChXCuk)NsD533!&% zY76oamHD4Cl?MH&^3HrF0+AVtoMyGl5Civ!y#^@P-c_5N9)q<3E{@q7(=NcDkyQuE z07B#cv)xj=RzJbKnC(KYe%WMdN612P&snye49gH#_W2{7tA)=UJQ@zy;rTL~L*VnU z`Jn6>pOK+4l~=AOg|3$i_7rtD%Ub{e9Ofy5S)jwSv!Bv=5B@s8an3apraf6%OIO& z?q}qxJ0Hau3;=bV2%P( zipV12x!^N`cA9$rP3Kq`6CXV0KcC({6-?wI2aVlB?7m>NOK`x4g%ULX0;Xk%1A+Y> zd5qjPHhi)2|1~e1{w*e&;JaA1AoIH^nufKYY_dyY_Hs>6L7tx--6r0m7xoj*i{8K0 zc`o3Ze|2)6HJ;{1kzC`H=4=-Ba890>+mslpGCxh%+)=-4?vxi5nNnabsEC+|@+6KW z3ShzyewWSra+1pO+vz>00l5%H?@Fd}96jaCO}!;^bjp45=G{?=1J7ILVR8d`YwkA7 zRx`VflwISOTxB$GDgEfL0(FIX0qY=8-Pv7E$x+WhdJQwGoJs@^w`nyb6u}FqGDA(W z+$Ov~bYm)l;F!pI+jJW;v(c(gN_5bC;Y>4>b<2ffuGZhNlOm~t#0jSDGKRDHtW3F0 zPWs5sJT3;c9&_Y+Sz;!sMO5$!aqim3X?B3In@9X z;cbUVrE6WU{w4+lO|T+OH`vUeAO>q--C>j#Kmwq`gf14&_U&Bpw5Tqq9HO_y>}&-M z;TS7ySQ`|>@E>8W#dX<0YTl{;YK(Z8SWbz^qUFH{EOy(`Ptg%0*`#=geZcOuP!qq#r~oWp02Blw z5&Xl*fDKe}(_5&CLjRry6aph3+eo|>3Kks$Cipxk*@61wYmAD3n%BKtAb7$s12~5G zkKil{H?|<(x6vO`6_^a8E)j-yBOj~(lab=j+rS-`7mOf_h6HMzNEwg@1q=%_DQF0x zh$CiVG8+IG)&<4nr;l@&4K2knylcVM!iu3<_`Qi14s546huAeux_hptIa5A!i;D!ji-?VX{#G*lojaRsMl)YxIIgAt`KwJM94h$mpgD3`RHP(g=(NTVFdr)Y^ zAbw?An!&f`BLy)Qlwk_$r(suEW}8|62iGPJjzgz?@(f45+Yb@ z@*a#U#{Lm8xBoZ$`&SCN191hIUpP{L(Q^wzA}+$w9N6pthW0($1*HR%SUF$~^T}cx zk-P6Akmh)g#ZO4Q)||;B4mc_VB8R^VTOWkZEPWCHiyIM0niR<9x4WtqOr~IfcW5h? zBaR(dnIc(jc^|SK9yWa(=ZAcUMy@49gbH;fLoM3Ec$6 zlV~C;HQ0c-eb2}YWC=*wh%ni@2g#)wZozsGU+lfZWS%iTtVdg$BzSnH2#N#D-2b_F zwp{8=x`Vsj*NDk2s?LFAg#Du}?X}zSAC)}|Ts#9n7STA1Ay#%7!Ljf>&GG-;A)WB( ziy2R7@_jpY0&UK|r%4HE3pK&sYrOR=;b84)?VMz^;bD$B@%Rce=1OD!rp#1o{<8vD zbMPM^)56Rk@;Ls2M~zrChGQhCX(r=hObu(dSlvSA80iCSfpmyilMUZ@ZZ%LV@BgEf zZ|cy9g@lH`hu0Ca$PNep%@~3iPI=Pc3*||WJ{x|`-#AC|{f?)JLl|J;2}-jd#CQC_ zph0F}H9^=P$BfX*K`7YnAZsw)cxZ#!7Qxt80;wH+3IPNFTtX-u0$@@WL&6|q7K3B7 z$X=fvA#uY@jSZUM0&@`laUn7?XM{VX0;8{>#J}4>B#u6?$UE#pM8GM*m}o`Q=i)pK zAb_;awQhEHs73#2#o`+Bk9f9O>>uFCY)V!vR3B6x$QrntxX7WIjgtg}R59;2v|0HY z5uILhltUiay+9!ljLkSFxw}ILrn@WVsOEhwQ+m^Scu99k_Pup zR!V9ZzO^a?@f?+wgP!OrIL^XEe2%x+Xg|eri}rFX#!NW^Jd8j??89L&CiNgBSbCxb zEppO)*rS6kSJK1*Z%`nl34p^{@Nb}y2s;{;LT4^|*J{CN2g8#lN@Fl7cIqQ8=!h_( zeGM6a!OD}fZYMw*fFxjDlc8S-qBy2tNOfA&sqrCa7~udLj7K9(Xd^|4ZkM+lBRE!r zVYxp(;W#&TF*_8lOVcj6YeQ;@r9GSsfG7;A9=^wAsDPsfKX!G;ZWvud5DtgbaEu5t zk5xe!RTyHfG_5M-Z;iJ8Pw>YuGf^NejPu~KYRXPA?jKB;7uE|06fK^Ca301#fG&WN@L2;fu@jm}M4{Klk3dm5 ziiO}MMzHL&LWhZu*-g4iDsES@Po9VVBvb8#0cAwJaJMy2Ne@B0UCR_w2PGXt7l=wE zM-^yNFrJPh7Az9LKro^Whz5ahEIWeIP4Kpq1at4YL0ZUqAT5Zw;BFm?UChuMN5)oJ zu=amOVRGE}Je&xjxX^L^Xox`)W4y@F2}c>GSe)sHX2PQ?EV9I7S}Y`pz#ydo7{ibT zXImioj%OIi0}|$OCO2IzwA2yThZ$l_#WVa`S4pjkdM>f=A9~EmX*NVh*%jus9kK&S zmO(j$yA5?hI>5nGG#=xR2?OI6N(U*0w9O*~>{tqaajzROvm*^CZ{RW$1@bzN(}B!k zNEzXFhB*yQB}ea4MyJl4&gVmvV(0{sFe3i1ZxKYoP@NfzyWkT_L_~fV3@o2WgCYL+ zJc;k>FT5LRCGkbqz?hH8i2N*Utug?PpstOsg)N%EBMWRu6Jr>t+2Q0VWF;^l8J+rb zI1Mz8W+6O`SOH=(_P6nTSexva5{x_m1s3l+o{z?eOK-d_ z0pxke@{s3kY$_MK!v#n&M0s9gc4mzDAVLDeIB;!UVeN-;ZfuUS4%i_i9Uxh&udog{ zywI=-_a0a!29DWj4YhEP+J+?%8$_rAV?MqCW(a~|^!<~Fl3yF@BT~aLMKI}1ctN0< zqeV6ZjZqhT&t71%Fh>FH&@|Q>$j25vq|swB7a}ejj=+%n;kYE8P5Q0dBz^E0jt4Tl zi_ym^7r9`s)h!HW;P+wV2tAkm+AI?a69a{@7988bP%@4;sPZ|%z$FIb=m-G zuccJ%d?E`m%!?8OSBHdnOy-f3r)VZ2EDqUz$6~N?cPGe)`tP_fkB^Q_?#_0i|wdP~c-!R37 z3)`HvBxd>SjH}>T4@)pPgdPx2q%*h(RSmGUbAx}iGGhv9H?dg&P^knxC+gXKcRqic8>v!y87Om!F)VM4*AkKzEET(^8JXYTCp;AAy|j)Ch4 z0o4D{{eft4&^k{?FNBoWW(Y#+XjQxi&ma z4z$T;9+~$-#?pS4{&bu3F13MA>^Jx~EV?$A%~TeCzcoj= z$Z(0{vc1E`h%6({UOSx7GKz-0Bk3;QFQ!GlX(ybqMdPxJBY-G#yhpWnrx{q&2>v|M zO($5Em-B$2nK-A%VA&p(BGl8GgpNPq@!r=Sq5T4Ukgwo`5} z%Uj-fwVO#dpp|^+QU^#+x=oO#PY$o4Xx^A7CBVh+!E}s$F2xIT$}{C~VJc%;@Th^Uq<8aca!89gjAX@;iVt+D&4Zs3G0WxOw zhsrKIK%`Af1}Yu-xATWUX)mn9Q*n469M0;(G|A_ym8ly@qnPcsQ%jhTz#YjFOwQo6 z49oc=!N8M1Awvw9B=C6&HUaE#nb!<|7H6dKbWF@y8$rt2IRcoArIKxHgA;$2`zNr5 z6ob?oGT=B3WYW)kk5QxMn34f-hfL=G`6rhz##vrCC&?!LFtx^M7n65!h80g>fCB+H z0`u^8DnsauWlC}dpA{2yma5?1yy^_4M~nRmna}nqPyE1?qX8_=p(9s`9Hq^r^VCO3 zxuEPw?m$8SJw^%zwGIHu+zk8-L#jMd{DR3F=UK#!>FF{eWyU(_Z=4#M{fBN&FKM9g;$m*qHROTFu#B0~O3e!xP8d zeDVB!eI-A=7F#*bLO@&3)nMDFl!bE(v5&F0v9D||(LNY$XU?H!0r0;27Nr&DD=alQ zxChQYNz8c5iM-gs&ko`|6*EWpWQ^L9mKO3FZPQ$MBH9-wj$l3&Pt9N};pifJ;yBL} zZaWMI(0n+x05E>#X?Q$0(@rNuK9wi4nXpt|dX;;%m>jorRw zA(Lfiv5=}-29~fFmC34b;f9qi--*mDCe1B_g|l0E9xT!x_8BEnt!x!hoZs_`LNMZM zH2;n>Xt-=uN9W$kT-yHu7ytk}_@9B{;84Kt85jU)0084+<6skDkWl0T(Tox)K$-!_ z=K<4u8U?m2kz4#;=j(oXyN<*Px p%btxtOad4jCirW@ literal 0 HcmV?d00001 diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 894c84eeb3e..1a785d28b48 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -953,7 +953,7 @@ def generate_list_struct_buff(size=28000): ) @pytest.mark.parametrize("num_rows", [0, 15, 1005, 10561, 28000]) @pytest.mark.parametrize("use_index", [True, False]) -@pytest.mark.parametrize("skip_rows", [0, 101, 1007]) +@pytest.mark.parametrize("skip_rows", [0, 101, 1007, 27000]) def test_lists_struct_nests( columns, num_rows, use_index, skip_rows, ): @@ -1034,3 +1034,14 @@ def test_orc_reader_decimal_invalid_column(datadir, data): # Since the `decimal_cols_as_float` column name # is invalid, this should be a decimal assert_eq(pdf, gdf) + + +# This test case validates the issue raised in #8665, +# please check the issue for more details. +def test_orc_timestamp_read(datadir): + path = datadir / "TestOrcFile.timestamp.issue.orc" + + pdf = pd.read_orc(path) + gdf = cudf.read_orc(path) + + assert_eq(pdf, gdf) From 0b9ea0176c7646219bbbc474c620a902b073f58d Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 12 Jul 2021 10:32:30 -0400 Subject: [PATCH 45/54] Fix orc reader assert on create data_type in debug (#8706) The fix from #8174 got reverted in rework of `orc/reader_impl.cu` in #8599 This PR reinstates the original fix to prevent an assert in debug mode in `gtests/ORC_TEST` Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Karthikeyan (https://github.com/karthikeyann) - Conor Hoekstra (https://github.com/codereport) URL: https://github.com/rapidsai/cudf/pull/8706 --- cpp/src/io/orc/reader_impl.cu | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index a5465090c2c..b2b4538994e 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -840,8 +840,11 @@ std::unique_ptr reader::impl::create_empty_column(const int32_t orc_col_ break; case type_id::DECIMAL64: - scale = -static_cast(_metadata->get_types()[orc_col_id].scale.value_or(0)); - default: out_col = make_empty_column(data_type(type, scale)); + scale = -static_cast(_metadata->get_types()[orc_col_id].scale.value_or(0)); + out_col = make_empty_column(data_type(type, scale)); + break; + + default: out_col = make_empty_column(data_type(type)); } return out_col; From 7823a181bfbffcfb63375d8f38506d0a20696a74 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Mon, 12 Jul 2021 12:58:38 -0400 Subject: [PATCH 46/54] Add post-processing steps to `dask_cudf.groupby.CudfSeriesGroupby.aggregate` (#8694) Closes #8655 Adds some post-processing steps to Dask-cuDF's series groupby when using the optimized codepaths for aggregations, to match [those done by Dask](https://github.com/dask/dask/blob/8601b540f8e7eac95fa739a5ca28f1d707299ed0/dask/dataframe/groupby.py#L1917-L1921). These ensure that a `dask_cudf.Series` is always returned for the groupby operation, which was a problem observed in #8655. Authors: - Charles Blackmon-Luca (https://github.com/charlesbluca) Approvers: - Richard (Rick) Zamora (https://github.com/rjzamora) URL: https://github.com/rapidsai/cudf/pull/8694 --- python/dask_cudf/dask_cudf/groupby.py | 2 +- python/dask_cudf/dask_cudf/tests/test_groupby.py | 16 +++------------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py index 336fdaf009c..2ec457018d9 100644 --- a/python/dask_cudf/dask_cudf/groupby.py +++ b/python/dask_cudf/dask_cudf/groupby.py @@ -143,7 +143,7 @@ def aggregate(self, arg, split_every=None, split_out=1): sep=self.sep, sort=self.sort, as_index=self.as_index, - ) + )[self._slice] return super().aggregate( arg, split_every=split_every, split_out=split_out diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py index 356567fdef0..5cb681fff55 100644 --- a/python/dask_cudf/dask_cudf/tests/test_groupby.py +++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py @@ -48,10 +48,7 @@ def test_groupby_basic_aggs(aggregation): "func", [ lambda df: df.groupby("x").agg({"y": "max"}), - pytest.param( - lambda df: df.groupby("x").y.agg(["sum", "max"]), - marks=pytest.mark.skip, - ), + lambda df: df.groupby("x").y.agg(["sum", "max"]), ], ) def test_groupby_agg(func): @@ -98,7 +95,6 @@ def test_groupby_agg_empty_partition(tmpdir, split_out): dd.assert_eq(gb.compute().sort_index(), expect) -@pytest.mark.xfail(reason="cudf issues") @pytest.mark.parametrize( "func", [lambda df: df.groupby("x").std(), lambda df: df.groupby("x").y.std()], @@ -115,13 +111,9 @@ def test_groupby_std(func): ddf = dask_cudf.from_cudf(gdf, npartitions=5) - a = func(gdf.to_pandas()) + a = func(gdf).to_pandas() b = func(ddf).compute().to_pandas() - a.index.name = None - a.name = None - b.index.name = None - dd.assert_eq(a, b) @@ -129,9 +121,7 @@ def test_groupby_std(func): "func", [ lambda df: df.groupby("x").agg({"y": "collect"}), - pytest.param( - lambda df: df.groupby("x").y.agg("collect"), marks=pytest.mark.skip - ), + lambda df: df.groupby("x").y.agg("collect"), ], ) def test_groupby_collect(func): From 9016a70618c642ae2a9251ab812ac1faf3aeffa7 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 12 Jul 2021 16:06:03 -0700 Subject: [PATCH 47/54] Translate Markdown documentation to rST and remove recommonmark (#8698) Resolves #8636 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Ashwin Srinath (https://github.com/shwina) - Michael Wang (https://github.com/isVoid) URL: https://github.com/rapidsai/cudf/pull/8698 --- conda/environments/cudf_dev_cuda11.0.yml | 1 - conda/environments/cudf_dev_cuda11.2.yml | 1 - docs/cudf/source/conf.py | 18 +- docs/cudf/source/dask-cudf.md | 78 -------- docs/cudf/source/dask-cudf.rst | 107 ++++++++++ docs/cudf/source/groupby.md | 200 ------------------- docs/cudf/source/groupby.rst | 237 +++++++++++++++++++++++ docs/cudf/source/index.rst | 6 +- docs/cudf/source/internals.md | 194 ------------------- docs/cudf/source/internals.rst | 216 +++++++++++++++++++++ 10 files changed, 565 insertions(+), 493 deletions(-) delete mode 100644 docs/cudf/source/dask-cudf.md create mode 100644 docs/cudf/source/dask-cudf.rst delete mode 100644 docs/cudf/source/groupby.md create mode 100644 docs/cudf/source/groupby.rst delete mode 100644 docs/cudf/source/internals.md create mode 100644 docs/cudf/source/internals.rst diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index 79fd94a3644..30586c91351 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -31,7 +31,6 @@ dependencies: - nbsphinx - numpydoc - ipython - - recommonmark - pandoc=<2.0.0 - cudatoolkit=11.0 - pip diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index b1a4c52c882..f2bc5a21079 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -31,7 +31,6 @@ dependencies: - nbsphinx - numpydoc - ipython - - recommonmark - pandoc=<2.0.0 - cudatoolkit=11.2 - pip diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index f14a31325d8..c949a491d3b 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -23,7 +23,6 @@ import sys from docutils.nodes import Text -from recommonmark.transform import AutoStructify from sphinx.addnodes import pending_xref sys.path.insert(0, os.path.abspath("../..")) @@ -47,7 +46,6 @@ "IPython.sphinxext.ipython_console_highlighting", "IPython.sphinxext.ipython_directive", "nbsphinx", - "recommonmark", ] @@ -77,9 +75,9 @@ # built documents. # # The short X.Y version. -version = '21.08' +version = "21.08" # The full version, including alpha/beta/rc tags. -release = '21.08.00' +release = "21.08.00" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -207,9 +205,6 @@ autoclass_content = "init" -# Config AutoStructify -github_doc_root = "https://github.com/rtfd/recommonmark/tree/master/doc/" - # Replace API shorthands with fullname _reftarget_aliases = { "cudf.Series": ("cudf.core.series.Series", "cudf.Series"), @@ -245,14 +240,5 @@ def setup(app): app.add_js_file("copybutton_pydocs.js") app.add_css_file("params.css") app.add_css_file("https://docs.rapids.ai/assets/css/custom.css") - app.add_config_value( - "recommonmark_config", - { - "url_resolver": lambda url: github_doc_root + url, - "auto_toc_tree_section": "Contents", - }, - True, - ) - app.add_transform(AutoStructify) app.connect("doctree-read", resolve_aliases) app.connect("missing-reference", ignore_internal_references) diff --git a/docs/cudf/source/dask-cudf.md b/docs/cudf/source/dask-cudf.md deleted file mode 100644 index 92ef4eb1c46..00000000000 --- a/docs/cudf/source/dask-cudf.md +++ /dev/null @@ -1,78 +0,0 @@ -Multi-GPU with Dask-cuDF -======================== - -cuDF is a single-GPU library. For Multi-GPU cuDF solutions we use [Dask](https://dask.org/) and the [dask-cudf package](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf), which is able to scale cuDF across multiple GPUs on a single machine, or multiple GPUs across many machines in a cluster. - -[Dask DataFrame](http://docs.dask.org/en/latest/dataframe.html) was originally designed to scale Pandas, orchestrating many Pandas DataFrames spread across many CPUs into a cohesive parallel DataFrame. Because cuDF currently implements only a subset of Pandas’s API, not all Dask DataFrame operations work with cuDF. - -The following is tested and expected to work: - -What works ----------- - -- Data ingestion - - ``dask_cudf.read_csv`` - - Use standard Dask ingestion with Pandas, then convert to cuDF (For - Parquet and other formats this is often decently fast) -- Linear operations - - Element-wise operations: ``df.x + df.y``, ``df ** 2`` - - Assignment: ``df['z'] = df.x + df.y`` - - Row-wise selections: ``df[df.x > 0]`` - - Loc: ``df.loc['2001-01-01': '2005-02-02']`` - - Date time/string accessors: ``df.timestamp.dt.dayofweek`` - - ... and most similar operations in this category that are already implemented in cuDF -- Reductions - - Like ``sum``, ``mean``, ``max``, ``count``, and so on on ``Series`` objects - - Support for reductions on full dataframes - - ``std`` - - Custom reductions with [dask.dataframe.reduction](http://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.Series.reduction) -- Groupby aggregations - - On single columns: ``df.groupby('x').y.max()`` - - With custom aggregations: - - groupby standard deviation - - grouping on multiple columns - - groupby agg for multiple outputs -- Joins: - - On full unsorted columns: ``left.merge(right, on='id')`` (expensive) - - On sorted indexes: ``left.merge(right, left_index=True, right_index=True)`` (fast) - - On large and small dataframes: ``left.merge(cudf_df, on='id')`` (fast) -- Rolling operations -- Converting to and from other forms - - Dask + Pandas to Dask + cuDF ``df.map_partitions(cudf.from_pandas)`` - - Dask + cuDF to Dask + Pandas ``df.map_partitions(lambda df: df.to_pandas())`` - - cuDF to Dask + cuDF: ``dask.dataframe.from_pandas(df, npartitions=20)`` - - Dask + cuDF to cuDF: ``df.compute()`` - -Additionally all generic Dask operations, like ``compute``, ``persist``, -``visualize`` and so on work regardless. - - -Developing the API ------------------- - -Above we mention the following: - -> and most similar operations in this category that are already implemented in cuDF - -This is because it is difficult to create a comprehensive list of operations in -the cuDF and Pandas libraries. The API is large enough to be difficult to track -effectively. For any operation that operates row-wise like ``fillna`` or -``query`` things will likely, but not certainly work. If operations don't work -it is often due to a slight inconsistency between Pandas and cuDF that is -generally easy to fix. We encourage users to look at the [cuDF issue -tracker](https://github.com/rapidsai/cudf/issues) to see if their issue has -already been reported and, if not, -[raise a new issue](https://github.com/rapidsai/cudf/issues/new). - - -Navigating the API ------------------- - -This project reuses the -[Dask DataFrame](https://docs.dask.org/en/latest/dataframe.html) project, which -was originally designed for Pandas, with the newer library cuDF. Because we use -the same Dask classes for both projects there are often methods that are -implemented for Pandas, but not yet for cuDF. As a result users looking at the -full Dask DataFrame API can be misleading, and often lead to frustration when -operations that are advertised in the Dask API do not work as expected with -cuDF. We apologize for this in advance. diff --git a/docs/cudf/source/dask-cudf.rst b/docs/cudf/source/dask-cudf.rst new file mode 100644 index 00000000000..1c8e5ebda43 --- /dev/null +++ b/docs/cudf/source/dask-cudf.rst @@ -0,0 +1,107 @@ +Multi-GPU with Dask-cuDF +======================== + +cuDF is a single-GPU library. For Multi-GPU cuDF solutions we use +`Dask `__ and the `dask-cudf +package `__, +which is able to scale cuDF across multiple GPUs on a single machine, or +multiple GPUs across many machines in a cluster. + +`Dask DataFrame `__ was +originally designed to scale Pandas, orchestrating many Pandas +DataFrames spread across many CPUs into a cohesive parallel DataFrame. +Because cuDF currently implements only a subset of Pandas’s API, not all +Dask DataFrame operations work with cuDF. + +The following is tested and expected to work: + +What works +---------- + +- Data ingestion + + - ``dask_cudf.read_csv`` + - Use standard Dask ingestion with Pandas, then convert to cuDF (For + Parquet and other formats this is often decently fast) + +- Linear operations + + - Element-wise operations: ``df.x + df.y``, ``df ** 2`` + - Assignment: ``df['z'] = df.x + df.y`` + - Row-wise selections: ``df[df.x > 0]`` + - Loc: ``df.loc['2001-01-01': '2005-02-02']`` + - Date time/string accessors: ``df.timestamp.dt.dayofweek`` + - ... and most similar operations in this category that are already + implemented in cuDF + +- Reductions + + - Like ``sum``, ``mean``, ``max``, ``count``, and so on on + ``Series`` objects + - Support for reductions on full dataframes + - \ ``std``\ + - Custom reductions with + `dask.dataframe.reduction `__ + +- Groupby aggregations + + - On single columns: ``df.groupby('x').y.max()`` + - With custom aggregations: + - groupby standard deviation + - grouping on multiple columns + - groupby agg for multiple outputs + +- Joins: + + - On full unsorted columns: ``left.merge(right, on='id')`` + (expensive) + - On sorted indexes: + ``left.merge(right, left_index=True, right_index=True)`` (fast) + - On large and small dataframes: ``left.merge(cudf_df, on='id')`` + (fast) + +- Rolling operations +- Converting to and from other forms + + - Dask + Pandas to Dask + cuDF + ``df.map_partitions(cudf.from_pandas)`` + - Dask + cuDF to Dask + Pandas + ``df.map_partitions(lambda df: df.to_pandas())`` + - cuDF to Dask + cuDF: + ``dask.dataframe.from_pandas(df, npartitions=20)`` + - Dask + cuDF to cuDF: ``df.compute()`` + +Additionally all generic Dask operations, like ``compute``, ``persist``, +``visualize`` and so on work regardless. + +Developing the API +------------------ + +Above we mention the following: + + and most similar operations in this category that are already + implemented in cuDF + +This is because it is difficult to create a comprehensive list of +operations in the cuDF and Pandas libraries. The API is large enough to +be difficult to track effectively. For any operation that operates +row-wise like ``fillna`` or ``query`` things will likely, but not +certainly work. If operations don't work it is often due to a slight +inconsistency between Pandas and cuDF that is generally easy to fix. We +encourage users to look at the `cuDF issue +tracker `__ to see if their +issue has already been reported and, if not, `raise a new +issue `__. + +Navigating the API +------------------ + +This project reuses the `Dask +DataFrame `__ project, +which was originally designed for Pandas, with the newer library cuDF. +Because we use the same Dask classes for both projects there are often +methods that are implemented for Pandas, but not yet for cuDF. As a +result users looking at the full Dask DataFrame API can be misleading, +and often lead to frustration when operations that are advertised in the +Dask API do not work as expected with cuDF. We apologize for this in +advance. diff --git a/docs/cudf/source/groupby.md b/docs/cudf/source/groupby.md deleted file mode 100644 index 8a0e5dddba0..00000000000 --- a/docs/cudf/source/groupby.md +++ /dev/null @@ -1,200 +0,0 @@ -GroupBy -======= - -cuDF supports a small (but important) subset of -Pandas' [groupby API](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html). - -## Summary of supported operations - -1. Grouping by one or more columns -1. Basic aggregations such as "sum", "mean", etc. -1. Quantile aggregation -1. A "collect" or `list` aggregation for collecting values in a group into lists -1. Automatic exclusion of columns with unsupported dtypes ("nuisance" columns) when aggregating -1. Iterating over the groups of a GroupBy object -1. `GroupBy.groups` API that returns a mapping of group keys to row labels -1. `GroupBy.apply` API for performing arbitrary operations on each group. Note that - this has very limited functionality compared to the equivalent Pandas function. - See the section on [apply](#groupby-apply) for more details. -1. `GroupBy.pipe` similar to [Pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls). - -## Grouping - -A GroupBy object is created by grouping the values of a `Series` or `DataFrame` -by one or more columns: - -```python -import cudf - ->>> df = cudf.DataFrame({'a': [1, 1, 1, 2, 2], 'b': [1, 1, 2, 2, 3], 'c': [1, 2, 3, 4, 5]}) ->>> df ->>> gb1 = df.groupby('a') # grouping by a single column ->>> gb2 = df.groupby(['a', 'b']) # grouping by multiple columns ->>> gb3 = df.groupby(cudf.Series(['a', 'a', 'b', 'b', 'b'])) # grouping by an external column -``` - -``` warning:: - cuDF uses `sort=False` by default to achieve better performance, which provides no gaurentee to the group order in outputs. This deviates from Pandas default behavior. - - For example: - - .. code-block:: python - - >>> df = cudf.DataFrame({'a' : [2, 2, 1], 'b' : [42, 21, 11]}) - >>> df.groupby('a').sum() - b - a - 2 63 - 1 11 - >>> df.to_pandas().groupby('a').sum() - b - a - 1 11 - 2 63 - - Setting `sort=True` will produce Pandas-like output, but with some performance penalty: - - .. code-block:: python - - >>> df.groupby('a', sort=True).sum() - b - a - 1 11 - 2 63 - -``` - -### Grouping by index levels - -You can also group by one or more levels of a MultiIndex: - -```python ->>> df = cudf.DataFrame( -... {'a': [1, 1, 1, 2, 2], 'b': [1, 1, 2, 2, 3], 'c': [1, 2, 3, 4, 5]} -... ).set_index(['a', 'b']) -... ->>> df.groupby(level='a') -``` - -### The `Grouper` object - -A `Grouper` can be used to disambiguate between columns and levels when they have the same name: - -```python ->>> df - b c -b -1 1 1 -1 1 2 -1 2 3 -2 2 4 -2 3 5 ->>> df.groupby('b', level='b') # ValueError: Cannot specify both by and level ->>> df.groupby([cudf.Grouper(key='b'), cudf.Grouper(level='b')]) # OK -``` - -## Aggregation - -Aggregations on groups is supported via the `agg` method: - -```python ->>> df - a b c -0 1 1 1 -1 1 1 2 -2 1 2 3 -3 2 2 4 -4 2 3 5 ->>> df.groupby('a').agg('sum') - b c -a -1 4 6 -2 5 9 ->>> df.groupby('a').agg({'b': ['sum', 'min'], 'c': 'mean'}) - b c - sum min mean -a -1 4 1 2.0 -2 5 2 4.5 -``` - -The following table summarizes the available aggregations and the types that support them: - -| Aggregations\dtypes | Numeric | Datetime | String | Categorical | List | Struct | Interval | Decimal | -| ------------------- | -------- | ------- | -------- | ----------- | ---- | ------ | -------- | ------- | -| count | ✅ | ✅ | ✅ | ✅ | | | | ✅ | -| size | ✅ | ✅ | ✅ | ✅ | | | | ✅ | -| sum | ✅ | ✅ | | | | | | ✅ | -| idxmin | ✅ | ✅ | | | | | | ✅ | -| idxmax | ✅ | ✅ | | | | | | ✅ | -| min | ✅ | ✅ | ✅ | | | | | ✅ | -| max | ✅ | ✅ | ✅ | | | | | ✅ | -| mean | ✅ | ✅ | | | | | | | -| var | ✅ | ✅ | | | | | | | -| std | ✅ | ✅ | | | | | | | -| quantile | ✅ | ✅ | | | | | | | -| median | ✅ | ✅ | | | | | | | -| nunique | ✅ | ✅ | ✅ | ✅ | | | | ✅ | -| nth | ✅ | ✅ | ✅ | | | | | ✅ | -| collect | ✅ | ✅ | ✅ | | ✅ | | | ✅ | -| unique | ✅ | ✅ | ✅ | ✅ | | | | | - -## GroupBy apply - -To apply function on each group, use the `GroupBy.apply()` method: - -```python ->>> df - a b c -0 1 1 1 -1 1 1 2 -2 1 2 3 -3 2 2 4 -4 2 3 5 ->>> df.groupby('a').apply(lambda x: x.max() - x.min()) - a b c -a -0 0 1 2 -1 0 1 1 -``` - -### Limitations - -* `apply` works by applying the provided function to each group sequentially, - and concatenating the results together. **This can be very slow**, especially - for a large number of small groups. For a small number of large groups, it - can give acceptable performance - -* The results may not always match Pandas exactly. For example, cuDF may return - a `DataFrame` containing a single column where Pandas returns a `Series`. - Some post-processing may be required to match Pandas behavior. - -* cuDF does not support some of the exceptional cases that Pandas supports with - `apply`, such as [`describe`](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#flexible-apply). - -## Rolling window calculations - -Use the `GroupBy.rolling()` method to perform rolling window calculations on each group: - -```python ->>> df - a b c -0 1 1 1 -1 1 1 2 -2 1 2 3 -3 2 2 4 -4 2 3 5 -``` - -Rolling window sum on each group with a window size of 2: - -```python ->>> df.groupby('a').rolling(2).sum() - a b c -a -1 0 - 1 2 2 3 - 2 2 3 5 -2 3 - 4 4 5 9 -``` diff --git a/docs/cudf/source/groupby.rst b/docs/cudf/source/groupby.rst new file mode 100644 index 00000000000..a6ce9db6817 --- /dev/null +++ b/docs/cudf/source/groupby.rst @@ -0,0 +1,237 @@ +GroupBy +======= + +cuDF supports a small (but important) subset of Pandas' `groupby +API `__. + +Summary of supported operations +------------------------------- + +1. Grouping by one or more columns +2. Basic aggregations such as "sum", "mean", etc. +3. Quantile aggregation +4. A "collect" or ``list`` aggregation for collecting values in a group + into lists +5. Automatic exclusion of columns with unsupported dtypes ("nuisance" + columns) when aggregating +6. Iterating over the groups of a GroupBy object +7. ``GroupBy.groups`` API that returns a mapping of group keys to row + labels +8. ``GroupBy.apply`` API for performing arbitrary operations on each + group. Note that this has very limited functionality compared to the + equivalent Pandas function. See the section on + `apply <#groupby-apply>`__ for more details. +9. ``GroupBy.pipe`` similar to + `Pandas `__. + +Grouping +-------- + +A GroupBy object is created by grouping the values of a ``Series`` or +``DataFrame`` by one or more columns: + +.. code:: python + + import cudf + + >>> df = cudf.DataFrame({'a': [1, 1, 1, 2, 2], 'b': [1, 1, 2, 2, 3], 'c': [1, 2, 3, 4, 5]}) + >>> df + >>> gb1 = df.groupby('a') # grouping by a single column + >>> gb2 = df.groupby(['a', 'b']) # grouping by multiple columns + >>> gb3 = df.groupby(cudf.Series(['a', 'a', 'b', 'b', 'b'])) # grouping by an external column + +.. warning:: + + cuDF uses `sort=False` by default to achieve better performance, which provides no gaurentee to the group order in outputs. This deviates from Pandas default behavior. + + For example: + + .. code-block:: python + + >>> df = cudf.DataFrame({'a' : [2, 2, 1], 'b' : [42, 21, 11]}) + >>> df.groupby('a').sum() + b + a + 2 63 + 1 11 + >>> df.to_pandas().groupby('a').sum() + b + a + 1 11 + 2 63 + + Setting `sort=True` will produce Pandas-like output, but with some performance penalty: + + .. code-block:: python + + >>> df.groupby('a', sort=True).sum() + b + a + 1 11 + 2 63 + +Grouping by index levels +~~~~~~~~~~~~~~~~~~~~~~~~ + +You can also group by one or more levels of a MultiIndex: + +.. code:: python + + >>> df = cudf.DataFrame( + ... {'a': [1, 1, 1, 2, 2], 'b': [1, 1, 2, 2, 3], 'c': [1, 2, 3, 4, 5]} + ... ).set_index(['a', 'b']) + ... + >>> df.groupby(level='a') + +The ``Grouper`` object +~~~~~~~~~~~~~~~~~~~~~~ + +A ``Grouper`` can be used to disambiguate between columns and levels +when they have the same name: + +.. code:: python + + >>> df + b c + b + 1 1 1 + 1 1 2 + 1 2 3 + 2 2 4 + 2 3 5 + >>> df.groupby('b', level='b') # ValueError: Cannot specify both by and level + >>> df.groupby([cudf.Grouper(key='b'), cudf.Grouper(level='b')]) # OK + +Aggregation +----------- + +Aggregations on groups is supported via the ``agg`` method: + +.. code:: python + + >>> df + a b c + 0 1 1 1 + 1 1 1 2 + 2 1 2 3 + 3 2 2 4 + 4 2 3 5 + >>> df.groupby('a').agg('sum') + b c + a + 1 4 6 + 2 5 9 + >>> df.groupby('a').agg({'b': ['sum', 'min'], 'c': 'mean'}) + b c + sum min mean + a + 1 4 1 2.0 + 2 5 2 4.5 + +The following table summarizes the available aggregations and the types +that support them: + ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| Aggregations / dtypes | Numeric | Datetime | String | Categorical | List | Struct | Interval | Decimal | ++====================================+===========+============+==========+===============+========+==========+============+===========+ +| count | ✅ | ✅ | ✅ | ✅ | | | | ✅ | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| size | ✅ | ✅ | ✅ | ✅ | | | | ✅ | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| sum | ✅ | ✅ | | | | | | ✅ | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| idxmin | ✅ | ✅ | | | | | | ✅ | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| idxmax | ✅ | ✅ | | | | | | ✅ | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| min | ✅ | ✅ | ✅ | | | | | ✅ | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| max | ✅ | ✅ | ✅ | | | | | ✅ | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| mean | ✅ | ✅ | | | | | | | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| var | ✅ | ✅ | | | | | | | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| std | ✅ | ✅ | | | | | | | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| quantile | ✅ | ✅ | | | | | | | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| median | ✅ | ✅ | | | | | | | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| nunique | ✅ | ✅ | ✅ | ✅ | | | | ✅ | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| nth | ✅ | ✅ | ✅ | | | | | ✅ | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| collect | ✅ | ✅ | ✅ | | ✅ | | | ✅ | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +| unique | ✅ | ✅ | ✅ | ✅ | | | | | ++------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + +GroupBy apply +------------- + +To apply function on each group, use the ``GroupBy.apply()`` method: + +.. code:: python + + >>> df + a b c + 0 1 1 1 + 1 1 1 2 + 2 1 2 3 + 3 2 2 4 + 4 2 3 5 + >>> df.groupby('a').apply(lambda x: x.max() - x.min()) + a b c + a + 0 0 1 2 + 1 0 1 1 + +Limitations +~~~~~~~~~~~ + +- ``apply`` works by applying the provided function to each group + sequentially, and concatenating the results together. **This can be + very slow**, especially for a large number of small groups. For a + small number of large groups, it can give acceptable performance + +- The results may not always match Pandas exactly. For example, cuDF + may return a ``DataFrame`` containing a single column where Pandas + returns a ``Series``. Some post-processing may be required to match + Pandas behavior. + +- cuDF does not support some of the exceptional cases that Pandas + supports with ``apply``, such as calling |describe|_ inside the + callable. + + .. |describe| replace:: ``describe`` + .. _describe: https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#flexible-apply + +Rolling window calculations +--------------------------- + +Use the ``GroupBy.rolling()`` method to perform rolling window +calculations on each group: + +.. code:: python + + >>> df + a b c + 0 1 1 1 + 1 1 1 2 + 2 1 2 3 + 3 2 2 4 + 4 2 3 5 + +Rolling window sum on each group with a window size of 2: + +.. code:: python + + >>> df.groupby('a').rolling(2).sum() + a b c + a + 1 0 + 1 2 2 3 + 2 2 3 5 + 2 3 + 4 4 5 9 diff --git a/docs/cudf/source/index.rst b/docs/cudf/source/index.rst index bba0ed824b1..ee720cc9739 100644 --- a/docs/cudf/source/index.rst +++ b/docs/cudf/source/index.rst @@ -9,11 +9,11 @@ Welcome to cuDF's documentation! 10min.ipynb basics.rst io.rst - groupby.md - dask-cudf.md + groupby.rst + dask-cudf.rst 10min-cudf-cupy.ipynb guide-to-udfs.ipynb - internals.md + internals.rst Working-with-missing-data.ipynb Indices and tables diff --git a/docs/cudf/source/internals.md b/docs/cudf/source/internals.md deleted file mode 100644 index d0a2a324c17..00000000000 --- a/docs/cudf/source/internals.md +++ /dev/null @@ -1,194 +0,0 @@ -cuDF internals -============== - -The cuDF API closely matches that of the [Pandas](https://pandas.pydata.org/) library. -Thus, we have the types `cudf.Series`, `cudf.DataFrame` and `cudf.Index` which look and -feel very much like their Pandas counterparts. - -Under the hood, however, cuDF uses data structures very different from Pandas. In this document, -we describe these internal data structures. - -## Column - -Columns are cuDF's core data structure and they are modeled after -the [Apache Arrow Columnar Format](https://arrow.apache.org/docs/format/Columnar.html). - -A column represents a sequence of values, any number of which may be "null". Columns are -specialized based on the type of data they contain. Thus we have `NumericalColumn`, `StringColumn`, -`DatetimeColumn`, etc., - -A column is composed of the following: - -* A **data type**, specifying the type of each element. -* A **data buffer** that may store the data for the column elements. - Some column types do not have a data buffer, instead storing data in the children columns. -* A **mask buffer** whose bits represent the validity (null or not null) of each element. - Columns whose elements are all "valid" may not have a mask buffer. Mask buffers are padded - to 64 bytes. -* A tuple of **children** columns, which enable the representation complex types such as - columns with non-fixed width elements such as strings or lists. -* A **size** indicating the number of elements in the column. -* An integer **offset**: a column may represent a "slice" of another column, - in which case this offset represents the first element of the slice. The size of - the column then gives the extent of the slice. A column that is not a slice - has an offset of 0. - -For example, the `NumericalColumn` backing a Series with 1000 elements of type 'int32' -and containing nulls is composed of: - -1. A data buffer of size 4000 bytes (sizeof(int32) * 1000) -2. A mask buffer of size 128 bytes (1000/8 padded to a multiple of 64 bytes) -3. No children columns - -As another example, the `StringColumn` backing the Series -`['do', 'you', 'have', 'any', 'cheese?']` is composed of: - -1. No data buffer -2. No mask buffer as there are no nulls in the Series -3. Two children columns: - - A column of 8-bit characters `['d', 'o', 'y', 'o', 'u', h' ... '?']` - - A column of "offsets" to the characters column (in this case, `[0, 2, 5, 9, 12, 19]`) - -## Buffer - -The data and mask buffers of a column represent data in GPU memory (a.k.a *device memory*), -and are object of type `cudf.core.buffer.Buffer`. - -Buffers can be constructed from array-like objects that live either on the host (e.g., numpy arrays) -or the device (e.g., cupy arrays). Arrays must be of `uint8` dtype or viewed as such. - -When constructing a Buffer from a host object such as a numpy array, new device memory is allocated: - -```python ->>> from cudf.core.buffer import Buffer ->>> buf = Buffer(np.array([1, 2, 3], dtype='int64').view("uint8")) ->>> print(buf.ptr) # address of new device memory allocation -140050901762560 ->>> print(buf.size) -24 ->>> print(buf._owner) - -``` - -cuDF uses the [RMM](https://github.com/rapidsai/rmm) library for allocating device memory. -You can read more about device memory allocation with RMM -[here](https://github.com/rapidsai/rmm#devicebuffers). - -When constructing a Buffer from a device object such as a CuPy array, no new device memory is -allocated. Instead, the Buffer points to the existing allocation, keeping a reference to the device -array: - -```python ->>> import cupy as cp ->>> c_ary = cp.asarray([1, 2, 3], dtype='int64') ->>> buf = Buffer(c_ary.view("uint8")) ->>> print(c_ary.data.mem.ptr) -140050901762560 ->>> print(buf.ptr) -140050901762560 ->>> print(buf.size) -24 ->>> print(buf._owner is c_ary) -True -``` - -An uninitialized block of device memory can be allocated with `Buffer.empty`: - -```python ->>> buf = Buffer.empty(10) ->>> print(buf.size) -10 ->>> print(buf._owner) - -``` - -## ColumnAccessor - -cuDF `Series`, `DataFrame` and `Index` are all subclasses of an internal `Frame` class. -The underlying data structure of `Frame` is an ordered, dictionary-like object -known as `ColumnAccessor`, which can be accessed via the `._data` attribute: - -```python ->>> a = cudf.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'c']}) ->>> a._data -ColumnAccessor(OrderedColumnDict([('x', ), ('y', )]), multiindex=False, level_names=(None,)) -``` - -ColumnAccessor is an ordered mapping of column labels to columns. In addition to behaving -like an OrderedDict, it supports things like selecting multiple columns (both by index and label), as well as hierarchical indexing. - -```python ->>> from cudf.core.column_accessor import ColumnAccessor -``` - -The values of a ColumnAccessor are coerced to Columns during construction: - -```python ->>> ca = ColumnAccessor({'x': [1, 2, 3], 'y': ['a', 'b', 'c']}) ->>> ca['x'] - ->>> ca['y'] - ->>> ca.pop('x') - ->>> ca -ColumnAccessor(OrderedColumnDict([('y', )]), multiindex=False, level_names=(None,)) -``` - -Columns can be inserted at a specified location: - -```python ->>> ca.insert('z', [3, 4, 5], loc=1) ->>> ca -ColumnAccessor(OrderedColumnDict([('x', ), ('z', ), ('y', )]), multiindex=False, level_names=(None,)) -``` - -Selecting columns by index: - -```python ->>> ca = ColumnAccessor({'x': [1, 2, 3], 'y': ['a', 'b', 'c'], 'z': [4, 5, 6]}) ->>> ca.select_by_index(1) -ColumnAccessor(OrderedColumnDict([('y', )]), multiindex=False, level_names=(None,)) ->>> ca.select_by_index([0, 1]) -ColumnAccessor(OrderedColumnDict([('x', ), ('y', )]), multiindex=False, level_names=(None,)) ->>> ca.select_by_index(slice(1, 3)) -ColumnAccessor(OrderedColumnDict([('y', ), ('z', )]), multiindex=False, level_names=(None,)) -``` - -Selecting columns by label: - -```python ->>> ca.select_by_label(['y', 'z']) -ColumnAccessor(OrderedColumnDict([('y', ), ('z', )]), multiindex=False, level_names=(None,)) ->>> ca.select_by_label(slice('x', 'y')) -ColumnAccessor(OrderedColumnDict([('x', ), ('y', )]), multiindex=False, level_names=(None,)) -``` - -A ColumnAccessor with tuple keys (and constructed with `multiindex=True`) -can be hierarchically indexed: - -```python ->>> ca = ColumnAccessor({('a', 'b'): [1, 2, 3], ('a', 'c'): [2, 3, 4], 'b': [4, 5, 6]}, multiindex=True) ->>> ca.select_by_label('a') -ColumnAccessor(OrderedColumnDict([('b', ), ('c', )]), multiindex=False, level_names=(None,)) ->>> ca.select_by_label(('a', 'b')) -ColumnAccessor(OrderedColumnDict([(('a', 'b'), )]), multiindex=False, level_names=(None,)) -``` - -"Wildcard" indexing is also allowed: - -```python ->>> ca = ColumnAccessor({('a', 'b'): [1, 2, 3], ('a', 'c'): [2, 3, 4], ('d', 'b'): [4, 5, 6]}, multiindex=True) ->>> ca.select_by_label((slice(None), 'b')) -ColumnAccessor(OrderedColumnDict([(('a', 'b'), ), (('d', 'b'), )]), multiindex=True, level_names=(None, None)) -``` - -Finally, ColumnAccessors can convert to Pandas `Index` or `MultiIndex` objects: - -```python ->>> ca.to_pandas_index() -MultiIndex([('a', 'b'), - ('a', 'c'), - ('d', 'b')], - ) -``` diff --git a/docs/cudf/source/internals.rst b/docs/cudf/source/internals.rst new file mode 100644 index 00000000000..60b63c6fab8 --- /dev/null +++ b/docs/cudf/source/internals.rst @@ -0,0 +1,216 @@ +cuDF internals +============== + +The cuDF API closely matches that of the +`Pandas `__ library. Thus, we have the types +``cudf.Series``, ``cudf.DataFrame`` and ``cudf.Index`` which look and +feel very much like their Pandas counterparts. + +Under the hood, however, cuDF uses data structures very different from +Pandas. In this document, we describe these internal data structures. + +Column +------ + +Columns are cuDF's core data structure and they are modeled after the +`Apache Arrow Columnar +Format `__. + +A column represents a sequence of values, any number of which may be +"null". Columns are specialized based on the type of data they contain. +Thus we have ``NumericalColumn``, ``StringColumn``, ``DatetimeColumn``, +etc., + +A column is composed of the following: + +- A **data type**, specifying the type of each element. +- A **data buffer** that may store the data for the column elements. + Some column types do not have a data buffer, instead storing data in + the children columns. +- A **mask buffer** whose bits represent the validity (null or not + null) of each element. Columns whose elements are all "valid" may not + have a mask buffer. Mask buffers are padded to 64 bytes. +- A tuple of **children** columns, which enable the representation + complex types such as columns with non-fixed width elements such as + strings or lists. +- A **size** indicating the number of elements in the column. +- An integer **offset**: a column may represent a "slice" of another + column, in which case this offset represents the first element of the + slice. The size of the column then gives the extent of the slice. A + column that is not a slice has an offset of 0. + +For example, the ``NumericalColumn`` backing a Series with 1000 elements +of type 'int32' and containing nulls is composed of: + +1. A data buffer of size 4000 bytes (sizeof(int32) \* 1000) +2. A mask buffer of size 128 bytes (1000/8 padded to a multiple of 64 + bytes) +3. No children columns + +As another example, the ``StringColumn`` backing the Series +``['do', 'you', 'have', 'any', 'cheese?']`` is composed of: + +1. No data buffer +2. No mask buffer as there are no nulls in the Series +3. Two children columns: + + - A column of 8-bit characters + ``['d', 'o', 'y', 'o', 'u', h' ... '?']`` + - A column of "offsets" to the characters column (in this case, + ``[0, 2, 5, 9, 12, 19]``) + +Buffer +------ + +The data and mask buffers of a column represent data in GPU memory +(a.k.a *device memory*), and are object of type +``cudf.core.buffer.Buffer``. + +Buffers can be constructed from array-like objects that live either on +the host (e.g., numpy arrays) or the device (e.g., cupy arrays). Arrays +must be of ``uint8`` dtype or viewed as such. + +When constructing a Buffer from a host object such as a numpy array, new +device memory is allocated: + +.. code:: python + + >>> from cudf.core.buffer import Buffer + >>> buf = Buffer(np.array([1, 2, 3], dtype='int64').view("uint8")) + >>> print(buf.ptr) # address of new device memory allocation + 140050901762560 + >>> print(buf.size) + 24 + >>> print(buf._owner) + + +cuDF uses the `RMM `__ library for +allocating device memory. You can read more about device memory +allocation with RMM +`here `__. + +When constructing a Buffer from a device object such as a CuPy array, no +new device memory is allocated. Instead, the Buffer points to the +existing allocation, keeping a reference to the device array: + +.. code:: python + + >>> import cupy as cp + >>> c_ary = cp.asarray([1, 2, 3], dtype='int64') + >>> buf = Buffer(c_ary.view("uint8")) + >>> print(c_ary.data.mem.ptr) + 140050901762560 + >>> print(buf.ptr) + 140050901762560 + >>> print(buf.size) + 24 + >>> print(buf._owner is c_ary) + True + +An uninitialized block of device memory can be allocated with +``Buffer.empty``: + +.. code:: python + + >>> buf = Buffer.empty(10) + >>> print(buf.size) + 10 + >>> print(buf._owner) + + +ColumnAccessor +-------------- + +cuDF ``Series``, ``DataFrame`` and ``Index`` are all subclasses of an +internal ``Frame`` class. The underlying data structure of ``Frame`` is +an ordered, dictionary-like object known as ``ColumnAccessor``, which +can be accessed via the ``._data`` attribute: + +.. code:: python + + >>> a = cudf.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'c']}) + >>> a._data + ColumnAccessor(OrderedColumnDict([('x', ), ('y', )]), multiindex=False, level_names=(None,)) + +ColumnAccessor is an ordered mapping of column labels to columns. In +addition to behaving like an OrderedDict, it supports things like +selecting multiple columns (both by index and label), as well as +hierarchical indexing. + +.. code:: python + + >>> from cudf.core.column_accessor import ColumnAccessor + +The values of a ColumnAccessor are coerced to Columns during +construction: + +.. code:: python + + >>> ca = ColumnAccessor({'x': [1, 2, 3], 'y': ['a', 'b', 'c']}) + >>> ca['x'] + + >>> ca['y'] + + >>> ca.pop('x') + + >>> ca + ColumnAccessor(OrderedColumnDict([('y', )]), multiindex=False, level_names=(None,)) + +Columns can be inserted at a specified location: + +.. code:: python + + >>> ca.insert('z', [3, 4, 5], loc=1) + >>> ca + ColumnAccessor(OrderedColumnDict([('x', ), ('z', ), ('y', )]), multiindex=False, level_names=(None,)) + +Selecting columns by index: + +.. code:: python + + >>> ca = ColumnAccessor({'x': [1, 2, 3], 'y': ['a', 'b', 'c'], 'z': [4, 5, 6]}) + >>> ca.select_by_index(1) + ColumnAccessor(OrderedColumnDict([('y', )]), multiindex=False, level_names=(None,)) + >>> ca.select_by_index([0, 1]) + ColumnAccessor(OrderedColumnDict([('x', ), ('y', )]), multiindex=False, level_names=(None,)) + >>> ca.select_by_index(slice(1, 3)) + ColumnAccessor(OrderedColumnDict([('y', ), ('z', )]), multiindex=False, level_names=(None,)) + +Selecting columns by label: + +.. code:: python + + >>> ca.select_by_label(['y', 'z']) + ColumnAccessor(OrderedColumnDict([('y', ), ('z', )]), multiindex=False, level_names=(None,)) + >>> ca.select_by_label(slice('x', 'y')) + ColumnAccessor(OrderedColumnDict([('x', ), ('y', )]), multiindex=False, level_names=(None,)) + +A ColumnAccessor with tuple keys (and constructed with +``multiindex=True``) can be hierarchically indexed: + +.. code:: python + + >>> ca = ColumnAccessor({('a', 'b'): [1, 2, 3], ('a', 'c'): [2, 3, 4], 'b': [4, 5, 6]}, multiindex=True) + >>> ca.select_by_label('a') + ColumnAccessor(OrderedColumnDict([('b', ), ('c', )]), multiindex=False, level_names=(None,)) + >>> ca.select_by_label(('a', 'b')) + ColumnAccessor(OrderedColumnDict([(('a', 'b'), )]), multiindex=False, level_names=(None,)) + +"Wildcard" indexing is also allowed: + +.. code:: python + + >>> ca = ColumnAccessor({('a', 'b'): [1, 2, 3], ('a', 'c'): [2, 3, 4], ('d', 'b'): [4, 5, 6]}, multiindex=True) + >>> ca.select_by_label((slice(None), 'b')) + ColumnAccessor(OrderedColumnDict([(('a', 'b'), ), (('d', 'b'), )]), multiindex=True, level_names=(None, None)) + +Finally, ColumnAccessors can convert to Pandas ``Index`` or +``MultiIndex`` objects: + +.. code:: python + + >>> ca.to_pandas_index() + MultiIndex([('a', 'b'), + ('a', 'c'), + ('d', 'b')], + ) From 05fd176f331c115b14eeba0347da0e5e7e09dfdc Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Mon, 12 Jul 2021 19:49:50 -0400 Subject: [PATCH 48/54] Fix min/max inclusive cudf::scan for strings column (#8705) Closes #8684 A bug in `thrust::inclusive_scan` reported [here](https://github.com/NVIDIA/thrust/issues/1479) is passing invalid data to the AssociateOperator parameter provided by `cudf::detail::inclusive_scan`. The invalid data is likely initialized memory used by CUDA blocks/threads where the result is ignored. For regular fixed-width and primitive types this is harmless since operations just produce invalid results which are not used. Unfortunately, for `string_view` objects, this invalid data will cause a crash since it normally requires de-referencing a device-memory pointer. This PR works around the issue by creating a custom scan-strings-operator wrapper for inclusive-scan. The operator accepts index values that are checked and used to access the individual rows. The underlying operator is then called to determine which index is returned. The end result is a vector of indices which is passed to a gather call to build the output column. Also, added some additional scan gtests with 512 strings as described in issue #8684 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Robert (Bobby) Evans (https://github.com/revans2) - Karthikeyan (https://github.com/karthikeyann) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cudf/pull/8705 --- cpp/include/cudf/strings/string_view.cuh | 3 +- cpp/src/reductions/scan/scan_inclusive.cu | 67 +++++++++++++++++++---- cpp/tests/reductions/scan_tests.cpp | 36 ++++++++++++ 3 files changed, 93 insertions(+), 13 deletions(-) diff --git a/cpp/include/cudf/strings/string_view.cuh b/cpp/include/cudf/strings/string_view.cuh index a7559c7fbcb..238d55d580e 100644 --- a/cpp/include/cudf/strings/string_view.cuh +++ b/cpp/include/cudf/strings/string_view.cuh @@ -274,7 +274,8 @@ __device__ inline int string_view::compare(const char* data, size_type bytes) co size_type const len1 = size_bytes(); const unsigned char* ptr1 = reinterpret_cast(this->data()); const unsigned char* ptr2 = reinterpret_cast(data); - size_type idx = 0; + if ((ptr1 == ptr2) && (bytes == len1)) return 0; + size_type idx = 0; for (; (idx < len1) && (idx < bytes); ++idx) { if (*ptr1 != *ptr2) return static_cast(*ptr1) - static_cast(*ptr2); ++ptr1; diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index f729f812b28..1beb9ecb282 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -67,7 +68,46 @@ rmm::device_buffer mask_scan(const column_view& input_view, namespace { /** - * @brief Dispatcher for running Scan operation on input column + * @brief Strings inclusive scan operator + * + * This was specifically created to workaround a thrust issue + * https://github.com/NVIDIA/thrust/issues/1479 + * where invalid values are passed to the operator. + * + * This operator will accept index values, check them and then + * run the `Op` operation on the individual string_view objects. + * The returned result is the appropriate index value. + */ +template +struct string_scan_operator { + column_device_view const col; ///< strings column device view + string_view const null_replacement{}; ///< value used when element is null + bool const has_nulls; ///< true if col has null elements + + string_scan_operator(column_device_view const& col, bool has_nulls = true) + : col{col}, null_replacement{Op::template identity()}, has_nulls{has_nulls} + { + CUDF_EXPECTS(type_id::STRING == col.type().id(), "the data type mismatch"); + // verify validity bitmask is non-null, otherwise, is_null_nocheck() will crash + if (has_nulls) CUDF_EXPECTS(col.nullable(), "column with nulls must have a validity bitmask"); + } + + CUDA_DEVICE_CALLABLE + size_type operator()(size_type lhs, size_type rhs) const + { + // thrust::inclusive_scan may pass us garbage values so we need to protect ourselves; + // in these cases the return value does not matter since the result is not used + if (lhs < 0 || rhs < 0 || lhs >= col.size() || rhs >= col.size()) return 0; + string_view d_lhs = + has_nulls && col.is_null_nocheck(lhs) ? null_replacement : col.element(lhs); + string_view d_rhs = + has_nulls && col.is_null_nocheck(rhs) ? null_replacement : col.element(rhs); + return Op{}(d_lhs, d_rhs) == d_lhs ? lhs : rhs; + } +}; + +/** + * @brief Dispatcher for running a Scan operation on an input column * * @tparam Op device binary operator */ @@ -117,22 +157,25 @@ struct scan_dispatcher { { auto d_input = column_device_view::create(input_view, stream); - rmm::device_uvector result(input_view.size(), stream); - auto begin = - make_null_replacement_iterator(*d_input, Op::template identity(), input_view.has_nulls()); - thrust::inclusive_scan( - rmm::exec_policy(stream), begin, begin + input_view.size(), result.data(), Op{}); - - CHECK_CUDA(stream.value()); - return cudf::make_strings_column(result, Op::template identity(), stream, mr); + // build indices of the scan operation results + rmm::device_uvector result(input_view.size(), stream); + thrust::inclusive_scan(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(input_view.size()), + result.begin(), + string_scan_operator{*d_input, input_view.has_nulls()}); + + // call gather using the indices to build the output column + return cudf::strings::detail::gather( + strings_column_view(input_view), result.begin(), result.end(), false, stream, mr); } public: /** - * @brief creates new column from input column by applying scan operation + * @brief Creates a new column from the input column by applying the scan operation * - * @param input input column view - * @param inclusive inclusive or exclusive scan + * @param input Input column view + * @param null_handling How null row entries are to be processed * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory * @return diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index 92ba1f9e60f..ef5a66a2019 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -394,3 +395,38 @@ TYPED_TEST(ScanTest, LeadingNulls) this->scan_test(v, b, cudf::make_min_aggregation(), scan_type::INCLUSIVE, null_policy::INCLUDE); this->scan_test(v, b, cudf::make_min_aggregation(), scan_type::EXCLUSIVE, null_policy::INCLUDE); } + +class ScanStringsTest : public ScanTest { +}; + +TEST_F(ScanStringsTest, MoreStringsMinMax) +{ + int row_count = 512; + + auto data_begin = cudf::detail::make_counting_transform_iterator(0, [](auto idx) { + char const s[] = {static_cast('a' + (idx % 26)), 0}; + return std::string(s); + }); + auto validity = cudf::detail::make_counting_transform_iterator( + 0, [](auto idx) -> bool { return (idx % 23) != 22; }); + cudf::test::strings_column_wrapper col(data_begin, data_begin + row_count, validity); + + thrust::host_vector v(data_begin, data_begin + row_count); + thrust::host_vector b(validity, validity + row_count); + + this->scan_test(v, {}, cudf::make_min_aggregation(), scan_type::INCLUSIVE); + this->scan_test(v, b, cudf::make_min_aggregation(), scan_type::INCLUSIVE); + this->scan_test(v, b, cudf::make_min_aggregation(), scan_type::INCLUSIVE, null_policy::EXCLUDE); + + this->scan_test(v, {}, cudf::make_min_aggregation(), scan_type::EXCLUSIVE); + this->scan_test(v, b, cudf::make_min_aggregation(), scan_type::EXCLUSIVE); + this->scan_test(v, b, cudf::make_min_aggregation(), scan_type::EXCLUSIVE, null_policy::EXCLUDE); + + this->scan_test(v, {}, cudf::make_max_aggregation(), scan_type::INCLUSIVE); + this->scan_test(v, b, cudf::make_max_aggregation(), scan_type::INCLUSIVE); + this->scan_test(v, b, cudf::make_max_aggregation(), scan_type::INCLUSIVE, null_policy::EXCLUDE); + + this->scan_test(v, {}, cudf::make_max_aggregation(), scan_type::EXCLUSIVE); + this->scan_test(v, b, cudf::make_max_aggregation(), scan_type::EXCLUSIVE); + this->scan_test(v, b, cudf::make_max_aggregation(), scan_type::EXCLUSIVE, null_policy::EXCLUDE); +} From 7521c3f2c1acc8161c82a5e4cd1d9f96f58945ec Mon Sep 17 00:00:00 2001 From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com> Date: Mon, 12 Jul 2021 19:13:00 -0500 Subject: [PATCH 49/54] Fix a crash in pack() when being handed tables with no columns. (#8697) Also changes the behavior of pack() such that when returning empty data, the `metadata_` and `gpu_data` unique_ptrs are not null, but instead point to empty `metadata` and `rmm::device_buffer` objects, respectively. Very mild breakage of the interface. Authors: - https://github.com/nvdbaranec Approvers: - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu) - Mark Harris (https://github.com/harrism) URL: https://github.com/rapidsai/cudf/pull/8697 --- cpp/include/cudf/copying.hpp | 10 ++++++++++ cpp/src/copying/pack.cpp | 12 ++++++++---- cpp/tests/copying/pack_tests.cpp | 18 ++++++++++++++++++ 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 6be865ea993..6ab115196d6 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -529,6 +529,7 @@ struct packed_columns { * @ingroup copy_split */ struct metadata { + metadata() = default; metadata(std::vector&& v) : data_(std::move(v)) {} uint8_t const* data() const { return data_.data(); } size_t size() const { return data_.size(); } @@ -537,6 +538,15 @@ struct packed_columns { std::vector data_; }; + packed_columns() + : metadata_(std::make_unique()), gpu_data(std::make_unique()) + { + } + packed_columns(std::unique_ptr&& md, std::unique_ptr&& gd) + : metadata_(std::move(md)), gpu_data(std::move(gd)) + { + } + std::unique_ptr metadata_; std::unique_ptr gpu_data; }; diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp index 182e3ff0584..89e5972f448 100644 --- a/cpp/src/copying/pack.cpp +++ b/cpp/src/copying/pack.cpp @@ -145,7 +145,7 @@ packed_columns pack(cudf::table_view const& input, // do a contiguous_split with no splits to get the memory for the table // arranged as we want it auto contig_split_result = cudf::detail::contiguous_split(input, {}, stream, mr); - return std::move(contig_split_result[0].data); + return contig_split_result.empty() ? packed_columns{} : std::move(contig_split_result[0].data); } template @@ -229,7 +229,9 @@ packed_columns::metadata pack_metadata(table_view const& table, size_t buffer_size) { CUDF_FUNC_RANGE(); - return detail::pack_metadata(table.begin(), table.end(), contiguous_buffer, buffer_size); + return table.is_empty() + ? packed_columns::metadata{} + : detail::pack_metadata(table.begin(), table.end(), contiguous_buffer, buffer_size); } /** @@ -238,8 +240,10 @@ packed_columns::metadata pack_metadata(table_view const& table, table_view unpack(packed_columns const& input) { CUDF_FUNC_RANGE(); - return detail::unpack(input.metadata_->data(), - reinterpret_cast(input.gpu_data->data())); + return input.metadata_->size() == 0 + ? table_view{} + : detail::unpack(input.metadata_->data(), + reinterpret_cast(input.gpu_data->data())); } /** diff --git a/cpp/tests/copying/pack_tests.cpp b/cpp/tests/copying/pack_tests.cpp index 2e7c41333d5..11aa505d163 100644 --- a/cpp/tests/copying/pack_tests.cpp +++ b/cpp/tests/copying/pack_tests.cpp @@ -472,6 +472,24 @@ TEST_F(PackUnpackTest, NestedSliced) this->run_test(t); } +TEST_F(PackUnpackTest, EmptyTable) +{ + // no columns + { + cudf::table_view t; + this->run_test(t); + } + + // no rows + { + cudf::test::fixed_width_column_wrapper a; + cudf::test::strings_column_wrapper b; + cudf::test::lists_column_wrapper c; + cudf::table_view t({a, b, c}); + this->run_test(t); + } +} + // clang-format on } // namespace test From b0d86d2bfe8814cc7b717156b8b7fa9d5d2198bd Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Mon, 12 Jul 2021 22:43:07 -0700 Subject: [PATCH 50/54] Add `datetime::is_leap_year` (#8711) Part 1 of #8677 This PR adds `datetime::is_leap_year`. The function returns `true` for datetime column rows with leap year; `false` for rows with non leap years, and `null` for null rows. Authors: - Michael Wang (https://github.com/isVoid) Approvers: - Mark Harris (https://github.com/harrism) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/8711 --- cpp/include/cudf/datetime.hpp | 17 ++++++++++++ cpp/include/cudf/detail/datetime.hpp | 11 ++++++++ cpp/src/datetime/datetime_ops.cu | 28 ++++++++++++++++++- cpp/tests/datetime/datetime_ops_test.cpp | 35 ++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index a276769c169..980c824fdf2 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -189,6 +189,23 @@ std::unique_ptr add_calendrical_months( cudf::column_view const& timestamps, cudf::column_view const& months, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Check if the year of the given date is a leap year + * + * `output[i] == true` if year of `column[i]` is a leap year + * `output[i] == false` if year of `column[i]` is not a leap year + * `output[i] is null` if `column[i]` is null + * + * @param[in] cudf::column_view of the input datetime values + * + * @returns cudf::column of datatype BOOL8 truth value of the corresponding date + * @throw cudf::logic_error if input column datatype is not a TIMESTAMP + */ +std::unique_ptr is_leap_year( + cudf::column_view const& column, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group } // namespace datetime } // namespace cudf diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp index 017fe0d96ff..9cc319b5011 100644 --- a/cpp/include/cudf/detail/datetime.hpp +++ b/cpp/include/cudf/detail/datetime.hpp @@ -124,6 +124,17 @@ std::unique_ptr add_calendrical_months( cudf::column_view const& months, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @copydoc cudf::is_leap_year(cudf::column_view const&, rmm::mr::device_memory_resource *) + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr is_leap_year( + cudf::column_view const& column, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + } // namespace detail } // namespace datetime } // namespace cudf diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 36c3605951e..41f3e7dcfee 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -19,9 +19,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -127,6 +127,17 @@ struct extract_day_num_of_year { } }; +struct is_leap_year_op { + template + CUDA_DEVICE_CALLABLE bool operator()(Timestamp const ts) const + { + using namespace cuda::std::chrono; + auto const days_since_epoch = floor(ts); + auto const date = year_month_day(days_since_epoch); + return date.year().is_leap(); + } +}; + // Apply the functor for every element/row in the input column to create the output column template struct launch_functor { @@ -357,6 +368,14 @@ std::unique_ptr day_of_year(column_view const& column, return detail::apply_datetime_op( column, stream, mr); } + +std::unique_ptr is_leap_year(column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return apply_datetime_op(column, stream, mr); +} + } // namespace detail std::unique_ptr extract_year(column_view const& column, rmm::mr::device_memory_resource* mr) @@ -426,5 +445,12 @@ std::unique_ptr add_calendrical_months(cudf::column_view const& ti return detail::add_calendrical_months( timestamp_column, months_column, rmm::cuda_stream_default, mr); } + +std::unique_ptr is_leap_year(column_view const& column, rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::is_leap_year(column, rmm::cuda_stream_default, mr); +} + } // namespace datetime } // namespace cudf diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 8aa83ce6b22..cdfc9de395c 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -26,6 +26,8 @@ #include #include +#define XXX false // stub for null values + template struct NonTimestampTest : public cudf::test::BaseFixture { cudf::data_type type() { return cudf::data_type{cudf::type_to_id()}; } @@ -532,4 +534,37 @@ TEST_F(BasicDatetimeOpsTest, TestAddMonthsWithSecondsAndNullValues) true); } +TEST_F(BasicDatetimeOpsTest, TestIsLeapYear) +{ + using namespace cudf::test; + using namespace cudf::datetime; + using namespace cuda::std::chrono; + + // Time in seconds since epoch + // Dates converted using epochconverter.com + auto timestamps_s = + cudf::test::fixed_width_column_wrapper{ + { + 1594332839L, // 2020-07-09 10:13:59 GMT - leap year + 0L, // null + 915148800L, // 1999-01-01 00:00:00 GMT - non leap year + -11663029161L, // 1600-5-31 05:40:39 GMT - leap year + 707904541L, // 1992-06-07 08:09:01 GMT - leap year + 2181048447L, // 1900-11-20 09:12:33 GMT - non leap year + 0L, // UNIX EPOCH 1970-01-01 00:00:00 GMT - non leap year + -12212553600L, // First full year of Gregorian Calandar 1583-01-01 00:00:00 - non-leap-year + 0L, // null + 13591632822L, // 2400-09-13 13:33:42 GMT - leap year + 4539564243L, // 2113-11-08 06:04:03 GMT - non leap year + 0L // null + }, + {true, false, true, true, true, true, true, true, false, true, true, false}}; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *is_leap_year(timestamps_s), + cudf::test::fixed_width_column_wrapper{ + {true, XXX, false, true, true, false, false, false, XXX, true, false, XXX}, + {true, false, true, true, true, true, true, true, false, true, true, false}}); +} + CUDF_TEST_PROGRAM_MAIN() From 3ed87f3cf3fc4c5039301c57c28bd64b062b862a Mon Sep 17 00:00:00 2001 From: Ashwin Srinath <3190405+shwina@users.noreply.github.com> Date: Tue, 13 Jul 2021 10:06:43 -0400 Subject: [PATCH 51/54] Fix instructions for running cuDF/dask-cuDF tests in CONTRIBUTING.md (#8724) Closes #8716 Authors: - Ashwin Srinath (https://github.com/shwina) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/8724 --- CONTRIBUTING.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a57652cb364..841a02f72e6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -221,7 +221,8 @@ $ ./build.sh dask_cudf - To run Python tests (Optional): ```bash $ cd $CUDF_HOME/python -$ py.test -v # run python tests on cudf and dask-cudf python bindings +$ py.test -v cudf # run cudf test suite +$ py.test -v dask_cudf # run dask_cudf test suite ``` - Other `build.sh` options: From d91d011b74571878f073181d204fec41fda31ab3 Mon Sep 17 00:00:00 2001 From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com> Date: Wed, 14 Jul 2021 02:03:30 +0530 Subject: [PATCH 52/54] Add compiled binary operation (#8192) Add compiled binary operations (instead of jit). _(column_view lhs OP column_view rhs) => output_type_ _(scalar lhs OP column_view rhs) => output_type_ _(column_view lhs OP scalar rhs) => output_type_ This PR adds compiled binary operations for fixed_width types. string operations are already compiled. Partially addresses #7801 #### Approach: Each triple dispatcher compilation takes 60 seconds. So, the device side dispatch is done in 2 ways. 1. for types with `common_type`,single device side dispatch with nested individual type dispatch. 2. double side dispatch for non-common types, with nested output type dispatch. (compiled faster than device side triple dispatch). Each operation compile time will be close to 30 seconds. Adds ~4 minutes compilation time (ninja -j 12) (1.5 minutes for cu files, util.cpp takes 2.5 minutes) Identifying common_type is done in host. Operations functor is defined only for types supported. and for operation not supported, no operation is done in device code. - [x] Identifying supported operations and types in host. - [x] host side operation dispatch - different file for different operation to parallelize compile time. - [x] All jit supported operations support added as compiled. ~- [ ] ~Coalesce~, redirect Userdefined to jit.~ - [x] scalar support added. #### Unit tests: - [x] tests added for numeric types and chrono type combinations for all supported operators It can't test all types. compilation time would be huge. while, covering all valid combinations of types in all operations, while keeping it minimal. #### Benchmark: - [x] jit binops benchmark - [x] compiled binops benchmark Authors: - Karthikeyan (https://github.com/karthikeyann) Approvers: - Robert (Bobby) Evans (https://github.com/revans2) - Robert Maynard (https://github.com/robertmaynard) - GALI PREM SAGAR (https://github.com/galipremsagar) - Charles Blackmon-Luca (https://github.com/charlesbluca) - Nghia Truong (https://github.com/ttnghia) - Jake Hemstad (https://github.com/jrhemstad) URL: https://github.com/rapidsai/cudf/pull/8192 --- cpp/CMakeLists.txt | 28 + cpp/benchmarks/CMakeLists.txt | 1 + .../binaryop/compiled_binaryop_benchmark.cpp | 99 +++ .../binaryop/jit_binaryop_benchmark.cpp | 49 +- cpp/include/cudf/binaryop.hpp | 122 ++- cpp/include/cudf/scalar/scalar.hpp | 9 +- cpp/include/cudf/utilities/traits.hpp | 35 +- cpp/include/cudf_test/base_fixture.hpp | 34 +- cpp/src/binaryop/binaryop.cpp | 80 +- cpp/src/binaryop/compiled/ATan2.cu | 26 + cpp/src/binaryop/compiled/Add.cu | 26 + cpp/src/binaryop/compiled/BitwiseAnd.cu | 26 + cpp/src/binaryop/compiled/BitwiseOr.cu | 26 + cpp/src/binaryop/compiled/BitwiseXor.cu | 26 + cpp/src/binaryop/compiled/Div.cu | 26 + cpp/src/binaryop/compiled/FloorDiv.cu | 26 + cpp/src/binaryop/compiled/Greater.cu | 26 + cpp/src/binaryop/compiled/GreaterEqual.cu | 26 + cpp/src/binaryop/compiled/Less.cu | 26 + cpp/src/binaryop/compiled/LessEqual.cu | 26 + cpp/src/binaryop/compiled/LogBase.cu | 26 + cpp/src/binaryop/compiled/LogicalAnd.cu | 26 + cpp/src/binaryop/compiled/LogicalOr.cu | 26 + cpp/src/binaryop/compiled/Mod.cu | 26 + cpp/src/binaryop/compiled/Mul.cu | 26 + cpp/src/binaryop/compiled/NullMax.cu | 26 + cpp/src/binaryop/compiled/NullMin.cu | 26 + cpp/src/binaryop/compiled/PMod.cu | 26 + cpp/src/binaryop/compiled/Pow.cu | 26 + cpp/src/binaryop/compiled/PyMod.cu | 26 + cpp/src/binaryop/compiled/ShiftLeft.cu | 26 + cpp/src/binaryop/compiled/ShiftRight.cu | 26 + .../binaryop/compiled/ShiftRightUnsigned.cu | 26 + cpp/src/binaryop/compiled/Sub.cu | 26 + cpp/src/binaryop/compiled/TrueDiv.cu | 26 + cpp/src/binaryop/compiled/binary_ops.cu | 542 ++++++------- cpp/src/binaryop/compiled/binary_ops.cuh | 272 +++++++ cpp/src/binaryop/compiled/binary_ops.hpp | 115 ++- cpp/src/binaryop/compiled/equality_ops.cu | 46 ++ cpp/src/binaryop/compiled/operation.cuh | 421 ++++++++++ cpp/src/binaryop/compiled/util.cpp | 183 +++++ cpp/src/scalar/scalar.cpp | 6 + cpp/src/table/table_device_view.cu | 4 +- cpp/tests/CMakeLists.txt | 2 + cpp/tests/binaryop/assert-binops.h | 41 +- .../binop-compiled-fixed_point-test.cpp | 721 ++++++++++++++++++ cpp/tests/binaryop/binop-compiled-test.cpp | 610 +++++++++++++++ cpp/tests/binaryop/binop-fixture.hpp | 19 +- cpp/tests/binaryop/util/operation.h | 66 +- .../main/java/ai/rapids/cudf/BinaryOp.java | 39 +- python/cudf/cudf/_lib/binaryop.pyx | 3 - python/cudf/cudf/_lib/cpp/binaryop.pxd | 1 - 52 files changed, 3781 insertions(+), 443 deletions(-) create mode 100644 cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp create mode 100644 cpp/src/binaryop/compiled/ATan2.cu create mode 100644 cpp/src/binaryop/compiled/Add.cu create mode 100644 cpp/src/binaryop/compiled/BitwiseAnd.cu create mode 100644 cpp/src/binaryop/compiled/BitwiseOr.cu create mode 100644 cpp/src/binaryop/compiled/BitwiseXor.cu create mode 100644 cpp/src/binaryop/compiled/Div.cu create mode 100644 cpp/src/binaryop/compiled/FloorDiv.cu create mode 100644 cpp/src/binaryop/compiled/Greater.cu create mode 100644 cpp/src/binaryop/compiled/GreaterEqual.cu create mode 100644 cpp/src/binaryop/compiled/Less.cu create mode 100644 cpp/src/binaryop/compiled/LessEqual.cu create mode 100644 cpp/src/binaryop/compiled/LogBase.cu create mode 100644 cpp/src/binaryop/compiled/LogicalAnd.cu create mode 100644 cpp/src/binaryop/compiled/LogicalOr.cu create mode 100644 cpp/src/binaryop/compiled/Mod.cu create mode 100644 cpp/src/binaryop/compiled/Mul.cu create mode 100644 cpp/src/binaryop/compiled/NullMax.cu create mode 100644 cpp/src/binaryop/compiled/NullMin.cu create mode 100644 cpp/src/binaryop/compiled/PMod.cu create mode 100644 cpp/src/binaryop/compiled/Pow.cu create mode 100644 cpp/src/binaryop/compiled/PyMod.cu create mode 100644 cpp/src/binaryop/compiled/ShiftLeft.cu create mode 100644 cpp/src/binaryop/compiled/ShiftRight.cu create mode 100644 cpp/src/binaryop/compiled/ShiftRightUnsigned.cu create mode 100644 cpp/src/binaryop/compiled/Sub.cu create mode 100644 cpp/src/binaryop/compiled/TrueDiv.cu create mode 100644 cpp/src/binaryop/compiled/binary_ops.cuh create mode 100644 cpp/src/binaryop/compiled/equality_ops.cu create mode 100644 cpp/src/binaryop/compiled/operation.cuh create mode 100644 cpp/src/binaryop/compiled/util.cpp create mode 100644 cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp create mode 100644 cpp/tests/binaryop/binop-compiled-test.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 605b67e77fc..ab7d8389c88 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -155,6 +155,34 @@ add_library(cudf src/ast/transform.cu src/binaryop/binaryop.cpp src/binaryop/compiled/binary_ops.cu + src/binaryop/compiled/Add.cu + src/binaryop/compiled/ATan2.cu + src/binaryop/compiled/BitwiseAnd.cu + src/binaryop/compiled/BitwiseOr.cu + src/binaryop/compiled/BitwiseXor.cu + src/binaryop/compiled/Less.cu + src/binaryop/compiled/Greater.cu + src/binaryop/compiled/LessEqual.cu + src/binaryop/compiled/GreaterEqual.cu + src/binaryop/compiled/Div.cu + src/binaryop/compiled/equality_ops.cu + src/binaryop/compiled/FloorDiv.cu + src/binaryop/compiled/LogBase.cu + src/binaryop/compiled/LogicalAnd.cu + src/binaryop/compiled/LogicalOr.cu + src/binaryop/compiled/Mod.cu + src/binaryop/compiled/Mul.cu + src/binaryop/compiled/NullMax.cu + src/binaryop/compiled/NullMin.cu + src/binaryop/compiled/PMod.cu + src/binaryop/compiled/Pow.cu + src/binaryop/compiled/PyMod.cu + src/binaryop/compiled/ShiftLeft.cu + src/binaryop/compiled/ShiftRight.cu + src/binaryop/compiled/ShiftRightUnsigned.cu + src/binaryop/compiled/Sub.cu + src/binaryop/compiled/TrueDiv.cu + src/binaryop/compiled/util.cpp src/labeling/label_bins.cu src/bitmask/null_mask.cu src/bitmask/is_element_valid.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index cf681a96cbd..6a2b71ae1d9 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -203,6 +203,7 @@ ConfigureBench(AST_BENCH ast/transform_benchmark.cpp) # - binaryop benchmark ---------------------------------------------------------------------------- ConfigureBench(BINARYOP_BENCH binaryop/binaryop_benchmark.cpp + binaryop/compiled_binaryop_benchmark.cpp binaryop/jit_binaryop_benchmark.cpp) ################################################################################################### diff --git a/cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp new file mode 100644 index 00000000000..aa86f3bedf8 --- /dev/null +++ b/cpp/benchmarks/binaryop/compiled_binaryop_benchmark.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include + +#include + +template +class COMPILED_BINARYOP : public cudf::benchmark { +}; + +template +void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop) +{ + const cudf::size_type column_size{(cudf::size_type)state.range(0)}; + + auto data_it = thrust::make_counting_iterator(0); + cudf::test::fixed_width_column_wrapper input1(data_it, data_it + column_size); + cudf::test::fixed_width_column_wrapper input2(data_it, data_it + column_size); + + auto lhs = cudf::column_view(input1); + auto rhs = cudf::column_view(input2); + auto output_dtype = cudf::data_type(cudf::type_to_id()); + + // Call once for hot cache. + cudf::experimental::binary_operation(lhs, rhs, binop, output_dtype); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + cudf::experimental::binary_operation(lhs, rhs, binop, output_dtype); + } +} + +// TODO tparam boolean for null. +#define BINARYOP_BENCHMARK_DEFINE(TypeLhs, TypeRhs, binop, TypeOut) \ + BENCHMARK_TEMPLATE_DEFINE_F( \ + COMPILED_BINARYOP, binop, TypeLhs, TypeRhs, TypeOut, cudf::binary_operator::binop) \ + (::benchmark::State & st) \ + { \ + BM_compiled_binaryop(st, cudf::binary_operator::binop); \ + } \ + BENCHMARK_REGISTER_F(COMPILED_BINARYOP, binop) \ + ->Unit(benchmark::kMicrosecond) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ + ->Arg(100000000); /* 100M */ + +using namespace cudf; +using namespace numeric; + +// clang-format off +BINARYOP_BENCHMARK_DEFINE(float, int64_t, ADD, int32_t); +BINARYOP_BENCHMARK_DEFINE(duration_s, duration_D, SUB, duration_ms); +BINARYOP_BENCHMARK_DEFINE(float, float, MUL, int64_t); +BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, DIV, int64_t); +BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, TRUE_DIV, int64_t); +BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, FLOOR_DIV, int64_t); +BINARYOP_BENCHMARK_DEFINE(double, double, MOD, double); +BINARYOP_BENCHMARK_DEFINE(int32_t, int64_t, PMOD, double); +BINARYOP_BENCHMARK_DEFINE(int32_t, uint8_t, PYMOD, int64_t); +BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, POW, double); +BINARYOP_BENCHMARK_DEFINE(float, double, LOG_BASE, double); +BINARYOP_BENCHMARK_DEFINE(float, double, ATAN2, double); +BINARYOP_BENCHMARK_DEFINE(int, int, SHIFT_LEFT, int); +BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, SHIFT_RIGHT, int); +BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, SHIFT_RIGHT_UNSIGNED, int64_t); +BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, BITWISE_AND, int16_t); +BINARYOP_BENCHMARK_DEFINE(int16_t, int32_t, BITWISE_OR, int64_t); +BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, BITWISE_XOR, int32_t); +BINARYOP_BENCHMARK_DEFINE(double, int8_t, LOGICAL_AND, bool); +BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, LOGICAL_OR, bool); +BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, EQUAL, bool); +BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NOT_EQUAL, bool); +BINARYOP_BENCHMARK_DEFINE(timestamp_s, timestamp_s, LESS, bool); +BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s, GREATER, bool); +BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, NULL_EQUALS, bool); +BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NULL_MAX, decimal32); +BINARYOP_BENCHMARK_DEFINE(timestamp_D, timestamp_s, NULL_MIN, timestamp_s); diff --git a/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp index 29ca02a843d..3c02f47eeb7 100644 --- a/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp +++ b/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp @@ -23,7 +23,7 @@ #include -template +template class JIT_BINARYOP : public cudf::benchmark { }; @@ -50,22 +50,24 @@ void BM_binaryop(benchmark::State& state, cudf::binary_operator binop) } // TODO tparam boolean for null. -#define BINARYOP_BENCHMARK_DEFINE(TypeLhs, TypeRhs, binop, TypeOut) \ - BENCHMARK_TEMPLATE_DEFINE_F(JIT_BINARYOP, binop, TypeLhs, TypeRhs, TypeOut) \ - (::benchmark::State & st) \ - { \ - BM_binaryop(st, cudf::binary_operator::binop); \ - } \ - BENCHMARK_REGISTER_F(JIT_BINARYOP, binop) \ - ->Unit(benchmark::kMillisecond) \ - ->UseManualTime() \ - ->Arg(10000) /* 10k */ \ - ->Arg(100000) /* 100k */ \ - ->Arg(1000000) /* 1M */ \ - ->Arg(10000000) /* 10M */ \ +#define BINARYOP_BENCHMARK_DEFINE(TypeLhs, TypeRhs, binop, TypeOut) \ + BENCHMARK_TEMPLATE_DEFINE_F( \ + JIT_BINARYOP, binop, TypeLhs, TypeRhs, TypeOut, cudf::binary_operator::binop) \ + (::benchmark::State & st) \ + { \ + BM_binaryop(st, cudf::binary_operator::binop); \ + } \ + BENCHMARK_REGISTER_F(JIT_BINARYOP, binop) \ + ->Unit(benchmark::kMicrosecond) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ ->Arg(100000000); /* 100M */ using namespace cudf; +using namespace numeric; // clang-format off BINARYOP_BENCHMARK_DEFINE(float, int64_t, ADD, int32_t); @@ -75,16 +77,23 @@ BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, DIV, int6 BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, TRUE_DIV, int64_t); BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, FLOOR_DIV, int64_t); BINARYOP_BENCHMARK_DEFINE(double, double, MOD, double); +BINARYOP_BENCHMARK_DEFINE(int32_t, int64_t, PMOD, double); +BINARYOP_BENCHMARK_DEFINE(int32_t, uint8_t, PYMOD, int64_t); BINARYOP_BENCHMARK_DEFINE(int64_t, int64_t, POW, double); +BINARYOP_BENCHMARK_DEFINE(float, double, LOG_BASE, double); +BINARYOP_BENCHMARK_DEFINE(float, double, ATAN2, double); +BINARYOP_BENCHMARK_DEFINE(int, int, SHIFT_LEFT, int); +BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, SHIFT_RIGHT, int); +BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, SHIFT_RIGHT_UNSIGNED, int64_t); BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, BITWISE_AND, int16_t); BINARYOP_BENCHMARK_DEFINE(int16_t, int32_t, BITWISE_OR, int64_t); BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, BITWISE_XOR, int32_t); -BINARYOP_BENCHMARK_DEFINE(double, int8_t, LOGICAL_AND, int16_t); +BINARYOP_BENCHMARK_DEFINE(double, int8_t, LOGICAL_AND, bool); BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, LOGICAL_OR, bool); +BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, EQUAL, bool); +BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NOT_EQUAL, bool); BINARYOP_BENCHMARK_DEFINE(timestamp_s, timestamp_s, LESS, bool); BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s, GREATER, bool); -BINARYOP_BENCHMARK_DEFINE(int, int, SHIFT_LEFT, int); -BINARYOP_BENCHMARK_DEFINE(int16_t, int64_t, SHIFT_RIGHT, int); -BINARYOP_BENCHMARK_DEFINE(int64_t, int32_t, SHIFT_RIGHT_UNSIGNED, int64_t); -BINARYOP_BENCHMARK_DEFINE(int32_t, int64_t, PMOD, double); -BINARYOP_BENCHMARK_DEFINE(float, double, ATAN2, double); +BINARYOP_BENCHMARK_DEFINE(duration_ms, duration_ns, NULL_EQUALS, bool); +BINARYOP_BENCHMARK_DEFINE(decimal32, decimal32, NULL_MAX, decimal32); +BINARYOP_BENCHMARK_DEFINE(timestamp_D, timestamp_s, NULL_MIN, timestamp_s); diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index 7099c29b9df..e6ff6b0eadc 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,37 +42,36 @@ enum class binary_operator : int32_t { FLOOR_DIV, ///< operator / after promoting to 64 bit floating point and then ///< flooring the result MOD, ///< operator % + PMOD, ///< positive modulo operator + ///< If remainder is negative, this returns (remainder + divisor) % divisor + ///< else, it returns (dividend % divisor) PYMOD, ///< operator % but following python's sign rules for negatives POW, ///< lhs ^ rhs + LOG_BASE, ///< logarithm to the base + ATAN2, ///< 2-argument arctangent + SHIFT_LEFT, ///< operator << + SHIFT_RIGHT, ///< operator >> + SHIFT_RIGHT_UNSIGNED, ///< operator >>> (from Java) + ///< Logical right shift. Casts to an unsigned value before shifting. + BITWISE_AND, ///< operator & + BITWISE_OR, ///< operator | + BITWISE_XOR, ///< operator ^ + LOGICAL_AND, ///< operator && + LOGICAL_OR, ///< operator || EQUAL, ///< operator == NOT_EQUAL, ///< operator != LESS, ///< operator < GREATER, ///< operator > LESS_EQUAL, ///< operator <= GREATER_EQUAL, ///< operator >= - BITWISE_AND, ///< operator & - BITWISE_OR, ///< operator | - BITWISE_XOR, ///< operator ^ - LOGICAL_AND, ///< operator && - LOGICAL_OR, ///< operator || - COALESCE, ///< operator x,y x is null ? y : x - GENERIC_BINARY, ///< generic binary operator to be generated with input - ///< ptx code - SHIFT_LEFT, ///< operator << - SHIFT_RIGHT, ///< operator >> - SHIFT_RIGHT_UNSIGNED, ///< operator >>> (from Java) - ///< Logical right shift. Casts to an unsigned value before shifting. - LOG_BASE, ///< logarithm to the base - ATAN2, ///< 2-argument arctangent - PMOD, ///< positive modulo operator - ///< If remainder is negative, this returns (remainder + divisor) % divisor - ///< else, it returns (dividend % divisor) NULL_EQUALS, ///< Returns true when both operands are null; false when one is null; the ///< result of equality when both are non-null NULL_MAX, ///< Returns max of operands when both are non-null; returns the non-null ///< operand when one is null; or invalid when both are null NULL_MIN, ///< Returns min of operands when both are non-null; returns the non-null ///< operand when one is null; or invalid when both are null + GENERIC_BINARY, ///< generic binary operator to be generated with input + ///< ptx code INVALID_BINARY ///< invalid operation }; /** @@ -87,6 +86,7 @@ enum class binary_operator : int32_t { * * @param lhs The left operand scalar * @param rhs The right operand column + * @param op The binary operator * @param output_type The desired data type of the output column * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column of `output_type` type containing the result of @@ -112,6 +112,7 @@ std::unique_ptr binary_operation( * * @param lhs The left operand column * @param rhs The right operand scalar + * @param op The binary operator * @param output_type The desired data type of the output column * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column of `output_type` type containing the result of @@ -135,6 +136,7 @@ std::unique_ptr binary_operation( * * @param lhs The left operand column * @param rhs The right operand column + * @param op The binary operator * @param output_type The desired data type of the output column * @param mr Device memory resource used to allocate the returned column's device memory * @return Output column of `output_type` type containing the result of @@ -202,5 +204,89 @@ cudf::data_type binary_operation_fixed_point_output_type(binary_operator op, cudf::data_type const& lhs, cudf::data_type const& rhs); +namespace experimental { +/** + * @brief Performs a binary operation between a scalar and a column. + * + * The output contains the result of `op(lhs, rhs[i])` for all `0 <= i < rhs.size()` + * The scalar is the left operand and the column elements are the right operand. + * This distinction is significant in case of non-commutative binary operations + * + * Regardless of the operator, the validity of the output value is the logical + * AND of the validity of the two operands except NullMin and NullMax (logical OR). + * + * @param lhs The left operand scalar + * @param rhs The right operand column + * @param op The binary operator + * @param output_type The desired data type of the output column + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Output column of `output_type` type containing the result of + * the binary operation + * @throw cudf::logic_error if @p output_type dtype isn't fixed-width + * @throw cudf::logic_error if @p output_type dtype isn't boolean for comparison and logical + * operations. + */ +std::unique_ptr binary_operation( + scalar const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Performs a binary operation between a column and a scalar. + * + * The output contains the result of `op(lhs[i], rhs)` for all `0 <= i < lhs.size()` + * The column elements are the left operand and the scalar is the right operand. + * This distinction is significant in case of non-commutative binary operations + * + * Regardless of the operator, the validity of the output value is the logical + * AND of the validity of the two operands except NullMin and NullMax (logical OR). + * + * @param lhs The left operand column + * @param rhs The right operand scalar + * @param op The binary operator + * @param output_type The desired data type of the output column + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Output column of `output_type` type containing the result of + * the binary operation + * @throw cudf::logic_error if @p output_type dtype isn't fixed-width + * @throw cudf::logic_error if @p output_type dtype isn't boolean for comparison and logical + * operations. + */ +std::unique_ptr binary_operation( + column_view const& lhs, + scalar const& rhs, + binary_operator op, + data_type output_type, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Performs a binary operation between two columns. + * + * The output contains the result of `op(lhs[i], rhs[i])` for all `0 <= i < lhs.size()` + * + * Regardless of the operator, the validity of the output value is the logical + * AND of the validity of the two operands except NullMin and NullMax (logical OR). + * + * @param lhs The left operand column + * @param rhs The right operand column + * @param op The binary operator + * @param output_type The desired data type of the output column + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Output column of `output_type` type containing the result of + * the binary operation + * @throw cudf::logic_error if @p lhs and @p rhs are different sizes + * @throw cudf::logic_error if @p output_type dtype isn't boolean for comparison and logical + * operations. + * @throw cudf::logic_error if @p output_type dtype isn't fixed-width + */ +std::unique_ptr binary_operation( + column_view const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +} // namespace experimental /** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 717cf8ea7b0..0e14b0c6bf5 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -154,7 +154,7 @@ class fixed_width_scalar : public scalar { void set_value(T value, rmm::cuda_stream_view stream = rmm::cuda_stream_default); /** - * @brief Implicit conversion operator to get the value of the scalar on the host. + * @brief Explicit conversion operator to get the value of the scalar on the host. */ explicit operator value_type() const; @@ -365,6 +365,11 @@ class fixed_point_scalar : public scalar { */ T fixed_point_value(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const; + /** + * @brief Explicit conversion operator to get the value of the scalar on the host. + */ + explicit operator value_type() const; + /** * @brief Returns a raw pointer to the value in device memory. */ @@ -465,7 +470,7 @@ class string_scalar : public scalar { rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Implicit conversion operator to get the value of the scalar in a host std::string. + * @brief Explicit conversion operator to get the value of the scalar in a host std::string. */ explicit operator std::string() const; diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index e2f5f6db624..2cdc455e05c 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -47,16 +47,20 @@ using void_t = void; */ #define CUDF_ENABLE_IF(...) std::enable_if_t<(__VA_ARGS__)>* = nullptr -template -struct is_relationally_comparable_impl : std::false_type { -}; - template using less_comparable = decltype(std::declval() < std::declval()); template using greater_comparable = decltype(std::declval() > std::declval()); +template +using equality_comparable = decltype(std::declval() == std::declval()); + +namespace detail { +template +struct is_relationally_comparable_impl : std::false_type { +}; + template struct is_relationally_comparable_impl struct is_equality_comparable_impl : std::false_type { }; -template -using equality_comparable = decltype(std::declval() == std::declval()); - template struct is_equality_comparable_impl>> : std::true_type { }; +// has common type +template +struct has_common_type_impl : std::false_type { +}; + +template +struct has_common_type_impl>, Ts...> : std::true_type { +}; +} // namespace detail + +template +using has_common_type = typename detail::has_common_type_impl::type; + +template +constexpr inline bool has_common_type_v = detail::has_common_type_impl::value; + template using is_timestamp_t = cuda::std::disjunction, std::is_same, @@ -104,7 +121,7 @@ using is_duration_t = cuda::std::disjunction, template constexpr inline bool is_relationally_comparable() { - return is_relationally_comparable_impl::value; + return detail::is_relationally_comparable_impl::value; } /** @@ -122,7 +139,7 @@ constexpr inline bool is_relationally_comparable() template constexpr inline bool is_equality_comparable() { - return is_equality_comparable_impl::value; + return detail::is_equality_comparable_impl::value; } /** diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp index 8502d5832e6..cd088d81531 100644 --- a/cpp/include/cudf_test/base_fixture.hpp +++ b/cpp/include/cudf_test/base_fixture.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,24 +56,24 @@ template struct uniform_distribution_impl { }; template -struct uniform_distribution_impl< - T, - std::enable_if_t::value && not cudf::is_boolean()>> { +struct uniform_distribution_impl::value>> { using type = std::uniform_int_distribution; }; -template -struct uniform_distribution_impl::value>> { - using type = std::uniform_real_distribution; +template <> +struct uniform_distribution_impl { + using type = std::bernoulli_distribution; }; template -struct uniform_distribution_impl()>> { - using type = std::bernoulli_distribution; +struct uniform_distribution_impl::value>> { + using type = std::uniform_real_distribution; }; template -struct uniform_distribution_impl()>> { +struct uniform_distribution_impl< + T, + std::enable_if_t() or cudf::is_fixed_point()>> { using type = std::uniform_int_distribution; }; @@ -131,7 +131,8 @@ class UniformRandomGenerator { * @param lower Lower bound of the range * @param upper Upper bound of the desired range */ - template ()>* = nullptr> + template () && !cudf::is_boolean()>* = nullptr> UniformRandomGenerator(T lower, T upper, uint64_t seed = detail::random_generator_incrementing_seed()) @@ -139,6 +140,14 @@ class UniformRandomGenerator { { } + template ()>* = nullptr> + UniformRandomGenerator(T lower, + T upper, + uint64_t seed = detail::random_generator_incrementing_seed()) + : dist{0.5}, rng{std::mt19937_64{seed}()} + { + } + /** * @brief Construct a new Uniform Random Generator to generate uniformly * random numbers in the range `[upper,lower]` @@ -146,7 +155,8 @@ class UniformRandomGenerator { * @param lower Lower bound of the range * @param upper Upper bound of the desired range */ - template ()>* = nullptr> + template () or cudf::is_fixed_point()>* = nullptr> UniformRandomGenerator(typename TL::rep lower, typename TL::rep upper, uint64_t seed = detail::random_generator_incrementing_seed()) diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index 11a3383ee87..aaf193ff5cf 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -588,7 +588,7 @@ std::unique_ptr binary_operation(scalar const& lhs, rmm::mr::device_memory_resource* mr) { if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) - return binops::compiled::binary_operation(lhs, rhs, op, output_type, stream, mr); + return experimental::binary_operation(lhs, rhs, op, output_type, mr); if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); @@ -615,7 +615,7 @@ std::unique_ptr binary_operation(column_view const& lhs, rmm::mr::device_memory_resource* mr) { if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) - return binops::compiled::binary_operation(lhs, rhs, op, output_type, stream, mr); + return experimental::binary_operation(lhs, rhs, op, output_type, mr); if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); @@ -644,7 +644,7 @@ std::unique_ptr binary_operation(column_view const& lhs, CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match"); if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING) - return binops::compiled::binary_operation(lhs, rhs, op, output_type, stream, mr); + return experimental::binary_operation(lhs, rhs, op, output_type, mr); if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); @@ -757,4 +757,78 @@ std::unique_ptr binary_operation(column_view const& lhs, return detail::binary_operation(lhs, rhs, ptx, output_type, rmm::cuda_stream_default, mr); } +// Experimental Compiled Binary operation +namespace experimental { +namespace detail { +/** + * @copydoc cudf::experimental::binary_operation(column_view const&, column_view const&, + * binary_operator, data_type, rmm::mr::device_memory_resource*) + * + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +template +std::unique_ptr binary_operation(LhsType const& lhs, + RhsType const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if constexpr (std::is_same_v and std::is_same_v) + CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match"); + + if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING and + output_type.id() == type_id::STRING and + (op == binary_operator::NULL_MAX or op == binary_operator::NULL_MIN)) + return binops::compiled::string_null_min_max(lhs, rhs, op, output_type, stream, mr); + + if (not binops::compiled::is_supported_operation(output_type, lhs.type(), rhs.type(), op)) + CUDF_FAIL("Unsupported operator for these types"); + + // TODO check if scale conversion required? + // if (is_fixed_point(lhs.type()) or is_fixed_point(rhs.type())) + // CUDF_FAIL("Not yet supported fixed_point"); + // return fixed_point_binary_operation(lhs, rhs, op, output_type, stream, mr); + + auto out = make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr); + + if constexpr (std::is_same_v) + if (lhs.is_empty()) return out; + if constexpr (std::is_same_v) + if (rhs.is_empty()) return out; + + auto out_view = out->mutable_view(); + cudf::binops::compiled::binary_operation(out_view, lhs, rhs, op, stream); + return out; +} +} // namespace detail + +std::unique_ptr binary_operation(scalar const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::binary_operation(lhs, rhs, op, output_type, rmm::cuda_stream_default, mr); +} +std::unique_ptr binary_operation(column_view const& lhs, + scalar const& rhs, + binary_operator op, + data_type output_type, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::binary_operation(lhs, rhs, op, output_type, rmm::cuda_stream_default, mr); +} +std::unique_ptr binary_operation(column_view const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::binary_operation(lhs, rhs, op, output_type, rmm::cuda_stream_default, mr); +} +} // namespace experimental } // namespace cudf diff --git a/cpp/src/binaryop/compiled/ATan2.cu b/cpp/src/binaryop/compiled/ATan2.cu new file mode 100644 index 00000000000..8e5cbf57f55 --- /dev/null +++ b/cpp/src/binaryop/compiled/ATan2.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/Add.cu b/cpp/src/binaryop/compiled/Add.cu new file mode 100644 index 00000000000..4cd2ced66f4 --- /dev/null +++ b/cpp/src/binaryop/compiled/Add.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/BitwiseAnd.cu b/cpp/src/binaryop/compiled/BitwiseAnd.cu new file mode 100644 index 00000000000..6abac2bd197 --- /dev/null +++ b/cpp/src/binaryop/compiled/BitwiseAnd.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/BitwiseOr.cu b/cpp/src/binaryop/compiled/BitwiseOr.cu new file mode 100644 index 00000000000..6d523cbf1d1 --- /dev/null +++ b/cpp/src/binaryop/compiled/BitwiseOr.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/BitwiseXor.cu b/cpp/src/binaryop/compiled/BitwiseXor.cu new file mode 100644 index 00000000000..45175681574 --- /dev/null +++ b/cpp/src/binaryop/compiled/BitwiseXor.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/Div.cu b/cpp/src/binaryop/compiled/Div.cu new file mode 100644 index 00000000000..7cc895ecd06 --- /dev/null +++ b/cpp/src/binaryop/compiled/Div.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/FloorDiv.cu b/cpp/src/binaryop/compiled/FloorDiv.cu new file mode 100644 index 00000000000..99ea2706b86 --- /dev/null +++ b/cpp/src/binaryop/compiled/FloorDiv.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/Greater.cu b/cpp/src/binaryop/compiled/Greater.cu new file mode 100644 index 00000000000..679e029b5fc --- /dev/null +++ b/cpp/src/binaryop/compiled/Greater.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/GreaterEqual.cu b/cpp/src/binaryop/compiled/GreaterEqual.cu new file mode 100644 index 00000000000..23b0c6aaa0d --- /dev/null +++ b/cpp/src/binaryop/compiled/GreaterEqual.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/Less.cu b/cpp/src/binaryop/compiled/Less.cu new file mode 100644 index 00000000000..7ab5dfe3478 --- /dev/null +++ b/cpp/src/binaryop/compiled/Less.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/LessEqual.cu b/cpp/src/binaryop/compiled/LessEqual.cu new file mode 100644 index 00000000000..983c50c9575 --- /dev/null +++ b/cpp/src/binaryop/compiled/LessEqual.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/LogBase.cu b/cpp/src/binaryop/compiled/LogBase.cu new file mode 100644 index 00000000000..bdc709b86bf --- /dev/null +++ b/cpp/src/binaryop/compiled/LogBase.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/LogicalAnd.cu b/cpp/src/binaryop/compiled/LogicalAnd.cu new file mode 100644 index 00000000000..08112fadfff --- /dev/null +++ b/cpp/src/binaryop/compiled/LogicalAnd.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/LogicalOr.cu b/cpp/src/binaryop/compiled/LogicalOr.cu new file mode 100644 index 00000000000..bc400afd4cd --- /dev/null +++ b/cpp/src/binaryop/compiled/LogicalOr.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/Mod.cu b/cpp/src/binaryop/compiled/Mod.cu new file mode 100644 index 00000000000..0b82c09c8a6 --- /dev/null +++ b/cpp/src/binaryop/compiled/Mod.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/Mul.cu b/cpp/src/binaryop/compiled/Mul.cu new file mode 100644 index 00000000000..15394245259 --- /dev/null +++ b/cpp/src/binaryop/compiled/Mul.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/NullMax.cu b/cpp/src/binaryop/compiled/NullMax.cu new file mode 100644 index 00000000000..78a44041cba --- /dev/null +++ b/cpp/src/binaryop/compiled/NullMax.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} // namespace cudf::binops::compiled diff --git a/cpp/src/binaryop/compiled/NullMin.cu b/cpp/src/binaryop/compiled/NullMin.cu new file mode 100644 index 00000000000..629ab600fd7 --- /dev/null +++ b/cpp/src/binaryop/compiled/NullMin.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} // namespace cudf::binops::compiled diff --git a/cpp/src/binaryop/compiled/PMod.cu b/cpp/src/binaryop/compiled/PMod.cu new file mode 100644 index 00000000000..36902c0ed10 --- /dev/null +++ b/cpp/src/binaryop/compiled/PMod.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/Pow.cu b/cpp/src/binaryop/compiled/Pow.cu new file mode 100644 index 00000000000..c6f897ee18d --- /dev/null +++ b/cpp/src/binaryop/compiled/Pow.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/PyMod.cu b/cpp/src/binaryop/compiled/PyMod.cu new file mode 100644 index 00000000000..b05dcd8e7bc --- /dev/null +++ b/cpp/src/binaryop/compiled/PyMod.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/ShiftLeft.cu b/cpp/src/binaryop/compiled/ShiftLeft.cu new file mode 100644 index 00000000000..6cc950b2d50 --- /dev/null +++ b/cpp/src/binaryop/compiled/ShiftLeft.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/ShiftRight.cu b/cpp/src/binaryop/compiled/ShiftRight.cu new file mode 100644 index 00000000000..1ddd7100a73 --- /dev/null +++ b/cpp/src/binaryop/compiled/ShiftRight.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/ShiftRightUnsigned.cu b/cpp/src/binaryop/compiled/ShiftRightUnsigned.cu new file mode 100644 index 00000000000..a87b4b9f9ac --- /dev/null +++ b/cpp/src/binaryop/compiled/ShiftRightUnsigned.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/Sub.cu b/cpp/src/binaryop/compiled/Sub.cu new file mode 100644 index 00000000000..e0cf47c1310 --- /dev/null +++ b/cpp/src/binaryop/compiled/Sub.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/TrueDiv.cu b/cpp/src/binaryop/compiled/TrueDiv.cu new file mode 100644 index 00000000000..d8f1d956340 --- /dev/null +++ b/cpp/src/binaryop/compiled/TrueDiv.cu @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +template void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view); +} diff --git a/cpp/src/binaryop/compiled/binary_ops.cu b/cpp/src/binaryop/compiled/binary_ops.cu index 2b24e0cfa3d..1dd00c4b981 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cu +++ b/cpp/src/binaryop/compiled/binary_ops.cu @@ -15,13 +15,12 @@ */ #include "binary_ops.hpp" +#include "operation.cuh" +#include #include -#include -#include #include -#include -#include +#include #include #include @@ -32,204 +31,76 @@ namespace binops { namespace compiled { namespace { - -template -struct apply_binop { - binary_operator op; - apply_binop(binary_operator op) : op(op) {} - CUDA_DEVICE_CALLABLE Out operator()(Lhs const& x, Rhs const& y) const - { - switch (op) { - case binary_operator::EQUAL: return this->equal(x, y); - case binary_operator::NOT_EQUAL: return this->not_equal(x, y); - case binary_operator::LESS: return this->less(x, y); - case binary_operator::GREATER: return this->greater(x, y); - case binary_operator::LESS_EQUAL: return this->less_equal(x, y); - case binary_operator::GREATER_EQUAL: return this->greater_equal(x, y); - default: return Out{}; - } - } - CUDA_DEVICE_CALLABLE Out equal(Lhs const& x, Rhs const& y) const - { - return static_cast(x == y); - } - CUDA_DEVICE_CALLABLE Out not_equal(Lhs const& x, Rhs const& y) const - { - return static_cast(x != y); - } - CUDA_DEVICE_CALLABLE Out less(Lhs const& x, Rhs const& y) const - { - return static_cast(x < y); - } - CUDA_DEVICE_CALLABLE Out greater(Lhs const& x, Rhs const& y) const - { - return static_cast(x > y); - } - CUDA_DEVICE_CALLABLE Out less_equal(Lhs const& x, Rhs const& y) const - { - return static_cast(x <= y); - } - CUDA_DEVICE_CALLABLE Out greater_equal(Lhs const& x, Rhs const& y) const - { - return static_cast(x >= y); - } -}; - -template -struct apply_binop_scalar_lhs_rhs : apply_binop { - cudf::scalar_device_type_t scalar; - apply_binop_scalar_lhs_rhs(binary_operator op, cudf::scalar_device_type_t scalar) - : apply_binop(op), scalar(scalar) - { - } - CUDA_DEVICE_CALLABLE Out operator()(Lhs const& x) const - { - return apply_binop::operator()(x, scalar.value()); - } -}; - -template -struct apply_binop_scalar_rhs_lhs : apply_binop { - cudf::scalar_device_type_t scalar; - apply_binop_scalar_rhs_lhs(binary_operator op, cudf::scalar_device_type_t scalar) - : apply_binop(op), scalar(scalar) +/** + * @brief Converts scalar to column_device_view with single element. + * + * @return pair with column_device_view and column containing any auxilary data to create + * column_view from scalar + */ +struct scalar_as_column_device_view { + using return_type = typename std::pair>; + template ())>* = nullptr> + return_type operator()(scalar const& s, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { + auto h_scalar_type_view = static_cast&>(const_cast(s)); + auto col_v = + column_view(s.type(), 1, h_scalar_type_view.data(), (bitmask_type const*)s.validity_data()); + return std::pair{column_device_view::create(col_v, stream), std::unique_ptr(nullptr)}; } - CUDA_DEVICE_CALLABLE Out operator()(Lhs const& x) const + template ())>* = nullptr> + return_type operator()(scalar const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*) { - return apply_binop::operator()(scalar.value(), x); + CUDF_FAIL("Unsupported type"); } }; +// specialization for cudf::string_view +template <> +scalar_as_column_device_view::return_type +scalar_as_column_device_view::operator()(scalar const& s, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + using T = cudf::string_view; + auto h_scalar_type_view = static_cast&>(const_cast(s)); + + // build offsets column from the string size + auto offsets_transformer_itr = + thrust::make_constant_iterator(h_scalar_type_view.size()); + auto offsets_column = strings::detail::make_offsets_child_column( + offsets_transformer_itr, offsets_transformer_itr + 1, stream, mr); + + auto chars_column_v = + column_view(data_type{type_id::INT8}, h_scalar_type_view.size(), h_scalar_type_view.data()); + // Construct string column_view + auto col_v = column_view(s.type(), + 1, + nullptr, + (bitmask_type const*)s.validity_data(), + cudf::UNKNOWN_NULL_COUNT, + 0, + {offsets_column->view(), chars_column_v}); + return std::pair{column_device_view::create(col_v, stream), std::move(offsets_column)}; +} -template -struct binary_op { - std::unique_ptr operator()(column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type out_type, - bool const reversed, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - { - auto new_mask = binops::detail::scalar_col_valid_mask_and(lhs, rhs, stream, mr); - auto out = make_fixed_width_column(out_type, - lhs.size(), - std::move(new_mask), - rhs.is_valid(stream) ? cudf::UNKNOWN_NULL_COUNT : lhs.size(), - stream, - mr); - - if (lhs.size() > 0 && rhs.is_valid(stream)) { - auto out_view = out->mutable_view(); - auto out_itr = out_view.begin(); - auto lhs_device_view = column_device_view::create(lhs, stream); - using rhs_type = cudf::scalar_type_t; - auto rhs_scalar = rhs_type(static_cast(rhs), stream); - auto rhs_scalar_view = get_scalar_device_view(rhs_scalar); - if (lhs.has_nulls()) { - auto lhs_itr = cudf::detail::make_null_replacement_iterator(*lhs_device_view, Lhs{}); - reversed - ? thrust::transform(rmm::exec_policy(stream), - lhs_itr, - lhs_itr + lhs.size(), - out_itr, - apply_binop_scalar_rhs_lhs{op, rhs_scalar_view}) - : thrust::transform(rmm::exec_policy(stream), - lhs_itr, - lhs_itr + lhs.size(), - out_itr, - apply_binop_scalar_lhs_rhs{op, rhs_scalar_view}); - } else { - auto lhs_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(size_type{0}), - [col = *lhs_device_view] __device__(size_type i) { return col.element(i); }); - reversed - ? thrust::transform(rmm::exec_policy(stream), - lhs_itr, - lhs_itr + lhs.size(), - out_itr, - apply_binop_scalar_rhs_lhs{op, rhs_scalar_view}) - : thrust::transform(rmm::exec_policy(stream), - lhs_itr, - lhs_itr + lhs.size(), - out_itr, - apply_binop_scalar_lhs_rhs{op, rhs_scalar_view}); - } - } - - CHECK_CUDA(stream.value()); - - return out; - } - - std::unique_ptr operator()(column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type out_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - { - auto new_mask = cudf::detail::bitmask_and(table_view({lhs, rhs}), stream, mr); - auto out = make_fixed_width_column( - out_type, lhs.size(), std::move(new_mask), cudf::UNKNOWN_NULL_COUNT, stream, mr); - - if (lhs.size() > 0) { - auto out_view = out->mutable_view(); - auto out_itr = out_view.begin(); - auto lhs_device_view = column_device_view::create(lhs, stream); - auto rhs_device_view = column_device_view::create(rhs, stream); - if (lhs.has_nulls() && rhs.has_nulls()) { - auto lhs_itr = cudf::detail::make_null_replacement_iterator(*lhs_device_view, Lhs{}); - auto rhs_itr = cudf::detail::make_null_replacement_iterator(*rhs_device_view, Rhs{}); - thrust::transform(rmm::exec_policy(stream), - lhs_itr, - lhs_itr + lhs.size(), - rhs_itr, - out_itr, - apply_binop{op}); - } else if (lhs.has_nulls()) { - auto lhs_itr = cudf::detail::make_null_replacement_iterator(*lhs_device_view, Lhs{}); - auto rhs_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(size_type{0}), - [col = *rhs_device_view] __device__(size_type i) { return col.element(i); }); - thrust::transform(rmm::exec_policy(stream), - lhs_itr, - lhs_itr + lhs.size(), - rhs_itr, - out_itr, - apply_binop{op}); - } else if (rhs.has_nulls()) { - auto lhs_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(size_type{0}), - [col = *lhs_device_view] __device__(size_type i) { return col.element(i); }); - auto rhs_itr = cudf::detail::make_null_replacement_iterator(*rhs_device_view, Rhs{}); - thrust::transform(rmm::exec_policy(stream), - lhs_itr, - lhs_itr + lhs.size(), - rhs_itr, - out_itr, - apply_binop{op}); - } else { - auto lhs_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(size_type{0}), - [col = *lhs_device_view] __device__(size_type i) { return col.element(i); }); - auto rhs_itr = thrust::make_transform_iterator( - thrust::make_counting_iterator(size_type{0}), - [col = *rhs_device_view] __device__(size_type i) { return col.element(i); }); - thrust::transform(rmm::exec_policy(stream), - lhs_itr, - lhs_itr + lhs.size(), - rhs_itr, - out_itr, - apply_binop{op}); - } - } - - CHECK_CUDA(stream.value()); - - return out; - } -}; +/** + * @brief Converts scalar to column_device_view with single element. + * + * @param scal scalar to convert + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory + * @return pair with column_device_view and column containing any auxilary data to create + * column_view from scalar + */ +auto scalar_to_column_device_view( + scalar const& scal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + return type_dispatcher(scal.type(), scalar_as_column_device_view{}, scal, stream, mr); +} // This functor does the actual comparison between string column value and a scalar string // or between two string column values using a comparator @@ -337,152 +208,181 @@ struct null_considering_binop { // Create device views for inputs auto const lhs_dev_view = get_device_view(lhs); auto const rhs_dev_view = get_device_view(rhs); - - switch (op) { - case binary_operator::NULL_EQUALS: { - // Validate input - CUDF_EXPECTS(output_type.id() == type_id::BOOL8, "Output column type has to be bool"); - - // Make a bool8 numeric output column - out = make_numeric_column( - data_type{type_id::BOOL8}, col_size, mask_state::ALL_VALID, stream, mr); - - // Create a compare function lambda - auto equal_func = [] __device__(bool lhs_valid, - bool rhs_valid, - cudf::string_view lhs_value, - cudf::string_view rhs_value) { - if (!lhs_valid && !rhs_valid) return true; - if (lhs_valid && rhs_valid) return (lhs_value == rhs_value); - return false; - }; - - // Populate output column - populate_out_col(lhs_dev_view, - rhs_dev_view, - col_size, - stream, - equal_func, - mutable_column_view{*out}.begin()); - - break; - } - - case binary_operator::NULL_MAX: - case binary_operator::NULL_MIN: { - // Validate input - CUDF_EXPECTS(output_type.id() == lhs.type().id(), - "Output column type should match input column type"); - - // Shallow copy of the resultant strings - rmm::device_uvector out_col_strings(col_size, stream); - - // Invalid output column strings - null rows - cudf::string_view const invalid_str{nullptr, 0}; - - // Create a compare function lambda - auto minmax_func = [op, invalid_str] __device__(bool lhs_valid, - bool rhs_valid, - cudf::string_view lhs_value, - cudf::string_view rhs_value) { - if (!lhs_valid && !rhs_valid) - return invalid_str; - else if (lhs_valid && rhs_valid) { - return (op == binary_operator::NULL_MAX) - ? thrust::maximum()(lhs_value, rhs_value) - : thrust::minimum()(lhs_value, rhs_value); - } else if (lhs_valid) - return lhs_value; - else - return rhs_value; - }; - - // Populate output column - populate_out_col( - lhs_dev_view, rhs_dev_view, col_size, stream, minmax_func, out_col_strings.data()); - - // Create an output column with the resultant strings - out = cudf::make_strings_column(out_col_strings, invalid_str, stream, mr); - - break; - } - - default: { - CUDF_FAIL("Null aware binop not supported"); - } - } - - return out; + // Validate input + CUDF_EXPECTS(output_type.id() == lhs.type().id(), + "Output column type should match input column type"); + + // Shallow copy of the resultant strings + rmm::device_uvector out_col_strings(col_size, stream); + + // Invalid output column strings - null rows + cudf::string_view const invalid_str{nullptr, 0}; + + // Create a compare function lambda + auto minmax_func = + [op, invalid_str] __device__( + bool lhs_valid, bool rhs_valid, cudf::string_view lhs_value, cudf::string_view rhs_value) { + if (!lhs_valid && !rhs_valid) + return invalid_str; + else if (lhs_valid && rhs_valid) { + return (op == binary_operator::NULL_MAX) + ? thrust::maximum()(lhs_value, rhs_value) + : thrust::minimum()(lhs_value, rhs_value); + } else if (lhs_valid) + return lhs_value; + else + return rhs_value; + }; + + // Populate output column + populate_out_col( + lhs_dev_view, rhs_dev_view, col_size, stream, minmax_func, out_col_strings.data()); + + // Create an output column with the resultant strings + return cudf::make_strings_column(out_col_strings, invalid_str, stream, mr); } }; } // namespace -std::unique_ptr binary_operation(scalar const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr string_null_min_max(scalar const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // hard-coded to only work with cudf::string_view so we don't explode compile times CUDF_EXPECTS(lhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported lhs datatype"); CUDF_EXPECTS(rhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported rhs datatype"); - if (is_null_dependent(op)) { - if (rhs.is_empty()) return cudf::make_empty_column(output_type); - auto rhs_device_view = cudf::column_device_view::create(rhs, stream); - return null_considering_binop{}(lhs, *rhs_device_view, op, output_type, rhs.size(), stream, mr); - } else { - CUDF_EXPECTS(is_boolean(output_type), "Invalid/Unsupported output datatype"); - // Should pass the right type of scalar and column_view when specializing binary_op - return binary_op{}( - rhs, lhs, op, output_type, true, stream, mr); - } + CUDF_EXPECTS(op == binary_operator::NULL_MAX or op == binary_operator::NULL_MIN, + "Unsupported binary operation"); + if (rhs.is_empty()) return cudf::make_empty_column(output_type); + auto rhs_device_view = cudf::column_device_view::create(rhs, stream); + return null_considering_binop{}(lhs, *rhs_device_view, op, output_type, rhs.size(), stream, mr); } -std::unique_ptr binary_operation(column_view const& lhs, - scalar const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr string_null_min_max(column_view const& lhs, + scalar const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // hard-coded to only work with cudf::string_view so we don't explode compile times CUDF_EXPECTS(lhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported lhs datatype"); CUDF_EXPECTS(rhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported rhs datatype"); - if (is_null_dependent(op)) { - if (lhs.is_empty()) return cudf::make_empty_column(output_type); - auto lhs_device_view = cudf::column_device_view::create(lhs, stream); - return null_considering_binop{}(*lhs_device_view, rhs, op, output_type, lhs.size(), stream, mr); - } else { - CUDF_EXPECTS(is_boolean(output_type), "Invalid/Unsupported output datatype"); - return binary_op{}( - lhs, rhs, op, output_type, false, stream, mr); - } + CUDF_EXPECTS(op == binary_operator::NULL_MAX or op == binary_operator::NULL_MIN, + "Unsupported binary operation"); + if (lhs.is_empty()) return cudf::make_empty_column(output_type); + auto lhs_device_view = cudf::column_device_view::create(lhs, stream); + return null_considering_binop{}(*lhs_device_view, rhs, op, output_type, lhs.size(), stream, mr); } -std::unique_ptr binary_operation(column_view const& lhs, - column_view const& rhs, - binary_operator op, - data_type output_type, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr string_null_min_max(column_view const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { // hard-coded to only work with cudf::string_view so we don't explode compile times CUDF_EXPECTS(lhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported lhs datatype"); CUDF_EXPECTS(rhs.type().id() == cudf::type_id::STRING, "Invalid/Unsupported rhs datatype"); - if (is_null_dependent(op)) { - CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes do not match"); - if (lhs.is_empty()) return cudf::make_empty_column(output_type); - auto lhs_device_view = cudf::column_device_view::create(lhs, stream); - auto rhs_device_view = cudf::column_device_view::create(rhs, stream); - return null_considering_binop{}( - *lhs_device_view, *rhs_device_view, op, output_type, lhs.size(), stream, mr); - } else { - CUDF_EXPECTS(is_boolean(output_type), "Invalid/Unsupported output datatype"); - return binary_op{}( - lhs, rhs, op, output_type, stream, mr); - } + CUDF_EXPECTS(op == binary_operator::NULL_MAX or op == binary_operator::NULL_MIN, + "Unsupported binary operation"); + CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes do not match"); + if (lhs.is_empty()) return cudf::make_empty_column(output_type); + auto lhs_device_view = cudf::column_device_view::create(lhs, stream); + auto rhs_device_view = cudf::column_device_view::create(rhs, stream); + return null_considering_binop{}( + *lhs_device_view, *rhs_device_view, op, output_type, lhs.size(), stream, mr); +} + +void operator_dispatcher(mutable_column_device_view& out, + column_device_view const& lhs, + column_device_view const& rhs, + bool is_lhs_scalar, + bool is_rhs_scalar, + binary_operator op, + rmm::cuda_stream_view stream) +{ + // clang-format off +switch (op) { +case binary_operator::ADD: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::SUB: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::MUL: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::DIV: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::TRUE_DIV: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::FLOOR_DIV: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::MOD: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::PYMOD: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::POW: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::EQUAL: +case binary_operator::NOT_EQUAL: +case binary_operator::NULL_EQUALS: +if(out.type().id() != type_id::BOOL8) CUDF_FAIL("Output type of Comparison operator should be bool type"); +dispatch_equality_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, op, stream); break; +case binary_operator::LESS: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::GREATER: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::LESS_EQUAL: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::GREATER_EQUAL: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::BITWISE_AND: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::BITWISE_OR: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::BITWISE_XOR: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::LOGICAL_AND: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::LOGICAL_OR: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +/* +case binary_operator::GENERIC_BINARY: // Cannot be compiled, should be called by jit::binary_operation +*/ +case binary_operator::SHIFT_LEFT: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::SHIFT_RIGHT: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::SHIFT_RIGHT_UNSIGNED: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::LOG_BASE: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::ATAN2: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::PMOD: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::NULL_MAX: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +case binary_operator::NULL_MIN: apply_binary_op(out, lhs, rhs, is_lhs_scalar, is_rhs_scalar, stream); break; +default:; +} + // clang-format on +} + +// vector_vector +void binary_operation(mutable_column_view& out, + column_view const& lhs, + column_view const& rhs, + binary_operator op, + rmm::cuda_stream_view stream) +{ + auto lhsd = column_device_view::create(lhs, stream); + auto rhsd = column_device_view::create(rhs, stream); + auto outd = mutable_column_device_view::create(out, stream); + operator_dispatcher(*outd, *lhsd, *rhsd, false, false, op, stream); +} +// scalar_vector +void binary_operation(mutable_column_view& out, + scalar const& lhs, + column_view const& rhs, + binary_operator op, + rmm::cuda_stream_view stream) +{ + auto [lhsd, aux] = scalar_to_column_device_view(lhs, stream); + auto rhsd = column_device_view::create(rhs, stream); + auto outd = mutable_column_device_view::create(out, stream); + operator_dispatcher(*outd, *lhsd, *rhsd, true, false, op, stream); +} +// vector_scalar +void binary_operation(mutable_column_view& out, + column_view const& lhs, + scalar const& rhs, + binary_operator op, + rmm::cuda_stream_view stream) +{ + auto lhsd = column_device_view::create(lhs, stream); + auto [rhsd, aux] = scalar_to_column_device_view(rhs, stream); + auto outd = mutable_column_device_view::create(out, stream); + operator_dispatcher(*outd, *lhsd, *rhsd, false, true, op, stream); } } // namespace compiled diff --git a/cpp/src/binaryop/compiled/binary_ops.cuh b/cpp/src/binaryop/compiled/binary_ops.cuh new file mode 100644 index 00000000000..b17f3eddc5d --- /dev/null +++ b/cpp/src/binaryop/compiled/binary_ops.cuh @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "binary_ops.hpp" +#include "operation.cuh" + +#include +#include + +#include +#include + +namespace cudf { +namespace binops { +namespace compiled { + +template +constexpr bool is_bool_result() +{ + using ReturnType = std::invoke_result_t; + return std::is_same_v; +} + +/** + * @brief Type casts each element of the column to `CastType` + * + */ +template +struct type_casted_accessor { + template + CUDA_DEVICE_CALLABLE CastType operator()(cudf::size_type i, + column_device_view const& col, + bool is_scalar) const + { + if constexpr (column_device_view::has_element_accessor() and + std::is_convertible_v) + return static_cast(col.element(is_scalar ? 0 : i)); + return {}; + } +}; + +/** + * @brief Type casts value to column type and stores in `i`th row of the column + * + */ +template +struct typed_casted_writer { + template + CUDA_DEVICE_CALLABLE void operator()(cudf::size_type i, + mutable_column_device_view const& col, + FromType val) const + { + if constexpr (mutable_column_device_view::has_element_accessor() and + std::is_constructible_v) { + col.element(i) = static_cast(val); + } else if constexpr (is_fixed_point() and std::is_constructible_v) { + if constexpr (is_fixed_point()) + col.data()[i] = val.rescaled(numeric::scale_type{col.type().scale()}).value(); + else + col.data()[i] = Element{val, numeric::scale_type{col.type().scale()}}.value(); + } + } +}; + +// Functors to launch only defined operations. + +/** + * @brief Functor to launch only defined operations with common type. + * + * @tparam BinaryOperator binary operator functor + */ +template +struct ops_wrapper { + mutable_column_device_view& out; + column_device_view const& lhs; + column_device_view const& rhs; + bool const& is_lhs_scalar; + bool const& is_rhs_scalar; + template + __device__ void operator()(size_type i) + { + if constexpr (std::is_invocable_v) { + TypeCommon x = + type_dispatcher(lhs.type(), type_casted_accessor{}, i, lhs, is_lhs_scalar); + TypeCommon y = + type_dispatcher(rhs.type(), type_casted_accessor{}, i, rhs, is_rhs_scalar); + auto result = [&]() { + if constexpr (std::is_same_v or + std::is_same_v or + std::is_same_v) { + bool output_valid = false; + auto result = BinaryOperator{}.template operator()( + x, + y, + lhs.is_valid(is_lhs_scalar ? 0 : i), + rhs.is_valid(is_rhs_scalar ? 0 : i), + output_valid); + if (out.nullable() && !output_valid) out.set_null(i); + return result; + } else { + return BinaryOperator{}.template operator()(x, y); + } + // To supress nvcc warning + return std::invoke_result_t{}; + }(); + if constexpr (is_bool_result()) + out.element(i) = result; + else + type_dispatcher(out.type(), typed_casted_writer{}, i, out, result); + } + (void)i; + } +}; + +/** + * @brief Functor to launch only defined operations without common type. + * + * @tparam BinaryOperator binary operator functor + */ +template +struct ops2_wrapper { + mutable_column_device_view& out; + column_device_view const& lhs; + column_device_view const& rhs; + bool const& is_lhs_scalar; + bool const& is_rhs_scalar; + template + __device__ void operator()(size_type i) + { + if constexpr (!has_common_type_v and + std::is_invocable_v) { + TypeLhs x = lhs.element(is_lhs_scalar ? 0 : i); + TypeRhs y = rhs.element(is_rhs_scalar ? 0 : i); + auto result = [&]() { + if constexpr (std::is_same_v or + std::is_same_v or + std::is_same_v) { + bool output_valid = false; + auto result = BinaryOperator{}.template operator()( + x, + y, + lhs.is_valid(is_lhs_scalar ? 0 : i), + rhs.is_valid(is_rhs_scalar ? 0 : i), + output_valid); + if (out.nullable() && !output_valid) out.set_null(i); + return result; + } else { + return BinaryOperator{}.template operator()(x, y); + } + // To supress nvcc warning + return std::invoke_result_t{}; + }(); + if constexpr (is_bool_result()) + out.element(i) = result; + else + type_dispatcher(out.type(), typed_casted_writer{}, i, out, result); + } + (void)i; + } +}; + +/** + * @brief Functor which does single, and double type dispatcher in device code + * + * single type dispatcher for lhs and rhs with common types. + * double type dispatcher for lhs and rhs without common types. + * + * @tparam BinaryOperator binary operator functor + */ +template +struct device_type_dispatcher { + mutable_column_device_view out; + column_device_view lhs; + column_device_view rhs; + bool is_lhs_scalar; + bool is_rhs_scalar; + std::optional common_data_type; + + __device__ void operator()(size_type i) + { + if (common_data_type) { + type_dispatcher(*common_data_type, + ops_wrapper{out, lhs, rhs, is_lhs_scalar, is_rhs_scalar}, + i); + } else { + double_type_dispatcher( + lhs.type(), + rhs.type(), + ops2_wrapper{out, lhs, rhs, is_lhs_scalar, is_rhs_scalar}, + i); + } + } +}; + +/** + * @brief Simplified for_each kernel + * + * @param size number of elements to process. + * @param f Functor object to call for each element. + */ +template +__global__ void for_each_kernel(cudf::size_type size, Functor f) +{ + int tid = threadIdx.x; + int blkid = blockIdx.x; + int blksz = blockDim.x; + int gridsz = gridDim.x; + + int start = tid + blkid * blksz; + int step = blksz * gridsz; + +#pragma unroll + for (cudf::size_type i = start; i < size; i += step) { + f(i); + } +} + +/** + * @brief Launches Simplified for_each kernel with maximum occupancy grid dimensions. + * + * @tparam Functor + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param size number of elements to process. + * @param f Functor object to call for each element. + */ +template +void for_each(rmm::cuda_stream_view stream, cudf::size_type size, Functor f) +{ + int block_size; + int min_grid_size; + CUDA_TRY( + cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, for_each_kernel)); + // 2 elements per thread. + const int grid_size = util::div_rounding_up_safe(size, 2 * block_size); + for_each_kernel<<>>(size, std::forward(f)); +} + +template +void apply_binary_op(mutable_column_device_view& outd, + column_device_view const& lhsd, + column_device_view const& rhsd, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view stream) +{ + auto common_dtype = get_common_type(outd.type(), lhsd.type(), rhsd.type()); + + // Create binop functor instance + auto binop_func = device_type_dispatcher{ + outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar, common_dtype}; + // Execute it on every element + for_each(stream, outd.size(), binop_func); +} + +} // namespace compiled +} // namespace binops +} // namespace cudf diff --git a/cpp/src/binaryop/compiled/binary_ops.hpp b/cpp/src/binaryop/compiled/binary_ops.hpp index a3f62f5018e..2a814c16d57 100644 --- a/cpp/src/binaryop/compiled/binary_ops.hpp +++ b/cpp/src/binaryop/compiled/binary_ops.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, NVIDIA CORPORATION. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,13 @@ #include +#include + namespace cudf { +// Forward declarations +class column_device_view; +class mutable_column_device_view; + namespace binops { namespace detail { /** @@ -45,6 +51,30 @@ inline bool is_null_dependent(binary_operator op) namespace compiled { +std::unique_ptr string_null_min_max( + scalar const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +std::unique_ptr string_null_min_max( + column_view const& lhs, + scalar const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +std::unique_ptr string_null_min_max( + column_view const& lhs, + column_view const& rhs, + binary_operator op, + data_type output_type, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Performs a binary operation between a string scalar and a string * column. @@ -123,6 +153,89 @@ std::unique_ptr binary_operation( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +void binary_operation(mutable_column_view& out, + scalar const& lhs, + column_view const& rhs, + binary_operator op, + rmm::cuda_stream_view stream); +void binary_operation(mutable_column_view& out, + column_view const& lhs, + scalar const& rhs, + binary_operator op, + rmm::cuda_stream_view stream); +void binary_operation(mutable_column_view& out, + column_view const& lhs, + column_view const& rhs, + binary_operator op, + rmm::cuda_stream_view stream); + +// Defined in util.cpp +/** + * @brief Get the common type among all input types. + * + * @param out type 1 + * @param lhs type 2 + * @param rhs type 3 + * @return common type among @p out, @p lhs, @p rhs. + */ +std::optional get_common_type(data_type out, data_type lhs, data_type rhs); +/** + * @brief Check if input binary operation is supported for the given input and output types. + * + * @param out output type of the binary operation + * @param lhs first operand type of the binary operation + * @param rhs second operand type of the binary operation + * @param op binary operator enum. + * @return true if given binary operator supports given input and output types. + */ +bool is_supported_operation(data_type out, data_type lhs, data_type rhs, binary_operator op); + +// Defined in individual .cu files. +/** + * @brief Deploys single type or double type dispatcher that runs binary operation on each element + * of @p lhsd and @p rhsd columns. + * + * This template is instantiated for each binary operator. + * + * @tparam BinaryOperator Binary operator functor + * @param outd mutable device view of output column + * @param lhsd device view of left operand column + * @param rhsd device view of right operand column + * @param is_lhs_scalar true if @p lhsd is a single element column representing a scalar + * @param is_rhs_scalar true if @p rhsd is a single element column representing a scalar + * @param stream CUDA stream used for device memory operations + */ +template +void apply_binary_op(mutable_column_device_view&, + column_device_view const&, + column_device_view const&, + bool is_lhs_scalar, + bool is_rhs_scalar, + rmm::cuda_stream_view stream); +/** + * @brief Deploys single type or double type dispatcher that runs equality operation on each element + * of @p lhsd and @p rhsd columns. + * + * Comparison operators are EQUAL, NOT_EQUAL, NULL_EQUALS. + * @p outd type is boolean. + * + * This template is instantiated for each binary operator. + * + * @param outd mutable device view of output column + * @param lhsd device view of left operand column + * @param rhsd device view of right operand column + * @param is_lhs_scalar true if @p lhsd is a single element column representing a scalar + * @param is_rhs_scalar true if @p rhsd is a single element column representing a scalar + * @param op comparison binary operator + * @param stream CUDA stream used for device memory operations + */ +void dispatch_equality_op(mutable_column_device_view& outd, + column_device_view const& lhsd, + column_device_view const& rhsd, + bool is_lhs_scalar, + bool is_rhs_scalar, + binary_operator op, + rmm::cuda_stream_view stream); } // namespace compiled } // namespace binops } // namespace cudf diff --git a/cpp/src/binaryop/compiled/equality_ops.cu b/cpp/src/binaryop/compiled/equality_ops.cu new file mode 100644 index 00000000000..feee310716a --- /dev/null +++ b/cpp/src/binaryop/compiled/equality_ops.cu @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "binary_ops.cuh" + +namespace cudf::binops::compiled { +void dispatch_equality_op(mutable_column_device_view& outd, + column_device_view const& lhsd, + column_device_view const& rhsd, + bool is_lhs_scalar, + bool is_rhs_scalar, + binary_operator op, + rmm::cuda_stream_view stream) +{ + auto common_dtype = get_common_type(outd.type(), lhsd.type(), rhsd.type()); + + // Execute it on every element + for_each( + stream, + outd.size(), + [op, outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar, common_dtype] __device__(size_type i) { + // clang-format off + // Similar enabled template types should go together (better performance) + switch (op) { + case binary_operator::EQUAL: device_type_dispatcher{outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar, common_dtype}(i); break; + case binary_operator::NOT_EQUAL: device_type_dispatcher{outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar, common_dtype}(i); break; + case binary_operator::NULL_EQUALS: device_type_dispatcher{outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar, common_dtype}(i); break; + default:; + } + // clang-format on + }); +} +} // namespace cudf::binops::compiled diff --git a/cpp/src/binaryop/compiled/operation.cuh b/cpp/src/binaryop/compiled/operation.cuh new file mode 100644 index 00000000000..86645e2cb8a --- /dev/null +++ b/cpp/src/binaryop/compiled/operation.cuh @@ -0,0 +1,421 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +namespace cudf { +namespace binops { +namespace compiled { + +// All binary operations +namespace ops { + +struct Add { + template + CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) -> decltype(lhs + rhs) + { + return lhs + rhs; + } +}; + +struct Sub { + template + CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) -> decltype(lhs - rhs) + { + return lhs - rhs; + } +}; + +struct Mul { + template + static constexpr inline bool is_supported() + { + return has_common_type_v or + // FIXME: without the following line, compilation error + // _deps/libcudacxx-src/include/cuda/std/detail/libcxx/include/chrono(917): error: + // identifier "cuda::std::__3::ratio<(long)86400000000l, (long)1l> ::num" is undefined in + // device code + (is_duration() and std::is_integral()) or + (std::is_integral() and is_duration()) or + (is_fixed_point() and is_numeric()) or + (is_numeric() and is_fixed_point()); + } + template ()>* = nullptr> + CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) -> decltype(lhs * rhs) + { + return lhs * rhs; + } +}; + +struct Div { + template + static constexpr inline bool is_supported() + { + return has_common_type_v or + // FIXME: without this, compilation error on chrono:917 + (is_duration() and (std::is_integral() or is_duration())) or + (is_fixed_point() and is_numeric()) or + (is_numeric() and is_fixed_point()); + } + template ()>* = nullptr> + CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) -> decltype(lhs / rhs) + { + return lhs / rhs; + } +}; + +struct TrueDiv { + template + CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) + -> decltype((static_cast(lhs) / static_cast(rhs))) + { + return (static_cast(lhs) / static_cast(rhs)); + } +}; + +struct FloorDiv { + template + CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) + -> decltype(floor(static_cast(lhs) / static_cast(rhs))) + { + return floor(static_cast(lhs) / static_cast(rhs)); + } +}; + +struct Mod { + template + static constexpr inline bool is_supported() + { + return has_common_type_v or + // FIXME: without this, compilation error + //_deps/libcudacxx-src/include/cuda/std/detail/libcxx/include/chrono(1337): + // error : expression must have integral or unscoped enum type + (is_duration() and (std::is_integral() or is_duration())); + } + template ()>* = nullptr> + CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) -> decltype(lhs % rhs) + { + return lhs % rhs; + } + template >)>* = nullptr> + CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) -> float + { + return fmodf(static_cast(lhs), static_cast(rhs)); + } + template >)>* = nullptr> + CUDA_DEVICE_CALLABLE auto operator()(T1 const& lhs, T2 const& rhs) -> double + { + return fmod(static_cast(lhs), static_cast(rhs)); + } +}; + +struct PMod { + // Ideally, these two specializations - one for integral types and one for non integral + // types shouldn't be required, as std::fmod should promote integral types automatically + // to double and call the std::fmod overload for doubles. Sadly, doing this in jitified + // code does not work - it is having trouble deciding between float/double overloads + template >)>* = nullptr> + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) + { + using common_t = std::common_type_t; + common_t xconv = static_cast(x); + common_t yconv = static_cast(y); + auto rem = xconv % yconv; + if constexpr (std::is_signed_v) + if (rem < 0) rem = (rem + yconv) % yconv; + return rem; + } + + template < + typename TypeLhs, + typename TypeRhs, + std::enable_if_t<(std::is_floating_point_v>)>* = nullptr> + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) + { + using common_t = std::common_type_t; + common_t xconv = static_cast(x); + common_t yconv = static_cast(y); + auto rem = std::fmod(xconv, yconv); + if (rem < 0) rem = std::fmod(rem + yconv, yconv); + return rem; + } +}; + +struct PyMod { + template >)>* = nullptr> + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(((x % y) + y) % y) + { + return ((x % y) + y) % y; + } + + template < + typename TypeLhs, + typename TypeRhs, + std::enable_if_t<(std::is_floating_point_v>)>* = nullptr> + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> double + { + double x1 = static_cast(x); + double y1 = static_cast(y); + return fmod(fmod(x1, y1) + y1, y1); + } + + template ())>* = nullptr> + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(((x % y) + y) % y) + { + return ((x % y) + y) % y; + } +}; + +struct Pow { + template and + std::is_convertible_v)>* = nullptr> + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> double + { + return pow(static_cast(x), static_cast(y)); + } +}; + +struct LogBase { + template and + std::is_convertible_v)>* = nullptr> + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> double + { + return (std::log(static_cast(x)) / std::log(static_cast(y))); + } +}; + +struct ATan2 { + template and + std::is_convertible_v)>* = nullptr> + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> double + { + return std::atan2(static_cast(x), static_cast(y)); + } +}; + +struct ShiftLeft { + template + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x << y) + { + return (x << y); + } +}; + +struct ShiftRight { + template + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x >> y) + { + return (x >> y); + } +}; + +struct ShiftRightUnsigned { + template < + typename TypeLhs, + typename TypeRhs, + std::enable_if_t<(std::is_integral_v and not is_boolean())>* = nullptr> + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) + -> decltype(static_cast>(x) >> y) + { + return (static_cast>(x) >> y); + } +}; + +struct BitwiseAnd { + template + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x & y) + { + return (x & y); + } +}; + +struct BitwiseOr { + template + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x | y) + { + return (x | y); + } +}; + +struct BitwiseXor { + template + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x ^ y) + { + return (x ^ y); + } +}; + +struct LogicalAnd { + template + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x && y) + { + return (x && y); + } +}; + +struct LogicalOr { + template + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x || y) + { + return (x || y); + } +}; + +struct Equal { + template + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x == y) + { + return (x == y); + } +}; + +struct NotEqual { + template + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x != y) + { + return (x != y); + } +}; + +struct Less { + template + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x < y) + { + return (x < y); + } +}; + +struct Greater { + template + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x > y) + { + return (x > y); + } +}; + +struct LessEqual { + template + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x <= y) + { + return (x <= y); + } +}; + +struct GreaterEqual { + template + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x >= y) + { + return (x >= y); + } +}; + +struct NullEquals { + template + CUDA_DEVICE_CALLABLE auto operator()( + TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) -> decltype(x == y) + { + output_valid = true; + if (!lhs_valid && !rhs_valid) return true; + if (lhs_valid && rhs_valid) return x == y; + return false; + } + // To allow std::is_invocable_v = true + template + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) -> decltype(x == y); +}; + +struct NullMax { + template > + CUDA_DEVICE_CALLABLE auto operator()( + TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) + -> decltype(static_cast(static_cast(x) > static_cast(y) ? x : y)) + { + output_valid = true; + auto const x_conv = static_cast(x); + auto const y_conv = static_cast(y); + if (!lhs_valid && !rhs_valid) { + output_valid = false; + return common_t{}; + } else if (lhs_valid && rhs_valid) { + return (x_conv > y_conv) ? x_conv : y_conv; + } else if (lhs_valid) + return x_conv; + else + return y_conv; + } + // To allow std::is_invocable_v = true + template > + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) + -> decltype(static_cast(static_cast(x) > static_cast(y) ? x : y)); +}; + +struct NullMin { + template > + CUDA_DEVICE_CALLABLE auto operator()( + TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid) + -> decltype(static_cast(static_cast(x) < static_cast(y) ? x : y)) + { + output_valid = true; + auto const x_conv = static_cast(x); + auto const y_conv = static_cast(y); + if (!lhs_valid && !rhs_valid) { + output_valid = false; + return common_t{}; + } else if (lhs_valid && rhs_valid) { + return (x_conv < y_conv) ? x_conv : y_conv; + } else if (lhs_valid) + return x_conv; + else + return y_conv; + } + // To allow std::is_invocable_v = true + template > + CUDA_DEVICE_CALLABLE auto operator()(TypeLhs x, TypeRhs y) + -> decltype(static_cast(static_cast(x) < static_cast(y) ? x : y)); +}; + +} // namespace ops +} // namespace compiled +} // namespace binops +} // namespace cudf diff --git a/cpp/src/binaryop/compiled/util.cpp b/cpp/src/binaryop/compiled/util.cpp new file mode 100644 index 00000000000..89320256aec --- /dev/null +++ b/cpp/src/binaryop/compiled/util.cpp @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "operation.cuh" + +#include +#include +#include +#include + +namespace cudf::binops::compiled { + +namespace { +/** + * @brief Functor that returns optional common type of 2 or 3 given types. + * + */ +struct common_type_functor { + template + struct nested_common_type_functor { + template + std::optional operator()() + { + // If common_type exists + if constexpr (cudf::has_common_type_v) { + using TypeCommon = typename std::common_type::type; + return data_type{type_to_id()}; + } else if constexpr (cudf::has_common_type_v) { + using TypeCommon = typename std::common_type::type; + // Eg. d=t-t + return data_type{type_to_id()}; + } + return {}; + } + }; + template + std::optional operator()(data_type out) + { + return type_dispatcher(out, nested_common_type_functor{}); + } +}; + +/** + * @brief Functor that return true if BinaryOperator supports given input and output types. + * + * @tparam BinaryOperator binary operator functor + */ +template +struct is_binary_operation_supported { + // For types where Out type is fixed. (eg. comparison types) + template + inline constexpr bool operator()(void) + { + if constexpr (column_device_view::has_element_accessor() and + column_device_view::has_element_accessor()) { + if constexpr (has_common_type_v) { + using common_t = std::common_type_t; + return std::is_invocable_v; + } else + return std::is_invocable_v; + } else { + return false; + } + } + + template + inline constexpr bool operator()(void) + { + if constexpr (column_device_view::has_element_accessor() and + column_device_view::has_element_accessor() and + (mutable_column_device_view::has_element_accessor() or + is_fixed_point())) { + if constexpr (has_common_type_v) { + using common_t = std::common_type_t; + if constexpr (std::is_invocable_v) { + using ReturnType = std::invoke_result_t; + return std::is_constructible_v; + } + } else { + if constexpr (std::is_invocable_v) { + using ReturnType = std::invoke_result_t; + return std::is_constructible_v; + } + } + } + return false; + } +}; + +struct is_supported_operation_functor { + template + struct nested_support_functor { + template + inline constexpr bool call() + { + return is_binary_operation_supported{} + .template operator()(); + } + template + inline constexpr bool operator()(binary_operator op) + { + switch (op) { + // clang-format off + case binary_operator::ADD: return call(); + case binary_operator::SUB: return call(); + case binary_operator::MUL: return call(); + case binary_operator::DIV: return call(); + case binary_operator::TRUE_DIV: return call(); + case binary_operator::FLOOR_DIV: return call(); + case binary_operator::MOD: return call(); + case binary_operator::PYMOD: return call(); + case binary_operator::POW: return call(); + case binary_operator::BITWISE_AND: return call(); + case binary_operator::BITWISE_OR: return call(); + case binary_operator::BITWISE_XOR: return call(); + case binary_operator::SHIFT_LEFT: return call(); + case binary_operator::SHIFT_RIGHT: return call(); + case binary_operator::SHIFT_RIGHT_UNSIGNED: return call(); + case binary_operator::LOG_BASE: return call(); + case binary_operator::ATAN2: return call(); + case binary_operator::PMOD: return call(); + case binary_operator::NULL_MAX: return call(); + case binary_operator::NULL_MIN: return call(); + /* + case binary_operator::GENERIC_BINARY: // defined in jit only. + */ + default: return false; + // clang-format on + } + } + }; + + template + inline constexpr bool bool_op(data_type out) + { + return out.id() == type_id::BOOL8 and + is_binary_operation_supported{}.template operator()(); + } + template + inline constexpr bool operator()(data_type out, binary_operator op) + { + switch (op) { + // output type should be bool type. + case binary_operator::LOGICAL_AND: return bool_op(out); + case binary_operator::LOGICAL_OR: return bool_op(out); + case binary_operator::EQUAL: return bool_op(out); + case binary_operator::NOT_EQUAL: return bool_op(out); + case binary_operator::LESS: return bool_op(out); + case binary_operator::GREATER: return bool_op(out); + case binary_operator::LESS_EQUAL: return bool_op(out); + case binary_operator::GREATER_EQUAL: return bool_op(out); + case binary_operator::NULL_EQUALS: return bool_op(out); + default: return type_dispatcher(out, nested_support_functor{}, op); + } + return false; + } +}; + +} // namespace + +std::optional get_common_type(data_type out, data_type lhs, data_type rhs) +{ + return double_type_dispatcher(lhs, rhs, common_type_functor{}, out); +} + +bool is_supported_operation(data_type out, data_type lhs, data_type rhs, binary_operator op) +{ + return double_type_dispatcher(lhs, rhs, is_supported_operation_functor{}, out, op); +} +} // namespace cudf::binops::compiled diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 546eb050a60..045bfbe0327 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -181,6 +181,12 @@ T fixed_point_scalar::fixed_point_value(rmm::cuda_stream_view stream) const numeric::scaled_integer{_data.value(stream), numeric::scale_type{type().scale()}}}; } +template +fixed_point_scalar::operator value_type() const +{ + return this->fixed_point_value(rmm::cuda_stream_default); +} + template typename fixed_point_scalar::rep_type* fixed_point_scalar::data() { diff --git a/cpp/src/table/table_device_view.cu b/cpp/src/table/table_device_view.cu index 62daeed6d79..859a6be3bb0 100644 --- a/cpp/src/table/table_device_view.cu +++ b/cpp/src/table/table_device_view.cu @@ -55,7 +55,7 @@ template class table_device_view_base; template class table_device_view_base; namespace { -struct is_relationally_comparable_impl { +struct is_relationally_comparable_functor { template constexpr bool operator()() { @@ -74,7 +74,7 @@ bool is_relationally_comparable(TableView const& lhs, TableView const& rhs) // TODO: possible to implement without double type dispatcher. return lhs.column(i).type() == rhs.column(i).type() and type_dispatcher(lhs.column(i).type(), - is_relationally_comparable_impl{}); + is_relationally_comparable_functor{}); }); } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 48c96316795..34fceb9015e 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -163,6 +163,8 @@ ConfigureTest(BINARY_TEST binaryop/binop-verify-input-test.cpp binaryop/binop-null-test.cpp binaryop/binop-integration-test.cpp + binaryop/binop-compiled-test.cpp + binaryop/binop-compiled-fixed_point-test.cpp binaryop/binop-generic-ptx-test.cpp ) diff --git a/cpp/tests/binaryop/assert-binops.h b/cpp/tests/binaryop/assert-binops.h index 9e762a1c987..65859251e42 100644 --- a/cpp/tests/binaryop/assert-binops.h +++ b/cpp/tests/binaryop/assert-binops.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Copyright 2018-2019 BlazingDB, Inc. * Copyright 2018 Christian Noboa Mardini @@ -36,28 +36,21 @@ namespace binop { // result returned by the binop operation into string, which is then used for display purposes // when the values do not match. struct stringify_out_values { - template ()>* = nullptr> - std::string operator()(TypeOut lhs, TypeOut rhs) const + template + std::string operator()(size_type i, TypeOut lhs, TypeOut rhs) const { std::stringstream out_str; - out_str << "lhs: " << lhs << "\nrhs: " << rhs; - return out_str.str(); - } - - template ()>* = nullptr> - std::string operator()(TypeOut lhs, TypeOut rhs) const - { - std::stringstream out_str; - out_str << "lhs: " << lhs.time_since_epoch().count() - << "\nrhs: " << rhs.time_since_epoch().count(); - return out_str.str(); - } - - template ()>* = nullptr> - std::string operator()(TypeOut lhs, TypeOut rhs) const - { - std::stringstream out_str; - out_str << "lhs: " << lhs.count() << "\nrhs: " << rhs.count(); + out_str << "[" << i << "]:\n"; + if constexpr (is_fixed_point()) { + out_str << "lhs: " << std::string(lhs) << "\nrhs: " << std::string(rhs); + } else if constexpr (is_timestamp()) { + out_str << "lhs: " << lhs.time_since_epoch().count() + << "\nrhs: " << rhs.time_since_epoch().count(); + } else if constexpr (is_duration()) { + out_str << "lhs: " << lhs.count() << "\nrhs: " << rhs.count(); + } else { + out_str << "lhs: " << lhs << "\nrhs: " << rhs; + } return out_str.str(); } }; @@ -101,7 +94,7 @@ void ASSERT_BINOP(column_view const& out, for (size_t i = 0; i < out_data.size(); ++i) { auto lhs = out_data[i]; auto rhs = (TypeOut)(op(lhs_h, rhs_data[i])); - ASSERT_TRUE(value_comparator(lhs, rhs)) << stringify_out_values{}(lhs, rhs); + ASSERT_TRUE(value_comparator(lhs, rhs)) << stringify_out_values{}(i, lhs, rhs); } if (rhs.nullable()) { @@ -148,7 +141,7 @@ void ASSERT_BINOP(column_view const& out, for (size_t i = 0; i < out_data.size(); ++i) { auto lhs = out_data[i]; auto rhs = (TypeOut)(op(lhs_data[i], rhs_h)); - ASSERT_TRUE(value_comparator(lhs, rhs)) << stringify_out_values{}(lhs, rhs); + ASSERT_TRUE(value_comparator(lhs, rhs)) << stringify_out_values{}(i, lhs, rhs); } if (lhs.nullable()) { @@ -196,7 +189,7 @@ void ASSERT_BINOP(column_view const& out, for (size_t i = 0; i < out_data.size(); ++i) { auto lhs = out_data[i]; auto rhs = (TypeOut)(op(lhs_data[i], rhs_data[i])); - ASSERT_TRUE(value_comparator(lhs, rhs)) << stringify_out_values{}(lhs, rhs); + ASSERT_TRUE(value_comparator(lhs, rhs)) << stringify_out_values{}(i, lhs, rhs); } if (lhs.nullable() and rhs.nullable()) { diff --git a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp new file mode 100644 index 00000000000..feb75cc3f09 --- /dev/null +++ b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp @@ -0,0 +1,721 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include "cudf/utilities/error.hpp" + +namespace cudf::test::binop { + +template +struct FixedPointCompiledTestBothReps : public cudf::test::BaseFixture { +}; + +template +using wrapper = cudf::test::fixed_width_column_wrapper; +TYPED_TEST_CASE(FixedPointCompiledTestBothReps, cudf::test::FixedPointTypes); + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpAdd) +{ + using namespace numeric; + using decimalXX = TypeParam; + + auto const sz = std::size_t{1000}; + + auto begin = cudf::detail::make_counting_transform_iterator(1, [](auto i) { + return decimalXX{i, scale_type{0}}; + }); + auto const vec1 = std::vector(begin, begin + sz); + auto const vec2 = std::vector(sz, decimalXX{2, scale_type{0}}); + auto expected = std::vector(sz); + + std::transform(std::cbegin(vec1), + std::cend(vec1), + std::cbegin(vec2), + std::begin(expected), + std::plus()); + + auto const lhs = wrapper(vec1.begin(), vec1.end()); + auto const rhs = wrapper(vec2.begin(), vec2.end()); + auto const expected_col = wrapper(expected.begin(), expected.end()); + + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = + cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpMultiply) +{ + using namespace numeric; + using decimalXX = TypeParam; + + auto const sz = std::size_t{1000}; + + auto begin = cudf::detail::make_counting_transform_iterator(1, [](auto i) { + return decimalXX{i, scale_type{0}}; + }); + auto const vec1 = std::vector(begin, begin + sz); + auto const vec2 = std::vector(sz, decimalXX{2, scale_type{0}}); + auto expected = std::vector(sz); + + std::transform(std::cbegin(vec1), + std::cend(vec1), + std::cbegin(vec2), + std::begin(expected), + std::multiplies()); + + auto const lhs = wrapper(vec1.begin(), vec1.end()); + auto const rhs = wrapper(vec2.begin(), vec2.end()); + auto const expected_col = wrapper(expected.begin(), expected.end()); + + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::MUL, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = + cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::MUL, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, result->view()); +} + +template +using fp_wrapper = cudf::test::fixed_point_column_wrapper; + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpMultiply2) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{11, 22, 33, 44, 55}, scale_type{-1}}; + auto const rhs = fp_wrapper{{10, 10, 10, 10, 10}, scale_type{0}}; + auto const expected = fp_wrapper{{110, 220, 330, 440, 550}, scale_type{-1}}; + + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::MUL, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = + cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::MUL, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpDiv) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{10, 30, 50, 70}, scale_type{-1}}; + auto const rhs = fp_wrapper{{4, 4, 4, 4}, scale_type{0}}; + auto const expected = fp_wrapper{{2, 7, 12, 17}, scale_type{-1}}; + + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::DIV, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = + cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpDiv2) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{10, 30, 50, 70}, scale_type{-1}}; + auto const rhs = fp_wrapper{{4, 4, 4, 4}, scale_type{-2}}; + auto const expected = fp_wrapper{{2, 7, 12, 17}, scale_type{1}}; + + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::DIV, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = + cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpDiv3) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{10, 30, 50, 70}, scale_type{-1}}; + auto const rhs = make_fixed_point_scalar(12, scale_type{-1}); + auto const expected = fp_wrapper{{0, 2, 4, 5}, scale_type{0}}; + + auto const type = cudf::binary_operation_fixed_point_output_type( + cudf::binary_operator::DIV, static_cast(lhs).type(), rhs->type()); + auto const result = + cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpDiv4) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto begin = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 11; }); + auto result_begin = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i * 11) / 12; }); + auto const lhs = fp_wrapper(begin, begin + 1000, scale_type{-1}); + auto const rhs = make_fixed_point_scalar(12, scale_type{-1}); + auto const expected = fp_wrapper(result_begin, result_begin + 1000, scale_type{0}); + + auto const type = cudf::binary_operation_fixed_point_output_type( + cudf::binary_operator::DIV, static_cast(lhs).type(), rhs->type()); + auto const result = + cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpAdd2) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{11, 22, 33, 44, 55}, scale_type{-1}}; + auto const rhs = fp_wrapper{{100, 200, 300, 400, 500}, scale_type{-2}}; + auto const expected = fp_wrapper{{210, 420, 630, 840, 1050}, scale_type{-2}}; + + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = + cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpAdd3) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{1100, 2200, 3300, 4400, 5500}, scale_type{-3}}; + auto const rhs = fp_wrapper{{100, 200, 300, 400, 500}, scale_type{-2}}; + auto const expected = fp_wrapper{{2100, 4200, 6300, 8400, 10500}, scale_type{-3}}; + + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = + cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpAdd4) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{11, 22, 33, 44, 55}, scale_type{-1}}; + auto const rhs = make_fixed_point_scalar(100, scale_type{-2}); + auto const expected = fp_wrapper{{210, 320, 430, 540, 650}, scale_type{-2}}; + + auto const type = cudf::binary_operation_fixed_point_output_type( + cudf::binary_operator::ADD, static_cast(lhs).type(), rhs->type()); + auto const result = + cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::ADD, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpAdd5) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = make_fixed_point_scalar(100, scale_type{-2}); + auto const rhs = fp_wrapper{{11, 22, 33, 44, 55}, scale_type{-1}}; + auto const expected = fp_wrapper{{210, 320, 430, 540, 650}, scale_type{-2}}; + + auto const type = cudf::binary_operation_fixed_point_output_type( + cudf::binary_operator::ADD, lhs->type(), static_cast(rhs).type()); + auto const result = + cudf::experimental::binary_operation(*lhs, rhs, cudf::binary_operator::ADD, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpAdd6) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const col = fp_wrapper{{30, 4, 5, 6, 7, 8}, scale_type{0}}; + + auto const expected1 = fp_wrapper{{60, 8, 10, 12, 14, 16}, scale_type{0}}; + auto const expected2 = fp_wrapper{{6, 0, 1, 1, 1, 1}, scale_type{1}}; + auto const type1 = cudf::data_type{cudf::type_to_id(), 0}; + auto const type2 = cudf::data_type{cudf::type_to_id(), 1}; + auto const result1 = + cudf::experimental::binary_operation(col, col, cudf::binary_operator::ADD, type1); + auto const result2 = + cudf::experimental::binary_operation(col, col, cudf::binary_operator::ADD, type2); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result2->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result1->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointCast) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const col = fp_wrapper{{6, 8, 10, 12, 14, 16}, scale_type{0}}; + auto const expected = fp_wrapper{{0, 0, 1, 1, 1, 1}, scale_type{1}}; + auto const type = cudf::data_type{cudf::type_to_id(), 1}; + auto const result = cudf::cast(col, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpMultiplyScalar) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{11, 22, 33, 44, 55}, scale_type{-1}}; + auto const rhs = make_fixed_point_scalar(100, scale_type{-1}); + auto const expected = fp_wrapper{{1100, 2200, 3300, 4400, 5500}, scale_type{-2}}; + + auto const type = cudf::binary_operation_fixed_point_output_type( + cudf::binary_operator::MUL, static_cast(lhs).type(), rhs->type()); + auto const result = + cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::MUL, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpSimplePlus) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{150, 200}, scale_type{-2}}; + auto const rhs = fp_wrapper{{2250, 1005}, scale_type{-3}}; + auto const expected = fp_wrapper{{3750, 3005}, scale_type{-3}}; + + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, + static_cast(lhs).type(), + static_cast(rhs).type()); + auto const result = + cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpEqualSimple) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const trues = std::vector(4, true); + auto const col1 = fp_wrapper{{1, 2, 3, 4}, scale_type{0}}; + auto const col2 = fp_wrapper{{100, 200, 300, 400}, scale_type{-2}}; + auto const expected = wrapper(trues.begin(), trues.end()); + + auto const result = cudf::experimental::binary_operation( + col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpEqualSimpleScale0) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const trues = std::vector(4, true); + auto const col = fp_wrapper{{1, 2, 3, 4}, scale_type{0}}; + auto const expected = wrapper(trues.begin(), trues.end()); + + auto const result = cudf::experimental::binary_operation( + col, col, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpEqualSimpleScale0Null) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const col1 = fp_wrapper{{1, 2, 3, 4}, {1, 1, 1, 1}, scale_type{0}}; + auto const col2 = fp_wrapper{{1, 2, 3, 4}, {0, 0, 0, 0}, scale_type{0}}; + auto const expected = wrapper{{0, 1, 0, 1}, {0, 0, 0, 0}}; + + auto const result = cudf::experimental::binary_operation( + col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpEqualSimpleScale2Null) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const col1 = fp_wrapper{{1, 2, 3, 4}, {1, 1, 1, 1}, scale_type{-2}}; + auto const col2 = fp_wrapper{{1, 2, 3, 4}, {0, 0, 0, 0}, scale_type{0}}; + auto const expected = wrapper{{0, 1, 0, 1}, {0, 0, 0, 0}}; + + auto const result = cudf::experimental::binary_operation( + col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpEqualLessGreater) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const sz = std::size_t{1000}; + + // TESTING binary op ADD + + auto begin = cudf::detail::make_counting_transform_iterator(1, [](auto e) { return e * 1000; }); + auto const vec1 = std::vector(begin, begin + sz); + auto const vec2 = std::vector(sz, 0); + + auto const iota_3 = fp_wrapper(vec1.begin(), vec1.end(), scale_type{-3}); + auto const zeros_3 = fp_wrapper(vec2.begin(), vec2.end(), scale_type{-1}); + + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD, + static_cast(iota_3).type(), + static_cast(zeros_3).type()); + auto const iota_3_after_add = + cudf::experimental::binary_operation(zeros_3, iota_3, binary_operator::ADD, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(iota_3, iota_3_after_add->view()); + + // TESTING binary op EQUAL, LESS, GREATER + + auto const trues = std::vector(sz, true); + auto const true_col = wrapper(trues.begin(), trues.end()); + + auto const btype = cudf::data_type{type_id::BOOL8}; + auto const equal_result = cudf::experimental::binary_operation( + iota_3, iota_3_after_add->view(), binary_operator::EQUAL, btype); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, equal_result->view()); + + auto const less_result = cudf::experimental::binary_operation( + zeros_3, iota_3_after_add->view(), binary_operator::LESS, btype); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, less_result->view()); + + auto const greater_result = cudf::experimental::binary_operation( + iota_3_after_add->view(), zeros_3, binary_operator::GREATER, btype); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, greater_result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpNullMaxSimple) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const trues = std::vector(4, true); + auto const col1 = fp_wrapper{{40, 30, 20, 10, 0}, {1, 0, 1, 1, 0}, scale_type{-2}}; + auto const col2 = fp_wrapper{{10, 20, 30, 40, 0}, {1, 1, 1, 0, 0}, scale_type{-2}}; + auto const expected = fp_wrapper{{40, 20, 30, 10, 0}, {1, 1, 1, 1, 0}, scale_type{-2}}; + + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::NULL_MAX, + static_cast(col1).type(), + static_cast(col2).type()); + auto const result = + cudf::experimental::binary_operation(col1, col2, binary_operator::NULL_MAX, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpNullMinSimple) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const trues = std::vector(4, true); + auto const col1 = fp_wrapper{{40, 30, 20, 10, 0}, {1, 1, 1, 0, 0}, scale_type{-1}}; + auto const col2 = fp_wrapper{{10, 20, 30, 40, 0}, {1, 0, 1, 1, 0}, scale_type{-1}}; + auto const expected = fp_wrapper{{10, 30, 20, 40, 0}, {1, 1, 1, 1, 0}, scale_type{-1}}; + + auto const type = + cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::NULL_MIN, + static_cast(col1).type(), + static_cast(col2).type()); + auto const result = + cudf::experimental::binary_operation(col1, col2, binary_operator::NULL_MIN, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpNullEqualsSimple) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const trues = std::vector(4, true); + auto const col1 = fp_wrapper{{400, 300, 300, 100}, {1, 1, 1, 0}, scale_type{-2}}; + auto const col2 = fp_wrapper{{40, 200, 20, 400}, {1, 0, 1, 0}, scale_type{-1}}; + auto const expected = wrapper{{1, 0, 0, 1}, {1, 1, 1, 1}}; + + auto const result = cudf::experimental::binary_operation( + col1, col2, binary_operator::NULL_EQUALS, cudf::data_type{type_id::BOOL8}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{100, 300, 500, 700}, scale_type{-2}}; + auto const rhs = fp_wrapper{{4, 4, 4, 4}, scale_type{0}}; + auto const expected = fp_wrapper{{25, 75, 125, 175}, scale_type{-2}}; + + auto const type = data_type{type_to_id(), -2}; + auto const result = + cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div2) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{100000, 300000, 500000, 700000}, scale_type{-3}}; + auto const rhs = fp_wrapper{{20, 20, 20, 20}, scale_type{-1}}; + auto const expected = fp_wrapper{{5000, 15000, 25000, 35000}, scale_type{-2}}; + + auto const type = data_type{type_to_id(), -2}; + auto const result = + cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div3) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{10000, 30000, 50000, 70000}, scale_type{-2}}; + auto const rhs = fp_wrapper{{3, 9, 3, 3}, scale_type{0}}; + auto const expected = fp_wrapper{{3333, 3333, 16666, 23333}, scale_type{-2}}; + + auto const type = data_type{type_to_id(), -2}; + auto const result = + cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div4) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{10, 30, 50, 70}, scale_type{1}}; + auto const rhs = make_fixed_point_scalar(3, scale_type{0}); + auto const expected = fp_wrapper{{3, 10, 16, 23}, scale_type{1}}; + + auto const type = data_type{type_to_id(), 1}; + auto const result = + cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div6) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = make_fixed_point_scalar(3000, scale_type{-3}); + auto const rhs = fp_wrapper{{10, 30, 50, 70}, scale_type{-1}}; + + auto const expected = fp_wrapper{{300, 100, 60, 42}, scale_type{-2}}; + + auto const type = data_type{type_to_id(), -2}; + auto const result = + cudf::experimental::binary_operation(*lhs, rhs, cudf::binary_operator::DIV, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div7) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = make_fixed_point_scalar(1200, scale_type{0}); + auto const rhs = fp_wrapper{{100, 200, 300, 500, 600, 800, 1200, 1300}, scale_type{-2}}; + + auto const expected = fp_wrapper{{12, 6, 4, 2, 2, 1, 1, 0}, scale_type{2}}; + + auto const type = data_type{type_to_id(), 2}; + auto const result = + cudf::experimental::binary_operation(*lhs, rhs, cudf::binary_operator::DIV, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div8) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{4000, 6000, 80000}, scale_type{-1}}; + auto const rhs = make_fixed_point_scalar(5000, scale_type{-3}); + auto const expected = fp_wrapper{{0, 1, 16}, scale_type{2}}; + + auto const type = data_type{type_to_id(), 2}; + auto const result = + cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div9) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{10, 20, 30}, scale_type{2}}; + auto const rhs = make_fixed_point_scalar(7, scale_type{1}); + auto const expected = fp_wrapper{{1, 2, 4}, scale_type{1}}; + + auto const type = data_type{type_to_id(), 1}; + auto const result = + cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div10) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{100, 200, 300}, scale_type{1}}; + auto const rhs = make_fixed_point_scalar(7, scale_type{0}); + auto const expected = fp_wrapper{{14, 28, 42}, scale_type{1}}; + + auto const type = data_type{type_to_id(), 1}; + auto const result = + cudf::experimental::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOp_Div11) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const lhs = fp_wrapper{{1000, 2000, 3000}, scale_type{1}}; + auto const rhs = fp_wrapper{{7, 7, 7}, scale_type{0}}; + auto const expected = fp_wrapper{{142, 285, 428}, scale_type{1}}; + + auto const type = data_type{type_to_id(), 1}; + auto const result = + cudf::experimental::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view()); +} + +TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpThrows) +{ + using namespace numeric; + using decimalXX = TypeParam; + using RepType = device_storage_type_t; + + auto const col = fp_wrapper{{100, 300, 500, 700}, scale_type{-2}}; + auto const non_bool_type = data_type{type_to_id(), -2}; + auto const float_type = data_type{type_id::FLOAT32}; + EXPECT_THROW( + cudf::experimental::binary_operation(col, col, cudf::binary_operator::LESS, non_bool_type), + cudf::logic_error); + // Allowed now, but not allowed in jit. + // EXPECT_THROW(cudf::experimental::binary_operation(col, col, cudf::binary_operator::MUL, + // float_type), + // cudf::logic_error); +} + +} // namespace cudf::test::binop diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp new file mode 100644 index 00000000000..081ae41fef1 --- /dev/null +++ b/cpp/tests/binaryop/binop-compiled-test.cpp @@ -0,0 +1,610 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include "cudf/utilities/error.hpp" + +#include + +namespace cudf::test::binop { + +template +auto lhs_random_column(size_type size) +{ + return BinaryOperationTest::make_random_wrapped_column(size); +} + +template <> +auto lhs_random_column(size_type size) +{ + return cudf::test::strings_column_wrapper({"eee", "bb", "", "", "aa", "bbb", "ééé"}, + {1, 1, 0, 1, 1, 1, 1}); +} +template +auto rhs_random_column(size_type size) +{ + return BinaryOperationTest::make_random_wrapped_column(size); +} +template <> +auto rhs_random_column(size_type size) +{ + return cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "", "bb", "eee"}, + {1, 1, 1, 1, 0, 1, 1}); +} + +// combinations to test +// n t d +// n n.n n.t n.d +// t t.n t.t t.d +// d d.n d.t d.d + +constexpr size_type col_size = 10000; +template +struct BinaryOperationCompiledTest : public BinaryOperationTest { + using TypeOut = cudf::test::GetType; + using TypeLhs = cudf::test::GetType; + using TypeRhs = cudf::test::GetType; + + template