From 035e62cb817b91b6b5a525b426c0fb165fb3c5cb Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Mon, 26 Aug 2024 14:40:50 -0500 Subject: [PATCH 1/2] Remove java ColumnView.copyWithBooleanColumnAsValidity Signed-off-by: Robert (Bobby) Evans --- .../main/java/ai/rapids/cudf/ColumnView.java | 38 ------------------ java/src/main/native/src/ColumnViewJni.cpp | 15 ------- java/src/main/native/src/ColumnViewJni.cu | 31 -------------- java/src/main/native/src/ColumnViewJni.hpp | 16 -------- .../java/ai/rapids/cudf/ColumnVectorTest.java | 40 ------------------- 5 files changed, 140 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 8ff2f0f0a73..6bd4e06c47e 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -913,25 +913,6 @@ public final ColumnVector mergeAndSetValidity(BinaryOp mergeOp, ColumnView... co return new ColumnVector(bitwiseMergeAndSetValidity(getNativeView(), columnViews, mergeOp.nativeId)); } - /** - * Creates a deep copy of a column while replacing the validity mask. The validity mask is the - * device_vector equivalent of the boolean column given as argument. - * - * The boolColumn must have the same number of rows as the current column. - * The result column will have the same number of rows as the current column. - * For all indices `i` where the boolColumn is `true`, the result column will have a valid value at index i. - * For all other values (i.e. `false` or `null`), the result column will have nulls. - * - * If the current column has a null at a given index `i`, and the new validity mask is `true` at index `i`, - * then the row value is undefined. - * - * @param boolColumn bool column whose value is to be used as the validity mask. - * @return Deep copy of the column with replaced validity mask. - */ - public final ColumnVector copyWithBooleanColumnAsValidity(ColumnView boolColumn) { - return new ColumnVector(copyWithBooleanColumnAsValidity(getNativeView(), boolColumn.getNativeView())); - } - ///////////////////////////////////////////////////////////////////////////// // DATE/TIME ///////////////////////////////////////////////////////////////////////////// @@ -4767,25 +4748,6 @@ private static native long clamper(long nativeView, long loScalarHandle, long lo private static native long bitwiseMergeAndSetValidity(long baseHandle, long[] viewHandles, int nullConfig) throws CudfException; - /** - * Native method to deep copy a column while replacing the null mask. The null mask is the - * device_vector equivalent of the boolean column given as argument. - * - * The boolColumn must have the same number of rows as the exemplar column. - * The result column will have the same number of rows as the exemplar. - * For all indices `i` where the boolean column is `true`, the result column will have a valid value at index i. - * For all other values (i.e. `false` or `null`), the result column will have nulls. - * - * If the exemplar column has a null at a given index `i`, and the new validity mask is `true` at index `i`, - * then the resultant row value is undefined. - * - * @param exemplarViewHandle column view of the column that is deep copied. - * @param boolColumnViewHandle bool column whose value is to be used as the null mask. - * @return Deep copy of the column with replaced null mask. - */ - private static native long copyWithBooleanColumnAsValidity(long exemplarViewHandle, - long boolColumnViewHandle) throws CudfException; - //////// // Native cudf::column_view life cycle and metadata access methods. Life cycle methods // should typically only be called from the OffHeap inner class. diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 4551325ebb1..72f0ad19912 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -2090,21 +2090,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_bitwiseMergeAndSetValidit CATCH_STD(env, 0); } -JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_copyWithBooleanColumnAsValidity( - JNIEnv* env, jobject j_object, jlong exemplar_handle, jlong validity_column_handle) -{ - JNI_NULL_CHECK(env, exemplar_handle, "ColumnView handle is null", 0); - JNI_NULL_CHECK(env, validity_column_handle, "Validity column handle is null", 0); - try { - cudf::jni::auto_set_device(env); - auto const exemplar = *reinterpret_cast(exemplar_handle); - auto const validity = *reinterpret_cast(validity_column_handle); - return release_as_jlong( - cudf::jni::new_column_with_boolean_column_as_validity(exemplar, validity)); - } - CATCH_STD(env, 0); -} - //////// // Native cudf::column_view life cycle and metadata access methods. Life cycle methods // should typically only be called from the CudfColumn inner class. diff --git a/java/src/main/native/src/ColumnViewJni.cu b/java/src/main/native/src/ColumnViewJni.cu index 2dbff923544..46261b087ae 100644 --- a/java/src/main/native/src/ColumnViewJni.cu +++ b/java/src/main/native/src/ColumnViewJni.cu @@ -43,37 +43,6 @@ namespace cudf::jni { -std::unique_ptr new_column_with_boolean_column_as_validity( - cudf::column_view const& exemplar, cudf::column_view const& validity_column) -{ - CUDF_EXPECTS(validity_column.type().id() == type_id::BOOL8, - "Validity column must be of type bool"); - CUDF_EXPECTS(validity_column.size() == exemplar.size(), - "Exemplar and validity columns must have the same size"); - - auto validity_device_view = cudf::column_device_view::create(validity_column); - auto validity_begin = cudf::detail::make_optional_iterator( - *validity_device_view, cudf::nullate::DYNAMIC{validity_column.has_nulls()}); - auto validity_end = validity_begin + validity_device_view->size(); - auto [null_mask, null_count] = cudf::detail::valid_if( - validity_begin, - validity_end, - [] __device__(auto optional_bool) { return optional_bool.value_or(false); }, - cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); - auto const exemplar_without_null_mask = - cudf::column_view{exemplar.type(), - exemplar.size(), - exemplar.head(), - nullptr, - 0, - exemplar.offset(), - std::vector{exemplar.child_begin(), exemplar.child_end()}}; - auto deep_copy = std::make_unique(exemplar_without_null_mask); - deep_copy->set_null_mask(std::move(null_mask), null_count); - return deep_copy; -} - std::unique_ptr generate_list_offsets(cudf::column_view const& list_length, rmm::cuda_stream_view stream) { diff --git a/java/src/main/native/src/ColumnViewJni.hpp b/java/src/main/native/src/ColumnViewJni.hpp index c9eef0139ea..c8c441e8fae 100644 --- a/java/src/main/native/src/ColumnViewJni.hpp +++ b/java/src/main/native/src/ColumnViewJni.hpp @@ -22,22 +22,6 @@ namespace cudf::jni { -/** - * @brief Creates a deep copy of the exemplar column, with its validity set to the equivalent - * of the boolean `validity` column's value. - * - * The bool_column must have the same number of rows as the exemplar column. - * The result column will have the same number of rows as the exemplar. - * For all indices `i` where the boolean column is `true`, the result column will have a valid value - * at index i. For all other values (i.e. `false` or `null`), the result column will have nulls. - * - * @param exemplar The column to be deep copied. - * @param bool_column bool column whose value is to be used as the validity. - * @return Deep copy of the exemplar, with the replaced validity. - */ -std::unique_ptr new_column_with_boolean_column_as_validity( - cudf::column_view const& exemplar, cudf::column_view const& bool_column); - /** * @brief Generates list offsets with lengths of each list. * diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 7136b162c13..36f8cad982c 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -6395,46 +6395,6 @@ void testReplaceSameIndexColumnInStruct() { assertTrue(e.getMessage().contains("Duplicate mapping found for replacing child index")); } - @Test - void testCopyWithBooleanColumnAsValidity() { - final Boolean T = true; - final Boolean F = false; - final Integer X = null; - - // Straight-line: Invalidate every other row. - try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); - ColumnVector validity = ColumnVector.fromBoxedBooleans(F, T, F, T, F, T, F, T, F, T); - ColumnVector expected = ColumnVector.fromBoxedInts(X, 2, X, 4, X, 6, X, 8, X, 10); - ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) { - assertColumnsAreEqual(expected, result); - } - - // Straight-line: Invalidate all Rows. - try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); - ColumnVector validity = ColumnVector.fromBoxedBooleans(F, F, F, F, F, F, F, F, F, F); - ColumnVector expected = ColumnVector.fromBoxedInts(X, X, X, X, X, X, X, X, X, X); - ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) { - assertColumnsAreEqual(expected, result); - } - - // Nulls in the validity column are treated as invalid. - try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); - ColumnVector validity = ColumnVector.fromBoxedBooleans(F, T, F, T, F, T, F, null, F, null); - ColumnVector expected = ColumnVector.fromBoxedInts(X, 2, X, 4, X, 6, X, X, X, X); - ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) { - assertColumnsAreEqual(expected, result); - } - - // Negative case: Mismatch in row count. - Exception x = assertThrows(CudfException.class, () -> { - try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); - ColumnVector validity = ColumnVector.fromBoxedBooleans(F, T, F, T); - ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) { - } - }); - assertTrue(x.getMessage().contains("Exemplar and validity columns must have the same size")); - } - @Test void testSegmentedGather() { HostColumnVector.DataType dt = new ListType(true, new BasicType(true, DType.STRING)); From 233b329c1ed976ef0f928d0cc577684f30e4289e Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Mon, 26 Aug 2024 14:52:53 -0500 Subject: [PATCH 2/2] Update Copyright --- java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 36f8cad982c..708744569df 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License.