diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 80be987c3b9..0972dba22cf 100755 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -146,7 +146,6 @@ add_library( src/ScalarJni.cpp src/TableJni.cpp src/aggregation128_utils.cu - src/map_lookup.cu src/maps_column_view.cu src/row_conversion.cu src/check_nvcomp_output_sizes.cu diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 0764bbb0cc7..1ad80ebe009 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -79,7 +79,6 @@ #include "cudf_jni_apis.hpp" #include "dtype_utils.hpp" #include "jni_utils.hpp" -#include "map_lookup.hpp" #include "maps_column_view.hpp" using cudf::jni::ptr_as_jlong; diff --git a/java/src/main/native/src/map_lookup.cu b/java/src/main/native/src/map_lookup.cu deleted file mode 100644 index 13d1a5a94a9..00000000000 --- a/java/src/main/native/src/map_lookup.cu +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace cudf { -namespace { - -/** - * @brief Device function that searches for the specified lookup_key - * in the list at index `row_index`, and writes out the index of the - * first match to the output. - * - * This function is called once per row of the `input` column - * If the lookup_key is not found, (-1) is returned for that list row. - */ -template -void __device__ search_each_list(size_type row_index, column_device_view input, - mutable_column_device_view output, - string_scalar_device_view lookup_key) { - if (has_nulls && input.is_null(row_index)) { // List row is null. - output.element(row_index) = -1; // Not found. - return; - } - - auto offsets{input.child(0)}; - auto start_index{offsets.element(row_index)}; - auto end_index{offsets.element(row_index + 1)}; - - auto key_column{input.child(1).child(0)}; - - for (size_type list_element_index{start_index}; list_element_index < end_index; - ++list_element_index) { - if (has_nulls && key_column.is_null(list_element_index)) { - continue; // Skip the list-element with null-key. - } - - // List element's key is not null. Check if it matches the lookup_key. - if (key_column.element(list_element_index) == lookup_key.value()) { - output.element(row_index) = list_element_index; - return; - } - } - - output.element(row_index) = -1; // Not found. -} - -/** - * @brief The map-lookup CUDA kernel, which searches for the specified `lookup_key` - * string in each list row of the `input` column. - * - * The kernel writes the index (into the `input` list-column's child) where the `lookup_key` - * is found, to the `output` column. If the `lookup_key` is not found, (-1) is written instead. - * - * The produces one output row per input, with no nulls. The output may then be used - * with `cudf::gather()`, to find the values corresponding to the `lookup_key`. - */ -template -__launch_bounds__(block_size) __global__ - void gpu_find_first(column_device_view input, mutable_column_device_view output, - string_scalar_device_view lookup_key) { - size_type tid = blockIdx.x * block_size + threadIdx.x; - size_type stride = block_size * gridDim.x; - - // Each CUDA thread processes one row of `input`. Each row is a list. - // So each thread searches for `lookup_key` in one row of the input column, - // and writes its index out to output. - while (tid < input.size()) { - search_each_list(tid, input, output, lookup_key); - tid += stride; - } -} - -/** - * @brief Function to generate a gather-map, based on the location of the `lookup_key` - * string in each row of the input. - * - * The gather map may then be used to gather the values corresponding to the `lookup_key` - * for each row. - */ -template -std::unique_ptr -get_gather_map_for_map_values(column_view const &input, string_scalar &lookup_key, - rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { - constexpr size_type block_size{256}; - cudf::detail::grid_1d grid{input.size(), block_size}; - - auto input_device_view = cudf::column_device_view::create(input, stream); - auto lookup_key_device_view{get_scalar_device_view(lookup_key)}; - auto gather_map = make_numeric_column(data_type{cudf::type_to_id()}, input.size(), - mask_state::ALL_VALID, stream, mr); - auto output_view = mutable_column_device_view::create(gather_map->mutable_view(), stream); - - gpu_find_first<<>>( - *input_device_view, *output_view, lookup_key_device_view); - - CUDF_CHECK_CUDA(stream.value()); - - return gather_map; -} - -/** - * @brief a defensive check for the map column that is going to be processed - */ -void map_input_check(column_view const &map_column, rmm::cuda_stream_view stream) { - CUDF_EXPECTS(map_column.type().id() == type_id::LIST, "Expected LIST>."); - - lists_column_view lcv{map_column}; - column_view structs_column = lcv.get_sliced_child(stream); - - CUDF_EXPECTS(structs_column.type().id() == type_id::STRUCT, "Expected LIST>."); - - CUDF_EXPECTS(structs_column.num_children() == 2, "Expected LIST>."); - CUDF_EXPECTS(structs_column.child(0).type().id() == type_id::STRING, - "Expected LIST>."); - CUDF_EXPECTS(structs_column.child(1).type().id() == type_id::STRING, - "Expected LIST>."); -} - -} // namespace - -namespace jni { - -std::unique_ptr map_contains(column_view const &map_column, string_scalar lookup_key, - bool has_nulls, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) { - // Defensive checks. - map_input_check(map_column, stream); - - lists_column_view lcv(map_column); - structs_column_view scv(lcv.child()); - - std::vector children; - children.push_back(lcv.offsets()); - children.push_back(scv.child(0)); - - column_view list_of_keys(map_column.type(), map_column.size(), nullptr, map_column.null_mask(), - map_column.null_count(), 0, children); - auto contains_column = lists::contains(list_of_keys, lookup_key); - // null will be skipped in all-aggregation when checking if all rows contain the key, - // so replace all nulls with 0. - std::unique_ptr replacement = - cudf::make_numeric_scalar(cudf::data_type(cudf::type_id::BOOL8)); - replacement->set_valid_async(true); - using ScalarType = cudf::scalar_type_t; - static_cast(replacement.get())->set_value(0); - auto result = cudf::replace_nulls(contains_column->view(), *replacement); - return result; -} - -std::unique_ptr map_lookup(column_view const &map_column, string_scalar lookup_key, - bool has_nulls, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) { - // Defensive checks. - map_input_check(map_column, stream); - - if (map_column.size() == 0) { - return make_empty_column(cudf::data_type{cudf::type_id::STRING}); - } - - lists_column_view lcv{map_column}; - column_view structs_column = lcv.get_sliced_child(stream); - // Two-pass plan: construct gather map, and then gather() on structs_column.child(1). Plan A. - // (Can do in one pass perhaps, but that's Plan B.) - - auto gather_map = has_nulls ? - get_gather_map_for_map_values(map_column, lookup_key, stream, mr) : - get_gather_map_for_map_values(map_column, lookup_key, stream, mr); - - // Gather map is now available. - - auto values_column = structs_column.child(1); - auto table_for_gather = table_view{std::vector{values_column}}; - - auto gathered_table = - cudf::detail::gather(table_for_gather, gather_map->view(), out_of_bounds_policy::NULLIFY, - detail::negative_index_policy::NOT_ALLOWED, stream, mr); - - return std::make_unique(std::move(gathered_table->get_column(0))); -} -} // namespace jni -} // namespace cudf diff --git a/java/src/main/native/src/map_lookup.hpp b/java/src/main/native/src/map_lookup.hpp deleted file mode 100644 index c564dba1b1a..00000000000 --- a/java/src/main/native/src/map_lookup.hpp +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace cudf { - -namespace jni { - -/** - * @brief Looks up a "map" column by specified key, and returns a column of string values. - * - * The map-column is represented as follows: - * - * list_view >. - * <---KEY---> <--VALUE--> - * - * The string_view struct members are the key and value, respectively. - * For each row in the input list column, the value corresponding to the first match - * of the specified lookup_key is returned. If the key is not found, a null is returned. - * - * @param map_column The input "map" column to be searched. Must be of - * type list_view>. - * @param lookup_key The search key, whose value is to be returned for each list row - * @param has_nulls Whether the input column might contain null list-rows, or null keys. - * @param stream The CUDA stream - * @param mr The device memory resource to be used for allocations - * @return A string_view column with the value from the first match in each list. - * A null row is returned for any row where the lookup_key is not found. - * @throw cudf::logic_error If the input column is not of type - * list_view> - */ -std::unique_ptr -map_lookup(column_view const &map_column, string_scalar lookup_key, bool has_nulls = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Looks up a "map" column by specified key to see if the key exists or not, - * and returns a cudf column of bool value. - * - * The map-column is represented as follows: - * - * list_view >. - * <---KEY---> <--VALUE--> - * - * The string_view struct members are the key and value, respectively. - * For each row in the input list column, if the key is not found, false will be returned for that - * row. - * Note: when search for the scalar key of "null", a column full of "false" will be returned because - * map_contains is leveraging cudf::list:contains. - * - * @param map_column The input "map" column to be searched. Must be of - * type list_view>. - * @param lookup_key The search key, whose index(offset) is to be returned for each list row - * @param has_nulls Whether the input column might contain null list-rows, or null keys. - * @param stream The CUDA stream - * @param mr The device memory resource to be used for allocations - * @return An boolean column reflecting the existence of the key in each row in the map - * column. True means the lookup_key is found in that row. - * @throw cudf::logic_error If the input column is not of type - * list_view> - */ -std::unique_ptr -map_contains(column_view const &map_column, string_scalar lookup_key, bool has_nulls = true, - rmm::cuda_stream_view stream = cudf::default_stream_value, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); - -} // namespace jni - -} // namespace cudf