Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-22.06' into rwlee/struc…
Browse files Browse the repository at this point in the history
…t_col_compare
  • Loading branch information
bdice committed May 11, 2022
2 parents 548dcf1 + 0cc29a0 commit 1dd1159
Show file tree
Hide file tree
Showing 45 changed files with 1,314 additions and 843 deletions.
4 changes: 4 additions & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ Here are some guidelines to help the review process go smoothly.
If conflicts occur against the target branch they should be resolved by
merging the target branch into the branch used for making the pull request.
8. Pull requests that modify cpp source that are marked ready for review
will automatically be assigned two cudf-cpp-codeowners reviewers.
Ensure at least two approvals from cudf-cpp-codeowners before merging.
Many thanks in advance for your cooperation!
-->
53 changes: 53 additions & 0 deletions .github/workflows/jni-docker-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: JNI Docker Build

on:
workflow_dispatch: # manual trigger only

concurrency:
group: jni-docker-build-${{ github.ref }}
cancel-in-progress: true

jobs:
docker-build:
if: github.repository == 'rapidsai/cudf'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2

- name: Set up QEMU
uses: docker/setup-qemu-action@v2

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Login to DockerHub
uses: docker/login-action@v2
with:
username: ${{ secrets.GPUCIBOT_DOCKERHUB_USER }}
password: ${{ secrets.GPUCIBOT_DOCKERHUB_TOKEN }}

- name: Set ENVs
run: |
echo "IMAGE_NAME=rapidsai/cudf-jni-build" >> $GITHUB_ENV
echo "IMAGE_REF=${GITHUB_REF_NAME}" >> $GITHUB_ENV
- name: Build and Push
uses: docker/build-push-action@v3
with:
push: true
file: java/ci/Dockerfile.centos7
tags: "${{ env.IMAGE_NAME }}:${{ env.IMAGE_REF }}"
5 changes: 5 additions & 0 deletions conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ dependencies:
- pydata-sphinx-theme
- librdkafka=1.7.0
- python-confluent-kafka=1.7.0
- moto>=3.1.6
- boto3>=1.21.21
- botocore>=1.24.21
- aiobotocore>=2.2.0
- s3fs>=2022.3.0
- pip:
- git+https://github.com/python-streamz/streamz.git@master
- pyorc
Expand Down
26 changes: 11 additions & 15 deletions cpp/include/cudf/detail/search.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -33,11 +33,11 @@ namespace detail {
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> lower_bound(
table_view const& t,
table_view const& values,
table_view const& haystack,
table_view const& needles,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -46,33 +46,29 @@ std::unique_ptr<column> lower_bound(
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> upper_bound(
table_view const& t,
table_view const& values,
table_view const& haystack,
table_view const& needles,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::contains(column_view const&, scalar const&,
* rmm::mr::device_memory_resource*)
* @copydoc cudf::contains(column_view const&, scalar const&, rmm::mr::device_memory_resource*)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
bool contains(column_view const& col,
scalar const& value,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);
bool contains(column_view const& haystack, scalar const& needle, rmm::cuda_stream_view stream);

/**
* @copydoc cudf::contains(column_view const&, column_view const&,
* rmm::mr::device_memory_resource*)
* @copydoc cudf::contains(column_view const&, column_view const&, rmm::mr::device_memory_resource*)
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> contains(
column_view const& haystack,
column_view const& needles,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
Expand Down
141 changes: 65 additions & 76 deletions cpp/include/cudf/search.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,134 +32,123 @@ namespace cudf {
*/

/**
* @brief Find smallest indices in a sorted table where values should be
* inserted to maintain order
* @brief Find smallest indices in a sorted table where values should be inserted to maintain order.
*
* For each row v in @p values, find the first index in @p t where
* inserting the row will maintain the sort order of @p t
* For each row in `needles`, find the first index in `haystack` where inserting the row still
* maintains its sort order.
*
* @code{.pseudo}
* Example:
*
* Single column:
* idx 0 1 2 3 4
* column = { 10, 20, 20, 30, 50 }
* values = { 20 }
* result = { 1 }
* idx 0 1 2 3 4
* haystack = { 10, 20, 20, 30, 50 }
* needles = { 20 }
* result = { 1 }
*
* Multi Column:
* idx 0 1 2 3 4
* t = {{ 10, 20, 20, 20, 20 },
* { 5.0, .5, .5, .7, .7 },
* { 90, 77, 78, 61, 61 }}
* values = {{ 20 },
* { .7 },
* { 61 }}
* result = { 3 }
* idx 0 1 2 3 4
* haystack = {{ 10, 20, 20, 20, 20 },
* { 5.0, .5, .5, .7, .7 },
* { 90, 77, 78, 61, 61 }}
* needles = {{ 20 },
* { .7 },
* { 61 }}
* result = { 3 }
* @endcode
*
* @param t Table to search
* @param values Find insert locations for these values
* @param column_order Vector of column sort order
* @param null_precedence Vector of null_precedence enums values
* @param mr Device memory resource used to allocate the returned column's device
* memory
* @param haystack The table containing search space.
* @param needles Values for which to find the insert locations in the search space.
* @param column_order Vector of column sort order.
* @param null_precedence Vector of null_precedence enums needles.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return A non-nullable column of cudf::size_type elements containing the insertion points.
*/
std::unique_ptr<column> lower_bound(
table_view const& t,
table_view const& values,
table_view const& haystack,
table_view const& needles,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Find largest indices in a sorted table where values should be
* inserted to maintain order
* @brief Find largest indices in a sorted table where values should be inserted to maintain order.
*
* For each row v in @p values, find the last index in @p t where
* inserting the row will maintain the sort order of @p t
* For each row in `needles`, find the last index in `haystack` where inserting the row still
* maintains its sort order.
*
* @code{.pseudo}
* Example:
*
* Single Column:
* idx 0 1 2 3 4
* column = { 10, 20, 20, 30, 50 }
* values = { 20 }
* result = { 3 }
* idx 0 1 2 3 4
* haystack = { 10, 20, 20, 30, 50 }
* needles = { 20 }
* result = { 3 }
*
* Multi Column:
* idx 0 1 2 3 4
* t = {{ 10, 20, 20, 20, 20 },
* { 5.0, .5, .5, .7, .7 },
* { 90, 77, 78, 61, 61 }}
* values = {{ 20 },
* { .7 },
* { 61 }}
* result = { 5 }
* idx 0 1 2 3 4
* haystack = {{ 10, 20, 20, 20, 20 },
* { 5.0, .5, .5, .7, .7 },
* { 90, 77, 78, 61, 61 }}
* needles = {{ 20 },
* { .7 },
* { 61 }}
* result = { 5 }
* @endcode
*
* @param search_table Table to search
* @param values Find insert locations for these values
* @param column_order Vector of column sort order
* @param null_precedence Vector of null_precedence enums values
* @param mr Device memory resource used to allocate the returned column's device
* memory
* @param haystack The table containing search space.
* @param needles Values for which to find the insert locations in the search space.
* @param column_order Vector of column sort order.
* @param null_precedence Vector of null_precedence enums needles.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return A non-nullable column of cudf::size_type elements containing the insertion points.
*/
std::unique_ptr<column> upper_bound(
table_view const& search_table,
table_view const& values,
table_view const& haystack,
table_view const& needles,
std::vector<order> const& column_order,
std::vector<null_order> const& null_precedence,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Find if the `value` is present in the `col`
* @brief Check if the given `needle` value exists in the `haystack` column.
*
* @throws cudf::logic_error
* If `col.type() != values.type()`
* @throws cudf::logic_error If `haystack.type() != needle.type()`.
*
* @code{.pseudo}
* Single Column:
* idx 0 1 2 3 4
* col = { 10, 20, 20, 30, 50 }
* Scalar:
* value = { 20 }
* result = true
* idx 0 1 2 3 4
* haystack = { 10, 20, 20, 30, 50 }
* needle = { 20 }
* result = true
* @endcode
*
* @param col A column object
* @param value A scalar value to search for in `col`
*
* @return bool If `value` is found in `column` true, else false.
* @param haystack The column containing search space.
* @param needle A scalar value to check for existence in the search space.
* @return true if the given `needle` value exists in the `haystack` column.
*/
bool contains(column_view const& col, scalar const& value);
bool contains(column_view const& haystack, scalar const& needle);

/**
* @brief Returns a new column of type bool identifying for each element of @p haystack column,
* if that element is contained in @p needles column.
* @brief Check if the given `needles` values exists in the `haystack` column.
*
* The new column will have the same dimension and null status as the @p haystack column. That is,
* any element that is invalid in the @p haystack column will be invalid in the returned column.
* The new column will have type BOOL and have the same size and null mask as the input `needles`
* column. That is, any null row in the `needles` column will result in a nul row in the output
* column.
*
* @throws cudf::logic_error
* If `haystack.type() != needles.type()`
* @throws cudf::logic_error If `haystack.type() != needles.type()`
*
* @code{.pseudo}
* haystack = { 10, 20, 30, 40, 50 }
* needles = { 20, 40, 60, 80 }
*
* result = { false, true, false, true, false }
* result = { true, true, false, false }
* @endcode
*
* @param haystack A column object
* @param needles A column of values to search for in `col`
* @param mr Device memory resource used to allocate the returned column's device memory
*
* @return A column of bool elements containing true if the corresponding entry in haystack
* appears in needles and false if it does not.
* @param haystack The column containing search space.
* @param needles A column of values to check for existence in the search space.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return A BOOL column indicating if each element in `needles` exists in the search space.
*/
std::unique_ptr<column> contains(
column_view const& haystack,
Expand Down
Loading

0 comments on commit 1dd1159

Please sign in to comment.