fix merge conflict

rapidsai · Jun 15, 2021 · 76f6fdb · 76f6fdb
2 parents df7df44 + 7c8d847
commit 76f6fdb
Show file tree

Hide file tree

Showing 62 changed files with 1,612 additions and 410 deletions.
diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
@@ -75,10 +75,10 @@ conda install "rmm=$MINOR_VERSION.*" "cudatoolkit=$CUDA_REL" \
 # conda install "your-pkg=1.0.0"
 
 # Install the master version of dask, distributed, and streamz
-logger "pip install git+https://github.com/dask/distributed.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
-logger "pip install git+https://github.com/dask/dask.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/distributed.git@2021.06.0 --upgrade --no-deps"
+pip install "git+https://github.com/dask/distributed.git@2021.06.0" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/dask.git@2021.06.0 --upgrade --no-deps"
+pip install "git+https://github.com/dask/dask.git@2021.06.0" --upgrade --no-deps
 logger "pip install git+https://github.com/python-streamz/streamz.git --upgrade --no-deps"
 pip install "git+https://github.com/python-streamz/streamz.git" --upgrade --no-deps
 

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -101,8 +101,8 @@ function install_dask {
     # Install the main version of dask, distributed, and streamz
     gpuci_logger "Install the main version of dask, distributed, and streamz"
     set -x
-    pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
-    pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps
+    pip install "git+https://github.com/dask/distributed.git@2021.06.0" --upgrade --no-deps
+    pip install "git+https://github.com/dask/dask.git@2021.06.0" --upgrade --no-deps
     pip install "git+https://github.com/python-streamz/streamz.git" --upgrade --no-deps
     set +x
 }

diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
@@ -60,7 +60,7 @@ dependencies:
   - cachetools
   - transformers
   - pip:
-      - git+https://github.com/dask/dask.git@main
-      - git+https://github.com/dask/distributed.git@main
+      - git+https://github.com/dask/dask.git@2021.06.0
+      - git+https://github.com/dask/distributed.git@2021.06.0
       - git+https://github.com/python-streamz/streamz.git
       - pyorc
diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml
@@ -60,7 +60,7 @@ dependencies:
   - cachetools
   - transformers
   - pip:
-      - git+https://github.com/dask/dask.git@main
-      - git+https://github.com/dask/distributed.git@main
+      - git+https://github.com/dask/dask.git@2021.06.0
+      - git+https://github.com/dask/distributed.git@2021.06.0
       - git+https://github.com/python-streamz/streamz.git
       - pyorc
diff --git a/conda/recipes/dask-cudf/run_test.sh b/conda/recipes/dask-cudf/run_test.sh
@@ -9,11 +9,11 @@ function logger() {
 }
 
 # Install the latest version of dask and distributed
-logger "pip install git+https://github.com/dask/distributed.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/distributed.git@2021.06.0 --upgrade --no-deps"
+pip install "git+https://github.com/dask/distributed.git@2021.06.0" --upgrade --no-deps
 
-logger "pip install git+https://github.com/dask/dask.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/dask.git@2021.06.0 --upgrade --no-deps"
+pip install "git+https://github.com/dask/dask.git@2021.06.0" --upgrade --no-deps
 
 logger "python -c 'import dask_cudf'"
 python -c "import dask_cudf"
diff --git a/cpp/cmake/thirdparty/CUDF_GetArrow.cmake b/cpp/cmake/thirdparty/CUDF_GetArrow.cmake
@@ -50,6 +50,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC)
                         "ARROW_WITH_BACKTRACE ON"
                         "ARROW_CXXFLAGS -w"
                         "ARROW_JEMALLOC OFF"
+                        "ARROW_S3 ON"
                         # Arrow modifies CMake's GLOBAL RULE_LAUNCH_COMPILE unless this is off
                         "ARROW_USE_CCACHE OFF"
                         "ARROW_ARMV8_ARCH ${ARROW_ARMV8_ARCH}"

diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md
@@ -342,6 +342,7 @@ namespace detail{
 } // namespace detail
 
 void external_function(...){
+    CUDF_FUNC_RANGE(); // Auto generates NVTX range for lifetime of this function
     detail::external_function(...);
 }
 ```
@@ -355,6 +356,12 @@ asynchrony if and when we add an asynchronous API to libcudf.
 **Note:** `cudaDeviceSynchronize()` should *never* be used.
  This limits the ability to do any multi-stream/multi-threaded work with libcudf APIs.
 
+ ### NVTX Ranges
+
+ In order to aid in performance optimization and debugging, all compute intensive libcudf functions should have a corresponding NVTX range.
+ In libcudf, we have a convenience macro `CUDF_FUNC_RANGE()` that will automatically annotate the lifetime of the enclosing function and use the functions name as the name of the NVTX range. 
+ For more information about NVTX, see [here](https://github.com/NVIDIA/NVTX/tree/dev/cpp).
+
  ### Stream Creation
 
 There may be times in implementing libcudf features where it would be advantageous to use streams 

diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh
@@ -305,7 +305,7 @@ struct column_scatterer_impl<struct_view> {
                                             [](auto const& col) { return col.nullable(); });
     if (child_nullable) {
       auto const gather_map =
-        scatter_to_gather(scatter_map_begin, scatter_map_end, source.size(), stream);
+        scatter_to_gather(scatter_map_begin, scatter_map_end, target.size(), stream);
       gather_bitmask(cudf::table_view{std::vector<cudf::column_view>{structs_src.child_begin(),
                                                                      structs_src.child_end()}},
                      gather_map.begin(),

diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp
@@ -22,9 +22,13 @@
 #include <rmm/cuda_stream_view.hpp>
 
 #include <arrow/buffer.h>
+#include <arrow/filesystem/filesystem.h>
+#include <arrow/filesystem/s3fs.h>
 #include <arrow/io/file.h>
 #include <arrow/io/interfaces.h>
 #include <arrow/io/memory.h>
+#include <arrow/result.h>
+#include <arrow/status.h>
 
 #include <memory>
 
@@ -302,6 +306,34 @@ class arrow_io_source : public datasource {
   };
 
  public:
+  /**
+   * @brief Constructs an object from an Apache Arrow Filesystem URI
+   *
+   * @param Apache Arrow Filesystem URI
+   */
+  explicit arrow_io_source(std::string_view arrow_uri)
+  {
+    const std::string uri_start_delimiter = "//";
+    const std::string uri_end_delimiter   = "?";
+
+    arrow::Result<std::shared_ptr<arrow::fs::FileSystem>> result =
+      arrow::fs::FileSystemFromUri(static_cast<std::string>(arrow_uri));
+    CUDF_EXPECTS(result.ok(), "Failed to generate Arrow Filesystem instance from URI.");
+    filesystem = result.ValueOrDie();
+
+    // Parse the path from the URI
+    size_t start = arrow_uri.find(uri_start_delimiter) == std::string::npos
+                     ? 0
+                     : arrow_uri.find(uri_start_delimiter) + uri_start_delimiter.size();
+    size_t end            = arrow_uri.find(uri_end_delimiter) - start;
+    std::string_view path = arrow_uri.substr(start, end);
+
+    arrow::Result<std::shared_ptr<arrow::io::RandomAccessFile>> in_stream =
+      filesystem->OpenInputFile(static_cast<std::string>(path).c_str());
+    CUDF_EXPECTS(in_stream.ok(), "Failed to open Arrow RandomAccessFile");
+    arrow_file = in_stream.ValueOrDie();
+  }
+
   /**
    * @brief Constructs an object from an `arrow` source object.
    *
@@ -340,6 +372,7 @@ class arrow_io_source : public datasource {
   }
 
  private:
+  std::shared_ptr<arrow::fs::FileSystem> filesystem;
   std::shared_ptr<arrow::io::RandomAccessFile> arrow_file;
 };
 

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
@@ -22,6 +22,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <optional>
 #include <vector>
 
 namespace cudf {
@@ -522,13 +523,15 @@ class hash_join {
 
   /**
    * Returns the row indices that can be used to construct the result of performing
-   * an inner join between two tables. @see cudf::inner_join().
+   * an inner join between two tables. @see cudf::inner_join(). Behavior is undefined if the
+   * provided `output_size` is smaller than the actual output size.
    *
    * @param probe The probe table, from which the tuples are probed.
    * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param output_size Optional value which allows users to specify the exact output size.
+   * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the returned table and columns' device
    * memory.
-   * @param stream CUDA stream used for device memory operations and kernel launches
    *
    * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
    * the result of performing an inner join between two tables with `build` and `probe`
@@ -537,19 +540,22 @@ class hash_join {
   std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
             std::unique_ptr<rmm::device_uvector<size_type>>>
   inner_join(cudf::table_view const& probe,
-             null_equality compare_nulls         = null_equality::EQUAL,
-             rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-             rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+             null_equality compare_nulls            = null_equality::EQUAL,
+             std::optional<std::size_t> output_size = {},
+             rmm::cuda_stream_view stream           = rmm::cuda_stream_default,
+             rmm::mr::device_memory_resource* mr    = rmm::mr::get_current_device_resource()) const;
 
   /**
    * Returns the row indices that can be used to construct the result of performing
-   * a left join between two tables. @see cudf::left_join().
+   * a left join between two tables. @see cudf::left_join(). Behavior is undefined if the
+   * provided `output_size` is smaller than the actual output size.
    *
    * @param probe The probe table, from which the tuples are probed.
    * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param output_size Optional value which allows users to specify the exact output size.
+   * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the returned table and columns' device
    * memory.
-   * @param stream CUDA stream used for device memory operations and kernel launches
    *
    * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
    * the result of performing a left join between two tables with `build` and `probe`
@@ -558,19 +564,22 @@ class hash_join {
   std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
             std::unique_ptr<rmm::device_uvector<size_type>>>
   left_join(cudf::table_view const& probe,
-            null_equality compare_nulls         = null_equality::EQUAL,
-            rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-            rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+            null_equality compare_nulls            = null_equality::EQUAL,
+            std::optional<std::size_t> output_size = {},
+            rmm::cuda_stream_view stream           = rmm::cuda_stream_default,
+            rmm::mr::device_memory_resource* mr    = rmm::mr::get_current_device_resource()) const;
 
   /**
    * Returns the row indices that can be used to construct the result of performing
-   * a full join between two tables. @see cudf::full_join().
+   * a full join between two tables. @see cudf::full_join(). Behavior is undefined if the
+   * provided `output_size` is smaller than the actual output size.
    *
    * @param probe The probe table, from which the tuples are probed.
    * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param output_size Optional value which allows users to specify the exact output size.
+   * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the returned table and columns' device
    * memory.
-   * @param stream CUDA stream used for device memory operations and kernel launches
    *
    * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
    * the result of performing a full join between two tables with `build` and `probe`
@@ -579,9 +588,59 @@ class hash_join {
   std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
             std::unique_ptr<rmm::device_uvector<size_type>>>
   full_join(cudf::table_view const& probe,
-            null_equality compare_nulls         = null_equality::EQUAL,
-            rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-            rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+            null_equality compare_nulls            = null_equality::EQUAL,
+            std::optional<std::size_t> output_size = {},
+            rmm::cuda_stream_view stream           = rmm::cuda_stream_default,
+            rmm::mr::device_memory_resource* mr    = rmm::mr::get_current_device_resource()) const;
+
+  /**
+   * Returns the exact number of matches (rows) when performing an inner join with the specified
+   * probe table.
+   *
+   * @param probe The probe table, from which the tuples are probed.
+   * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   *
+   * @return The exact number of output when performing an inner join between two tables with
+   * `build` and `probe` as the the join keys .
+   */
+  std::size_t inner_join_size(cudf::table_view const& probe,
+                              null_equality compare_nulls  = null_equality::EQUAL,
+                              rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
+
+  /**
+   * Returns the exact number of matches (rows) when performing a left join with the specified probe
+   * table.
+   *
+   * @param probe The probe table, from which the tuples are probed.
+   * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   *
+   * @return The exact number of output when performing a left join between two tables with `build`
+   * and `probe` as the the join keys .
+   */
+  std::size_t left_join_size(cudf::table_view const& probe,
+                             null_equality compare_nulls  = null_equality::EQUAL,
+                             rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
+
+  /**
+   * Returns the exact number of matches (rows) when performing a full join with the specified probe
+   * table.
+   *
+   * @param probe The probe table, from which the tuples are probed.
+   * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   * @param mr Device memory resource used to allocate the intermediate table and columns' device
+   * memory.
+   *
+   * @return The exact number of output when performing a full join between two tables with `build`
+   * and `probe` as the the join keys .
+   */
+  std::size_t full_join_size(
+    cudf::table_view const& probe,
+    null_equality compare_nulls         = null_equality::EQUAL,
+    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
  private:
   struct hash_join_impl;

diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp
@@ -171,7 +171,7 @@ std::unique_ptr<column> hex_to_integers(
  * @code{.pseudo}
  * Example:
  * s = ['123', '-456', '', 'AGE', '+17EA', '0x9EF' '123ABC']
- * b = s.is_hex(s)
+ * b = is_hex(s)
  * b is [true, false, false, false, false, true, true]
  * @endcode
  *
@@ -185,6 +185,37 @@ std::unique_ptr<column> is_hex(
   strings_column_view const& strings,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Returns a new strings column converting integer columns to hexadecimal
+ * characters.
+ *
+ * Any null entries will result in corresponding null entries in the output column.
+ *
+ * The output character set is '0'-'9' and 'A'-'F'. The output string width will
+ * be a multiple of 2 depending on the size of the integer type. A single leading
+ * zero is applied to the first non-zero output byte if it less than 0x10.
+ *
+ * @code{.pseudo}
+ * Example:
+ * input = [123, -1, 0, 27, 342718233] // int32 type input column
+ * s = integers_to_hex(input)
+ * s is [ '04D2', 'FFFFFFFF', '00', '1B', '146D7719']
+ * @endcode
+ *
+ * The example above shows an `INT32` type column where each integer is 4 bytes.
+ * Leading zeros are suppressed unless filling out a complete byte as in
+ * `123 -> '04D2'` instead of `000004D2` or `4D2`.
+ *
+ * @throw cudf::logic_error if the input column is not integral type.
+ *
+ * @param input Integer column to convert to hex.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return New strings column with hexadecimal characters.
+ */
+std::unique_ptr<column> integers_to_hex(
+  column_view const& input,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /** @} */  // end of doxygen group
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/include/cudf/wrappers/timestamps.hpp b/cpp/include/cudf/wrappers/timestamps.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,19 +32,7 @@ template <class Duration>
 using time_point = cuda::std::chrono::sys_time<Duration>;
 
 template <class Duration>
-struct timestamp : time_point<Duration> {
-  // Bring over base class constructors and make them visible here
-  using time_point<Duration>::time_point;
-
-  // This is needed as __shared__ objects of this type can't be assigned in device code
-  // when the initializer list constructs subobjects with values, which is what std::time_point
-  // does.
-  constexpr timestamp() : time_point<Duration>(Duration()){};
-
-  // The inherited copy constructor will hide the auto generated copy constructor;
-  // hence, explicitly define and delegate
-  constexpr timestamp(const time_point<Duration>& other) : time_point<Duration>(other) {}
-};
+using timestamp = time_point<Duration>;
 }  // namespace detail
 
 /**