NVIDIA · gerashegalov · May 25, 2022 · May 24, 2022 · May 24, 2022 · May 24, 2022
@@ -73,6 +73,14 @@ mvn_verify() {
 
     # Triggering here until we change the jenkins file
     rapids_shuffle_smoke_test
+
+    # non-caller classloader smoke test in pseudo-distributed
+    # standalone cluster
+    echo "Running test_cartesian_join_special_case_count with spark.rapids.force.caller.classloader=false"
+    PYSP_TEST_spark_rapids_force_caller_classloader=false \
+        NUM_LOCAL_EXECS=1 \
+        TEST_PARALLEL=0 \
+        ./integration_tests/run_pyspark_from_build.sh -k 'test_cartesian_join_special_case_count[100]'
 }
 
 rapids_shuffle_smoke_test() {

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffleCoalesceExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffleCoalesceExec.scala
@@ -18,7 +18,7 @@ package com.nvidia.spark.rapids
 
 import java.util
 
-import ai.rapids.cudf.{HostConcatResultUtil, HostMemoryBuffer, JCudfSerialization, NvtxColor, NvtxRange}
+import ai.rapids.cudf.{HostMemoryBuffer, JCudfSerialization, NvtxColor, NvtxRange}
 import ai.rapids.cudf.JCudfSerialization.{HostConcatResult, SerializedTableHeader}
 import com.nvidia.spark.rapids.shims.ShimUnaryExecNode
 
@@ -103,7 +103,7 @@ class HostShuffleCoalesceIterator(
       val firstHeader = serializedTables.peekFirst().header
       if (firstHeader.getNumColumns == 0) {
         (0 until numTablesInBatch).foreach(_ => serializedTables.removeFirst())
-        HostConcatResultUtil.rowsOnlyHostConcatResult(numRowsInBatch)
+        cudf_utils.HostConcatResultUtil.rowsOnlyHostConcatResult(numRowsInBatch)
       } else {
         val headers = new Array[SerializedTableHeader](numTablesInBatch)
         withResource(new Array[HostMemoryBuffer](numTablesInBatch)) { buffers =>
@@ -211,7 +211,7 @@ class GpuShuffleCoalesceIterator(iter: Iterator[HostConcatResult],
         // generate GPU data from batches that are empty.
         GpuSemaphore.acquireIfNecessary(TaskContext.get(), semWaitTime)
         withResource(new MetricRange(opTimeMetric)) { _ =>
-          val batch = HostConcatResultUtil.getColumnarBatch(hostConcatResult, dataTypes)
+          val batch = cudf_utils.HostConcatResultUtil.getColumnarBatch(hostConcatResult, dataTypes)
           outputBatchesMetric += 1
           outputRowsMetric += batch.numRows()
           batch

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExec.scala
@@ -16,7 +16,7 @@
 
 package com.nvidia.spark.rapids
 
-import ai.rapids.cudf.{HostConcatResultUtil, NvtxColor, NvtxRange}
+import ai.rapids.cudf.{NvtxColor, NvtxRange}
 import ai.rapids.cudf.JCudfSerialization.HostConcatResult
 import com.nvidia.spark.rapids.shims.{GpuHashPartitioning, ShimBinaryExecNode}
 
@@ -345,7 +345,7 @@ object GpuShuffledHashJoinExec extends Arm {
     // we can bring the build batch to the GPU now
     withResource(hostConcatResult) { _ =>
       buildTime.ns {
-        HostConcatResultUtil.getColumnarBatch(hostConcatResult, dataTypes)
+        cudf_utils.HostConcatResultUtil.getColumnarBatch(hostConcatResult, dataTypes)
       }
     }
   }

diff --git a/...ai/rapids/cudf/HostConcatResultUtil.scala → ...ids/cudf_utils/HostConcatResultUtil.scala b/...ai/rapids/cudf/HostConcatResultUtil.scala → ...ids/cudf_utils/HostConcatResultUtil.scala
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-package ai.rapids.cudf
+package com.nvidia.spark.rapids.cudf_utils
 
-import ai.rapids.cudf.JCudfSerialization.HostConcatResult
+import ai.rapids.cudf
 import com.nvidia.spark.rapids.{Arm,  GpuColumnVectorFromBuffer}
 
 import org.apache.spark.sql.types.DataType
@@ -26,11 +26,10 @@ object HostConcatResultUtil extends Arm {
   /**
    * Create a rows-only `HostConcatResult`.
    */
-  def rowsOnlyHostConcatResult(numRows: Int): HostConcatResult = {
-    new HostConcatResult(
-      new JCudfSerialization.SerializedTableHeader(
-        Array.empty, numRows, 0L),
-      HostMemoryBuffer.allocate(0, false))
+  def rowsOnlyHostConcatResult(numRows: Int): cudf.JCudfSerialization.HostConcatResult = {
+    new cudf.JCudfSerialization.HostConcatResult(
+      new cudf.JCudfSerialization.SerializedTableHeader(numRows),
+      cudf.HostMemoryBuffer.allocate(0, false))
   }
 
   /**
@@ -41,7 +40,7 @@ object HostConcatResultUtil extends Arm {
    *       callers are responsible for closing the resulting `ColumnarBatch`
    */
   def getColumnarBatch(
-      hostConcatResult: HostConcatResult,
+      hostConcatResult: cudf.JCudfSerialization.HostConcatResult,
       sparkSchema: Array[DataType]): ColumnarBatch = {
     if (hostConcatResult.getTableHeader.getNumColumns == 0) {
       // We expect the caller to have acquired the GPU unconditionally before calling