NVIDIA · jlowe · Apr 12, 2021 · Apr 8, 2021
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSortExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSortExec.scala
@@ -288,14 +288,12 @@ case class GpuOutOfCoreSortIterator(
       // The entire thing is sorted
       withResource(sortedTbl.contiguousSplit()) { splits =>
         assert(splits.length == 1)
-        memUsed += splits.head.getBuffer.getLength
-        closeOnExcept(
-          GpuColumnVectorFromBuffer.from(splits.head, sorter.projectedBatchTypes)) { cb =>
-          val sp = SpillableColumnarBatch(cb, SpillPriorities.ACTIVE_ON_DECK_PRIORITY,
-            spillCallback)
-          sortedSize += sp.sizeInBytes
-          sorted.add(sp)
-        }
+        val ct = splits.head
+        memUsed += ct.getBuffer.getLength
+        val sp = SpillableColumnarBatch(ct, sorter.projectedBatchTypes,
+          SpillPriorities.ACTIVE_ON_DECK_PRIORITY, spillCallback)
+        sortedSize += sp.sizeInBytes
+        sorted.add(sp)
       }
     } else {
       val splitIndexes = if (sortedOffset >= 0) {
@@ -326,13 +324,10 @@ case class GpuOutOfCoreSortIterator(
       withResource(sortedTbl.contiguousSplit(splitIndexes: _*)) { splits =>
         memUsed += splits.map(_.getBuffer.getLength).sum
         val stillPending = if (sortedOffset >= 0) {
-          closeOnExcept(
-            GpuColumnVectorFromBuffer.from(splits.head, sorter.projectedBatchTypes)) { cb =>
-            val sp = SpillableColumnarBatch(cb, SpillPriorities.ACTIVE_ON_DECK_PRIORITY,
-              spillCallback)
-            sortedSize += sp.sizeInBytes
-            sorted.add(sp)
-          }
+          val sp = SpillableColumnarBatch(splits.head, sorter.projectedBatchTypes,
+            SpillPriorities.ACTIVE_ON_DECK_PRIORITY, spillCallback)
+          sortedSize += sp.sizeInBytes
+          sorted.add(sp)
           splits.slice(1, splits.length)
         } else {
           splits
@@ -342,11 +337,9 @@ case class GpuOutOfCoreSortIterator(
         stillPending.zip(boundaries).foreach {
           case (ct: ContiguousTable, lower: UnsafeRow) =>
             if (ct.getRowCount > 0) {
-              closeOnExcept(
-                GpuColumnVectorFromBuffer.from(ct, sorter.projectedBatchTypes)) { cb =>
-                pending.add(SpillableColumnarBatch(cb, SpillPriorities.ACTIVE_BATCHING_PRIORITY,
-                  spillCallback), lower)
-              }
+              val sp = SpillableColumnarBatch(splits.head, sorter.projectedBatchTypes,
+                SpillPriorities.ACTIVE_ON_DECK_PRIORITY, spillCallback)
+              pending.add(sp, lower)
             } else {
               ct.close()
             }

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SpillableColumnarBatch.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SpillableColumnarBatch.scala
@@ -16,6 +16,8 @@
 
 package com.nvidia.spark.rapids
 
+import ai.rapids.cudf.ContiguousTable
+
 import org.apache.spark.TaskContext
 import org.apache.spark.sql.rapids.TempSpillBufferId
 import org.apache.spark.sql.types.DataType
@@ -150,6 +152,25 @@ object SpillableColumnarBatch extends Arm {
     }
   }
 
+  /**
+   * Create a new SpillableColumnarBatch
+   * @note The caller is responsible for closing the contiguous table parameter.
+   * @param ct contiguous table containing the batch GPU data
+   * @param sparkTypes array of Spark types describing the data schema
+   * @param priority the initial spill priority of this batch
+   * @param spillCallback a callback when the buffer is spilled. This should be very light weight.
+   *                      It should never allocate GPU memory and really just be used for metrics.
+   */
+  def apply(
+      ct: ContiguousTable,
+      sparkTypes: Array[DataType],
+      priority: Long,
+      spillCallback: RapidsBuffer.SpillCallback): SpillableColumnarBatch = {
+    val id = TempSpillBufferId()
+    RapidsBufferCatalog.addContiguousTable(id, ct, priority, spillCallback)
+    new SpillableColumnarBatchImpl(id, ct.getRowCount.toInt, sparkTypes)
+  }
+
   private[this] def addBatch(
       id: RapidsBufferId,
       batch: ColumnarBatch,

diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/RapidsDeviceMemoryStoreSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/RapidsDeviceMemoryStoreSuite.scala
@@ -202,13 +202,17 @@ class RapidsDeviceMemoryStoreSuite extends FunSuite with Arm with MockitoSugar {
   }
 
   class MockSpillStore(catalog: RapidsBufferCatalog)
-      extends RapidsBufferStore(StorageTier.HOST, catalog) {
+      extends RapidsBufferStore(StorageTier.HOST, catalog) with Arm {
     val spilledBuffers = new ArrayBuffer[RapidsBufferId]
 
-    override protected def createBuffer(b: RapidsBuffer, m: MemoryBuffer, s: Cuda.Stream)
-    : RapidsBufferBase = {
-      spilledBuffers += b.id
-      new MockRapidsBuffer(b.id, b.size, b.meta, b.getSpillPriority)
+    override protected def createBuffer(
+        b: RapidsBuffer,
+        m: MemoryBuffer,
+        s: Cuda.Stream): RapidsBufferBase = {
+      withResource(m) { _ =>
+        spilledBuffers += b.id
+        new MockRapidsBuffer(b.id, b.size, b.meta, b.getSpillPriority)
+      }
     }
 
     class MockRapidsBuffer(id: RapidsBufferId, size: Long, meta: TableMeta, spillPriority: Long)

diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/RapidsHostMemoryStoreSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/RapidsHostMemoryStoreSuite.scala
@@ -171,10 +171,15 @@ class RapidsHostMemoryStoreSuite extends FunSuite with Arm with MockitoSugar {
 
               devStore.addContiguousTable(smallBufferId, smallTable, spillPriority)
               devStore.synchronousSpill(0)
-              val ac: ArgumentCaptor[RapidsBuffer] = ArgumentCaptor.forClass(classOf[RapidsBuffer])
-              verify(mockStore).copyBuffer(ac.capture(), ArgumentMatchers.any[MemoryBuffer],
-                ArgumentMatchers.any[Cuda.Stream])
-              assertResult(bigBufferId)(ac.getValue.id)
+              val rapidsBufferCaptor: ArgumentCaptor[RapidsBuffer] =
+                ArgumentCaptor.forClass(classOf[RapidsBuffer])
+              val memoryBufferCaptor: ArgumentCaptor[MemoryBuffer] =
+                ArgumentCaptor.forClass(classOf[MemoryBuffer])
+              verify(mockStore).copyBuffer(rapidsBufferCaptor.capture(),
+                memoryBufferCaptor.capture(), ArgumentMatchers.any[Cuda.Stream])
+              withResource(memoryBufferCaptor.getValue) { _ =>
+                assertResult(bigBufferId)(rapidsBufferCaptor.getValue.id)
+              }
             }
           }
         }