NVIDIA · revans2 · Apr 23, 2021 · Apr 23, 2021 · Apr 23, 2021
diff --git a/integration_tests/src/main/python/data_gen.py b/integration_tests/src/main/python/data_gen.py
@@ -252,6 +252,29 @@ def __init__(self, nullable=True, min_val =LONG_MIN, max_val = LONG_MAX,
     def start(self, rand):
         self._start(rand, lambda : rand.randint(self._min_val, self._max_val))
 
+class LongRangeGen(DataGen):
+    """Generate Longs in incrementing order."""
+    def __init__(self, nullable=False, start_val=0, direction="inc"):
+        super().__init__(LongType(), nullable=nullable)
+        self._start_val = start_val
+        self._current_val = start_val
+        if (direction == "dec"):
+            def dec_it():
+                tmp = self._current_val
+                self._current_val -= 1
+                return tmp
+            self._do_it = dec_it
+        else:
+            def inc_it():
+                tmp = self._current_val
+                self._current_val += 1
+                return tmp
+            self._do_it = inc_it
+
+    def start(self, rand):
+        self._current_val = self._start_val
+        self._start(rand, self._do_it)
+
 class RepeatSeqGen(DataGen):
     """Generate Repeated seq of `length` random items"""
     def __init__(self, child, length):

diff --git a/integration_tests/src/main/python/sort_test.py b/integration_tests/src/main/python/sort_test.py
@@ -171,3 +171,21 @@ def test_large_orderby():
             lambda spark : unary_op_df(spark, long_gen, length=1024*128)\
                     .orderBy(f.col('a')),
             conf = {'spark.rapids.sql.batchSizeBytes': '16384'})
+
+# This is similar to test_large_orderby, but here we want to test some types
+# that are not being sorted on, but are going along with it
+@pytest.mark.parametrize('data_gen', [byte_gen,
+    string_gen,
+    float_gen,
+    date_gen,
+    timestamp_gen,
+    decimal_gen_default,
+    StructGen([('child1', byte_gen)]),
+    ArrayGen(byte_gen, max_length=5)], ids=idfn)
+def test_large_orderby_nested_ridealong(data_gen):
+    # We use a LongRangeGen to avoid duplicate keys that can cause ambiguity in the sort
+    #  results, especially on distributed clusters.
+    assert_gpu_and_cpu_are_equal_collect(
+            lambda spark : two_col_df(spark, LongRangeGen(), data_gen, length=1024*127)\
+                    .orderBy(f.col('a').desc()),
+            conf = {'spark.rapids.sql.batchSizeBytes': '16384'})
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SortUtils.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SortUtils.scala
@@ -201,7 +201,23 @@ class GpuSorter(
           batches.foreach { cb =>
             tabs += GpuColumnVector.from(cb)
           }
-          Table.merge(tabs.toArray, cudfOrdering: _*)
+          // In the current version of cudf merge does not work for structs or lists (nested types)
+          // This should be fixed by https://github.com/rapidsai/cudf/issues/8050
+          val hasNested = {
+            val tab = tabs.head
+            (0 until tab.getNumberOfColumns).exists { i =>
+              tab.getColumn(i).getType.isNestedType
+            }
+          }
+          if (hasNested) {
+            // so as a work around we concatenate all of the data together and then sort it.
+            // It is slower, but it works
+            withResource(Table.concatenate(tabs: _*)) { concatenated =>
+              concatenated.orderBy(cudfOrdering: _*)
+            }
+          } else {
+            Table.merge(tabs.toArray, cudfOrdering: _*)
+          }
         }
         withResource(merged) { merged =>
           GpuColumnVector.from(merged, projectedBatchTypes)