Add more decimal128 tests (#4523)

Signed-off-by: Kuhu Shukla <kuhus@nvidia.com> Co-authored-by: Kuhu Shukla <kuhus@nvidia.com>
NVIDIA · Jan 13, 2022 · eab40b0 · eab40b0
1 parent f1be1a5
commit eab40b0
Show file tree

Hide file tree

Showing 5 changed files with 12 additions and 12 deletions.
diff --git a/integration_tests/src/main/python/collection_ops_test.py b/integration_tests/src/main/python/collection_ops_test.py
@@ -32,7 +32,7 @@
                                 for nullable in [True, False]
                                 for sub_gen in all_gen + [null_gen] + decimal_128_gens_no_neg]
 
-@pytest.mark.parametrize('data_gen', non_nested_array_gens, ids=idfn)
+@pytest.mark.parametrize('data_gen', non_nested_array_gens_dec128, ids=idfn)
 def test_concat_list(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: binary_op_df(spark, data_gen).selectExpr('concat(a)'))
@@ -48,7 +48,7 @@ def test_empty_concat_list():
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: binary_op_df(spark, ArrayGen(LongGen())).selectExpr('concat()'))
 
-@pytest.mark.parametrize('data_gen', non_nested_array_gens, ids=idfn)
+@pytest.mark.parametrize('data_gen', non_nested_array_gens_dec128, ids=idfn)
 def test_concat_list_with_lit(data_gen):
     array_lit = gen_scalar(data_gen)
     array_lit2 = gen_scalar(data_gen)

diff --git a/integration_tests/src/main/python/conditionals_test.py b/integration_tests/src/main/python/conditionals_test.py
@@ -69,7 +69,7 @@ def test_if_else_map(data_gen):
             conf = allow_negative_scale_of_decimal_conf)
 
 @pytest.mark.order(1) # at the head of xdist worker queue if pytest-order is installed
-@pytest.mark.parametrize('data_gen', all_gens + all_nested_gens + decimal_128_gens, ids=idfn)
+@pytest.mark.parametrize('data_gen', all_gens + all_nested_gens + single_array_gens_sample_with_decimal128 + decimal_128_gens, ids=idfn)
 def test_case_when(data_gen):
     num_cmps = 20
     s1 = gen_scalar(data_gen, force_no_nulls=not isinstance(data_gen, NullGen))
@@ -128,7 +128,7 @@ def test_nvl(data_gen):
 # in both cpu and gpu runs.
 #      E: java.lang.AssertionError: assertion failed: each serializer expression should contain\
 #         at least one `BoundReference`
-@pytest.mark.parametrize('data_gen', all_gens + all_nested_gens_nonempty_struct + decimal_128_gens, ids=idfn)
+@pytest.mark.parametrize('data_gen', all_gens + all_nested_gens_nonempty_struct + decimal_128_gens + single_array_gens_sample_with_decimal128, ids=idfn)
 def test_coalesce(data_gen):
     num_cols = 20
     s1 = gen_scalar(data_gen, force_no_nulls=not isinstance(data_gen, NullGen))

diff --git a/integration_tests/src/main/python/join_test.py b/integration_tests/src/main/python/join_test.py
@@ -224,7 +224,7 @@ def do_join(spark):
     assert_gpu_and_cpu_are_equal_collect(do_join, conf=allow_negative_scale_of_decimal_conf)
 
 @ignore_order(local=True)
-@pytest.mark.parametrize('data_gen', basic_nested_gens + decimal_128_gens, ids=idfn)
+@pytest.mark.parametrize('data_gen', basic_nested_gens + decimal_128_gens + single_array_gens_sample_with_decimal128, ids=idfn)
 # Not all join types can be translated to a broadcast join, but this tests them to be sure we
 # can handle what spark is doing
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
@@ -252,7 +252,7 @@ def do_join(spark):
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.order(1) # at the head of xdist worker queue if pytest-order is installed
-@pytest.mark.parametrize('data_gen', all_gen + basic_nested_gens + decimal_128_gens, ids=idfn)
+@pytest.mark.parametrize('data_gen', all_gen + basic_nested_gens + decimal_128_gens + single_array_gens_sample_with_decimal128, ids=idfn)
 @pytest.mark.parametrize('batch_size', ['100', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
 def test_cartesian_join(data_gen, batch_size):
     def do_join(spark):

diff --git a/integration_tests/src/main/python/repart_test.py b/integration_tests/src/main/python/repart_test.py
@@ -84,7 +84,7 @@ def test_union_struct_missing_children(data_gen):
         lambda spark : binary_op_df(spark, left_gen).unionByName(binary_op_df(
             spark, right_gen), True))
 
-@pytest.mark.parametrize('data_gen', all_gen + decimal_128_gens + map_gens + array_gens_sample +
+@pytest.mark.parametrize('data_gen', all_gen + decimal_128_gens + map_gens + array_gens_sample_with_decimal128 +
                                      [all_basic_struct_gen,
                                       StructGen([['child0', DecimalGen(7, 2)]]),
                                       nested_struct,
@@ -95,7 +95,7 @@ def test_union(data_gen):
             lambda spark : binary_op_df(spark, data_gen).union(binary_op_df(spark, data_gen)),
             conf=allow_negative_scale_of_decimal_conf)
 
-@pytest.mark.parametrize('data_gen', all_gen + decimal_128_gens + map_gens + array_gens_sample +
+@pytest.mark.parametrize('data_gen', all_gen + decimal_128_gens + map_gens + array_gens_sample_with_decimal128 +
                                      [all_basic_struct_gen,
                                       StructGen([['child0', DecimalGen(7, 2)]]),
                                       nested_struct,
@@ -106,7 +106,7 @@ def test_unionAll(data_gen):
             lambda spark : binary_op_df(spark, data_gen).unionAll(binary_op_df(spark, data_gen)),
             conf=allow_negative_scale_of_decimal_conf)
 
-@pytest.mark.parametrize('data_gen', all_gen + decimal_128_gens + map_gens + array_gens_sample +
+@pytest.mark.parametrize('data_gen', all_gen + decimal_128_gens + map_gens + array_gens_sample_with_decimal128 +
                                      [all_basic_struct_gen,
                                       pytest.param(all_basic_struct_gen),
                                       pytest.param(StructGen([[ 'child0', DecimalGen(7, 2)]])),
@@ -154,7 +154,7 @@ def assert_union_equal(gen1, gen2):
 
 
 
-@pytest.mark.parametrize('data_gen', all_gen + decimal_128_gens + map_gens + array_gens_sample +
+@pytest.mark.parametrize('data_gen', all_gen + decimal_128_gens + map_gens + array_gens_sample_with_decimal128 +
                                      [all_basic_struct_gen,
                                       StructGen([['child0', DecimalGen(7, 2)]]),
                                       nested_struct,

diff --git a/integration_tests/src/main/python/sort_test.py b/integration_tests/src/main/python/sort_test.py
@@ -299,11 +299,11 @@ def test_large_orderby_nested_ridealong(data_gen):
     decimal_gen_38_10,
     StructGen([('child1', byte_gen)]),
     simple_string_to_string_map_gen,
-    ArrayGen(byte_gen, max_length=5)] + decimal_128_gens_no_neg, ids=idfn)
+    ArrayGen(byte_gen, max_length=5)] + decimal_128_gens_no_neg + single_array_gens_sample_with_decimal128, ids=idfn)
 @pytest.mark.order(2)
 def test_orderby_nested_ridealong_limit(data_gen):
     # We use a LongRangeGen to avoid duplicate keys that can cause ambiguity in the sort
     #  results, especially on distributed clusters.
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : two_col_df(spark, LongRangeGen(), data_gen)\
-                    .orderBy(f.col('a').desc()).limit(100))
+                    .orderBy(f.col('a').desc()).limit(100), conf=allow_negative_scale_of_decimal_conf)