diff --git a/integration_tests/src/main/python/conditionals_test.py b/integration_tests/src/main/python/conditionals_test.py index 88a79dc17000..e983d33e1bef 100644 --- a/integration_tests/src/main/python/conditionals_test.py +++ b/integration_tests/src/main/python/conditionals_test.py @@ -137,7 +137,7 @@ def test_coalesce(data_gen): s1 = with_cpu_session( lambda spark: gen_scalar(data_gen, force_no_nulls=not isinstance(data_gen, NullGen))) # we want lots of nulls - gen = StructGen([('_c' + str(x), data_gen.copy_special_case(None, weight=1000.0)) + gen = StructGen([('_c' + str(x), data_gen.copy_special_case(None, weight=1000.0)) for x in range(0, num_cols)], nullable=False) command_args = [f.col('_c' + str(x)) for x in range(0, num_cols)] command_args.append(s1) @@ -208,13 +208,12 @@ def test_conditional_with_side_effects_col_scalar(data_gen): @pytest.mark.parametrize('data_gen', [mk_str_gen('[0-9]{1,20}')], ids=idfn) @pytest.mark.skipif(not is_jvm_charset_utf8(), reason="regular expressions require UTF-8") -@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9992') def test_conditional_with_side_effects_cast(data_gen): test_conf=copy_and_update( ansi_enabled_conf, {'spark.rapids.sql.regexp.enabled': True}) assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, data_gen).selectExpr( - 'IF(a RLIKE "^[0-9]{1,5}\\z", CAST(a AS INT), 0)'), + r'IF(a RLIKE "^\\d{5,}", CAST(SUBSTR(a, 0, 5) AS INT), 0)'), conf = test_conf) @pytest.mark.parametrize('data_gen', [mk_str_gen('[0-9]{1,9}')], ids=idfn) @@ -223,11 +222,10 @@ def test_conditional_with_side_effects_case_when(data_gen): test_conf=copy_and_update( ansi_enabled_conf, {'spark.rapids.sql.regexp.enabled': True}) assert_gpu_and_cpu_are_equal_collect( - lambda spark : unary_op_df(spark, data_gen).selectExpr( - 'CASE \ - WHEN a RLIKE "^[0-9]{1,3}\\z" THEN CAST(a AS INT) \ - WHEN a RLIKE "^[0-9]{4,6}\\z" THEN CAST(a AS INT) + 123 \ - ELSE -1 END'), + lambda spark : unary_op_df(spark, data_gen).select( + f.when(f.col('a').rlike(r"^[0-9]{1,3}"), f.col('a').substr(0, 1).cast('INT'))\ + .when(f.col('a').rlike(r"^[0-9]{4,6}"), f.col('a').substr(0, 4).cast('INT') + f.lit(123))\ + .otherwise(f.lit(-1))), conf = test_conf) @pytest.mark.parametrize('data_gen', [mk_str_gen('[a-z]{0,3}')], ids=idfn)