Skip to content

Commit

Permalink
Don't use \\z
Browse files Browse the repository at this point in the history
Signed-off-by: Gera Shegalov <gera@apache.org>
  • Loading branch information
gerashegalov committed Dec 21, 2023
1 parent 79ed020 commit 3353e66
Showing 1 changed file with 6 additions and 8 deletions.
14 changes: 6 additions & 8 deletions integration_tests/src/main/python/conditionals_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def test_coalesce(data_gen):
s1 = with_cpu_session(
lambda spark: gen_scalar(data_gen, force_no_nulls=not isinstance(data_gen, NullGen)))
# we want lots of nulls
gen = StructGen([('_c' + str(x), data_gen.copy_special_case(None, weight=1000.0))
gen = StructGen([('_c' + str(x), data_gen.copy_special_case(None, weight=1000.0))
for x in range(0, num_cols)], nullable=False)
command_args = [f.col('_c' + str(x)) for x in range(0, num_cols)]
command_args.append(s1)
Expand Down Expand Up @@ -208,13 +208,12 @@ def test_conditional_with_side_effects_col_scalar(data_gen):

@pytest.mark.parametrize('data_gen', [mk_str_gen('[0-9]{1,20}')], ids=idfn)
@pytest.mark.skipif(not is_jvm_charset_utf8(), reason="regular expressions require UTF-8")
@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9992')
def test_conditional_with_side_effects_cast(data_gen):
test_conf=copy_and_update(
ansi_enabled_conf, {'spark.rapids.sql.regexp.enabled': True})
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).selectExpr(
'IF(a RLIKE "^[0-9]{1,5}\\z", CAST(a AS INT), 0)'),
r'IF(a RLIKE "^\\d{5,}", CAST(SUBSTR(a, 0, 5) AS INT), 0)'),
conf = test_conf)

@pytest.mark.parametrize('data_gen', [mk_str_gen('[0-9]{1,9}')], ids=idfn)
Expand All @@ -223,11 +222,10 @@ def test_conditional_with_side_effects_case_when(data_gen):
test_conf=copy_and_update(
ansi_enabled_conf, {'spark.rapids.sql.regexp.enabled': True})
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).selectExpr(
'CASE \
WHEN a RLIKE "^[0-9]{1,3}\\z" THEN CAST(a AS INT) \
WHEN a RLIKE "^[0-9]{4,6}\\z" THEN CAST(a AS INT) + 123 \
ELSE -1 END'),
lambda spark : unary_op_df(spark, data_gen).select(
f.when(f.col('a').rlike(r"^[0-9]{1,3}"), f.col('a').substr(0, 1).cast('INT'))\
.when(f.col('a').rlike(r"^[0-9]{4,6}"), f.col('a').substr(0, 4).cast('INT') + f.lit(123))\
.otherwise(f.lit(-1))),
conf = test_conf)

@pytest.mark.parametrize('data_gen', [mk_str_gen('[a-z]{0,3}')], ids=idfn)
Expand Down

0 comments on commit 3353e66

Please sign in to comment.