diff --git a/integration_tests/src/main/python/string_test.py b/integration_tests/src/main/python/string_test.py index 48032f14b20..0581250ed74 100644 --- a/integration_tests/src/main/python/string_test.py +++ b/integration_tests/src/main/python/string_test.py @@ -20,6 +20,7 @@ from marks import * from pyspark.sql.types import * import pyspark.sql.functions as f +from spark_session import is_before_spark_320 def mk_str_gen(pattern): return StringGen(pattern).with_special_case('').with_special_pattern('.{0,10}') @@ -483,6 +484,28 @@ def test_regexp_replace(): 'regexp_replace(a, "a|b|c", "A")'), conf={'spark.rapids.sql.expression.RegExpReplace': 'true'}) +@pytest.mark.skipif(is_before_spark_320(), reason='regexp is synonym for RLike starting in Spark 3.2.0') +def test_regexp(): + gen = mk_str_gen('[abcd]{1,3}') + assert_gpu_and_cpu_are_equal_collect( + lambda spark: unary_op_df(spark, gen).selectExpr( + 'regexp(a, "a{2}")', + 'regexp(a, "a{1,3}")', + 'regexp(a, "a{1,}")', + 'regexp(a, "a[bc]d")'), + conf={'spark.rapids.sql.expression.RLike': 'true'}) + +@pytest.mark.skipif(is_before_spark_320(), reason='regexp_like is synonym for RLike starting in Spark 3.2.0') +def test_regexp_like(): + gen = mk_str_gen('[abcd]{1,3}') + assert_gpu_and_cpu_are_equal_collect( + lambda spark: unary_op_df(spark, gen).selectExpr( + 'regexp_like(a, "a{2}")', + 'regexp_like(a, "a{1,3}")', + 'regexp_like(a, "a{1,}")', + 'regexp_like(a, "a[bc]d")'), + conf={'spark.rapids.sql.expression.RLike': 'true'}) + @pytest.mark.skipif(is_databricks_runtime(), reason='Databricks optimizes out regexp_replace call in this case') @allow_non_gpu('ProjectExec', 'RegExpReplace')