Skip to content

Commit

Permalink
Stop using invalid escape sequences (#9073)
Browse files Browse the repository at this point in the history
Fixes #8980 

Replace strings with invalid escape sequences with the equivalent raw literals 

## Testing

```bash
$ ./integration_tests/run_pyspark_from_build.sh -k 'regexp_test or get_json_test' |& tee build.log
$ grep -c 'invalid escape' build.log 
0
```

Signed-off-by: Gera Shegalov <gera@apache.org>
  • Loading branch information
gerashegalov authored Aug 18, 2023
1 parent 66b1174 commit a889054
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 9 deletions.
2 changes: 1 addition & 1 deletion integration_tests/src/main/python/get_json_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_get_json_object(json_str_pattern):
def test_unsupported_fallback_get_json_object(json_str_pattern):
gen = mk_json_str_gen(json_str_pattern)
scalar_json = '{"store": {"fruit": "test"}}'
pattern = StringGen(pattern='\$\.[a-z]{1,9}')
pattern = StringGen(pattern=r'\$\.[a-z]{1,9}')
def assert_gpu_did_fallback(sql_text):
assert_gpu_fallback_collect(lambda spark:
gen_df(spark, [('a', gen), ('b', pattern)], length=10).selectExpr(sql_text),
Expand Down
15 changes: 7 additions & 8 deletions integration_tests/src/main/python/regexp_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import locale
import pytest

from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect, \
assert_cpu_and_gpu_are_equal_collect_with_capture, assert_gpu_and_cpu_error, \
assert_gpu_and_cpu_error, \
assert_gpu_sql_fallback_collect
from data_gen import *
from marks import *
Expand Down Expand Up @@ -520,7 +519,7 @@ def test_word_boundaries():
'regexp_replace(a, "\\\\B", "#")',
),
conf=_regexp_conf)

def test_character_classes():
gen = mk_str_gen('[abcd]{1,3}[0-9]{1,3}[abcd]{1,3}[ \n\t\r]{0,2}')
assert_gpu_and_cpu_are_equal_collect(
Expand Down Expand Up @@ -864,7 +863,7 @@ def test_regexp_replace_fallback_configured_off():
@allow_non_gpu('ProjectExec')
def test_unsupported_fallback_regexp_extract():
gen = mk_str_gen('[abcdef]{0,2}')
regex_gen = StringGen('\[a-z\]\+')
regex_gen = StringGen(r'\[a-z\]\+')
num_gen = IntegerGen(min_val=0, max_val=0, special_cases=[])

def assert_gpu_did_fallback(sql_text):
Expand All @@ -886,7 +885,7 @@ def assert_gpu_did_fallback(sql_text):
@allow_non_gpu('ProjectExec')
def test_unsupported_fallback_regexp_extract_all():
gen = mk_str_gen('[abcdef]{0,2}')
regex_gen = StringGen('\[a-z\]\+')
regex_gen = StringGen(r'\[a-z\]\+')
num_gen = IntegerGen(min_val=0, max_val=0, special_cases=[])
def assert_gpu_did_fallback(sql_text):
assert_gpu_fallback_collect(lambda spark:
Expand All @@ -907,7 +906,7 @@ def assert_gpu_did_fallback(sql_text):
@allow_non_gpu('ProjectExec', 'RegExpReplace')
def test_unsupported_fallback_regexp_replace():
gen = mk_str_gen('[abcdef]{0,2}')
regex_gen = StringGen('\[a-z\]\+')
regex_gen = StringGen(r'\[a-z\]\+')
def assert_gpu_did_fallback(sql_text):
assert_gpu_fallback_collect(lambda spark:
gen_df(spark, [
Expand Down Expand Up @@ -992,7 +991,7 @@ def test_regexp_memory_fallback():
'a rlike "1|2|3|4|5|6"'
),
cpu_fallback_class_name='RLike',
conf={
conf={
'spark.rapids.sql.regexp.enabled': True,
'spark.rapids.sql.regexp.maxStateMemoryBytes': '10',
'spark.rapids.sql.batchSizeBytes': '20' # 1 row in the batch
Expand All @@ -1014,7 +1013,7 @@ def test_regexp_memory_ok():
'a rlike "(1)(2)(3)"',
'a rlike "1|2|3|4|5|6"'
),
conf={
conf={
'spark.rapids.sql.regexp.enabled': True,
'spark.rapids.sql.regexp.maxStateMemoryBytes': '12',
'spark.rapids.sql.batchSizeBytes': '20' # 1 row in the batch
Expand Down

0 comments on commit a889054

Please sign in to comment.