Skip to content

Commit

Permalink
Update passing JSON tests after list support added in CUDF (#11319)
Browse files Browse the repository at this point in the history
Signed-off-by: Robert (Bobby) Evans <bobby@apache.org>
  • Loading branch information
revans2 authored Aug 13, 2024
1 parent 1101427 commit 36b9e2c
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 9 deletions.
10 changes: 5 additions & 5 deletions integration_tests/src/main/python/json_matrix_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,9 +709,9 @@ def test_from_json_decs(std_input_path, input_file, dt):
pytest.param("single_quoted_strings.json", marks=pytest.mark.xfail(condition=is_before_spark_330(), reason='https://github.com/NVIDIA/spark-rapids/issues/10495')),
pytest.param("boolean_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10479')),
pytest.param("invalid_ridealong_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10534')),
pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15278')),
pytest.param("int_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15278')),
pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11154'))])
pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15318')),
"int_struct_formatted.json",
pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(condition=is_spark_400_or_later(), reason='https://github.com/NVIDIA/spark-rapids/issues/11154'))])
@pytest.mark.parametrize('read_func', [read_json_df])
def test_scan_json_strings(std_input_path, read_func, spark_tmp_table_factory, input_file):
assert_gpu_and_cpu_are_equal_collect(
Expand All @@ -731,9 +731,9 @@ def test_scan_json_strings(std_input_path, read_func, spark_tmp_table_factory, i
"single_quoted_strings.json",
pytest.param("boolean_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10479')),
pytest.param("invalid_ridealong_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10534')),
pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15278')),
pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15318')),
"int_struct_formatted.json",
pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15278'))])
"int_mixed_array_struct_formatted.json"])
@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
def test_from_json_strings(std_input_path, input_file):
schema = StructType([StructField("data", StringType())])
Expand Down
7 changes: 3 additions & 4 deletions integration_tests/src/main/python/json_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def do_read(spark):
@approximate_float
@pytest.mark.parametrize('filename', [
'boolean.json',
pytest.param('boolean_invalid.json', marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/4779')),
'boolean_invalid.json',
'ints.json',
pytest.param('ints_invalid.json', marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/4940')), # This fails for dates, as not all are invalid
'nan_and_inf.json',
Expand Down Expand Up @@ -670,9 +670,9 @@ def test_from_json_struct_fallback_dupe_keys(schema):

@pytest.mark.parametrize('pattern', [
r'{ "bool": (true|false|True|False|TRUE|FALSE) }',
pytest.param(r'{ "bool": "(true|false)" }', marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/4779')),
r'{ "bool": "(true|false)" }',
r'{ "bool": "(True|False|TRUE|FALSE)" }',
pytest.param(r'{ "bool": [0-9]{0,2}(\.[0-9]{1,2})? }', marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/4779')),
r'{ "bool": [0-9]{0,2}(\.[0-9]{1,2})? }',
r'{ "bool": "[0-9]{0,2}(\.[0-9]{1,2})?" }',
r'{ "bool": [0-9]{4}-[0-9]{2}-[0-9]{2} }',
r'{ "bool": "[0-9]{4}-[0-9]{2}-[0-9]{2}" }'
Expand Down Expand Up @@ -895,7 +895,6 @@ def test_from_json_struct_of_list(schema):
'struct<a:string>'
])
@allow_non_gpu(*non_utc_allow)
@pytest.mark.xfail(reason = 'https://github.com/NVIDIA/spark-rapids/issues/10351')
def test_from_json_mixed_types_list_struct(schema):
json_string_gen = StringGen(r'{"a": (\[1,2,3\]|{"b":"[a-z]{2}"}) }')
assert_gpu_and_cpu_are_equal_collect(
Expand Down
3 changes: 3 additions & 0 deletions integration_tests/src/main/python/spark_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,9 @@ def is_spark_350_or_later():
def is_spark_351_or_later():
return spark_version() >= "3.5.1"

def is_spark_400_or_later():
return spark_version() >= "4.0.0"

def is_spark_330():
return spark_version() == "3.3.0"

Expand Down

0 comments on commit 36b9e2c

Please sign in to comment.