From 36b9e2c5e4f01824bee5247ef4de37eac6835a0b Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Tue, 13 Aug 2024 13:59:24 -0500 Subject: [PATCH] Update passing JSON tests after list support added in CUDF (#11319) Signed-off-by: Robert (Bobby) Evans --- integration_tests/src/main/python/json_matrix_test.py | 10 +++++----- integration_tests/src/main/python/json_test.py | 7 +++---- integration_tests/src/main/python/spark_session.py | 3 +++ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/integration_tests/src/main/python/json_matrix_test.py b/integration_tests/src/main/python/json_matrix_test.py index 548851d11c1..4e2eb071789 100644 --- a/integration_tests/src/main/python/json_matrix_test.py +++ b/integration_tests/src/main/python/json_matrix_test.py @@ -709,9 +709,9 @@ def test_from_json_decs(std_input_path, input_file, dt): pytest.param("single_quoted_strings.json", marks=pytest.mark.xfail(condition=is_before_spark_330(), reason='https://github.com/NVIDIA/spark-rapids/issues/10495')), pytest.param("boolean_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10479')), pytest.param("invalid_ridealong_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10534')), - pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15278')), - pytest.param("int_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15278')), - pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11154'))]) + pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15318')), + "int_struct_formatted.json", + pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(condition=is_spark_400_or_later(), reason='https://github.com/NVIDIA/spark-rapids/issues/11154'))]) @pytest.mark.parametrize('read_func', [read_json_df]) def test_scan_json_strings(std_input_path, read_func, spark_tmp_table_factory, input_file): assert_gpu_and_cpu_are_equal_collect( @@ -731,9 +731,9 @@ def test_scan_json_strings(std_input_path, read_func, spark_tmp_table_factory, i "single_quoted_strings.json", pytest.param("boolean_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10479')), pytest.param("invalid_ridealong_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10534')), - pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15278')), + pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15318')), "int_struct_formatted.json", - pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15278'))]) + "int_mixed_array_struct_formatted.json"]) @allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453 def test_from_json_strings(std_input_path, input_file): schema = StructType([StructField("data", StringType())]) diff --git a/integration_tests/src/main/python/json_test.py b/integration_tests/src/main/python/json_test.py index d20f947737a..ca5eb135715 100644 --- a/integration_tests/src/main/python/json_test.py +++ b/integration_tests/src/main/python/json_test.py @@ -305,7 +305,7 @@ def do_read(spark): @approximate_float @pytest.mark.parametrize('filename', [ 'boolean.json', - pytest.param('boolean_invalid.json', marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/4779')), + 'boolean_invalid.json', 'ints.json', pytest.param('ints_invalid.json', marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/4940')), # This fails for dates, as not all are invalid 'nan_and_inf.json', @@ -670,9 +670,9 @@ def test_from_json_struct_fallback_dupe_keys(schema): @pytest.mark.parametrize('pattern', [ r'{ "bool": (true|false|True|False|TRUE|FALSE) }', - pytest.param(r'{ "bool": "(true|false)" }', marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/4779')), + r'{ "bool": "(true|false)" }', r'{ "bool": "(True|False|TRUE|FALSE)" }', - pytest.param(r'{ "bool": [0-9]{0,2}(\.[0-9]{1,2})? }', marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/4779')), + r'{ "bool": [0-9]{0,2}(\.[0-9]{1,2})? }', r'{ "bool": "[0-9]{0,2}(\.[0-9]{1,2})?" }', r'{ "bool": [0-9]{4}-[0-9]{2}-[0-9]{2} }', r'{ "bool": "[0-9]{4}-[0-9]{2}-[0-9]{2}" }' @@ -895,7 +895,6 @@ def test_from_json_struct_of_list(schema): 'struct' ]) @allow_non_gpu(*non_utc_allow) -@pytest.mark.xfail(reason = 'https://github.com/NVIDIA/spark-rapids/issues/10351') def test_from_json_mixed_types_list_struct(schema): json_string_gen = StringGen(r'{"a": (\[1,2,3\]|{"b":"[a-z]{2}"}) }') assert_gpu_and_cpu_are_equal_collect( diff --git a/integration_tests/src/main/python/spark_session.py b/integration_tests/src/main/python/spark_session.py index 26388617fff..831680e4feb 100644 --- a/integration_tests/src/main/python/spark_session.py +++ b/integration_tests/src/main/python/spark_session.py @@ -228,6 +228,9 @@ def is_spark_350_or_later(): def is_spark_351_or_later(): return spark_version() >= "3.5.1" +def is_spark_400_or_later(): + return spark_version() >= "4.0.0" + def is_spark_330(): return spark_version() == "3.3.0"