diff --git a/integration_tests/src/main/python/orc_test.py b/integration_tests/src/main/python/orc_test.py index 7a232b5d9a3..b6401ea2357 100644 --- a/integration_tests/src/main/python/orc_test.py +++ b/integration_tests/src/main/python/orc_test.py @@ -34,6 +34,7 @@ def read_orc_sql(data_path): coalescing_orc_file_reader_conf = {'spark.rapids.sql.format.orc.reader.type': 'COALESCING'} reader_opt_confs = [original_orc_file_reader_conf, multithreaded_orc_file_reader_conf, coalescing_orc_file_reader_conf] +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') @pytest.mark.parametrize('name', ['timestamp-date-test.orc']) @pytest.mark.parametrize('read_func', [read_orc_df, read_orc_sql]) @pytest.mark.parametrize('v1_enabled_list', ["", "orc"]) @@ -120,6 +121,7 @@ def test_orc_fallback(spark_tmp_path, read_func, disable_conf): conf={disable_conf: 'false', "spark.sql.sources.useV1SourceList": "orc"}) +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') @pytest.mark.order(2) @pytest.mark.parametrize('orc_gens', orc_gens_list, ids=idfn) @pytest.mark.parametrize('read_func', [read_orc_df, read_orc_sql]) @@ -145,6 +147,7 @@ def test_read_round_trip(spark_tmp_path, orc_gens, read_func, reader_confs, v1_e # timestamp_gen TimestampGen(start=datetime(1970, 1, 1, tzinfo=timezone.utc))] +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') @pytest.mark.order(2) @pytest.mark.parametrize('orc_gen', orc_pred_push_gens, ids=idfn) @pytest.mark.parametrize('read_func', [read_orc_df, read_orc_sql]) @@ -200,6 +203,7 @@ def test_compress_read_round_trip(spark_tmp_path, compress, v1_enabled_list, rea lambda spark : spark.read.orc(data_path), conf=all_confs) +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') @pytest.mark.parametrize('v1_enabled_list', ["", "orc"]) @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn) def test_simple_partitioned_read(spark_tmp_path, v1_enabled_list, reader_confs): @@ -225,6 +229,7 @@ def test_simple_partitioned_read(spark_tmp_path, v1_enabled_list, reader_confs): conf=all_confs) # In this we are reading the data, but only reading the key the data was partitioned by +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') @pytest.mark.parametrize('v1_enabled_list', ["", "orc"]) @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn) def test_partitioned_read_just_partitions(spark_tmp_path, v1_enabled_list, reader_confs): @@ -378,6 +383,7 @@ def test_missing_column_names_filter(spark_tmp_table_factory, reader_confs): reader_confs) +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') @pytest.mark.parametrize('data_gen,read_schema', _nested_pruning_schemas, ids=idfn) @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn) @pytest.mark.parametrize('v1_enabled_list', ["", "orc"]) @@ -410,6 +416,7 @@ def test_read_struct_without_stream(spark_tmp_path): lambda spark : spark.read.orc(data_path)) +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') @pytest.mark.parametrize('orc_gen', flattened_orc_gens, ids=idfn) @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn) @pytest.mark.parametrize('v1_enabled_list', ["", "orc"]) diff --git a/integration_tests/src/main/python/parquet_test.py b/integration_tests/src/main/python/parquet_test.py index b13c786a702..8966a5c00e6 100644 --- a/integration_tests/src/main/python/parquet_test.py +++ b/integration_tests/src/main/python/parquet_test.py @@ -70,6 +70,7 @@ def read_parquet_sql(data_path): reader_opt_confs = [original_parquet_file_reader_conf, multithreaded_parquet_file_reader_conf, coalesce_parquet_file_reader_conf] +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') @pytest.mark.parametrize('parquet_gens', parquet_gens_list, ids=idfn) @pytest.mark.parametrize('read_func', [read_parquet_df, read_parquet_sql]) @pytest.mark.parametrize('reader_confs', reader_opt_confs) @@ -133,6 +134,7 @@ def test_compress_read_round_trip(spark_tmp_path, compress, v1_enabled_list, rea # timestamp_gen TimestampGen(start=datetime(1900, 1, 1, tzinfo=timezone.utc))] + decimal_gens +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') @pytest.mark.parametrize('parquet_gen', parquet_pred_push_gens, ids=idfn) @pytest.mark.parametrize('read_func', [read_parquet_df, read_parquet_sql]) @pytest.mark.parametrize('reader_confs', reader_opt_confs) @@ -174,6 +176,7 @@ def test_ts_read_round_trip_nested(gen, spark_tmp_path, ts_write, ts_rebase, v1_ lambda spark : spark.read.parquet(data_path), conf=all_confs) +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') # Once https://github.com/NVIDIA/spark-rapids/issues/132 is fixed replace this with # timestamp_gen @pytest.mark.parametrize('gen', [TimestampGen(start=datetime(1900, 1, 1, tzinfo=timezone.utc))], ids=idfn) @@ -198,6 +201,7 @@ def readParquetCatchException(spark, data_path): df = spark.read.parquet(data_path).collect() assert e_info.match(r".*SparkUpgradeException.*") +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') # Once https://github.com/NVIDIA/spark-rapids/issues/1126 is fixed nested timestamps and dates should be added in # Once https://github.com/NVIDIA/spark-rapids/issues/132 is fixed replace this with # timestamp_gen @@ -240,6 +244,7 @@ def test_decimal_read_legacy(spark_tmp_path, parquet_gens, read_func, reader_con pytest.param([timestamp_gen], marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/133')), pytest.param([date_gen], marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/133'))] +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') @pytest.mark.parametrize('parquet_gens', parquet_gens_legacy_list, ids=idfn) @pytest.mark.parametrize('reader_confs', reader_opt_confs) @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"]) @@ -254,6 +259,7 @@ def test_read_round_trip_legacy(spark_tmp_path, parquet_gens, v1_enabled_list, r lambda spark : spark.read.parquet(data_path), conf=all_confs) +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') @pytest.mark.parametrize('reader_confs', reader_opt_confs) @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"]) def test_simple_partitioned_read(spark_tmp_path, v1_enabled_list, reader_confs): @@ -326,6 +332,7 @@ def test_read_schema_missing_cols(spark_tmp_path, v1_enabled_list, reader_confs) lambda spark : spark.read.parquet(data_path), conf=all_confs) +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') @pytest.mark.parametrize('reader_confs', reader_opt_confs) @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"]) def test_read_merge_schema(spark_tmp_path, v1_enabled_list, reader_confs): @@ -350,6 +357,7 @@ def test_read_merge_schema(spark_tmp_path, v1_enabled_list, reader_confs): lambda spark : spark.read.option('mergeSchema', 'true').parquet(data_path), conf=all_confs) +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') @pytest.mark.parametrize('reader_confs', reader_opt_confs) @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"]) def test_read_merge_schema_from_conf(spark_tmp_path, v1_enabled_list, reader_confs): @@ -480,6 +488,7 @@ def test_small_file_memory(spark_tmp_path, v1_enabled_list): [["ar", ArrayGen(StructGen([["str_2", StringGen()]]))]]) ] +@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742') @pytest.mark.parametrize('data_gen,read_schema', _nested_pruning_schemas, ids=idfn) @pytest.mark.parametrize('reader_confs', reader_opt_confs) @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])