NVIDIA · jlowe · Oct 8, 2021 · Oct 8, 2021
diff --git a/integration_tests/src/main/python/orc_test.py b/integration_tests/src/main/python/orc_test.py
@@ -34,7 +34,6 @@ def read_orc_sql(data_path):
 coalescing_orc_file_reader_conf = {'spark.rapids.sql.format.orc.reader.type': 'COALESCING'}
 reader_opt_confs = [original_orc_file_reader_conf, multithreaded_orc_file_reader_conf, coalescing_orc_file_reader_conf]
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 @pytest.mark.parametrize('name', ['timestamp-date-test.orc'])
 @pytest.mark.parametrize('read_func', [read_orc_df, read_orc_sql])
 @pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
@@ -121,7 +120,6 @@ def test_orc_fallback(spark_tmp_path, read_func, disable_conf):
             conf={disable_conf: 'false',
                 "spark.sql.sources.useV1SourceList": "orc"})
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 @pytest.mark.order(2)
 @pytest.mark.parametrize('orc_gens', orc_gens_list, ids=idfn)
 @pytest.mark.parametrize('read_func', [read_orc_df, read_orc_sql])
@@ -147,7 +145,6 @@ def test_read_round_trip(spark_tmp_path, orc_gens, read_func, reader_confs, v1_e
         # timestamp_gen 
         TimestampGen(start=datetime(1970, 1, 1, tzinfo=timezone.utc))]
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 @pytest.mark.order(2)
 @pytest.mark.parametrize('orc_gen', orc_pred_push_gens, ids=idfn)
 @pytest.mark.parametrize('read_func', [read_orc_df, read_orc_sql])
@@ -203,7 +200,6 @@ def test_compress_read_round_trip(spark_tmp_path, compress, v1_enabled_list, rea
             lambda spark : spark.read.orc(data_path),
             conf=all_confs)
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 @pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
 @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
 def test_simple_partitioned_read(spark_tmp_path, v1_enabled_list, reader_confs):
@@ -229,7 +225,6 @@ def test_simple_partitioned_read(spark_tmp_path, v1_enabled_list, reader_confs):
             conf=all_confs)
 
 # In this we are reading the data, but only reading the key the data was partitioned by
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 @pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
 @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
 def test_partitioned_read_just_partitions(spark_tmp_path, v1_enabled_list, reader_confs):
@@ -383,7 +378,6 @@ def test_missing_column_names_filter(spark_tmp_table_factory, reader_confs):
         reader_confs)
 
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 @pytest.mark.parametrize('data_gen,read_schema', _nested_pruning_schemas, ids=idfn)
 @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
 @pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
@@ -416,7 +410,6 @@ def test_read_struct_without_stream(spark_tmp_path):
             lambda spark : spark.read.orc(data_path))
 
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 @pytest.mark.parametrize('orc_gen', flattened_orc_gens, ids=idfn)
 @pytest.mark.parametrize('reader_confs', reader_opt_confs, ids=idfn)
 @pytest.mark.parametrize('v1_enabled_list', ["", "orc"])

diff --git a/integration_tests/src/main/python/parquet_test.py b/integration_tests/src/main/python/parquet_test.py
@@ -70,7 +70,6 @@ def read_parquet_sql(data_path):
 reader_opt_confs = [original_parquet_file_reader_conf, multithreaded_parquet_file_reader_conf,
                     coalesce_parquet_file_reader_conf]
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 @pytest.mark.parametrize('parquet_gens', parquet_gens_list, ids=idfn)
 @pytest.mark.parametrize('read_func', [read_parquet_df, read_parquet_sql])
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
@@ -134,7 +133,6 @@ def test_compress_read_round_trip(spark_tmp_path, compress, v1_enabled_list, rea
         # timestamp_gen
         TimestampGen(start=datetime(1900, 1, 1, tzinfo=timezone.utc))] + decimal_gens
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 @pytest.mark.parametrize('parquet_gen', parquet_pred_push_gens, ids=idfn)
 @pytest.mark.parametrize('read_func', [read_parquet_df, read_parquet_sql])
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
@@ -176,7 +174,6 @@ def test_ts_read_round_trip_nested(gen, spark_tmp_path, ts_write, ts_rebase, v1_
             lambda spark : spark.read.parquet(data_path),
             conf=all_confs)
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 # Once https://github.com/NVIDIA/spark-rapids/issues/132 is fixed replace this with
 # timestamp_gen
 @pytest.mark.parametrize('gen', [TimestampGen(start=datetime(1900, 1, 1, tzinfo=timezone.utc))], ids=idfn)
@@ -201,7 +198,6 @@ def readParquetCatchException(spark, data_path):
         df = spark.read.parquet(data_path).collect()
     assert e_info.match(r".*SparkUpgradeException.*")
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 # Once https://github.com/NVIDIA/spark-rapids/issues/1126 is fixed nested timestamps and dates should be added in
 # Once https://github.com/NVIDIA/spark-rapids/issues/132 is fixed replace this with
 # timestamp_gen
@@ -244,7 +240,6 @@ def test_decimal_read_legacy(spark_tmp_path, parquet_gens, read_func, reader_con
                             pytest.param([timestamp_gen], marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/133')),
                             pytest.param([date_gen], marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/133'))]
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 @pytest.mark.parametrize('parquet_gens', parquet_gens_legacy_list, ids=idfn)
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
@@ -259,7 +254,6 @@ def test_read_round_trip_legacy(spark_tmp_path, parquet_gens, v1_enabled_list, r
             lambda spark : spark.read.parquet(data_path),
             conf=all_confs)
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
 def test_simple_partitioned_read(spark_tmp_path, v1_enabled_list, reader_confs):
@@ -332,7 +326,6 @@ def test_read_schema_missing_cols(spark_tmp_path, v1_enabled_list, reader_confs)
             lambda spark : spark.read.parquet(data_path),
             conf=all_confs)
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
 def test_read_merge_schema(spark_tmp_path, v1_enabled_list, reader_confs):
@@ -357,7 +350,6 @@ def test_read_merge_schema(spark_tmp_path, v1_enabled_list, reader_confs):
             lambda spark : spark.read.option('mergeSchema', 'true').parquet(data_path),
             conf=all_confs)
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
 def test_read_merge_schema_from_conf(spark_tmp_path, v1_enabled_list, reader_confs):
@@ -488,7 +480,6 @@ def test_small_file_memory(spark_tmp_path, v1_enabled_list):
             [["ar", ArrayGen(StructGen([["str_2", StringGen()]]))]])
         ]
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/3742')
 @pytest.mark.parametrize('data_gen,read_schema', _nested_pruning_schemas, ids=idfn)
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])