From ac0e29252bf2316207edaf872e14ba151c059644 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Mon, 11 Dec 2023 16:17:13 +0800 Subject: [PATCH 1/2] Using fix seed to unblock 23.12 release; Move the blocked issues to 24.02 (#10009) Signed-off-by: Chong Gao Co-authored-by: Chong Gao --- integration_tests/src/main/python/conditionals_test.py | 1 + integration_tests/src/main/python/delta_lake_delete_test.py | 2 ++ integration_tests/src/main/python/delta_lake_update_test.py | 1 + integration_tests/src/main/python/hash_aggregate_test.py | 1 + integration_tests/src/main/python/schema_evolution_test.py | 3 ++- 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/integration_tests/src/main/python/conditionals_test.py b/integration_tests/src/main/python/conditionals_test.py index 48d5a05c099..0370d24da11 100644 --- a/integration_tests/src/main/python/conditionals_test.py +++ b/integration_tests/src/main/python/conditionals_test.py @@ -219,6 +219,7 @@ def test_conditional_with_side_effects_col_scalar(data_gen): @pytest.mark.parametrize('data_gen', [mk_str_gen('[0-9]{1,20}')], ids=idfn) @pytest.mark.skipif(not is_jvm_charset_utf8(), reason="regular expressions require UTF-8") +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9992') def test_conditional_with_side_effects_cast(data_gen): test_conf=copy_and_update( ansi_enabled_conf, {'spark.rapids.sql.regexp.enabled': True}) diff --git a/integration_tests/src/main/python/delta_lake_delete_test.py b/integration_tests/src/main/python/delta_lake_delete_test.py index 413479b3a12..fe2659bf8b7 100644 --- a/integration_tests/src/main/python/delta_lake_delete_test.py +++ b/integration_tests/src/main/python/delta_lake_delete_test.py @@ -153,6 +153,7 @@ def generate_dest_data(spark): @pytest.mark.parametrize("use_cdf", [True, False], ids=idfn) @pytest.mark.parametrize("partition_columns", [None, ["a"]], ids=idfn) @pytest.mark.skipif(is_before_spark_320(), reason="Delta Lake writes are not supported before Spark 3.2.x") +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9884') def test_delta_delete_rows(spark_tmp_path, use_cdf, partition_columns): # Databricks changes the number of files being written, so we cannot compare logs unless there's only one slice num_slices_to_test = 1 if is_databricks_runtime() else 10 @@ -171,6 +172,7 @@ def generate_dest_data(spark): @pytest.mark.parametrize("use_cdf", [True, False], ids=idfn) @pytest.mark.parametrize("partition_columns", [None, ["a"]], ids=idfn) @pytest.mark.skipif(is_before_spark_320(), reason="Delta Lake writes are not supported before Spark 3.2.x") +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9884') def test_delta_delete_dataframe_api(spark_tmp_path, use_cdf, partition_columns): from delta.tables import DeltaTable data_path = spark_tmp_path + "/DELTA_DATA" diff --git a/integration_tests/src/main/python/delta_lake_update_test.py b/integration_tests/src/main/python/delta_lake_update_test.py index 0fc65658332..b1348fdfe17 100644 --- a/integration_tests/src/main/python/delta_lake_update_test.py +++ b/integration_tests/src/main/python/delta_lake_update_test.py @@ -122,6 +122,7 @@ def generate_dest_data(spark): @pytest.mark.parametrize("use_cdf", [True, False], ids=idfn) @pytest.mark.parametrize("partition_columns", [None, ["a"]], ids=idfn) @pytest.mark.skipif(is_before_spark_320(), reason="Delta Lake writes are not supported before Spark 3.2.x") +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9884') def test_delta_update_rows(spark_tmp_path, use_cdf, partition_columns): # Databricks changes the number of files being written, so we cannot compare logs unless there's only one slice num_slices_to_test = 1 if is_databricks_runtime() else 10 diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py index 0c99fc4516a..6fada82dd37 100644 --- a/integration_tests/src/main/python/hash_aggregate_test.py +++ b/integration_tests/src/main/python/hash_aggregate_test.py @@ -396,6 +396,7 @@ def test_hash_reduction_sum(data_gen, conf): @pytest.mark.parametrize('data_gen', numeric_gens + decimal_gens + [ DecimalGen(precision=38, scale=0), DecimalGen(precision=38, scale=-10)], ids=idfn) @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn) +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9779') def test_hash_reduction_sum_full_decimal(data_gen, conf): assert_gpu_and_cpu_are_equal_collect( lambda spark: unary_op_df(spark, data_gen, length=100).selectExpr("SUM(a)"), diff --git a/integration_tests/src/main/python/schema_evolution_test.py b/integration_tests/src/main/python/schema_evolution_test.py index d9f4c0f0899..248d915523e 100644 --- a/integration_tests/src/main/python/schema_evolution_test.py +++ b/integration_tests/src/main/python/schema_evolution_test.py @@ -16,7 +16,7 @@ from conftest import is_not_utc from data_gen import * from datetime import date, datetime, timezone -from marks import ignore_order +from marks import ignore_order, datagen_overrides import pytest from spark_session import is_databricks_runtime, is_databricks113_or_later @@ -63,6 +63,7 @@ def get_ddl(col_gen_pairs): @ignore_order(local=True) @pytest.mark.parametrize("format", _formats) @pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653') +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9807') def test_column_add_after_partition(spark_tmp_table_factory, format): # Databricks 10.4 appears to be missing https://issues.apache.org/jira/browse/SPARK-39417 # so avoid generating nulls for numeric partitions From e33f4008ee00e12f667d3c0bf28370bd3edea89c Mon Sep 17 00:00:00 2001 From: Peixin Date: Tue, 12 Dec 2023 08:49:04 +0800 Subject: [PATCH 2/2] Update private and JNI version to released 23.12.0 (#9954) Signed-off-by: Peixin Li --- pom.xml | 4 ++-- scala2.13/pom.xml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index e8086a35d06..765c51ff3e4 100644 --- a/pom.xml +++ b/pom.xml @@ -660,8 +660,8 @@ spark${buildver} cuda11 ${cuda.version} - 23.12.0-SNAPSHOT - 23.12.0-SNAPSHOT + 23.12.0 + 23.12.0 2.12 2.8.0 incremental diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index e065c698522..6ca4cdf7e65 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -660,8 +660,8 @@ spark${buildver} cuda11 ${cuda.version} - 23.12.0-SNAPSHOT - 23.12.0-SNAPSHOT + 23.12.0 + 23.12.0 2.13 2.8.0 incremental