diff --git a/integration_tests/README.md b/integration_tests/README.md index fa06c75f0f9..9e4ba5a378e 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -343,6 +343,26 @@ integration tests. For example: $ DATAGEN_SEED=1702166057 SPARK_HOME=~/spark-3.4.0-bin-hadoop3 integration_tests/run_pyspark_from_build.sh ``` +### Running with non-UTC time zone +For the new added cases, we should check non-UTC time zone is working, or the non-UTC nightly CIs will fail. +The non-UTC nightly CIs are verifing all cases with non-UTC time zone. +But only a small amout of cases are verifing with non-UTC time zone in the pre-merge CI due to limited GPU resources. +When adding cases, should also check non-UTC is working besides the default UTC time zone. +Please test the following time zones: +```shell +$ TZ=Iran ./integration_tests/run_pyspark_from_build.sh +$ TZ=America/Los_Angeles ./integration_tests/run_pyspark_from_build.sh +``` +`Iran` is non-DST(Daylight Savings Time) time zone and `America/Los_Angeles` is DST time zone. + +If the new added cases failed with non-UTC, then should allow the operator(does not support non-UTC) fallback, +For example, add the following annotation to the case: +```python +non_utc_allow_for_sequence = ['ProjectExec'] # Update after non-utc time zone is supported for sequence +@allow_non_gpu(*non_utc_allow_for_sequence) +test_my_new_added_case_for_sequence_operator() +``` + ### Reviewing integration tests in Spark History Server If the integration tests are run using [run_pyspark_from_build.sh](run_pyspark_from_build.sh) we have diff --git a/integration_tests/src/main/python/array_test.py b/integration_tests/src/main/python/array_test.py index c9749865438..e2d7d1b5c81 100644 --- a/integration_tests/src/main/python/array_test.py +++ b/integration_tests/src/main/python/array_test.py @@ -17,7 +17,7 @@ from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_are_equal_sql, assert_gpu_and_cpu_error, assert_gpu_fallback_collect from data_gen import * from conftest import is_databricks_runtime -from marks import incompat +from marks import incompat, allow_non_gpu from spark_session import is_before_spark_313, is_before_spark_330, is_databricks113_or_later, is_spark_330_or_later, is_databricks104_or_later, is_spark_33X, is_spark_340_or_later, is_spark_330, is_spark_330cdh from pyspark.sql.types import * from pyspark.sql.types import IntegralType @@ -332,11 +332,14 @@ def do_it(spark): assert_gpu_and_cpu_are_equal_collect(do_it) +non_utc_allow_for_sequence = ['ProjectExec'] # Update after non-utc time zone is supported for sequence +@allow_non_gpu(*non_utc_allow_for_sequence) def test_array_transform_non_deterministic(): assert_gpu_and_cpu_are_equal_collect( lambda spark : spark.range(1).selectExpr("transform(sequence(0, cast(rand(5)*10 as int) + 1), x -> x * 22) as t"), conf={'spark.rapids.sql.castFloatToIntegralTypes.enabled': True}) +@allow_non_gpu(*non_utc_allow_for_sequence) def test_array_transform_non_deterministic_second_param(): assert_gpu_and_cpu_are_equal_collect( lambda spark : debug_df(spark.range(1).selectExpr("transform(sequence(0, cast(rand(5)*10 as int) + 1), (x, i) -> x + i) as t")),