NVIDIA · NVnavkumar · Dec 13, 2023 · Nov 15, 2023 · Nov 17, 2023 · Nov 17, 2023
diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py
@@ -244,11 +244,16 @@ def test_dayofyear(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).select(f.dayofyear(f.col('a'))))
 
+
+non_utc_unix_time_allow = ['ProjectExec'] if not is_supported_time_zone() else []
+
+
 @pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
-@allow_non_gpu(*non_utc_allow)
+@allow_non_gpu(*non_utc_unix_time_allow)
 def test_unix_timestamp(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : unary_op_df(spark, data_gen).select(f.unix_timestamp(f.col('a'))))
+        lambda spark : unary_op_df(spark, data_gen).select(f.unix_timestamp(f.col('a'))),
+        {"spark.rapids.sql.nonUTC.enabled": "true"})
 
 
 @allow_non_gpu('ProjectExec')
@@ -381,30 +386,30 @@ def fun(spark):
 
 @pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
 @pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
-@allow_non_gpu(*non_utc_allow)
+@allow_non_gpu(*non_utc_unix_time_allow)
 def test_unix_timestamp_improved(data_gen, ansi_enabled):
     conf = {"spark.rapids.sql.improvedTimeOps.enabled": "true",
             "spark.sql.legacy.timeParserPolicy": "CORRECTED"}
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).select(f.unix_timestamp(f.col('a'))),
-        copy_and_update({'spark.sql.ansi.enabled': ansi_enabled}, conf))
+        copy_and_update({'spark.sql.ansi.enabled': ansi_enabled, "spark.rapids.sql.nonUTC.enabled": "true"}, conf))
 
 @pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
 @pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
-@allow_non_gpu(*non_utc_allow)
+@allow_non_gpu(*non_utc_unix_time_allow)
 def test_unix_timestamp(data_gen, ansi_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).select(f.unix_timestamp(f.col("a"))),
-        {'spark.sql.ansi.enabled': ansi_enabled})
+        {'spark.sql.ansi.enabled': ansi_enabled, "spark.rapids.sql.nonUTC.enabled": "true"})
 
 @pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
 @pytest.mark.parametrize('data_gen', date_n_time_gens, ids=idfn)
-@allow_non_gpu(*non_utc_allow)
+@allow_non_gpu(*non_utc_unix_time_allow)
 def test_to_unix_timestamp_improved(data_gen, ansi_enabled):
     conf = {"spark.rapids.sql.improvedTimeOps.enabled": "true"}
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).selectExpr("to_unix_timestamp(a)"),
-        copy_and_update({'spark.sql.ansi.enabled': ansi_enabled}, conf))
+        copy_and_update({'spark.sql.ansi.enabled': ansi_enabled, "spark.rapids.sql.nonUTC.enabled": "true"}, conf))
 
 str_date_and_format_gen = [pytest.param(StringGen('[0-9]{4}/[01][0-9]'),'yyyy/MM', marks=pytest.mark.xfail(reason="cudf does no checks")),
         (StringGen('[0-9]{4}/[01][12]/[0-2][1-8]'),'yyyy/MM/dd'),
@@ -417,12 +422,13 @@ def invalid_date_string_df(spark):
     return spark.createDataFrame([['invalid_date_string']], "a string")
 
 @pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
+@pytest.mark.parametrize('non_utc_timezone_enabled', [True, False], ids=['NOT_UTC_ON', 'NOT_UTC_OFF'])
 @pytest.mark.parametrize('data_gen,date_form', str_date_and_format_gen, ids=idfn)
-@allow_non_gpu(*non_utc_allow)
-def test_string_to_unix_timestamp(data_gen, date_form, ansi_enabled):
+@allow_non_gpu('ProjectExec')
+def test_string_to_unix_timestamp(data_gen, date_form, ansi_enabled, non_utc_timezone_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen, seed=1).selectExpr("to_unix_timestamp(a, '{}')".format(date_form)),
-        {'spark.sql.ansi.enabled': ansi_enabled})
+        {'spark.sql.ansi.enabled': ansi_enabled, "spark.rapids.sql.nonUTC.enabled": "true"})
 
 def test_string_to_unix_timestamp_ansi_exception():
     assert_gpu_and_cpu_error(
@@ -431,12 +437,13 @@ def test_string_to_unix_timestamp_ansi_exception():
         conf=ansi_enabled_conf)
 
 @pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
+@pytest.mark.parametrize('non_utc_timezone_enabled', [True, False], ids=['NOT_UTC_ON', 'NOT_UTC_OFF'])
 @pytest.mark.parametrize('data_gen,date_form', str_date_and_format_gen, ids=idfn)
-@allow_non_gpu(*non_utc_allow)
-def test_string_unix_timestamp(data_gen, date_form, ansi_enabled):
+@allow_non_gpu('ProjectExec')
+def test_string_unix_timestamp(data_gen, date_form, ansi_enabled, non_utc_timezone_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen, seed=1).select(f.unix_timestamp(f.col('a'), date_form)),
-        {'spark.sql.ansi.enabled': ansi_enabled})
+        {'spark.sql.ansi.enabled': ansi_enabled, "spark.rapids.sql.nonUTC.enabled": non_utc_timezone_enabled})
 
 def test_string_unix_timestamp_ansi_exception():
     assert_gpu_and_cpu_error(

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -24,6 +24,7 @@ import scala.util.control.NonFatal
 
 import ai.rapids.cudf.DType
 import com.nvidia.spark.rapids.RapidsConf.{SUPPRESS_PLANNING_FAILURE, TEST_CONF}
+import com.nvidia.spark.rapids.jni.GpuTimeZoneDB
 import com.nvidia.spark.rapids.shims._
 import org.apache.hadoop.fs.Path
 
@@ -625,6 +626,10 @@ object GpuOverrides extends Logging {
     timezoneId.normalized() == UTC_TIMEZONE_ID
   }
 
+  def isUTCTimezone(timezoneIdStr: String): Boolean = {
+    isUTCTimezone(GpuTimeZoneDB.getZoneId(timezoneIdStr))
+  }
+
   def isUTCTimezone(): Boolean = {
     val zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone)
     isUTCTimezone(zoneId.normalized())
@@ -1695,12 +1700,20 @@ object GpuOverrides extends Logging {
             .withPsNote(TypeEnum.STRING, "A limited number of formats are supported"),
             TypeSig.STRING)),
       (a, conf, p, r) => new UnixTimeExprMeta[ToUnixTimestamp](a, conf, p, r) {
+        // String type is not supported yet for non-UTC timezone.
+        override def isTimeZoneSupported: Boolean = a.timeZoneId.forall { zoneID =>
+          a.left.dataType match {
+            case _: StringType => GpuOverrides.isUTCTimezone(zoneID)
+            case _ => true
+          }
+        }
+
         override def convertToGpu(lhs: Expression, rhs: Expression): GpuExpression = {
           if (conf.isImprovedTimestampOpsEnabled) {
             // passing the already converted strf string for a little optimization
-            GpuToUnixTimestampImproved(lhs, rhs, sparkFormat, strfFormat)
+            GpuToUnixTimestampImproved(lhs, rhs, sparkFormat, strfFormat, a.timeZoneId)
           } else {
-            GpuToUnixTimestamp(lhs, rhs, sparkFormat, strfFormat)
+            GpuToUnixTimestamp(lhs, rhs, sparkFormat, strfFormat, a.timeZoneId)
           }
         }
       }),
@@ -1714,12 +1727,20 @@ object GpuOverrides extends Logging {
             .withPsNote(TypeEnum.STRING, "A limited number of formats are supported"),
             TypeSig.STRING)),
       (a, conf, p, r) => new UnixTimeExprMeta[UnixTimestamp](a, conf, p, r) {
+        // String type is not supported yet for non-UTC timezone.
+        override def isTimeZoneSupported: Boolean = a.timeZoneId.forall { zoneID =>
+            a.left.dataType match {
+              case _: StringType => GpuOverrides.isUTCTimezone(zoneID)
+              case _ => true
+            }
+        }
+
         override def convertToGpu(lhs: Expression, rhs: Expression): GpuExpression = {
           if (conf.isImprovedTimestampOpsEnabled) {
             // passing the already converted strf string for a little optimization
-            GpuUnixTimestampImproved(lhs, rhs, sparkFormat, strfFormat)
+            GpuUnixTimestampImproved(lhs, rhs, sparkFormat, strfFormat, a.timeZoneId)
           } else {
-            GpuUnixTimestamp(lhs, rhs, sparkFormat, strfFormat)
+            GpuUnixTimestamp(lhs, rhs, sparkFormat, strfFormat, a.timeZoneId)
           }
         }
       }),

diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala
@@ -824,25 +824,41 @@ abstract class GpuToTimestamp
   val failOnError: Boolean = SQLConf.get.ansiEnabled
 
   override def doColumnar(lhs: GpuColumnVector, rhs: GpuScalar): ColumnVector = {
-    val tmp = if (lhs.dataType == StringType) {
-      // rhs is ignored we already parsed the format
-      if (getTimeParserPolicy == LegacyTimeParserPolicy) {
-        parseStringAsTimestampWithLegacyParserPolicy(
-          lhs,
-          sparkFormat,
-          strfFormat,
-          DType.TIMESTAMP_MICROSECONDS,
-          (col, strfFormat) => col.asTimestampMicroseconds(strfFormat))
-      } else {
-        parseStringAsTimestamp(
-          lhs,
-          sparkFormat,
-          strfFormat,
-          DType.TIMESTAMP_MICROSECONDS,
-          failOnError)
-      }
-    } else { // Timestamp or DateType
-      lhs.getBase.asTimestampMicroseconds()
+    val tmp = lhs.dataType match {
+      case _: StringType =>
+        // rhs is ignored we already parsed the format
+        if (getTimeParserPolicy == LegacyTimeParserPolicy) {
+          parseStringAsTimestampWithLegacyParserPolicy(
+            lhs,
+            sparkFormat,
+            strfFormat,
+            DType.TIMESTAMP_MICROSECONDS,
+            (col, strfFormat) => col.asTimestampMicroseconds(strfFormat))
+        } else {
+          parseStringAsTimestamp(
+            lhs,
+            sparkFormat,
+            strfFormat,
+            DType.TIMESTAMP_MICROSECONDS,
+            failOnError)
+        }
+      case _: DateType =>
+        timeZoneId match {
+          case Some(_) =>
+            if (GpuOverrides.isUTCTimezone(zoneId)) {
+              lhs.getBase.asTimestampMicroseconds()
+            } else {
+              assert(GpuTimeZoneDB.isSupportedTimeZone(zoneId))
+              withResource(lhs.getBase.asTimestampMicroseconds) { tsInMs =>
+                GpuTimeZoneDB.fromTimestampToUtcTimestamp(tsInMs, zoneId)
+              }
+            }
+          case None => lhs.getBase.asTimestampMicroseconds()
+        }
+      case _ =>
+        // Consistent with Spark's behavior which ignores timeZone for other types like timestamp
+        // and timestampNtp.
+        lhs.getBase.asTimestampMicroseconds()
     }
     // Return Timestamp value if dataType it is expecting is of TimestampType
     if (dataType.equals(TimestampType)) {
@@ -892,8 +908,19 @@ abstract class GpuToTimestampImproved extends GpuToTimestamp {
           failOnError)
       }
     } else if (lhs.dataType() == DateType){
-      lhs.getBase.asTimestampSeconds()
-    } else { // Timestamp
+      timeZoneId match {
+        case Some(_) =>
+          if (GpuOverrides.isUTCTimezone(zoneId)) {
+            lhs.getBase.asTimestampSeconds()
+          } else {
+            assert(GpuTimeZoneDB.isSupportedTimeZone(zoneId))
+            withResource(lhs.getBase.asTimestampSeconds) { tsInMs =>
+              GpuTimeZoneDB.fromTimestampToUtcTimestamp(tsInMs, zoneId)
+            }
+          }
+        case None => lhs.getBase.asTimestampSeconds()
+      }
+    } else { // Timestamp. Note: no need to consider timezone which is consistent with Spark
       // https://github.com/rapidsai/cudf/issues/5166
       // The time is off by 1 second if the result is < 0
       val longSecs = withResource(lhs.getBase.asTimestampSeconds()) { secs =>