NVIDIA · mimaomao · Jan 16, 2024 · Jan 19, 2024 · NVnavkumar · Jan 17, 2024
@@ -762,5 +762,22 @@
                 </dependency>
             </dependencies>
         </profile>
+        <profile>
+            <id>release350emr</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>350emr</value>
+                </property>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>com.nvidia</groupId>
+                    <artifactId>rapids-4-spark-delta-stub_${scala.binary.version}</artifactId>
+                    <version>${project.version}</version>
+                    <classifier>${spark.version.classifier}</classifier>
+                </dependency>
+            </dependencies>
+        </profile>
     </profiles>
 </project>
diff --git a/.../src/main/spark320/scala/org/apache/spark/sql/tests/datagen/datagen/DataGenExprBase.scala b/.../src/main/spark320/scala/org/apache/spark/sql/tests/datagen/datagen/DataGenExprBase.scala
@@ -36,6 +36,7 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "350"}
+{"spark": "350emr"}
 {"spark": "351"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.tests.datagen

diff --git a/integration_tests/src/main/python/arithmetic_ops_test.py b/integration_tests/src/main/python/arithmetic_ops_test.py
@@ -160,6 +160,8 @@ def test_subtraction_ansi_no_overflow(data_gen):
     _decimal_gen_38_10,
     _decimal_gen_38_neg10
     ], ids=idfn)
+@pytest.mark.xfail(condition=is_spark_350emr(),
+                   reason='EMR back-ported https://issues.apache.org/jira/browse/SPARK-45786 in spark350emr')
 def test_multiplication(data_gen):
     data_type = data_gen.data_type
     assert_gpu_and_cpu_are_equal_collect(
@@ -203,6 +205,8 @@ def test_multiplication_ansi_overflow():
 @pytest.mark.parametrize('rhs', [byte_gen, short_gen, int_gen, long_gen, DecimalGen(6, 3),
     DecimalGen(10, -2), DecimalGen(15, 3), DecimalGen(30, 12), DecimalGen(3, -3),
     DecimalGen(27, 7), DecimalGen(20, -3)], ids=idfn)
+@pytest.mark.xfail(condition=is_spark_350emr(),
+                   reason='EMR back-ported https://issues.apache.org/jira/browse/SPARK-45786 in spark350emr')
 def test_multiplication_mixed(lhs, rhs):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : two_col_df(spark, lhs, rhs).select(

diff --git a/integration_tests/src/main/python/spark_session.py b/integration_tests/src/main/python/spark_session.py
@@ -241,6 +241,12 @@ def is_spark_332cdh():
 def is_spark_cdh():
     return is_spark_321cdh() or is_spark_330cdh() or is_spark_332cdh()
 
+def is_spark_emr():
+    return is_spark350emr()
+
+def is_spark_350emr():
+    return "3.5.0-amzn" in spark_version()
+
 def is_databricks_version_or_later(major, minor):
     spark = get_spark_i_know_what_i_am_doing()
     version = spark.conf.get("spark.databricks.clusterUsageTags.sparkVersion", "0.0")

@@ -595,6 +595,27 @@
                 <module>delta-lake/delta-stub</module>
             </modules>
         </profile>
+        <profile>
+            <id>release350emr</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>350emr</value>
+                </property>
+            </activation>
+            <properties>
+                <buildver>350emr</buildver>
+                <spark.version>${spark350emr.version}</spark.version>
+                <spark.test.version>${spark350emr.version}</spark.test.version>
+                <parquet.hadoop.version>1.13.1</parquet.hadoop.version>
+                <iceberg.version>${spark330.iceberg.version}</iceberg.version>
+                <slf4j.version>2.0.7</slf4j.version>
+            </properties>
+            <modules>
+                <module>shim-deps/emr</module>
+                <module>delta-lake/delta-stub</module>
+            </modules>
+        </profile>
         <profile>
             <id>release351</id>
             <activation>
@@ -782,6 +803,7 @@
         <spark332db.version>3.3.2-databricks</spark332db.version>
         <spark341db.version>3.4.1-databricks</spark341db.version>
         <spark350.version>3.5.0</spark350.version>
+        <spark350emr.version>3.5.0-amzn-0</spark350emr.version>
         <spark351.version>3.5.1-SNAPSHOT</spark351.version>
         <mockito.version>3.12.4</mockito.version>
         <scala.plugin.version>4.3.0</scala.plugin.version>
@@ -791,6 +813,7 @@
         <guava.cdh.version>30.0-jre</guava.cdh.version>
         <arrow.cdh.version>2.0.0</arrow.cdh.version>
         <slf4j.version>1.7.30</slf4j.version>
+        <log4j.version>2.20.0</log4j.version>
         <flatbuffers.java.version>1.11.0</flatbuffers.java.version>
         <hadoop.client.version>3.3.1</hadoop.client.version>
         <iceberg.version>0.13.2</iceberg.version>
@@ -831,7 +854,8 @@
             340,
             341,
             342,
-            350
+            350,
+            350emr
         </noSnapshot.buildvers>
         <snapshot.buildvers>
             351
@@ -933,6 +957,34 @@
                 <artifactId>jucx</artifactId>
                 <version>${ucx.version}</version>
             </dependency>
+            <!-- -->
+            <dependency>
+                <groupId>org.slf4j</groupId>
+                <artifactId>slf4j-api</artifactId>
+                <version>${slf4j.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.logging.log4j</groupId>
+                <artifactId>log4j-slf4j2-impl</artifactId>
+                <version>${log4j.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.logging.log4j</groupId>
+                <artifactId>log4j-api</artifactId>
+                <version>${log4j.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.logging.log4j</groupId>
+                <artifactId>log4j-core</artifactId>
+                <version>${log4j.version}</version>
+            </dependency>
+            <dependency>
+                <!-- API bridge between log4j 1 and 2 -->
+                <groupId>org.apache.logging.log4j</groupId>
+                <artifactId>log4j-1.2-api</artifactId>
+                <version>${log4j.version}</version>
+            </dependency>
+            <!-- -->
             <dependency>
               <groupId>org.slf4j</groupId>
               <artifactId>jul-to-slf4j</artifactId>

@@ -762,5 +762,22 @@
                 </dependency>
             </dependencies>
         </profile>
+        <profile>
+            <id>release350emr</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>350emr</value>
+                </property>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>com.nvidia</groupId>
+                    <artifactId>rapids-4-spark-delta-stub_${scala.binary.version}</artifactId>
+                    <version>${project.version}</version>
+                    <classifier>${spark.version.classifier}</classifier>
+                </dependency>
+            </dependencies>
+        </profile>
     </profiles>
 </project>
@@ -595,6 +595,27 @@
                 <module>delta-lake/delta-stub</module>
             </modules>
         </profile>
+        <profile>
+            <id>release350emr</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>350emr</value>
+                </property>
+            </activation>
+            <properties>
+                <buildver>350emr</buildver>
+                <spark.version>${spark350emr.version}</spark.version>
+                <spark.test.version>${spark350emr.version}</spark.test.version>
+                <parquet.hadoop.version>1.13.1</parquet.hadoop.version>
+                <iceberg.version>${spark330.iceberg.version}</iceberg.version>
+                <slf4j.version>2.0.7</slf4j.version>
+            </properties>
+            <modules>
+                <module>shim-deps/emr</module>
+                <module>delta-lake/delta-stub</module>
+            </modules>
+        </profile>
         <profile>
             <id>release351</id>
             <activation>
@@ -782,6 +803,7 @@
         <spark332db.version>3.3.2-databricks</spark332db.version>
         <spark341db.version>3.4.1-databricks</spark341db.version>
         <spark350.version>3.5.0</spark350.version>
+        <spark350emr.version>3.5.0-amzn-0</spark350emr.version>
         <spark351.version>3.5.1-SNAPSHOT</spark351.version>
         <mockito.version>3.12.4</mockito.version>
         <scala.plugin.version>4.3.0</scala.plugin.version>

@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Copyright (c) 2023, NVIDIA CORPORATION.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>com.nvidia</groupId>
+        <artifactId>rapids-4-spark-parent_2.12</artifactId>
+        <version>24.02.0-SNAPSHOT</version>
+        <relativePath>../../pom.xml</relativePath>
+    </parent>
+    <artifactId>rapids-4-spark-emr-bom</artifactId>
+    <packaging>pom</packaging>
+    <description>EMR Shim Dependencies</description>
+    <version>24.02.0-SNAPSHOT</version>
+
+    <properties>
+        <rapids.module>../shim-deps/emr</rapids.module>
+    </properties>
+
+    <!--
+        This module is going to be used as a provided dependency.
+        The dependencies below are compile-scope so that they are propagated to dependents as provided.
+    -->
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_${scala.binary.version}</artifactId>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-hive_${scala.binary.version}</artifactId>
+            <scope>compile</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.arrow</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+    </dependencies>
+</project>
@@ -176,6 +176,24 @@
                 </dependency>
             </dependencies>
         </profile>
+        <profile>
+            <id>release350emr</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>350emr</value>
+                </property>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>com.nvidia</groupId>
+                    <artifactId>rapids-4-spark-emr-bom</artifactId>
+                    <version>${project.version}</version>
+                    <type>pom</type>
+                    <scope>provided</scope>
+                </dependency>
+            </dependencies>
+        </profile>
 
         <profile>
             <id>with-classifier</id>

@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Copyright (c) 2023, NVIDIA CORPORATION.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>com.nvidia</groupId>
+        <artifactId>rapids-4-spark-parent_2.12</artifactId>
+        <version>24.02.0-SNAPSHOT</version>
+        <relativePath>../../pom.xml</relativePath>
+    </parent>
+    <artifactId>rapids-4-spark-emr-bom</artifactId>
+    <packaging>pom</packaging>
+    <description>EMR Shim Dependencies</description>
+    <version>24.02.0-SNAPSHOT</version>
+
+    <properties>
+        <rapids.module>../shim-deps/emr</rapids.module>
+    </properties>
+
+    <!--
+       This module is going to be used as a provided dependency.
+       The dependencies below are compile-scope so that they are propagated to dependents as provided.
+   -->
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_${scala.binary.version}</artifactId>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-hive_${scala.binary.version}</artifactId>
+            <scope>compile</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.arrow</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+    </dependencies>
+</project>
@@ -176,6 +176,24 @@
                 </dependency>
             </dependencies>
         </profile>
+        <profile>
+            <id>release350emr</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>350emr</value>
+                </property>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>com.nvidia</groupId>
+                    <artifactId>rapids-4-spark-emr-bom</artifactId>
+                    <version>${project.version}</version>
+                    <type>pom</type>
+                    <scope>provided</scope>
+                </dependency>
+            </dependencies>
+        </profile>
 
         <profile>
             <id>with-classifier</id>

diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/AQEUtils.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/AQEUtils.scala
@@ -35,6 +35,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "350"}
+{"spark": "350emr"}
 {"spark": "351"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims

diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/AggregationTagging.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/AggregationTagging.scala
@@ -35,6 +35,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "350"}
+{"spark": "350emr"}
 {"spark": "351"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims