Skip to content

Commit

Permalink
Implemenent a Spark 3.2.1-SNAPSHOT Shim
Browse files Browse the repository at this point in the history
Signed-off-by: Gera Shegalov <gera@apache.org>
  • Loading branch information
gerashegalov committed Oct 30, 2021
1 parent 17500a5 commit 5dc32e8
Show file tree
Hide file tree
Showing 29 changed files with 342 additions and 3 deletions.
22 changes: 20 additions & 2 deletions dist/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,15 @@
<overWrite>false</overWrite>
<outputDirectory>${project.build.directory}/deps</outputDirectory>
</artifactItem>
<artifactItem>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-aggregator_${scala.binary.version}</artifactId>
<classifier>spark321</classifier>
<version>${project.version}</version>
<type>jar</type>
<overWrite>false</overWrite>
<outputDirectory>${project.build.directory}/deps</outputDirectory>
</artifactItem>
</artifactItems>
<useBaseVersion>true</useBaseVersion>
<outputDirectory>${project.build.directory}/deps</outputDirectory>
Expand All @@ -542,7 +551,7 @@
<property name="project.version" value="${project.version}"/>
<property name="scala.binary.version" value="${scala.binary.version}"/>
<property name="included_buildvers"
value="301,302,303,311,312,313,320,311cdh"/>
value="301,302,303,311,312,313,320,321,311cdh"/>
<ant
antfile="${project.basedir}/maven-antrun/build-parallel-worlds.xml"
target="build-parallel-worlds"
Expand Down Expand Up @@ -661,6 +670,15 @@
<overWrite>false</overWrite>
<outputDirectory>${project.build.directory}/deps</outputDirectory>
</artifactItem>
<artifactItem>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-aggregator_${scala.binary.version}</artifactId>
<classifier>spark321</classifier>
<version>${project.version}</version>
<type>jar</type>
<overWrite>false</overWrite>
<outputDirectory>${project.build.directory}/deps</outputDirectory>
</artifactItem>
<artifactItem>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-aggregator_${scala.binary.version}</artifactId>
Expand Down Expand Up @@ -702,7 +720,7 @@
<property name="project.version" value="${project.version}"/>
<property name="scala.binary.version" value="${scala.binary.version}"/>
<property name="included_buildvers"
value="301,302,303,304,311,312,313,320,311cdh,301db,311db"/>
value="301,302,303,304,311,312,313,320,321,311cdh,301db,311db"/>
<ant
antfile="${project.basedir}/maven-antrun/build-parallel-worlds.xml"
target="build-parallel-worlds"
Expand Down
1 change: 1 addition & 0 deletions docs/additional-functionality/rapids-shuffle.md
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ In this section, we are using a docker container built using the sample dockerfi
| 3.1.2 | com.nvidia.spark.rapids.spark312.RapidsShuffleManager |
| 3.1.3 | com.nvidia.spark.rapids.spark313.RapidsShuffleManager |
| 3.2.0 | com.nvidia.spark.rapids.spark320.RapidsShuffleManager |
| 3.2.1 | com.nvidia.spark.rapids.spark321.RapidsShuffleManager |

2. Settings for UCX 1.11.2+:

Expand Down
2 changes: 2 additions & 0 deletions jenkins/spark-nightly-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ mvn -U -B -Dbuildver=311cdh clean install $MVN_URM_MIRROR -Dmaven.repo.local=$M2
mvn -B -Dbuildver=311cdh deploy -pl -dist $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR -Dcuda.version=$CUDA_CLASSIFIER -DskipTests
mvn -U -B -Dbuildver=320 clean install $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR -Dcuda.version=$CUDA_CLASSIFIER
mvn -B -Dbuildver=320 deploy -pl -dist $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR -Dcuda.version=$CUDA_CLASSIFIER -DskipTests
mvn -U -B -Dbuildver=321 clean install $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR -Dcuda.version=$CUDA_CLASSIFIER
mvn -B -Dbuildver=321 deploy -pl -dist $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR -Dcuda.version=$CUDA_CLASSIFIER -DskipTests

mvn -B -Dbuildver=301 -PsnapshotsWithDatabricks clean deploy $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR -Dcuda.version=$CUDA_CLASSIFIER

Expand Down
1 change: 1 addition & 0 deletions jenkins/spark-premerge-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ mvn_verify() {
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Dbuildver=313 clean install -Drat.skip=true -DskipTests -Dmaven.javadoc.skip=true -Dskip -Dmaven.scalastyle.skip=true -Dcuda.version=$CUDA_CLASSIFIER -pl aggregator -am
# don't skip tests
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Dbuildver=320 clean install -Drat.skip=true -Dmaven.javadoc.skip=true -Dskip -Dmaven.scalastyle.skip=true -Dcuda.version=$CUDA_CLASSIFIER -Dpytest.TEST_TAGS=''
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Dbuildver=321 clean install -Drat.skip=true -DskipTests -Dmaven.javadoc.skip=true -Dskip -Dmaven.scalastyle.skip=true -Dcuda.version=$CUDA_CLASSIFIER -pl aggregator -am

# Here run Python integration tests tagged with 'premerge_ci_1' only, that would help balance test duration and memory
# consumption from two k8s pods running in parallel, which executes 'mvn_verify()' and 'ci_2()' respectively.
Expand Down
43 changes: 42 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,47 @@
<source>${project.basedir}/src/main/301+-nondb/scala</source>
<source>${project.basedir}/src/main/311+-all/scala</source>
<source>${project.basedir}/src/main/311+-nondb/scala</source>
<source>${project.basedir}/src/main/320/scala</source>
<source>${project.basedir}/src/main/320+/scala</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<modules>
<module>aggregator</module>
<module>tests-spark310+</module>
</modules>
</profile>
<profile>
<id>release321</id>
<activation>
<property>
<name>buildver</name>
<value>321</value>
</property>
</activation>
<properties>
<spark.version>${spark321.version}</spark.version>
<spark.test.version>${spark321.version}</spark.test.version>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<executions>
<execution>
<id>add-profile-src-31+</id>
<goals><goal>add-source</goal></goals>
<phase>generate-sources</phase>
<configuration>
<sources>
<source>${project.basedir}/src/main/301+-nondb/scala</source>
<source>${project.basedir}/src/main/311+-all/scala</source>
<source>${project.basedir}/src/main/311+-nondb/scala</source>
<source>${project.basedir}/src/main/320+/scala</source>
</sources>
</configuration>
</execution>
Expand Down Expand Up @@ -665,6 +705,7 @@
<spark312.version>3.1.2</spark312.version>
<spark313.version>3.1.3-SNAPSHOT</spark313.version>
<spark320.version>3.2.0</spark320.version>
<spark321.version>3.2.1-SNAPSHOT</spark321.version>
<mockito.version>3.6.0</mockito.version>
<scala.plugin.version>4.3.0</scala.plugin.version>
<maven.jar.plugin.version>3.2.0</maven.jar.plugin.version>
Expand Down
4 changes: 4 additions & 0 deletions shims/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@
<id>release320</id>
<modules><module>spark320</module></modules>
</profile>
<profile>
<id>release321</id>
<modules><module>spark321</module></modules>
</profile>
</profiles>

<dependencies>
Expand Down
86 changes: 86 additions & 0 deletions shims/spark321/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2021, NVIDIA CORPORATION.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-shims_2.12</artifactId>
<version>21.12.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>rapids-4-spark-shims-spark321_2.12</artifactId>
<name>RAPIDS Accelerator for Apache Spark SQL Plugin Spark 3.2.1 Shim</name>
<description>The RAPIDS SQL plugin for Apache Spark 3.2.1 Shim</description>
<version>21.12.0-SNAPSHOT</version>
<properties>
<!-- don't move to the parent module shims due to conflicting tmp files -->
<target.classifier/>
</properties>

<!-- Set 'spark.version' for the shims layer -->
<!-- Create a separate file 'SPARK_VER.properties' in the jar to save cudf & spark version info -->
<build>
<plugins>
<plugin>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<id>dependency</id>
<phase>generate-resources</phase>
<configuration>
<target>
<mkdir dir="${project.build.directory}/extra-resources"/>
<exec executable="bash" failonerror="true"
output="${project.build.directory}/extra-resources/spark-${spark321.version}-info.properties">
<arg value="${user.dir}/build/dependency-info.sh"/>
<arg value="${cudf.version}"/>
<arg value="${cuda.version}"/>
<arg value="${spark321.version}"/>
</exec>
</target>
</configuration>
<goals>
<goal>run</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>

<resources>
<resource>
<!-- Include the properties file to provide the build information. -->
<directory>${project.build.directory}/extra-resources</directory>
</resource>
<resource>
<directory>src/main/resources</directory>
</resource>
</resources>
</build>

<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark321.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.spark.rapids.shims.spark321

import com.nvidia.spark.rapids._
import com.nvidia.spark.rapids.shims.v2._

class Spark321Shims extends Spark32XShims {
override def getSparkShimVersion: ShimVersion = SparkShimServiceProvider.VERSION
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.spark.rapids.shims.spark321

import com.nvidia.spark.rapids.{SparkShims, SparkShimVersion}

object SparkShimServiceProvider {
val VERSION = SparkShimVersion(3, 2, 1)
val VERSIONNAMES = Seq(s"$VERSION", s"$VERSION-SNAPSHOT")
}

class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider {

def matchesVersion(version: String): Boolean = {
SparkShimServiceProvider.VERSIONNAMES.contains(version)
}

def buildShim: SparkShims = {
new Spark321Shims()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.spark.rapids.spark321

import org.apache.spark.SparkConf
import org.apache.spark.sql.rapids.shims.spark321.ProxyRapidsShuffleInternalManager

/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
sealed class RapidsShuffleManager(
conf: SparkConf,
isDriver: Boolean) extends ProxyRapidsShuffleInternalManager(conf, isDriver) {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.rapids.shims.spark321

import org.apache.spark.{SparkConf, TaskContext}
import org.apache.spark.shuffle._
import org.apache.spark.sql.rapids.{ProxyRapidsShuffleInternalManagerBase, RapidsShuffleInternalManagerBase}

/**
* A shuffle manager optimized for the RAPIDS Plugin For Apache Spark.
* @note This is an internal class to obtain access to the private
* `ShuffleManager` and `SortShuffleManager` classes.
*/
class RapidsShuffleInternalManager(conf: SparkConf, isDriver: Boolean)
extends RapidsShuffleInternalManagerBase(conf, isDriver) {

def getReader[K, C](
handle: ShuffleHandle,
startMapIndex: Int,
endMapIndex: Int,
startPartition: Int,
endPartition: Int,
context: TaskContext,
metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C] = {
getReaderInternal(handle, startMapIndex, endMapIndex, startPartition, endPartition, context,
metrics)
}
}


class ProxyRapidsShuffleInternalManager(conf: SparkConf, isDriver: Boolean)
extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver) with ShuffleManager {

def getReader[K, C](
handle: ShuffleHandle,
startMapIndex: Int,
endMapIndex: Int,
startPartition: Int,
endPartition: Int,
context: TaskContext,
metrics: ShuffleReadMetricsReporter
): ShuffleReader[K, C] = {
self.getReader(handle, startMapIndex, endMapIndex, startPartition, endPartition, context,
metrics)
}
}
Loading

0 comments on commit 5dc32e8

Please sign in to comment.