Skip to content

Commit

Permalink
Add Spark 3.0.2 to Shim layer (NVIDIA#596)
Browse files Browse the repository at this point in the history
* Add Spark 3.0.2 to Shim layer

Signed-off-by: Thomas Graves <tgraves@nvidia.com>

* Update docs for Spark 3.0.2

Signed-off-by: Thomas Graves <tgraves@nvidia.com>

* add missing pom

Signed-off-by: Thomas Graves <tgraves@nvidia.com>

* Add spark 3.0.2 unit tests to the nightly

Signed-off-by: Thomas Graves <tgraves@nvidia.com>

* Add 3.0.2 to the integration tests

* Add jenkins file for 3.0.2 integration tests

* Change docs to use bullet list

Co-authored-by: Thomas Graves <tgraves@nvidia.com>
  • Loading branch information
tgravescs and tgravescs authored Aug 21, 2020
1 parent ac148f6 commit 4636f37
Show file tree
Hide file tree
Showing 14 changed files with 263 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/get-started/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ simplify these settings in the near future). Choose the version of the shuffle m
that matches your Spark version. Currently we support
- Spark 3.0.0 (com.nvidia.spark.rapids.spark300.RapidsShuffleManager)
- Spark 3.0.1 (com.nvidia.spark.rapids.spark301.RapidsShuffleManager)
- Spark 3.0.2 (com.nvidia.spark.rapids.spark302.RapidsShuffleManager)
- Spark 3.1.0 (com.nvidia.spark.rapids.spark310.RapidsShuffleManager)

```shell
Expand Down
6 changes: 4 additions & 2 deletions docs/testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ They generally follow TPCH but are not guaranteed to be the same.
Unit tests exist in the tests directory. This is unconventional and is done so we can run the tests
on the final shaded version of the plugin. It also helps with how we collect code coverage.
You can run the unit tests against different versions of Spark using the different profiles. The
default version runs again Spark 3.0.0, `-Pspark301tests` runs against Spark 3.0.1, and `-Pspark310tests`
runs unit tests against Spark 3.1.0.
default version runs again Spark 3.0.0, to run against other version use one of the following profiles:
- `-Pspark301tests` (Spark 3.0.1)
- `-Pspark302tests` (Spark 3.0.2)
- `-Pspark310tests` (Spark 3.1.0)

## Integration tests

Expand Down
6 changes: 6 additions & 0 deletions integration_tests/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@
<spark.test.version>3.0.1-SNAPSHOT</spark.test.version>
</properties>
</profile>
<profile>
<id>spark302tests</id>
<properties>
<spark.test.version>3.0.2-SNAPSHOT</spark.test.version>
</properties>
</profile>
<profile>
<id>spark310tests</id>
<properties>
Expand Down
99 changes: 99 additions & 0 deletions jenkins/Jenkinsfile.302.integration
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/local/env groovy
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
*
* Jenkins file for running spark3.0.2 integration tests
*
*/

@Library(['shared-libs', 'spark-jenkins-shared-lib']) _

def urmUrl="https://${ArtifactoryConstants.ARTIFACTORY_NAME}/artifactory/sw-spark-maven"

pipeline {
agent none

options {
ansiColor('xterm')
timestamps()
timeout(time: 240, unit: 'MINUTES')
buildDiscarder(logRotator(numToKeepStr: '10'))
}

parameters {
string(name: 'OVERWRITE_PARAMS', defaultValue: '',
description: 'parameters format XXX_VER=xxx;YYY_VER=yyy;')
string(name: 'REF', defaultValue: 'branch-0.2', description: 'Commit to build')
}

environment {
JENKINS_ROOT = 'jenkins'
TEST_SCRIPT = '$JENKINS_ROOT/spark-tests.sh'
LIBCUDF_KERNEL_CACHE_PATH='/tmp/.cudf'
ARTIFACTORY_NAME = "${ArtifactoryConstants.ARTIFACTORY_NAME}"
URM_URL = "${urmUrl}"
MVN_URM_MIRROR='-s jenkins/settings.xml -P mirror-apache-to-urm'
}

stages {
stage('IT on 3.0.2-SNAPSHOT') {
agent { label 'docker-gpu' }
environment {SPARK_VER='3.0.2-SNAPSHOT'}
steps {
script {
def CUDA_NAME=sh(returnStdout: true,
script: '. jenkins/version-def.sh>&2 && echo -n $CUDA_CLASSIFIER | sed "s/-/./g"')
def IMAGE_NAME="$ARTIFACTORY_NAME/sw-spark-docker/plugin:it-centos7-$CUDA_NAME"
def CUDA_VER="$CUDA_NAME" - "cuda"
sh "docker pull $IMAGE_NAME"
docker.image(IMAGE_NAME).inside("--runtime=nvidia -v ${HOME}/.zinc:${HOME}/.zinc:rw") {
sh "bash $TEST_SCRIPT"
}
}
}
}
} // end of stages
post {
always {
script {
def status = "failed"
if (currentBuild.currentResult == "SUCCESS") {
status = "success"
slack("#rapidsai-spark-cicd", "Success", color: "#33CC33")
}
else {
slack("#rapidsai-spark-cicd", "Failed", color: "#FF0000")
}
}
echo 'Pipeline finished!'
}
}
} // end of pipeline

void slack(Map params = [:], String channel, String message) {
Map defaultParams = [
color: "#000000",
baseUrl: "${SparkConstants.SLACK_API_ENDPOINT}",
tokenCredentialId: "slack_token"
]

params["channel"] = channel
params["message"] = "${BUILD_URL}\n" + message

slackSend(defaultParams << params)
}
1 change: 1 addition & 0 deletions jenkins/spark-nightly-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ set -ex
mvn -U -B -Pinclude-databricks clean deploy $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2
# Run unit tests against other spark versions
mvn -U -B -Pspark301tests test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2
mvn -U -B -Pspark302tests test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2
mvn -U -B -Pspark310tests test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2

# Parse cudf and spark files from local mvn repo
Expand Down
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@
<slf4j.version>1.7.30</slf4j.version>
<spark300.version>3.0.0</spark300.version>
<spark301.version>3.0.1-SNAPSHOT</spark301.version>
<spark302.version>3.0.2-SNAPSHOT</spark302.version>
<spark310.version>3.1.0-SNAPSHOT</spark310.version>
</properties>

Expand Down
6 changes: 6 additions & 0 deletions shims/aggregator/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-shims-spark302_${scala.binary.version}</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-shims-spark301_${scala.binary.version}</artifactId>
Expand Down
1 change: 1 addition & 0 deletions shims/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
<modules>
<module>spark300</module>
<module>spark301</module>
<module>spark302</module>
<module>spark310</module>
<module>aggregator</module>
</modules>
Expand Down
47 changes: 47 additions & 0 deletions shims/spark302/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2020, NVIDIA CORPORATION.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-shims_2.12</artifactId>
<version>0.2.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-shims-spark302_2.12</artifactId>
<name>RAPIDS Accelerator for Apache Spark SQL Plugin Spark 3.0.2 Shim</name>
<description>The RAPIDS SQL plugin for Apache Spark 3.0.2 Shim</description>
<version>0.2.0-SNAPSHOT</version>

<dependencies>
<dependency>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-shims-spark301_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark302.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
com.nvidia.spark.rapids.shims.spark302.SparkShimServiceProvider
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.spark.rapids.shims.spark302

import com.nvidia.spark.rapids.ShimVersion
import com.nvidia.spark.rapids.shims.spark301.Spark301Shims
import com.nvidia.spark.rapids.spark302.RapidsShuffleManager

class Spark302Shims extends Spark301Shims {

override def getSparkShimVersion: ShimVersion = SparkShimServiceProvider.VERSION

override def getRapidsShuffleManagerClass: String = {
classOf[RapidsShuffleManager].getCanonicalName
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.spark.rapids.shims.spark302

import com.nvidia.spark.rapids.{SparkShims, SparkShimVersion}

object SparkShimServiceProvider {
val VERSION = SparkShimVersion(3, 0, 2)
val VERSIONNAMES = Seq(s"$VERSION", s"$VERSION-SNAPSHOT")
}
class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider {

def matchesVersion(version: String): Boolean = {
SparkShimServiceProvider.VERSIONNAMES.contains(version)
}

def buildShim: SparkShims = {
new Spark302Shims()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.nvidia.spark.rapids.spark302

import org.apache.spark.SparkConf
import org.apache.spark.sql.rapids.shims.spark300.RapidsShuffleInternalManager

/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
sealed class RapidsShuffleManager(
conf: SparkConf,
isDriver: Boolean) extends RapidsShuffleInternalManager(conf, isDriver) {
}
6 changes: 6 additions & 0 deletions tests/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@
<spark.test.version>3.0.1-SNAPSHOT</spark.test.version>
</properties>
</profile>
<profile>
<id>spark302tests</id>
<properties>
<spark.test.version>3.0.2-SNAPSHOT</spark.test.version>
</properties>
</profile>
<profile>
<id>spark310tests</id>
<properties>
Expand Down

0 comments on commit 4636f37

Please sign in to comment.