Skip to content

Commit

Permalink
Test parallel pre-merge build
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Zhang <alex4zhang@gmail.com>
  • Loading branch information
zhanga5 committed Jul 20, 2021
1 parent fc9940b commit 05cd5a7
Show file tree
Hide file tree
Showing 4 changed files with 260 additions and 28 deletions.
106 changes: 106 additions & 0 deletions jenkins/Jenkinsfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/local/env groovy
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
*
* Jenkinsfile to watch EMR cluster on AWS. If running more than threshold will generate email notification.
*/
@Library(['shared-libs', 'blossom-lib']) _

def IMAGE = "${ArtifactoryConstants.ARTIFACTORY_NAME}/sw-spark-docker/plugin:dev-ubuntu18-cuda11.0-blossom-dev"

pipeline {
agent {
kubernetes {
label "test-parallel-${BUILD_NUMBER}"
cloud 'sc-ipp-blossom-prod'
}
}

options {
ansiColor('xterm')
timeout(time: 2, unit: 'HOURS')
buildDiscarder(logRotator(numToKeepStr: '20'))
}

parameters {
string(name: 'GPU_POOL', defaultValue: 'RESERVED_POOL', description: 'GPU pool name')
}

environment {
MVN_URM_MIRROR = '-s jenkins/settings.xml -P mirror-apache-to-urm'
LIBCUDF_KERNEL_CACHE_PATH = '/tmp/.cudf'
URM_URL = "https://${ArtifactoryConstants.ARTIFACTORY_NAME}/artifactory/sw-spark-maven"
CUDA_CLASSIFIER = 'cuda11'
}

stages {
stage('Test Parallel for Pre-merge') {
parallel {
stage('Integration Test') {
options {
lock(label: "${params.GPU_POOL}", quantity: 1, variable: 'GPU_RESOURCE')
}
agent {
kubernetes {
label "test-parallel-it-${BUILD_TAG}"
cloud 'sc-ipp-blossom-prod'
yaml pod.getGPUYAML("${IMAGE}", "${env.GPU_RESOURCE}", '8', '32Gi')
}
}

steps {
container('gpu') {
script {
sh "cat /proc/cpuinfo; cat /proc/meminfo"
sh "jenkins/spark-premerge-build.sh"
step([$class : 'JacocoPublisher',
execPattern : '**/target/jacoco.exec',
classPattern : 'target/jacoco_classes/',
sourcePattern : 'shuffle-plugin/src/main/scala/,udf-compiler/src/main/scala/,sql-plugin/src/main/java/,sql-plugin/src/main/scala/,shims/spark311/src/main/scala/,shims/spark301db/src/main/scala/,shims/spark301/src/main/scala/,shims/spark302/src/main/scala/,shims/spark303/src/main/scala/,shims/spark304/src/main/scala/,shims/spark312/src/main/scala/,shims/spark313/src/main/scala/',
sourceInclusionPattern: '**/*.java,**/*.scala'
])
}
}
}
}

stage('Unit Test') {
options {
lock(label: "${params.GPU_POOL}", quantity: 1, variable: 'GPU_RESOURCE')
}
agent {
kubernetes {
label "test-parallel-ut-${BUILD_TAG}"
cloud 'sc-ipp-blossom-prod'
yaml pod.getGPUYAML("${IMAGE}", "${env.GPU_RESOURCE}", '8', '32Gi')
}
}

steps {
container('gpu') {
script {
sh "cat /proc/cpuinfo; cat /proc/meminfo"
sh "jenkins/spark-premerge-build-ut.sh"
}
}
}
}
}
}
}
}
108 changes: 81 additions & 27 deletions jenkins/Jenkinsfile-blossom.premerge
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
*/

@Library(['shared-libs', 'blossom-lib']) _
@Library('blossom-github-lib@master')
// @Library('blossom-github-lib@master')
@Library('blossom-github-lib-alex@nvbug-3339178')
import ipp.blossom.*

def githubHelper // blossom github helper
Expand Down Expand Up @@ -151,33 +152,81 @@ pipeline {
!skipped
}
}
options {
// We have to use params to pass the resource label in options block,
// this is a limitation of declarative pipeline. And we need to lock resource before agent start
lock(label: "${params.GPU_POOL}", quantity: 1, variable: 'GPU_RESOURCE')
}
agent {
kubernetes {
label "premerge-test-${BUILD_TAG}"
cloud 'sc-ipp-blossom-prod'
yaml pod.getGPUYAML("${IMAGE_PREMERGE}", "${env.GPU_RESOURCE}", '8', '32Gi') // cpu: 8, memory: 32Gi
workspaceVolume persistentVolumeClaimWorkspaceVolume(claimName: "${PVC}", readOnly: false)
customWorkspace "${CUSTOM_WORKSPACE}"

failFast true
parallel {
stage('Integration Test') {
options {
// We have to use params to pass the resource label in options block,
// this is a limitation of declarative pipeline. And we need to lock resource before agent start
lock(label: "${params.GPU_POOL}", quantity: 1, variable: 'GPU_RESOURCE')
}
agent {
kubernetes {
label "premerge-test-it-${BUILD_TAG}"
cloud 'sc-ipp-blossom-prod'
yaml pod.getGPUYAML("${IMAGE_PREMERGE}", "${env.GPU_RESOURCE}", '8', '32Gi') // cpu: 8, memory: 32Gi
workspaceVolume persistentVolumeClaimWorkspaceVolume(claimName: "${PVC}", readOnly: false)
customWorkspace "${CUSTOM_WORKSPACE}"
}
}

steps {
script {
container('gpu') {
// TODO: improve resource management
timeout(time: 4, unit: 'HOURS') { // step only timeout for test run
sh "$PREMERGE_SCRIPT"
step([$class : 'JacocoPublisher',
execPattern : '**/target/jacoco.exec',
classPattern : 'target/jacoco_classes/',
sourcePattern : 'shuffle-plugin/src/main/scala/,udf-compiler/src/main/scala/,sql-plugin/src/main/java/,sql-plugin/src/main/scala/,shims/spark311/src/main/scala/,shims/spark301db/src/main/scala/,shims/spark301/src/main/scala/,shims/spark302/src/main/scala/,shims/spark303/src/main/scala/,shims/spark304/src/main/scala/,shims/spark312/src/main/scala/,shims/spark313/src/main/scala/',
sourceInclusionPattern: '**/*.java,**/*.scala'
])
}
}
}
}
}
}

steps {
script {
container('gpu') {
// TODO: improve resource management
timeout(time: 4, unit: 'HOURS') { // step only timeout for test run
sh "$PREMERGE_SCRIPT"
step([$class : 'JacocoPublisher',
execPattern : '**/target/jacoco.exec',
classPattern : 'target/jacoco_classes/',
sourcePattern : 'shuffle-plugin/src/main/scala/,udf-compiler/src/main/scala/,sql-plugin/src/main/java/,sql-plugin/src/main/scala/,shims/spark311/src/main/scala/,shims/spark301db/src/main/scala/,shims/spark301/src/main/scala/,shims/spark302/src/main/scala/,shims/spark303/src/main/scala/,shims/spark304/src/main/scala/,shims/spark312/src/main/scala/,shims/spark313/src/main/scala/',
sourceInclusionPattern: '**/*.java,**/*.scala'
])
stage('Unit Test') {
options {
// We have to use params to pass the resource label in options block,
// this is a limitation of declarative pipeline. And we need to lock resource before agent start
lock(label: "${params.GPU_POOL}", quantity: 1, variable: 'GPU_RESOURCE')
}
agent {
kubernetes {
label "premerge-test-ut-${BUILD_TAG}"
cloud 'sc-ipp-blossom-prod'
yaml pod.getGPUYAML("${IMAGE_PREMERGE}", "${env.GPU_RESOURCE}", '8', '32Gi') // cpu: 8, memory: 32Gi
workspaceVolume persistentVolumeClaimWorkspaceVolume(claimName: "${PVC}", readOnly: false)
customWorkspace "${CUSTOM_WORKSPACE}-ut"
}
}

steps {
script {
checkout(
changelog: false,
poll: true,
scm: [
$class: 'GitSCM', branches: [[name: githubHelper.getMergedSHA()]],
doGenerateSubmoduleConfigurations: false,
submoduleCfg: [],
userRemoteConfigs: [[
credentialsId: 'github-token',
url: githubHelper.getCloneUrl(),
refspec: '+refs/pull/*/merge:refs/remotes/origin/pr/*']]
]
)

container('gpu') {
// TODO: improve resource management
timeout(time: 2, unit: 'HOURS') { // step only timeout for test run
sh "$JENKINS_ROOT/spark-premerge-build-ut.sh"
}
}
}
}
}
Expand All @@ -197,9 +246,14 @@ pipeline {
} else {
// upload log only in case of build failure
def guardWords = ["gitlab.*?\\.com", "urm.*?\\.com"]

// hide GPU info
guardWords.add("nvidia-smi(?s)(.*?)(?=jenkins/version-def.sh)")

def logPattern = "### BEGIN OF TEST LOG ###.*### END OF TEST LOG ###"

githubHelper.uploadPartialLogs(this, env.JOB_NAME, env.BUILD_NUMBER, null, guardWords, logPattern)
// githubHelper.uploadPartialLogs(this, env.JOB_NAME, env.BUILD_NUMBER, null, guardWords, logPattern)
githubHelper.uploadParallelLogs(this, env.JOB_NAME, env.BUILD_NUMBER, null, guardWords)

githubHelper.updateCommitStatus("$BUILD_URL", "Fail", GitHubCommitState.FAILURE)
}
Expand Down
72 changes: 72 additions & 0 deletions jenkins/spark-premerge-build-ut.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/bin/bash
#
# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -ex

nvidia-smi

function on_exit {
echo '### END OF TEST LOG ###'
}
trap on_exit EXIT

echo '### BEGIN OF TEST LOG ###'

. jenkins/version-def.sh

# get merge BASE from merged pull request. Log message e.g. "Merge HEAD into BASE"
# BASE_REF=$(git --no-pager log --oneline -1 | awk '{ print $NF }')
# file size check for pull request. The size of a committed file should be less than 1.5MiB
# pre-commit run check-added-large-files --from-ref $BASE_REF --to-ref HEAD

ARTF_ROOT="$WORKSPACE/.download"
MVN_GET_CMD="mvn org.apache.maven.plugins:maven-dependency-plugin:2.8:get -B \
$MVN_URM_MIRROR -DremoteRepositories=$URM_URL \
-Ddest=$ARTF_ROOT"

rm -rf $ARTF_ROOT && mkdir -p $ARTF_ROOT

# Download a full version of spark
$MVN_GET_CMD \
-DgroupId=org.apache -DartifactId=spark -Dversion=$SPARK_VER -Dclassifier=bin-hadoop3.2 -Dpackaging=tgz

# export SPARK_HOME="$ARTF_ROOT/spark-$SPARK_VER-bin-hadoop3.2"
# export PATH="$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH"
# tar zxf $SPARK_HOME.tgz -C $ARTF_ROOT && \
# rm -f $SPARK_HOME.tgz

# mvn -U -B $MVN_URM_MIRROR '-P!snapshot-shims,pre-merge' clean verify -Dpytest.TEST_TAGS='' \
# -Dpytest.TEST_TYPE="pre-commit" -Dpytest.TEST_PARALLEL=4 -Dcuda.version=$CUDA_CLASSIFIER
# Run the unit tests for other Spark versions but dont run full python integration tests
# NOT ALL TESTS NEEDED FOR PREMERGE
# Test latest stable and snapshot shims for a spark minor versions. All others shims test should be covered in nightly pipelines
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark303tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark304tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark312tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark313tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER
# Disabled until Spark 3.2 source incompatibility fixed, see https://github.com/NVIDIA/spark-rapids/issues/2052
#env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark320tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER

# The jacoco coverage should have been collected, but because of how the shade plugin
# works and jacoco we need to clean some things up so jacoco will only report for the
# things we care about
# mkdir -p target/jacoco_classes/
# FILE=$(ls dist/target/rapids-4-spark_2.12-*.jar | grep -v test | xargs readlink -f)
# pushd target/jacoco_classes/
# jar xf $FILE
# rm -rf com/nvidia/shaded/ org/openucx/
# popd
2 changes: 1 addition & 1 deletion jenkins/spark-premerge-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ mvn -U -B $MVN_URM_MIRROR '-P!snapshot-shims,pre-merge' clean verify -Dpytest.TE
# NOT ALL TESTS NEEDED FOR PREMERGE
# Just test one 3.0.X version (base version covers this) and one 3.1.X version.
# All others shims test should be covered in nightly pipelines
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark313tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER
# env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark313tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER
# Disabled until Spark 3.2 source incompatibility fixed, see https://github.com/NVIDIA/spark-rapids/issues/2052
#env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark320tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER

Expand Down

0 comments on commit 05cd5a7

Please sign in to comment.