From d6a8ad98f681e49f3c97e4dba82882d36800cd42 Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Wed, 13 Apr 2022 11:39:51 -0700 Subject: [PATCH 1/9] Revert "Fix tools depending on the common jar (#5239)" This reverts commit cec1b7d47f2ace962b7301db953634ac2004a9b7. --- tools/pom.xml | 6 +++ .../rapids/tool/ThreadFactoryBuilder.scala | 52 ------------------- .../rapids/tool/profiling/Profiler.scala | 2 +- .../tool/qualification/Qualification.scala | 2 +- .../spark/sql/rapids/tool/AppFilterImpl.scala | 2 +- 5 files changed, 9 insertions(+), 55 deletions(-) delete mode 100644 tools/src/main/scala/com/nvidia/spark/rapids/tool/ThreadFactoryBuilder.scala diff --git a/tools/pom.xml b/tools/pom.xml index c7a767d122e..183d20f7be8 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -40,6 +40,11 @@ + + com.nvidia + rapids-4-spark-common_${scala.binary.version} + ${project.version} + org.scala-lang scala-library @@ -100,6 +105,7 @@ org.rogach:scallop_${scala.binary.version} + com.nvidia:rapids-4-spark-common_${scala.binary.version} diff --git a/tools/src/main/scala/com/nvidia/spark/rapids/tool/ThreadFactoryBuilder.scala b/tools/src/main/scala/com/nvidia/spark/rapids/tool/ThreadFactoryBuilder.scala deleted file mode 100644 index 582c7adcd6f..00000000000 --- a/tools/src/main/scala/com/nvidia/spark/rapids/tool/ThreadFactoryBuilder.scala +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.nvidia.spark.rapids.tool - -import java.util.concurrent.{Executors, ThreadFactory} -import java.util.concurrent.atomic.AtomicLong - -// This is similar to Guava ThreadFactoryBuilder -// Avoid to use Guava as it is a messy dependency in practice. -// This is copied from the common module -class ThreadFactoryBuilder { - private var nameFormat = Option.empty[String] - private var daemon = Option.empty[Boolean] - - def setNameFormat(nameFormat: String): ThreadFactoryBuilder = { - nameFormat.format(0) - this.nameFormat = Some(nameFormat) - this - } - - def setDaemon(daemon: Boolean): ThreadFactoryBuilder = { - this.daemon = Some(daemon) - this - } - - def build(): ThreadFactory = { - val count = nameFormat.map(_ => new AtomicLong(0)) - new ThreadFactory() { - private val defaultThreadFactory = Executors.defaultThreadFactory - - override def newThread(r: Runnable): Thread = { - val thread = defaultThreadFactory.newThread(r) - nameFormat.foreach(f => thread.setName(f.format(count.get.getAndIncrement()))) - daemon.foreach(b => thread.setDaemon(b)) - thread - } - } - } -} diff --git a/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala b/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala index d5e9715bab8..6c176842ed3 100644 --- a/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala +++ b/tools/src/main/scala/com/nvidia/spark/rapids/tool/profiling/Profiler.scala @@ -22,8 +22,8 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.{ArrayBuffer, HashMap} import scala.util.control.NonFatal +import com.nvidia.spark.rapids.ThreadFactoryBuilder import com.nvidia.spark.rapids.tool.{EventLogInfo, EventLogPathProcessor} -import com.nvidia.spark.rapids.tool.ThreadFactoryBuilder import org.apache.hadoop.conf.Configuration import org.apache.spark.internal.Logging diff --git a/tools/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala b/tools/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala index bb08c9312fb..a895d28fe69 100644 --- a/tools/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala +++ b/tools/src/main/scala/com/nvidia/spark/rapids/tool/qualification/Qualification.scala @@ -20,8 +20,8 @@ import java.util.concurrent.{ConcurrentLinkedQueue, Executors, ThreadPoolExecuto import scala.collection.JavaConverters._ +import com.nvidia.spark.rapids.ThreadFactoryBuilder import com.nvidia.spark.rapids.tool.EventLogInfo -import com.nvidia.spark.rapids.tool.ThreadFactoryBuilder import org.apache.hadoop.conf.Configuration import org.apache.spark.internal.Logging diff --git a/tools/src/main/scala/org/apache/spark/sql/rapids/tool/AppFilterImpl.scala b/tools/src/main/scala/org/apache/spark/sql/rapids/tool/AppFilterImpl.scala index 9b7373afedb..5ebae2a075b 100644 --- a/tools/src/main/scala/org/apache/spark/sql/rapids/tool/AppFilterImpl.scala +++ b/tools/src/main/scala/org/apache/spark/sql/rapids/tool/AppFilterImpl.scala @@ -22,8 +22,8 @@ import java.util.regex.PatternSyntaxException import scala.collection.JavaConverters._ +import com.nvidia.spark.rapids.ThreadFactoryBuilder import com.nvidia.spark.rapids.tool.EventLogInfo -import com.nvidia.spark.rapids.tool.ThreadFactoryBuilder import com.nvidia.spark.rapids.tool.qualification.QualificationArgs import org.apache.hadoop.conf.Configuration From 2b0034aa9ffa87d1f2c55d3dd9e47f1164581132 Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Thu, 14 Apr 2022 12:01:37 -0700 Subject: [PATCH 2/9] generate reduced pom Signed-off-by: Niranjan Artal --- tools/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/pom.xml b/tools/pom.xml index 183d20f7be8..553bdb6bbae 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -124,7 +124,7 @@ com.nvidia.spark.rapids.tool.profiling.ProfileMain - false + true From 44591cfa442a4503cea27c50aacc0a8e9511c83c Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Fri, 15 Apr 2022 16:58:55 -0700 Subject: [PATCH 3/9] include only required classes Signed-off-by: Niranjan Artal --- jenkins/deploy.sh | 2 +- tools/pom.xml | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/jenkins/deploy.sh b/jenkins/deploy.sh index 0101a5d56e8..29e4bbd0a45 100755 --- a/jenkins/deploy.sh +++ b/jenkins/deploy.sh @@ -97,7 +97,7 @@ TOOL_FPATH="deployjars/$TOOL_ART_ID-$TOOL_ART_VER" TOOL_DOC_JARS="-Dsources=${TOOL_FPATH}-sources.jar -Djavadoc=${TOOL_FPATH}-javadoc.jar" $DEPLOY_CMD -Durl=$SERVER_URL -DrepositoryId=$SERVER_ID \ $TOOL_DOC_JARS \ - -Dfile=$TOOL_FPATH.jar -DpomFile=${TOOL_PL}/pom.xml + -Dfile=$TOOL_FPATH.jar -DpomFile=${TOOL_PL}/dependency-reduced-pom.xml ###### Deploy Spark 2.x explain meta jar ###### SPARK2_PL=${SPARK2_PL:-"spark2-sql-plugin"} diff --git a/tools/pom.xml b/tools/pom.xml index 553bdb6bbae..0aecd00d1ea 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -109,6 +109,12 @@ + + com.nvidia:rapids-4-spark-common_${scala.binary.version} + + com/nvidia/spark/rapids/ThreadFactoryBuilder* + + > *:* From ad283062607ea2159550b493e82b73d18c0048e3 Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Mon, 18 Apr 2022 11:17:44 -0700 Subject: [PATCH 4/9] include ThreadyFactoryBuilder.class Signed-off-by: Niranjan Artal --- tools/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/pom.xml b/tools/pom.xml index 0aecd00d1ea..06ab0e6d32c 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -112,7 +112,7 @@ com.nvidia:rapids-4-spark-common_${scala.binary.version} - com/nvidia/spark/rapids/ThreadFactoryBuilder* + com/nvidia/spark/rapids/ThreadFactoryBuilder.class > From 60d7ee36c7287f8aaa874b332d3a2a739646bfc5 Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Mon, 18 Apr 2022 16:59:14 -0700 Subject: [PATCH 5/9] addressed review comments Signed-off-by: Niranjan Artal --- tools/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/pom.xml b/tools/pom.xml index 06ab0e6d32c..0aecd00d1ea 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -112,7 +112,7 @@ com.nvidia:rapids-4-spark-common_${scala.binary.version} - com/nvidia/spark/rapids/ThreadFactoryBuilder.class + com/nvidia/spark/rapids/ThreadFactoryBuilder* > From ec1a54a43e4a15ab446beeebe57622a603b16fd6 Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Tue, 19 Apr 2022 10:03:42 -0700 Subject: [PATCH 6/9] add minimizeJar to include classes Signed-off-by: Niranjan Artal --- tools/pom.xml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tools/pom.xml b/tools/pom.xml index 0aecd00d1ea..92401534ea4 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -102,6 +102,7 @@ shade + true org.rogach:scallop_${scala.binary.version} @@ -109,12 +110,6 @@ - - com.nvidia:rapids-4-spark-common_${scala.binary.version} - - com/nvidia/spark/rapids/ThreadFactoryBuilder* - - > *:* From fa313f1c7f751dc3d031f28e208d3f43c155f4e7 Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Tue, 19 Apr 2022 18:25:12 -0700 Subject: [PATCH 7/9] update spark-nightly-build.sh to to deploy dependency reduced pom Signed-off-by: Niranjan Artal --- jenkins/spark-nightly-build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jenkins/spark-nightly-build.sh b/jenkins/spark-nightly-build.sh index 3c2d98f5f57..22c3e288631 100755 --- a/jenkins/spark-nightly-build.sh +++ b/jenkins/spark-nightly-build.sh @@ -119,7 +119,8 @@ if [[ $SKIP_DEPLOY != 'true' ]]; then mvn -B deploy -pl '!dist' \ -Dbuildver=$SPARK_BASE_SHIM_VERSION \ $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR \ - -Dcuda.version=$CUDA_CLASSIFIER + -Dcuda.version=$CUDA_CLASSIFIER \ + -DpomFile=${TOOL_PL}/dependency-reduced-pom.xml fi # Parse Spark files from local mvn repo From f8c570d01514e33fd4b6b4bae7ac8d9129b7b0ae Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Tue, 19 Apr 2022 21:50:58 -0700 Subject: [PATCH 8/9] update nightly script Signed-off-by: Niranjan Artal --- jenkins/spark-nightly-build.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/jenkins/spark-nightly-build.sh b/jenkins/spark-nightly-build.sh index 22c3e288631..d01f1971636 100755 --- a/jenkins/spark-nightly-build.sh +++ b/jenkins/spark-nightly-build.sh @@ -115,6 +115,7 @@ distWithReducedPom "install" if [[ $SKIP_DEPLOY != 'true' ]]; then distWithReducedPom "deploy" +TOOL_PL=${TOOL_PL:-"tools"} # this deploy includes 'tools' that is unconditionally built with Spark 3.1.1 mvn -B deploy -pl '!dist' \ -Dbuildver=$SPARK_BASE_SHIM_VERSION \ From 990897cc4573a3258719a8840937579d7bb222e9 Mon Sep 17 00:00:00 2001 From: Niranjan Artal Date: Wed, 20 Apr 2022 12:01:14 -0700 Subject: [PATCH 9/9] addressed review comments Signed-off-by: Niranjan Artal --- jenkins/spark-nightly-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jenkins/spark-nightly-build.sh b/jenkins/spark-nightly-build.sh index 4a940a8e6c4..9ef54157103 100755 --- a/jenkins/spark-nightly-build.sh +++ b/jenkins/spark-nightly-build.sh @@ -22,6 +22,7 @@ set -ex ## export 'M2DIR' so that shims can get the correct Spark dependency info export M2DIR="$WORKSPACE/.m2" +TOOL_PL=${TOOL_PL:-"tools"} DIST_PL="dist" function mvnEval { mvn help:evaluate -q -pl $DIST_PL $MVN_URM_MIRROR -Prelease311 -Dmaven.repo.local=$M2DIR -Dcuda.version=$CUDA_CLASSIFIER -DforceStdout -Dexpression=$1 @@ -116,7 +117,6 @@ if [[ $SKIP_DEPLOY != 'true' ]]; then DIST_FPATH="$DIST_FPATH-$CUDA_CLASSIFIER" distWithReducedPom "deploy" -TOOL_PL=${TOOL_PL:-"tools"} # this deploy includes 'tools' that is unconditionally built with Spark 3.1.1 mvn -B deploy -pl '!dist' \ -Dbuildver=$SPARK_BASE_SHIM_VERSION \