Enable build and test with JDK11 (#5528)

This PR closes #4649, #3682. - add a doc for JDK9+ property with potential cross-compilation to lower Java major versions - `allowConventionalDistJar` for conventional jar creation when only one shim is included - automatically detect conventional jar and suppress warnings about classloader issues - remove manual configuration of javac args for scala-maven-plugin. This plugin processes `maven .compiler.*` and adds the right set of javac opts automatically. 3.4.x we use for db doesn't handle maven.compiler.release, however, we don't need it at the moment and it's currently not used. - disambiguate calls to buf.position() generating errors on JDK11 Note this PR only builds skipping running tests `-DskipTests` because we still need to address #3851 (comment) Signed-off-by: Gera Shegalov <gera@apache.org>
NVIDIA · May 20, 2022 · c200c31 · c200c31
1 parent 3b57770
commit c200c31
Show file tree

Hide file tree

Showing 6 changed files with 109 additions and 38 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -101,6 +101,30 @@ specifying the environment variable `BUILD_PARALLEL=<n>`.
 You can build against different versions of the CUDA Toolkit by using qone of the following profiles:
 * `-Pcuda11` (CUDA 11.0/11.1/11.2, default)
 
+### Building and Testing with JDK9+
+We support JDK8 as our main JDK version. However, it's possible to build and run with more modern
+JDK versions as well. To this end set `JAVA_HOME` in the environment to your JDK root directory.
+
+With JDK9+, you need to disable the default classloader manipulation option and set
+spark.rapids.force.caller.classloader=false in your Spark application configuration. There are, however,
+known issues with it, e.g. see #5513.
+
+At the time of this writing, the most robust way to run the RAPIDS Accelerator is from a jar dedicated to
+a single Spark version. To this end please use a single shim and specify `-DallowConventionalDistJar=true`
+
+Also make sure to use scala-maven-plugin version `scala.plugin.version` 4.6.0 or later to correctly process
+[maven.compiler.release](https://github.com/davidB/scala-maven-plugin/blob/4.6.1/src/main/java/scala_maven/ScalaMojoSupport.java#L161)
+flag if cross-compilation is required.
+
+```bash
+mvn clean verify -Dbuildver=321 \
+  -Dmaven.compiler.release=11 \
+  -Dmaven.compiler.source=11 \
+  -Dmaven.compiler.target=11 \
+  -Dscala.plugin.version=4.6.1 \
+  -DallowConventionalDistJar=true
+```
+
 ## Code contributions
 
 ### Source code layout
@@ -160,8 +184,8 @@ Spark version, open [Maven tool window](https://www.jetbrains.com/help/idea/2021
 select one of the `release3xx` profiles (e.g, `release320`) for Apache Spark 3.2.0, and click "Reload"
 if not triggered automatically.
 
-There is a known issue where, even after selecting a different Maven profile in the Maven submenu, the source folders from 
-a previously selected profile may remain active. To get around this you have to manually reload the Maven project from 
+There is a known issue where, even after selecting a different Maven profile in the Maven submenu, the source folders from
+a previously selected profile may remain active. To get around this you have to manually reload the Maven project from
 the Maven side menu.
 
 If you see Scala symbols unresolved (highlighted red) in IDEA please try the following steps to resolve it:

diff --git a/dist/maven-antrun/build-parallel-worlds.xml b/dist/maven-antrun/build-parallel-worlds.xml
@@ -47,7 +47,7 @@
     -->
     <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
 
-    <target name="build-parallel-worlds">
+    <target name="init-properties">
         <property environment="env"/>
         <echo level="info">Preparing parallel worlds with params:
             included_buildvers=${included_buildvers}
@@ -63,6 +63,18 @@
         <property name="aggregatorPrefix" value="${project.build.directory}/deps/${aggregatorArtifact}-${project.version}"/>
         <truncate file="${shimServiceFile}" create="true" mkdirs="true"/>
         <property name="aggregatorBuildDir" value="${project.basedir}/../aggregator/target"/>
+
+        <condition property="should.build.conventional.jar">
+            <and>
+                <not><contains string="${included_buildvers}" substring=","/></not>
+                <istrue value="${allowConventionalDistJar}"/>
+            </and>
+        </condition>
+        <echo level="info">Determined should.build.conventional.jar: ${should.build.conventional.jar}</echo>
+    </target>
+
+    <target name="copy-dependencies"
+            depends="init-properties">
         <ac:for list="${included_buildvers}" param="bv" trim="true">
             <sequential>
                 <!-- precedence order tolerating mvn clean
@@ -104,6 +116,24 @@
                         </exec>
                     </ac:else>
                 </ac:if>
+            </sequential>
+        </ac:for>
+    </target>
+    <target name="build-conventional-jar"
+            depends="copy-dependencies"
+            if="should.build.conventional.jar">
+            <echo level="info">Single Shim build and allowConventionalDistJar is set to true:
+                promoting aggregator jar as the final conventional jar.
+            </echo>
+            <unzip src="${aggregatorPrefix}-spark${included_buildvers}.jar"
+                dest="${project.build.directory}/parallel-world"
+            />
+    </target>
+    <target name="build-parallel-worlds"
+            depends="build-conventional-jar"
+            unless="should.build.conventional.jar">
+        <ac:for list="${included_buildvers}" param="bv" trim="true">
+            <sequential>
                 <unzip overwrite="false" src="${aggregatorPrefix}-spark@{bv}.jar"
                     dest="${project.build.directory}/parallel-world">
                     <patternset id="shared-world-includes">
@@ -112,7 +142,8 @@
                     </patternset>
                 </unzip>
                 <unzip src="${aggregatorPrefix}-spark@{bv}.jar"
-                    dest="${project.build.directory}/parallel-world/spark@{bv}">
+                    dest="${project.build.directory}/parallel-world/spark@{bv}"
+                >
                 </unzip>
             </sequential>
         </ac:for>

diff --git a/pom.xml b/pom.xml
@@ -808,6 +808,7 @@
     </profiles>
 
     <properties>
+        <allowConventionalDistJar>false</allowConventionalDistJar>
         <buildver>311</buildver>
         <maven.compiler.source>1.8</maven.compiler.source>
         <maven.compiler.target>1.8</maven.compiler.target>
@@ -818,7 +819,9 @@
         <cuda.version>cuda11</cuda.version>
         <spark-rapids-jni.version>22.06.0-SNAPSHOT</spark-rapids-jni.version>
         <scala.binary.version>2.12</scala.binary.version>
+        <scala.recompileMode>incremental</scala.recompileMode>
         <scala.version>2.12.15</scala.version>
+        <scala.javac.args>-Xlint:all,-serial,-path,-try</scala.javac.args>
         <!--
              If the shade package changes we need to also update jenkins/spark-premerge-build.sh
              so code coverage does not include the shaded classes.
@@ -1059,7 +1062,7 @@
                         <scalaVersion>${scala.version}</scalaVersion>
                         <checkMultipleScalaVersions>true</checkMultipleScalaVersions>
                         <failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
-                        <recompileMode>incremental</recompileMode>
+                        <recompileMode>${scala.recompileMode}</recompileMode>
                         <args>
                             <arg>-unchecked</arg>
                             <arg>-deprecation</arg>
@@ -1074,13 +1077,7 @@
                             <jvmArg>-Xms1024m</jvmArg>
                             <jvmArg>-Xmx1024m</jvmArg>
                         </jvmArgs>
-                        <javacArgs>
-                            <javacArg>-source</javacArg>
-                            <javacArg>${maven.compiler.source}</javacArg>
-                            <javacArg>-target</javacArg>
-                            <javacArg>${maven.compiler.target}</javacArg>
-                            <javacArg>-Xlint:all,-serial,-path,-try</javacArg>
-                        </javacArgs>
+                        <addJavacArgs>${scala.javac.args}</addJavacArgs>
                     </configuration>
                 </plugin>
                 <plugin>

diff --git a/spark2-sql-plugin/pom.xml b/spark2-sql-plugin/pom.xml
@@ -147,7 +147,7 @@
                     <scalaVersion>${scala.version}</scalaVersion>
                     <checkMultipleScalaVersions>true</checkMultipleScalaVersions>
                     <failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
-                    <recompileMode>incremental</recompileMode>
+                    <recompileMode>${scala.recompileMode}</recompileMode>
                     <args>
                         <arg>-unchecked</arg>
                         <arg>-deprecation</arg>
@@ -161,13 +161,7 @@
                         <jvmArg>-Xms1024m</jvmArg>
                         <jvmArg>-Xmx1024m</jvmArg>
                     </jvmArgs>
-                    <javacArgs>
-                        <javacArg>-source</javacArg>
-                        <javacArg>${maven.compiler.source}</javacArg>
-                        <javacArg>-target</javacArg>
-                        <javacArg>${maven.compiler.target}</javacArg>
-                        <javacArg>-Xlint:all,-serial,-path,-try</javacArg>
-                    </javacArgs>
+                    <addJavacArgs>${scala.javac.args}</addJavacArgs>
                 </configuration>
             </plugin>
             <plugin>

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala
@@ -392,17 +392,17 @@ class HMBSeekableInputStream(
   }
 
   private def readHeapBuffer(buf: ByteBuffer) = {
-    val bytesRead = read(buf.array, buf.arrayOffset + buf.position, buf.remaining)
+    val bytesRead = read(buf.array, buf.arrayOffset + buf.position(), buf.remaining)
     if (bytesRead < 0) {
       bytesRead
     } else {
-      buf.position(buf.position + bytesRead)
+      buf.position(buf.position() + bytesRead)
       bytesRead
     }
   }
 
   private def readFullyHeapBuffer(buf: ByteBuffer): Unit = {
-    readFully(buf.array, buf.arrayOffset + buf.position, buf.remaining)
+    readFully(buf.array, buf.arrayOffset + buf.position(), buf.remaining)
     buf.position(buf.limit)
   }
 

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala
@@ -21,6 +21,7 @@ import java.net.URL
 import org.apache.commons.lang3.reflect.MethodUtils
 import scala.annotation.tailrec
 import scala.collection.JavaConverters._
+import scala.util.{Failure, Success, Try}
 
 import org.apache.spark.{SPARK_BRANCH, SPARK_BUILD_DATE, SPARK_BUILD_USER, SPARK_REPO_URL, SPARK_REVISION, SPARK_VERSION, SparkConf, SparkEnv}
 import org.apache.spark.api.plugin.{DriverPlugin, ExecutorPlugin}
@@ -79,6 +80,7 @@ object ShimLoader extends Logging {
   @volatile private var sparkShims: SparkShims = _
   @volatile private var shimURL: URL = _
   @volatile private var pluginClassLoader: ClassLoader = _
+  @volatile private var conventionalSingleShimJarDetected: Boolean = _
 
   // REPL-only logic
   @volatile private var tmpClassLoader: MutableURLClassLoader = _
@@ -138,6 +140,7 @@ object ShimLoader extends Logging {
       .flatMap {
         case env if !env.conf.get("spark.rapids.force.caller.classloader",
           true.toString).toBoolean => Option(env.serializer)
+        case _ if (conventionalSingleShimJarDetected) => None
         case _ =>
           logInfo("Forcing shim caller classloader update (default behavior). " +
             "If it causes issues with userClassPathFirst, set " +
@@ -162,7 +165,9 @@ object ShimLoader extends Logging {
         findURLClassLoader(serdeClassLoader)
       }.orElse {
         val shimLoaderCallerCl = getClass.getClassLoader
-        logInfo("Falling back on ShimLoader caller's classloader " + shimLoaderCallerCl)
+        if (!conventionalSingleShimJarDetected) {
+          logInfo("Falling back on ShimLoader caller's classloader " + shimLoaderCallerCl)
+        }
         Option(shimLoaderCallerCl)
       }
   }
@@ -210,21 +215,32 @@ object ShimLoader extends Logging {
     // TODO propose a proper addClassPathURL API to Spark similar to addJar but
     //  accepting non-file-based URI
     serializerClassloader().foreach { urlAddable =>
-      logInfo(s"Updating spark classloader $urlAddable with the URLs: " +
-        urlsForSparkClassLoader.mkString(", "))
       urlsForSparkClassLoader.foreach { url =>
-        MethodUtils.invokeMethod(urlAddable, true, "addURL", url)
-      }
-      logInfo(s"Spark classLoader $urlAddable updated successfully")
-      urlAddable match {
-        case urlCl: java.net.URLClassLoader =>
-          if (!urlCl.getURLs.contains(shimCommonURL)) {
-            // infeasible, defensive diagnostics
-            logWarning(s"Didn't find expected URL $shimCommonURL in the spark " +
-              s"classloader $urlCl although addURL succeeded, maybe pushed up to the " +
-              s"parent classloader ${urlCl.getParent}")
+        if (!conventionalSingleShimJarDetected) {
+          logInfo(s"Updating spark classloader $urlAddable with the URLs: " +
+            urlsForSparkClassLoader.mkString(", "))
+          Try(MethodUtils.invokeMethod(urlAddable, true, "addURL", url))
+            .recoverWith {
+              case nsm: NoSuchMethodException =>
+                logWarning("JDK8+ detected, consider setting " +
+                  "spark.rapids.force.caller.classloader to false as a workaround")
+                logDebug(s"JDK8+ detected by catching ${nsm}", nsm)
+                Success(Unit)
+              case t => Failure(t)
+            }.get
+
+          logInfo(s"Spark classLoader $urlAddable updated successfully")
+          urlAddable match {
+            case urlCl: java.net.URLClassLoader =>
+              if (!urlCl.getURLs.contains(shimCommonURL)) {
+                // infeasible, defensive diagnostics
+                logWarning(s"Didn't find expected URL $shimCommonURL in the spark " +
+                  s"classloader $urlCl although addURL succeeded, maybe pushed up to the " +
+                  s"parent classloader ${urlCl.getParent}")
+              }
+            case _ => ()
           }
-        case _ => ()
+        }
       }
       pluginClassLoader = urlAddable
     }
@@ -285,18 +301,27 @@ object ShimLoader extends Logging {
         thisClassLoader.getResources(serviceProviderListPath)
           .asScala.map(scala.io.Source.fromURL)
           .flatMap(_.getLines())
+          .toSeq
       }
 
     assert(serviceProviderList.nonEmpty, "Classpath should contain the resource for " +
         serviceProviderListPath)
 
+    val numShimServiceProviders = serviceProviderList.size
     val shimServiceProviderOpt = serviceProviderList.flatMap { shimServiceProviderStr =>
       val mask = shimIdFromPackageName(shimServiceProviderStr)
       try {
         val shimURL = new java.net.URL(s"${shimRootURL.toString}$mask/")
         val shimClassLoader = new MutableURLClassLoader(Array(shimURL, shimCommonURL),
           thisClassLoader)
-        val shimClass = shimClassLoader.loadClass(shimServiceProviderStr)
+        val shimClass = Try[java.lang.Class[_]] {
+          val ret = thisClassLoader.loadClass(shimServiceProviderStr)
+          if (numShimServiceProviders == 1) {
+            conventionalSingleShimJarDetected = true
+            logInfo("Conventional shim jar layout for a single Spark verision detected")
+          }
+          ret
+        }.getOrElse(shimClassLoader.loadClass(shimServiceProviderStr))
         Option(
           (instantiateClass(shimClass).asInstanceOf[SparkShimServiceProvider], shimURL)
         )