NVIDIA · tgravescs · Mar 21, 2022 · Mar 16, 2022 · Mar 18, 2022 · Mar 18, 2022
diff --git a/api_validation/README.md b/api_validation/README.md
@@ -2,7 +2,7 @@
 
 API validation script checks the compatibility of community Spark Execs and GPU Execs in the Rapids Plugin for Spark.  
 For example: HashAggregateExec with GpuHashAggregateExec.
-Script can be used to audit different versions of Spark(3.0.1 and 3.1.1)
+Script can be used to audit different versions of Spark.
 The script prints Execs where validation fails. 
 Validation fails when:
 1) The number of parameters differ between community Spark Execs and Gpu Execs.
@@ -17,11 +17,11 @@ It requires cudf, rapids-4-spark and spark jars.
 
 ```
 cd api_validation
-// To run validation script on all version of Spark(3.0.1 and 3.1.1)
+// To run validation script on all version of Spark
 sh auditAllVersions.sh
 
-// To run script on particular version we can use profile(spark301 and spark311)
-mvn scala:run -P spark301
+// To run script on particular version we can use profile
+mvn scala:run -P spark311
 ```
 
 # Output

diff --git a/api_validation/auditAllVersions.sh b/api_validation/auditAllVersions.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,5 +14,4 @@
 # limitations under the License.
 set -ex
 
-mvn scala:run -P spark301
 mvn scala:run -P spark311
@@ -41,19 +41,6 @@
                 </dependency>
             </dependencies>
         </profile>
-        <profile>
-            <id>spark301</id>
-            <properties>
-                <spark.version>${spark301.version}</spark.version>
-            </properties>
-            <dependencies>
-                <dependency>
-                    <groupId>org.apache.spark</groupId>
-                    <artifactId>spark-sql_${scala.binary.version}</artifactId>
-                    <version>${spark.version}</version>
-                </dependency>
-            </dependencies>
-        </profile>
         <profile>
             <id>spark311</id>
             <properties>

diff --git a/api_validation/src/main/scala/com/nvidia/spark/rapids/api/ApiValidation.scala b/api_validation/src/main/scala/com/nvidia/spark/rapids/api/ApiValidation.scala
@@ -70,7 +70,7 @@ object ApiValidation extends Logging {
     val gpuKeys = gpuExecs.keys
     var printNewline = false
 
-    val sparkToShimMap = Map("3.0.1" -> "spark301", "3.1.1" -> "spark311")
+    val sparkToShimMap = Map("3.1.1" -> "spark311")
     val sparkVersion = SparkShimImpl.getSparkShimVersion.toString
     val shimVersion = sparkToShimMap(sparkVersion)
 

diff --git a/build/buildall b/build/buildall
@@ -138,10 +138,6 @@ case $DIST_PROFILE in
 
   snapshots?(WithDatabricks))
     SPARK_SHIM_VERSIONS=(
-      301
-      302
-      303
-      304
       311
       311cdh
       312
@@ -156,9 +152,6 @@ case $DIST_PROFILE in
 
   noSnapshots?(WithDatabricks))
     SPARK_SHIM_VERSIONS=(
-      301
-      302
-      303
       311
       311cdh
       312
@@ -170,7 +163,6 @@ case $DIST_PROFILE in
 
   minimumFeatureVersionMix)
     SPARK_SHIM_VERSIONS=(
-      302
       311cdh
       312
       320
@@ -251,8 +243,8 @@ export -f build_single_shim
 # Install all the versions for DIST_PROFILE
 
 # First build the aggregator module for all SPARK_SHIM_VERSIONS in parallel skipping expensive plugins that
-# - either deferred to 301 because the check is identical in all shim profiles such as scalastyle
-# - or deferred to 301 because we currently don't require it per shim such as javadoc generation
+# - either deferred to 311 because the check is identical in all shim profiles such as scalastyle
+# - or deferred to 311 because we currently don't require it per shim such as javadoc generation
 # - or there is a dedicated step to run against a particular shim jar such as unit tests, in
 #   the near future we will run unit tests against a combined multi-shim jar to catch classloading
 #   regressions even before pytest-based integration_tests
@@ -265,7 +257,7 @@ time (
     bash -c 'build_single_shim "$@"' _ %
   # This used to resume from dist. However, without including aggregator in the build
   # the build does not properly initialize spark.version property via buildver profiles
-  # in the root pom, and we get a missing spark301 dependency even for --profile=312,321
+  # in the root pom, and we get a missing spark311 dependency even for --profile=312,321
   # where the build does not require it. Moving it to aggregator resolves this issue with
   # a negligible increase of the build time by ~2 seconds.
   joinShimBuildFrom="aggregator"

diff --git a/build/coverage-report b/build/coverage-report
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 #
-# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -23,7 +23,7 @@ TMP_CLASS=${TEMP_CLASS_LOC:-"./target/jacoco_classes/"}
 HTML_LOC=${HTML_LOCATION:="./target/jacoco-report/"}
 XML_LOC=${XML_LOCATION:="${HTML_LOC}"}
 DIST_JAR=${RAPIDS_DIST_JAR:-$(ls ./dist/target/rapids-4-spark_2.12-*.jar | grep -v test | xargs readlink -f)}
-SPK_VER=${JACOCO_SPARK_VER:-"301"}
+SPK_VER=${JACOCO_SPARK_VER:-"311"}
 UDF_JAR=${RAPIDS_UDF_JAR:-$(ls ./udf-compiler/target/spark${SPK_VER}/rapids-4-spark-udf_2.12-*-SNAPSHOT-spark${SPK_VER}.jar | grep -v test | xargs readlink -f)}
 SOURCE_DIRS=${SOURCE_DIRS:-"./sql-plugin/src/main/scala/:./sql-plugin/src/main/java/:./shuffle-plugin/src/main/scala/:./udf-compiler/src/main/scala/"}
 

diff --git a/dist/README.md b/dist/README.md
@@ -17,22 +17,22 @@ Files are: `com.nvidia.spark.rapids.SparkShimServiceProvider.sparkNonSnapshot`,
 
 The new uber jar is structured like:
 
-1. Base common classes are user visible classes. For these we use Spark 3.0.1 versions because they are assumed to be 
+1. Base common classes are user visible classes. For these we use Spark 3.1.1 versions because they are assumed to be 
 bitwise-identical to the other shims, this assumption is subject to the future automatic validation.
 2. META-INF/services. This is a file that has to list all the shim versions supported by this jar. 
 The files talked about above for each profile are put into place here for uber jars. Although we currently do not use 
 [ServiceLoader API](https://docs.oracle.com/javase/8/docs/api/java/util/ServiceLoader.html) we use the same service 
 provider discovery mechanism
-3. META-INF base files are from 3.0.1  - maven, LICENSE, NOTICE, etc
+3. META-INF base files are from 3.1.1  - maven, LICENSE, NOTICE, etc
 4. Spark specific directory (aka Parallel World in the jargon of 
 [ParallelWorldClassloader](https://github.com/openjdk/jdk/blob/jdk8-b120/jaxws/src/share/jaxws_classes/com/sun/istack/internal/tools/ParallelWorldClassLoader.java)) 
-for each version of Spark supported in the jar, i.e., spark301/, spark302/, spark311/, etc.
+for each version of Spark supported in the jar, i.e., spark311/, spark312/, spark320/, etc.
 
 If you have to change the contents of the uber jar the following files control what goes into the base jar as classes that are not shaded.
 
-1. `unshimmed-common-from-spark301.txt` - this has classes and files that should go into the base jar with their normal
+1. `unshimmed-common-from-spark311.txt` - this has classes and files that should go into the base jar with their normal
 package name (not shaded). This includes user visible classes (i.e., com/nvidia/spark/SQLPlugin), python files,
-and other files that aren't version specific. Uses Spark 3.0.1 built jar for these base classes as explained above.
+and other files that aren't version specific. Uses Spark 3.1.1 built jar for these base classes as explained above.
 2. `unshimmed-from-each-spark3xx.txt` - This is applied to all the individual Spark specific version jars to pull
 any files that need to go into the base of the jar and not into the Spark specific directory.
 3. `unshimmed-spark311.txt` - This is applied to all the Spark 3.1.1 specific version jars to pull any files that need to go 

diff --git a/dist/maven-antrun/build-parallel-worlds.xml b/dist/maven-antrun/build-parallel-worlds.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <!--
-  Copyright (c) 2021, NVIDIA CORPORATION.
+  Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
@@ -107,7 +107,7 @@
                 <unzip overwrite="false" src="${aggregatorPrefix}-spark@{bv}.jar"
                     dest="${project.build.directory}/parallel-world">
                     <patternset id="shared-world-includes">
-                        <includesfile name="${project.basedir}/unshimmed-common-from-spark301.txt"/>
+                        <includesfile name="${project.basedir}/unshimmed-common-from-spark311.txt"/>
                         <includesfile name="${project.basedir}/unshimmed-from-each-spark3xx.txt"/>
                         <includesfile name="${project.basedir}/unshimmed-spark311.txt"/>
                     </patternset>

@@ -52,9 +52,6 @@
     <properties>
         <target.classifier/>
         <noSnapshot.buildvers>
-            301,
-            302,
-            303,
             311,
             311cdh,
             312,
@@ -63,13 +60,11 @@
             321
         </noSnapshot.buildvers>
         <snapshot.buildvers>
-            304,
             314,
             322,
             330
         </snapshot.buildvers>
         <databricks.buildvers>
-            301db,
             312db
         </databricks.buildvers>
     </properties>
@@ -111,7 +106,6 @@
             <id>minimumFeatureVersionMix</id>
             <properties>
                 <included_buildvers>
-                    302,
                     312,
                     320,
                     311cdh
@@ -324,7 +318,7 @@
                             <target>
                                 <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
                                 <ac:if xmlns:ac="antlib:net.sf.antcontrib">
-                                    <equals arg1="spark301" arg2="${spark.version.classifier}"/>
+                                    <equals arg1="spark311" arg2="${spark.version.classifier}"/>
                                     <ac:then>
                                         <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true">
                                             <arg value="${project.basedir}/../docs/configs.md"/>

diff --git a/dist/unshimmed-common-from-spark301.txt → dist/unshimmed-common-from-spark311.txt b/dist/unshimmed-common-from-spark301.txt → dist/unshimmed-common-from-spark311.txt
diff --git a/docs/FAQ.md b/docs/FAQ.md
@@ -10,7 +10,7 @@ nav_order: 12
 
 ### What versions of Apache Spark does the RAPIDS Accelerator for Apache Spark support?
 
-The RAPIDS Accelerator for Apache Spark requires version 3.0.1, 3.0.2, 3.0.3, 3.1.1, 3.1.2 or 3.2.0 of
+The RAPIDS Accelerator for Apache Spark requires version 3.1.1, 3.1.2, 3.1.3, 3.2.0, or 3.2.1 of
 Apache Spark. Because the plugin replaces parts of the physical plan that Apache Spark considers to
 be internal the code for those plans can change even between bug fix releases. As a part of our
 process, we try to stay on top of these changes and release updates as quickly as possible.
@@ -265,9 +265,7 @@ Queries on Databricks will not fail but it can not benefit from DPP.
 
 ### Is Adaptive Query Execution (AQE) Supported?
 
-In the 0.2 release, AQE is supported but all exchanges will default to the CPU.  As of the 0.3 
-release, running on Spark 3.0.1 and higher any operation that is supported on GPU will now stay on 
-the GPU when AQE is enabled. 
+Any operation that is supported on GPU will stay on the GPU when AQE is enabled. 
 
 AQE is not supported on Databricks with the plugin. 
 If AQE is enabled on Databricks, queries may fail with `StackOverflowError` error.

diff --git a/docs/additional-functionality/rapids-shuffle.md b/docs/additional-functionality/rapids-shuffle.md
@@ -287,16 +287,12 @@ In this section, we are using a docker container built using the sample dockerfi
 
    | Spark Shim      | spark.shuffle.manager value                              |
    | --------------- | -------------------------------------------------------- |
-   | 3.0.1           | com.nvidia.spark.rapids.spark301.RapidsShuffleManager    |
-   | 3.0.2           | com.nvidia.spark.rapids.spark302.RapidsShuffleManager    |
-   | 3.0.3           | com.nvidia.spark.rapids.spark303.RapidsShuffleManager    |
    | 3.1.1           | com.nvidia.spark.rapids.spark311.RapidsShuffleManager    |
    | 3.1.1 CDH       | com.nvidia.spark.rapids.spark311cdh.RapidsShuffleManager |
    | 3.1.2           | com.nvidia.spark.rapids.spark312.RapidsShuffleManager    |
    | 3.1.3           | com.nvidia.spark.rapids.spark313.RapidsShuffleManager    |
    | 3.2.0           | com.nvidia.spark.rapids.spark320.RapidsShuffleManager    |
    | 3.2.1           | com.nvidia.spark.rapids.spark321.RapidsShuffleManager    |
-   | Databricks 7.3  | com.nvidia.spark.rapids.spark301db.RapidsShuffleManager  |
    | Databricks 9.1  | com.nvidia.spark.rapids.spark312db.RapidsShuffleManager  |
    | Databricks 10.4 | com.nvidia.spark.rapids.spark321db.RapidsShuffleManager  |
 
@@ -369,15 +365,6 @@ spark.executorEnv.UCX_ERROR_SIGNALS ""
 spark.shuffle.manager com.nvidia.spark.rapids.spark312db.RapidsShuffleManager
 ```
 
-Databricks 7.3:
-
-```
-spark.shuffle.service.enabled false
-spark.executorEnv.UCX_MEMTYPE_CACHE n
-spark.executorEnv.UCX_ERROR_SIGNALS ""
-spark.shuffle.manager com.nvidia.spark.rapids.spark301db.RapidsShuffleManager
-```
-
 Example of configuration panel with the new settings:
 
 ![Configurations with UCX](../img/Databricks/sparkconfig_ucx.png)

diff --git a/docs/configs.md b/docs/configs.md
@@ -189,7 +189,7 @@ Name | SQL Function(s) | Description | Default Value | Notes
 <a name="sql.expression.DayOfYear"></a>spark.rapids.sql.expression.DayOfYear|`dayofyear`|Returns the day of the year from a date or timestamp|true|None|
 <a name="sql.expression.DenseRank"></a>spark.rapids.sql.expression.DenseRank|`dense_rank`|Window function that returns the dense rank value within the aggregation window|true|None|
 <a name="sql.expression.Divide"></a>spark.rapids.sql.expression.Divide|`/`|Division|true|None|
-<a name="sql.expression.ElementAt"></a>spark.rapids.sql.expression.ElementAt|`element_at`|Returns element of array at given(1-based) index in value if column is array. Returns value for the given key in value if column is map|true|None|
+<a name="sql.expression.ElementAt"></a>spark.rapids.sql.expression.ElementAt|`element_at`|Returns element of array at given(1-based) index in value if column is array. Returns value for the given key in value if column is map.|true|None|
 <a name="sql.expression.EndsWith"></a>spark.rapids.sql.expression.EndsWith| |Ends with|true|None|
 <a name="sql.expression.EqualNullSafe"></a>spark.rapids.sql.expression.EqualNullSafe|`<=>`|Check if the values are equal including nulls <=>|true|None|
 <a name="sql.expression.EqualTo"></a>spark.rapids.sql.expression.EqualTo|`=`, `==`|Check if the values are equal|true|None|
@@ -303,7 +303,6 @@ Name | SQL Function(s) | Description | Default Value | Notes
 <a name="sql.expression.Tan"></a>spark.rapids.sql.expression.Tan|`tan`|Tangent|true|None|
 <a name="sql.expression.Tanh"></a>spark.rapids.sql.expression.Tanh|`tanh`|Hyperbolic tangent|true|None|
 <a name="sql.expression.TimeAdd"></a>spark.rapids.sql.expression.TimeAdd| |Adds interval to timestamp|true|None|
-<a name="sql.expression.TimeSub"></a>spark.rapids.sql.expression.TimeSub| |Subtracts interval from timestamp|true|None|
 <a name="sql.expression.ToDegrees"></a>spark.rapids.sql.expression.ToDegrees|`degrees`|Converts radians to degrees|true|None|
 <a name="sql.expression.ToRadians"></a>spark.rapids.sql.expression.ToRadians|`radians`|Converts degrees to radians|true|None|
 <a name="sql.expression.ToUnixTimestamp"></a>spark.rapids.sql.expression.ToUnixTimestamp|`to_unix_timestamp`|Returns the UNIX timestamp of the given time|true|None|
@@ -364,6 +363,7 @@ Name | Description | Default Value | Notes
 <a name="sql.exec.HashAggregateExec"></a>spark.rapids.sql.exec.HashAggregateExec|The backend for hash based aggregations|true|None|
 <a name="sql.exec.ObjectHashAggregateExec"></a>spark.rapids.sql.exec.ObjectHashAggregateExec|The backend for hash based aggregations supporting TypedImperativeAggregate functions|true|None|
 <a name="sql.exec.SortAggregateExec"></a>spark.rapids.sql.exec.SortAggregateExec|The backend for sort based aggregations|true|None|
+<a name="sql.exec.InMemoryTableScanExec"></a>spark.rapids.sql.exec.InMemoryTableScanExec|Implementation of InMemoryTableScanExec to use GPU accelerated Caching|true|None|
 <a name="sql.exec.DataWritingCommandExec"></a>spark.rapids.sql.exec.DataWritingCommandExec|Writing data|true|None|
 <a name="sql.exec.BatchScanExec"></a>spark.rapids.sql.exec.BatchScanExec|The backend for most file input|true|None|
 <a name="sql.exec.BroadcastExchangeExec"></a>spark.rapids.sql.exec.BroadcastExchangeExec|The backend for broadcast exchange of data|true|None|

diff --git a/docs/get-started/getting-started-databricks.md b/docs/get-started/getting-started-databricks.md
@@ -11,9 +11,9 @@ At the end of this guide, the reader will be able to run a sample Apache Spark a
 on NVIDIA GPUs on Databricks.
 
 ## Prerequisites
-    * Apache Spark 3.x running in Databricks Runtime 7.3 ML or 9.1 ML with GPU
-    * AWS: 7.3 LTS ML (GPU, Scala 2.12, Spark 3.0.1) or 9.1 LTS ML (GPU, Scala 2.12, Spark 3.1.2)
-    * Azure: 7.3 LTS ML (GPU, Scala 2.12, Spark 3.0.1) or 9.1 LTS ML (GPU, Scala 2.12, Spark 3.1.2)
+    * Apache Spark 3.x running in Databricks Runtime 9.1 ML or 10.4 ML with GPU
+    * AWS: 9.1 LTS ML (GPU, Scala 2.12, Spark 3.1.2) or 10.4 LTS ML (GPU, Scala 2.12, Spark 3.2.1)
+    * Azure: 9.1 LTS ML (GPU, Scala 2.12, Spark 3.1.2) or 10.4 LTS ML (GPU, Scala 2.12, Spark 3.2.1)
 
 Databricks may do [maintenance
 releases](https://docs.databricks.com/release-notes/runtime/maintenance-updates.html) for their