From c6cb915249ed104c9fd518581d4b7e576fcfb998 Mon Sep 17 00:00:00 2001
From: Sameer Raheja <sraheja@nvidia.com>
Date: Thu, 18 Feb 2021 22:24:01 -0800
Subject: [PATCH 1/3] Documentation updates for 0.4 release

Signed-off-by: Sameer Raheja <sraheja@nvidia.com>
---
 docs/additional-functionality/rapids-udfs.md  |  2 +-
 docs/benchmarks.md                            |  2 +-
 docs/configs.md                               |  2 +-
 .../Databricks/generate-init-script.ipynb     |  2 +-
 docs/dev/testing.md                           |  4 +-
 docs/download.md                              | 46 +++++++++++++++++++
 docs/get-started/Dockerfile.cuda              |  4 +-
 docs/get-started/getting-started-gcp.md       |  6 +--
 docs/get-started/getting-started-on-prem.md   | 10 ++--
 .../com/nvidia/spark/rapids/RapidsConf.scala  |  2 +-
 10 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/docs/additional-functionality/rapids-udfs.md b/docs/additional-functionality/rapids-udfs.md
index f4d5d3f1a1b..4eaf88e902e 100644
--- a/docs/additional-functionality/rapids-udfs.md
+++ b/docs/additional-functionality/rapids-udfs.md
@@ -12,7 +12,7 @@ RAPIDS Accelerator to perform the user-defined operation on the GPU.
 
 Note that there are other potential solutions to performing user-defined
 operations on the GPU. See the
-[Frequently Asked Questions entry](FAQ.md#how-can-i-run-custom-expressionsudfs-on-the-gpu)
+[Frequently Asked Questions entry](../FAQ.md#how-can-i-run-custom-expressionsudfs-on-the-gpu)
 on UDFs for more details.
 
 ## UDF Obstacles To Query Acceleration
diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index 2d4dad9c96b..6e52e722f24 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -38,7 +38,7 @@ benchmark.
 
 The integration test jar needs to be added to the `--jars` configuration option when launching the
 Spark shell. This jar can be found in the `integration_tests/target` directory after running
-`mvn package`, with a filename matching `rapids-4-spark-integration-tests_2.12-*-SNAPSHOT.jar`.
+`mvn package`, with a filename matching `rapids-4-spark-integration-tests_2.12-0.4.0.jar`.
 
 To run benchmarks on the GPU, the RAPIDS Accelerator for Apache Spark must also be installed,
 following the instructions provided in the [Getting Started](get-started/getting-started.md) guide.
diff --git a/docs/configs.md b/docs/configs.md
index af91974e42d..5b208acb03c 100644
--- a/docs/configs.md
+++ b/docs/configs.md
@@ -10,7 +10,7 @@ The following is the list of options that `rapids-plugin-4-spark` supports.
 On startup use: `--conf [conf key]=[conf value]`. For example:
 
 ```
-${SPARK_HOME}/bin/spark --jars 'rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar,cudf-0.18-SNAPSHOT-cuda10-1.jar' \
+${SPARK_HOME}/bin/spark --jars 'rapids-4-spark_2.12-0.4.0.jar,cudf-0.18-cuda10-1.jar' \
 --conf spark.plugins=com.nvidia.spark.SQLPlugin \
 --conf spark.rapids.sql.incompatibleOps.enabled=true
 ```
diff --git a/docs/demo/Databricks/generate-init-script.ipynb b/docs/demo/Databricks/generate-init-script.ipynb
index 829ea411b4a..ea79022702f 100644
--- a/docs/demo/Databricks/generate-init-script.ipynb
+++ b/docs/demo/Databricks/generate-init-script.ipynb
@@ -1 +1 @@
-{"cells":[{"cell_type":"code","source":["dbutils.fs.mkdirs(\"dbfs:/databricks/init_scripts/\")\n \ndbutils.fs.put(\"/databricks/init_scripts/init.sh\",\"\"\"\n#!/bin/bash\nsudo wget -O /databricks/jars/rapids-4-spark_2.12-0.3.0.jar https://oss.sonatype.org/content/repositories/staging/com/nvidia/rapids-4-spark_2.12/0.3.0/rapids-4-spark_2.12-0.3.0.jar\nsudo wget -O /databricks/jars/cudf-0.17-cuda10-1.jar https://oss.sonatype.org/content/repositories/staging/ai/rapids/cudf/0.17/cudf-0.17-cuda10-1.jar\"\"\", True)"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["%sh\ncd ../../dbfs/databricks/init_scripts\npwd\nls -ltr\ncat init.sh"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":[""],"metadata":{},"outputs":[],"execution_count":3}],"metadata":{"name":"generate-init-script","notebookId":2645746662301564},"nbformat":4,"nbformat_minor":0}
+{"cells":[{"cell_type":"code","source":["dbutils.fs.mkdirs(\"dbfs:/databricks/init_scripts/\")\n \ndbutils.fs.put(\"/databricks/init_scripts/init.sh\",\"\"\"\n#!/bin/bash\nsudo wget -O /databricks/jars/rapids-4-spark_2.12-0.4.0.jar https://oss.sonatype.org/content/repositories/staging/com/nvidia/rapids-4-spark_2.12/0.4.0/rapids-4-spark_2.12-0.4.0.jar\nsudo wget -O /databricks/jars/cudf-0.18-cuda10-1.jar https://oss.sonatype.org/content/repositories/staging/ai/rapids/cudf/0.18/cudf-0.18-cuda10-1.jar\"\"\", True)"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["%sh\ncd ../../dbfs/databricks/init_scripts\npwd\nls -ltr\ncat init.sh"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":[""],"metadata":{},"outputs":[],"execution_count":3}],"metadata":{"name":"generate-init-script","notebookId":2645746662301564},"nbformat":4,"nbformat_minor":0}
diff --git a/docs/dev/testing.md b/docs/dev/testing.md
index 4b8456a9d2b..061d342ee35 100644
--- a/docs/dev/testing.md
+++ b/docs/dev/testing.md
@@ -5,5 +5,5 @@ nav_order: 1
 parent: Developer Overview
 ---
 An overview of testing can be found within the repository at:
-* [Unit tests](https://github.com/NVIDIA/spark-rapids/tree/branch-0.3/tests)
-* [Integration testing](https://github.com/NVIDIA/spark-rapids/tree/branch-0.3/integration_tests)
+* [Unit tests](https://github.com/NVIDIA/spark-rapids/tree/branch-0.4/tests)
+* [Integration testing](https://github.com/NVIDIA/spark-rapids/tree/main/integration_tests)
diff --git a/docs/download.md b/docs/download.md
index 20e082b4316..7f215af7a43 100644
--- a/docs/download.md
+++ b/docs/download.md
@@ -4,6 +4,52 @@ title: Download
 nav_order: 3
 ---
 
+## Release v0.4.0
+
+New functionality for the release includes
+* Decimal support up to 64 bit, including reading and writing decimal from Parquet
+* Ability to call native CUDA or cudf functions from Scala, Java or Hive UDFs
+* Shuffle and sort support for `struct` data types
+* `array_contains` for list operations
+* `collect_list` and `average` for windowing operations
+* Murmur3 `hash` operation 
+* Reading a v2 datasource when the data being read is an `ArrowColumnVector`
+
+This release includes additional performance improvements, including
+* RAPIDS Shuffle with UCX performance improvements
+* Instructions on how to use [Alluxio caching](get-started/getting-started-alluxio.md) with Spark to
+  leverage caching.
+
+The release is supported on Apache Spark 3.0.0, 3.0.1, 3.1.1, Databricks 7.3 ML LTS and Google Cloud
+Platform Dataproc 2.0.
+
+The list of all supported operations is provided [here](supported_ops.md).
+
+For a detailed list of changes, please refer to the
+[CHANGELOG](https://github.com/NVIDIA/spark-rapids/blob/main/CHANGELOG.md). 
+
+Hardware Requirements: 
+
+	GPU Architecture: NVIDIA Pascal™ or better (Tested on V100, T4 and A100 GPU)
+	
+Software Requirements:
+
+	OS: Ubuntu 16.04, Ubuntu 18.04 or CentOS 7
+	
+	CUDA & Nvidia Drivers: 10.1.2 & v418.87+, 10.2 & v440.33+ or 11.0 & v450.36+
+	
+	Apache Spark 3.0, 3.0.1, 3.1.1, Databricks 7.3 ML LTS Runtime, or GCP Dataproc 2.0 
+	
+	Apache Hadoop 2.10+ or 3.1.1+ (3.1.1 for nvidia-docker version 2)
+	
+	Python 3.6+, Scala 2.12, Java 8 
+
+### Download v0.4.0
+* [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/0.4.0/rapids-4-spark_2.12-0.4.0.jar)
+* [cuDF 11.0 Package](https://repo1.maven.org/maven2/ai/rapids/cudf/0.18/cudf-0.18-cuda11.jar)
+* [cuDF 10.2 Package](https://repo1.maven.org/maven2/ai/rapids/cudf/0.18/cudf-0.18-cuda10-2.jar)
+* [cuDF 10.1 Package](https://repo1.maven.org/maven2/ai/rapids/cudf/0.18/cudf-0.18-cuda10-1.jar)
+
 ## Release v0.3.0
 This release includes additional performance improvements, including
 * Use of per thread default stream to make more efficient use of the GPU
diff --git a/docs/get-started/Dockerfile.cuda b/docs/get-started/Dockerfile.cuda
index d503b455456..f1c504baaf6 100644
--- a/docs/get-started/Dockerfile.cuda
+++ b/docs/get-started/Dockerfile.cuda
@@ -53,8 +53,8 @@ COPY spark-3.0.1-bin-hadoop3.2/examples /opt/spark/examples
 COPY spark-3.0.1-bin-hadoop3.2/kubernetes/tests /opt/spark/tests
 COPY spark-3.0.1-bin-hadoop3.2/data /opt/spark/data
 
-COPY cudf-0.18-SNAPSHOT-cuda10-1.jar /opt/sparkRapidsPlugin
-COPY rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar /opt/sparkRapidsPlugin
+COPY cudf-0.18-cuda10-1.jar /opt/sparkRapidsPlugin
+COPY rapids-4-spark_2.12-0.4.0.jar /opt/sparkRapidsPlugin
 COPY getGpusResources.sh /opt/sparkRapidsPlugin
 
 RUN mkdir /opt/spark/python
diff --git a/docs/get-started/getting-started-gcp.md b/docs/get-started/getting-started-gcp.md
index 2f07d9dfdec..13e652e3df6 100644
--- a/docs/get-started/getting-started-gcp.md
+++ b/docs/get-started/getting-started-gcp.md
@@ -33,7 +33,7 @@ gcloud services enable storage-api.googleapis.com
 
 After the command line environment is setup, log in to your GCP account.  You can now create a
 Dataproc cluster with the configuration shown below.  The configuration will allow users to run any
-of the [notebook demos](https://github.com/NVIDIA/spark-rapids/tree/branch-0.2/docs/demo/GCP) on
+of the [notebook demos](https://github.com/NVIDIA/spark-rapids/tree/main/docs/demo/GCP) on
 GCP.  Alternatively, users can also start 2*2T4 worker nodes.
 
 The script below will initialize with the following: 
@@ -60,7 +60,7 @@ The script below will initialize with the following:
 
 gcloud dataproc clusters create $CLUSTER_NAME  \
     --region $REGION \
-    --image-version=preview-ubuntu18 \
+    --image-version=2.0-ubuntu18 \
     --master-machine-type n1-standard-16 \
     --num-workers $NUM_WORKERS \
     --worker-accelerator type=nvidia-tesla-t4,count=$NUM_GPUS \
@@ -96,7 +96,7 @@ configuration the first stage should take ~110 seconds (1/3 of CPU execution tim
 and the second stage takes ~170 seconds (1/7 of CPU execution time with same config).  The notebook
 depends on the pre-compiled [Spark RAPIDS SQL
 plugin](https://mvnrepository.com/artifact/com.nvidia/rapids-4-spark) and
-[cuDF](https://mvnrepository.com/artifact/ai.rapids/cudf/0.15), which are pre-downloaded by the GCP
+[cuDF](https://mvnrepository.com/artifact/ai.rapids/cudf), which are pre-downloaded by the GCP
 Dataproc [RAPIDS init
 script](https://github.com/GoogleCloudDataproc/initialization-actions/tree/master/rapids).
 
diff --git a/docs/get-started/getting-started-on-prem.md b/docs/get-started/getting-started-on-prem.md
index 0cca91095be..ae6277d175e 100644
--- a/docs/get-started/getting-started-on-prem.md
+++ b/docs/get-started/getting-started-on-prem.md
@@ -55,16 +55,16 @@ CUDA and will not run on other versions. The jars use a maven classifier to keep
 - CUDA 11.0 => classifier cuda11
 
 For example, here is a sample version of the jars and cudf with CUDA 10.1 support:
-- cudf-0.18-SNAPSHOT-cuda10-1.jar
-- rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar
+- cudf-0.18-cuda10-1.jar
+- rapids-4-spark_2.12-0.4.0.jar
 
 
 For simplicity export the location to these jars. This example assumes the sample jars above have
 been placed in the `/opt/sparkRapidsPlugin` directory:
 ```shell 
 export SPARK_RAPIDS_DIR=/opt/sparkRapidsPlugin
-export SPARK_CUDF_JAR=${SPARK_RAPIDS_DIR}/cudf-0.18-SNAPSHOT-cuda10-1.jar
-export SPARK_RAPIDS_PLUGIN_JAR=${SPARK_RAPIDS_DIR}/rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar
+export SPARK_CUDF_JAR=${SPARK_RAPIDS_DIR}/cudf-0.18-cuda10-1.jar
+export SPARK_RAPIDS_PLUGIN_JAR=${SPARK_RAPIDS_DIR}/rapids-4-spark_2.12-0.4.0.jar
 ```
 
 ## Install the GPU Discovery Script
@@ -526,7 +526,7 @@ To enable _GPU Scheduling for Pandas UDF_, you need to configure your spark job
     On Standalone, you need to add
     ```shell
     ...
-    --conf spark.executorEnv.PYTHONPATH=rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar \
+    --conf spark.executorEnv.PYTHONPATH=rapids-4-spark_2.12-0.4.0.jar \
     --py-files ${SPARK_RAPIDS_PLUGIN_JAR}
     ```
 
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index b5d09383c41..4edd122316a 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -916,7 +916,7 @@ object RapidsConf {
         |On startup use: `--conf [conf key]=[conf value]`. For example:
         |
         |```
-        |${SPARK_HOME}/bin/spark --jars 'rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar,cudf-0.18-SNAPSHOT-cuda10-1.jar' \
+        |${SPARK_HOME}/bin/spark --jars 'rapids-4-spark_2.12-0.4.0.jar,cudf-0.18-cuda10-1.jar' \
         |--conf spark.plugins=com.nvidia.spark.SQLPlugin \
         |--conf spark.rapids.sql.incompatibleOps.enabled=true
         |```

From d6ba43ce66e7cf3850a6cca8354368ade86d3451 Mon Sep 17 00:00:00 2001
From: Sameer Raheja <sraheja@nvidia.com>
Date: Thu, 18 Feb 2021 22:32:57 -0800
Subject: [PATCH 2/3] Update link to point to 0.4 branch

Signed-off-by: Sameer Raheja <sraheja@nvidia.com>
---
 docs/dev/testing.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/dev/testing.md b/docs/dev/testing.md
index 061d342ee35..1e499616443 100644
--- a/docs/dev/testing.md
+++ b/docs/dev/testing.md
@@ -6,4 +6,4 @@ parent: Developer Overview
 ---
 An overview of testing can be found within the repository at:
 * [Unit tests](https://github.com/NVIDIA/spark-rapids/tree/branch-0.4/tests)
-* [Integration testing](https://github.com/NVIDIA/spark-rapids/tree/main/integration_tests)
+* [Integration testing](https://github.com/NVIDIA/spark-rapids/tree/branch-0.4/integration_tests)

From ae84f41a1b5580549b473435bd5ea1baaa3cc861 Mon Sep 17 00:00:00 2001
From: Sameer Raheja <sraheja@nvidia.com>
Date: Fri, 19 Feb 2021 13:06:11 -0800
Subject: [PATCH 3/3] Updated comment on decimal support, noted support for
 3.0.2

Signed-off-by: Sameer Raheja <sraheja@nvidia.com>
---
 docs/configs.md                                          | 2 +-
 docs/download.md                                         | 9 +++++----
 .../main/scala/com/nvidia/spark/rapids/RapidsConf.scala  | 5 ++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/configs.md b/docs/configs.md
index 5b208acb03c..7dca31841c9 100644
--- a/docs/configs.md
+++ b/docs/configs.md
@@ -57,7 +57,7 @@ Name | Description | Default Value
 <a name="sql.castStringToTimestamp.enabled"></a>spark.rapids.sql.castStringToTimestamp.enabled|When set to true, casting from string to timestamp is supported on the GPU. The GPU only supports a subset of formats when casting strings to timestamps. Refer to the CAST documentation for more details.|false
 <a name="sql.concurrentGpuTasks"></a>spark.rapids.sql.concurrentGpuTasks|Set the number of tasks that can execute concurrently per GPU. Tasks may temporarily block when the number of concurrent tasks in the executor exceeds this amount. Allowing too many concurrent tasks on the same GPU may lead to GPU out of memory errors.|1
 <a name="sql.csvTimestamps.enabled"></a>spark.rapids.sql.csvTimestamps.enabled|When set to true, enables the CSV parser to read timestamps. The default output format for Spark includes a timezone at the end. Anything except the UTC timezone is not supported. Timestamps after 2038 and before 1902 are also not supported.|false
-<a name="sql.decimalType.enabled"></a>spark.rapids.sql.decimalType.enabled|Enable decimal type support on the GPU.  Decimal support on the GPU is limited to less than 18 digits and is only supported by a small number of operations currently.  This can result in a lot of data movement to and from the GPU, which can slow down processing in some cases.|false
+<a name="sql.decimalType.enabled"></a>spark.rapids.sql.decimalType.enabled|Enable decimal type support on the GPU.  Decimal support on the GPU is limited to less than 18 digits.  This can result in a lot of data movement to and from the GPU, which can slow down processing in some cases.|false
 <a name="sql.enabled"></a>spark.rapids.sql.enabled|Enable (true) or disable (false) sql operations on the GPU|true
 <a name="sql.explain"></a>spark.rapids.sql.explain|Explain why some parts of a query were not placed on a GPU or not. Possible values are ALL: print everything, NONE: print nothing, NOT_ON_GPU: print only parts of a query that did not go on the GPU|NONE
 <a name="sql.format.csv.enabled"></a>spark.rapids.sql.format.csv.enabled|When set to false disables all csv input and output acceleration. (only input is currently supported anyways)|true
diff --git a/docs/download.md b/docs/download.md
index 7f215af7a43..cae33496dae 100644
--- a/docs/download.md
+++ b/docs/download.md
@@ -7,13 +7,14 @@ nav_order: 3
 ## Release v0.4.0
 
 New functionality for the release includes
-* Decimal support up to 64 bit, including reading and writing decimal from Parquet
-* Ability to call native CUDA or cudf functions from Scala, Java or Hive UDFs
+* Decimal support up to 64 bit, including reading and writing decimal from Parquet (can be enabled
+  by setting `spark.rapids.sql.decimalType.enabled` to True)
+* Ability for users to provide GPU versions of Scala, Java or Hive UDFs
 * Shuffle and sort support for `struct` data types
 * `array_contains` for list operations
 * `collect_list` and `average` for windowing operations
 * Murmur3 `hash` operation 
-* Reading a v2 datasource when the data being read is an `ArrowColumnVector`
+* Improved performance when reading from DataSource v2 when the source produces data in the Arrow format
 
 This release includes additional performance improvements, including
 * RAPIDS Shuffle with UCX performance improvements
@@ -38,7 +39,7 @@ Software Requirements:
 	
 	CUDA & Nvidia Drivers: 10.1.2 & v418.87+, 10.2 & v440.33+ or 11.0 & v450.36+
 	
-	Apache Spark 3.0, 3.0.1, 3.1.1, Databricks 7.3 ML LTS Runtime, or GCP Dataproc 2.0 
+	Apache Spark 3.0, 3.0.1, 3.0.2, 3.1.1, Databricks 7.3 ML LTS Runtime, or GCP Dataproc 2.0 
 	
 	Apache Hadoop 2.10+ or 3.1.1+ (3.1.1 for nvidia-docker version 2)
 	
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index 4edd122316a..c1870d8ddf6 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -505,9 +505,8 @@ object RapidsConf {
 
   val DECIMAL_TYPE_ENABLED = conf("spark.rapids.sql.decimalType.enabled")
       .doc("Enable decimal type support on the GPU.  Decimal support on the GPU is limited to " +
-          "less than 18 digits and is only supported by a small number of operations currently.  " +
-          "This can result in a lot of data movement to and from the GPU, which can slow down " +
-          "processing in some cases.")
+          "less than 18 digits.  This can result in a lot of data movement to and from the GPU, " +
+          "which can slow down processing in some cases.")
       .booleanConf
       .createWithDefault(false)