From 82fdb6ee9746ff3e5b1374bf41a87af1ec6bd0bd Mon Sep 17 00:00:00 2001
From: Tim Liu <timl@nvidia.com>
Date: Mon, 1 Mar 2021 00:58:03 +0800
Subject: [PATCH] Cleanup unused Jenkins files and scripts

Fix issue : https://github.com/NVIDIA/spark-rapids/issues/1568
Move Databricks scripts to GitLab so we can use the common scripts for the nightly build job and integration tests job
Remove unused Dockerfiles

Signed-off-by: Tim Liu <timl@nvidia.com>
---
 jenkins/Dockerfile.integration.ubuntu16 |  36 ------
 jenkins/Dockerfile.ubuntu16             |  39 ------
 jenkins/databricks/build.sh             | 133 ---------------------
 jenkins/databricks/clusterutils.py      | 151 ------------------------
 jenkins/databricks/create.py            |  96 ---------------
 jenkins/databricks/deploy.sh            |  36 ------
 jenkins/databricks/run-tests.py         | 102 ----------------
 jenkins/databricks/shutdown.py          |  64 ----------
 8 files changed, 657 deletions(-)
 delete mode 100644 jenkins/Dockerfile.integration.ubuntu16
 delete mode 100644 jenkins/Dockerfile.ubuntu16
 delete mode 100755 jenkins/databricks/build.sh
 delete mode 100644 jenkins/databricks/clusterutils.py
 delete mode 100644 jenkins/databricks/create.py
 delete mode 100755 jenkins/databricks/deploy.sh
 delete mode 100644 jenkins/databricks/run-tests.py
 delete mode 100644 jenkins/databricks/shutdown.py

diff --git a/jenkins/Dockerfile.integration.ubuntu16 b/jenkins/Dockerfile.integration.ubuntu16
deleted file mode 100644
index 4027aa8fa77a..000000000000
--- a/jenkins/Dockerfile.integration.ubuntu16
+++ /dev/null
@@ -1,36 +0,0 @@
-#
-# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-###
-#
-# Arguments: CUDA_VER=10.1 or 10.2
-#
-###
-ARG CUDA_VER=10.1
-
-FROM nvidia/cuda:${CUDA_VER}-runtime-ubuntu16.04
-
-#Install java-8, maven, docker image
-RUN apt-get update -y && \
-    apt-get install -y software-properties-common
-RUN add-apt-repository ppa:deadsnakes/ppa && \
-    apt-get update -y && \
-    apt-get install -y maven \
-    openjdk-8-jdk python3.8 python3.8-distutils python3-setuptools
-RUN python3.8 -m easy_install pip
-
-RUN ln -s /usr/bin/python3.8 /usr/bin/python
-RUN python -m pip install pytest sre_yield
diff --git a/jenkins/Dockerfile.ubuntu16 b/jenkins/Dockerfile.ubuntu16
deleted file mode 100644
index b6915a9fd6fa..000000000000
--- a/jenkins/Dockerfile.ubuntu16
+++ /dev/null
@@ -1,39 +0,0 @@
-#
-# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-###
-#
-# Build the image for rapids-plugin development environment
-#
-# Arguments: CUDA_VER=10.1 or 10.2
-#
-###
-
-ARG CUDA_VER=10.1
-
-FROM nvidia/cuda:${CUDA_VER}-runtime-ubuntu16.04
-
-#Install java-8, maven, docker image
-RUN apt-get update -y && \
-    apt-get install -y software-properties-common
-RUN add-apt-repository ppa:deadsnakes/ppa && \
-    apt-get update -y && \
-    apt-get install -y maven \
-    openjdk-8-jdk python3.8 python3.8-distutils python3-setuptools tzdata git
-RUN python3.8 -m easy_install pip
-
-RUN ln -s /usr/bin/python3.8 /usr/bin/python
-RUN python -m pip install pytest sre_yield requests pandas pyarrow
diff --git a/jenkins/databricks/build.sh b/jenkins/databricks/build.sh
deleted file mode 100755
index c6c9bdabdd10..000000000000
--- a/jenkins/databricks/build.sh
+++ /dev/null
@@ -1,133 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-set -e
-
-SPARKSRCTGZ=$1
-# version of Apache Spark we are building against
-BASE_SPARK_VERSION=$2
-BUILD_PROFILES=$3
-
-echo "tgz is $SPARKSRCTGZ"
-echo "Base Spark version is $BASE_SPARK_VERSION"
-echo "build profiles $BUILD_PROFILES"
-
-sudo apt install -y maven
-
-# this has to match the Databricks init script
-DB_JAR_LOC=/databricks/jars/
-
-rm -rf spark-rapids
-mkdir spark-rapids
-echo  "tar -zxvf $SPARKSRCTGZ -C spark-rapids"
-tar -zxvf $SPARKSRCTGZ -C spark-rapids
-cd spark-rapids
-export WORKSPACE=`pwd`
-
-SPARK_PLUGIN_JAR_VERSION=`mvn help:evaluate -q -pl dist -Dexpression=project.version -DforceStdout`
-CUDF_VERSION=`mvn help:evaluate -q -pl dist -Dexpression=cudf.version -DforceStdout`
-SCALA_VERSION=`mvn help:evaluate -q -pl dist -Dexpression=scala.binary.version -DforceStdout`
-CUDA_VERSION=`mvn help:evaluate -q -pl dist -Dexpression=cuda.version -DforceStdout`
-
-# the version of spark used when we install the databricks jars in .m2
-SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS=$BASE_SPARK_VERSION-databricks
-RAPIDS_BUILT_JAR=rapids-4-spark_$SCALA_VERSION-$SPARK_PLUGIN_JAR_VERSION.jar
-RAPIDS_UDF_JAR=rapids-4-spark-udf-examples_$SCALA_VERSION-$SPARK_PLUGIN_JAR_VERSION.jar
-
-echo "Scala version is: $SCALA_VERSION"
-mvn -B -P${BUILD_PROFILES} clean package -DskipTests || true
-# export 'M2DIR' so that shims can get the correct cudf/spark dependnecy info
-export M2DIR=/home/ubuntu/.m2/repository
-CUDF_JAR=${M2DIR}/ai/rapids/cudf/${CUDF_VERSION}/cudf-${CUDF_VERSION}-${CUDA_VERSION}.jar
-
-# pull normal Spark artifacts and ignore errors then install databricks jars, then build again
-JARDIR=/databricks/jars
-SQLJAR=----workspace_spark_3_0--sql--core--core-hive-2.3__hadoop-2.7_${SCALA_VERSION}_deploy.jar
-CATALYSTJAR=----workspace_spark_3_0--sql--catalyst--catalyst-hive-2.3__hadoop-2.7_${SCALA_VERSION}_deploy.jar
-ANNOTJAR=----workspace_spark_3_0--common--tags--tags-hive-2.3__hadoop-2.7_${SCALA_VERSION}_deploy.jar
-COREJAR=----workspace_spark_3_0--core--core-hive-2.3__hadoop-2.7_${SCALA_VERSION}_deploy.jar
-# install the 3.0.0 pom file so we get dependencies
-COREPOM=spark-core_${SCALA_VERSION}-${BASE_SPARK_VERSION}.pom
-COREPOMPATH=$M2DIR/org/apache/spark/spark-core_${SCALA_VERSION}/${BASE_SPARK_VERSION}
-mvn -B install:install-file \
-   -Dmaven.repo.local=$M2DIR \
-   -Dfile=$JARDIR/$COREJAR \
-   -DgroupId=org.apache.spark \
-   -DartifactId=spark-core_$SCALA_VERSION \
-   -Dversion=$SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS \
-   -Dpackaging=jar \
-   -DpomFile=$COREPOMPATH/$COREPOM
-
-mvn -B install:install-file \
-   -Dmaven.repo.local=$M2DIR \
-   -Dfile=$JARDIR/$CATALYSTJAR \
-   -DgroupId=org.apache.spark \
-   -DartifactId=spark-catalyst_$SCALA_VERSION \
-   -Dversion=$SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS \
-   -Dpackaging=jar
-
-mvn -B install:install-file \
-   -Dmaven.repo.local=$M2DIR \
-   -Dfile=$JARDIR/$SQLJAR \
-   -DgroupId=org.apache.spark \
-   -DartifactId=spark-sql_$SCALA_VERSION \
-   -Dversion=$SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS \
-   -Dpackaging=jar
-
-mvn -B install:install-file \
-   -Dmaven.repo.local=$M2DIR \
-   -Dfile=$JARDIR/$ANNOTJAR \
-   -DgroupId=org.apache.spark \
-   -DartifactId=spark-annotation_$SCALA_VERSION \
-   -Dversion=$SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS \
-   -Dpackaging=jar
-
-mvn -B -P${BUILD_PROFILES} clean package -DskipTests
-
-# Copy so we pick up new built jar and latest CuDF jar. Note that the jar names have to be
-# exactly what is in the statically setup Databricks cluster we use.
-echo "Copying rapids jars: dist/target/$RAPIDS_BUILT_JAR udf-examples/target/$RAPIDS_UDF_JAR $DB_JAR_LOC"
-sudo cp dist/target/$RAPIDS_BUILT_JAR udf-examples/target/$RAPIDS_UDF_JAR $DB_JAR_LOC
-echo "Copying cudf jars: $CUDF_JAR $DB_JAR_LOC"
-sudo cp $CUDF_JAR $DB_JAR_LOC
-
-# tests
-export PATH=/databricks/conda/envs/databricks-ml-gpu/bin:/databricks/conda/condabin:$PATH
-sudo /databricks/conda/envs/databricks-ml-gpu/bin/pip install pytest sre_yield requests pandas \
-	pyarrow findspark pytest-xdist
-cd /home/ubuntu/spark-rapids/integration_tests
-export SPARK_HOME=/databricks/spark
-# change to not point at databricks confs so we don't conflict with their settings
-export SPARK_CONF_DIR=$PWD
-export PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/pyspark/:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip
-sudo ln -s /databricks/jars/ $SPARK_HOME/jars || true
-sudo chmod 777 /databricks/data/logs/
-sudo chmod 777 /databricks/data/logs/*
-echo { \"port\":\"15002\" } > ~/.databricks-connect
-if [ `ls $DB_JAR_LOC/rapids* | wc -l` -gt 2 ]; then
-    echo "ERROR: Too many rapids jars in $DB_JAR_LOC"
-    ls $DB_JAR_LOC/rapids*
-    exit 1
-fi
-if [ `ls $DB_JAR_LOC/cudf* | wc -l` -gt 1 ]; then
-    echo "ERROR: Too many cudf jars in $DB_JAR_LOC"
-    ls $DB_JAR_LOC/cudf*
-    exit 1
-fi
-bash run_pyspark_from_build.sh --runtime_env="databricks"
-cd /home/ubuntu
-tar -zcvf spark-rapids-built.tgz spark-rapids
diff --git a/jenkins/databricks/clusterutils.py b/jenkins/databricks/clusterutils.py
deleted file mode 100644
index af085ee0f584..000000000000
--- a/jenkins/databricks/clusterutils.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import time
-import json
-import time
-import os
-import requests
-import sys
-
-class ClusterUtils(object):
-
-    @staticmethod
-    def generate_create_templ(sshKey, cluster_name, runtime, idle_timeout,
-            num_workers, driver_node_type, worker_node_type,
-            printLoc=sys.stdout):
-        timeStr = str(int(time.time()))
-        uniq_name = cluster_name + "-" + timeStr
-        templ = {}
-        templ['cluster_name'] = uniq_name
-        print("cluster name is going to be %s" % uniq_name, file=printLoc)
-        templ['spark_version'] = runtime
-        templ['aws_attributes'] = {
-                    "zone_id": "us-west-2a",
-                    "first_on_demand": 1,
-                    "availability": "SPOT_WITH_FALLBACK",
-                    "spot_bid_price_percent": 100,
-                    "ebs_volume_count": 0
-                }
-        templ['autotermination_minutes'] = idle_timeout
-        templ['enable_elastic_disk'] = 'false'
-        templ['enable_local_disk_encryption'] = 'false'
-        templ['node_type_id'] = worker_node_type
-        templ['driver_node_type_id'] = driver_node_type
-        templ['ssh_public_keys'] = [ sshKey ]
-        templ['num_workers'] = num_workers
-        return templ
-
-
-    @staticmethod
-    def create_cluster(workspace, jsonCreateTempl, token, printLoc=sys.stdout):
-        resp = requests.post(workspace + "/api/2.0/clusters/create", headers={'Authorization': 'Bearer %s' % token}, json=jsonCreateTempl)
-        print("create response is %s" % resp.text, file=printLoc)
-        clusterid = resp.json()['cluster_id']
-        print("cluster id is %s" % clusterid, file=printLoc)
-        return clusterid
-
-
-    @staticmethod
-    def wait_for_cluster_start(workspace, clusterid, token, retries=20, printLoc=sys.stdout):
-        p = 0
-        waiting = True
-        jsonout = None
-        while waiting:
-            time.sleep(30)
-            jsonout = ClusterUtils.cluster_state(workspace, clusterid, token, printLoc=printLoc)
-            current_state = jsonout['state']
-            print(clusterid + " state:" + current_state, file=printLoc)
-            if current_state in ['RUNNING']:
-                break
-            if current_state in ['INTERNAL_ERROR', 'SKIPPED', 'TERMINATED'] or p >= 60:
-                if p >= retries:
-                   print("Waited %d times already, stopping" % p)
-                sys.exit(4)
-            p = p + 1
-        print("Done starting cluster", file=printLoc)
-        return jsonout
-
-
-    @staticmethod
-    def is_cluster_running(jsonout):
-        current_state = jsonout['state']
-        if current_state in ['RUNNING', 'RESIZING']:
-            return True
-        else:
-            return False
-
-
-    @staticmethod
-    def terminate_cluster(workspace, clusterid, token, printLoc=sys.stdout):
-        jsonout = ClusterUtils.cluster_state(workspace, clusterid, token, printLoc=printLoc)
-        if not ClusterUtils.is_cluster_unning(jsonout):
-            print("Cluster is not running", file=printLoc)
-            sys.exit(1)
-
-        print("Stopping cluster: " + clusterid, file=printLoc)
-        resp = requests.post(workspace + "/api/2.0/clusters/delete", headers={'Authorization': 'Bearer %s' % token}, json={'cluster_id': clusterid})
-        print("stop response is %s" % resp.text, file=printLoc)
-        print("Done stopping cluster", file=printLoc)
-
-
-    @staticmethod
-    def delete_cluster(workspace, clusterid, token, printLoc=sys.stdout):
-        print("Deleting cluster: " + clusterid, file=printLoc)
-        resp = requests.post(workspace + "/api/2.0/clusters/permanent-delete", headers={'Authorization': 'Bearer %s' % token}, json={'cluster_id': clusterid})
-        print("delete response is %s" % resp.text, file=printLoc)
-        print("Done deleting cluster", file=printLoc)
-
-
-    @staticmethod
-    def start_existing_cluster(workspace, clusterid, token, printLoc=sys.stdout):
-        print("Starting cluster: " + clusterid, file=printLoc)
-        resp = requests.post(workspace + "/api/2.0/clusters/start", headers={'Authorization': 'Bearer %s' % token}, json={'cluster_id': clusterid})
-        print("start response is %s" % resp.text, file=printLoc)
-
-
-    @staticmethod
-    def cluster_state(workspace, clusterid, token, printLoc=sys.stdout):
-        clusterresp = requests.get(workspace + "/api/2.0/clusters/get?cluster_id=%s" % clusterid, headers={'Authorization': 'Bearer %s' % token})
-        clusterjson = clusterresp.text
-        print("cluster response is %s" % clusterjson, file=printLoc)
-        jsonout = json.loads(clusterjson)
-        return jsonout
-
-
-    @staticmethod
-    def get_master_addr_from_json(jsonout):
-        master_addr = None
-        if ClusterUtils.is_cluster_running(jsonout):
-            driver = jsonout['driver']
-            master_addr = driver["public_dns"]
-        return master_addr
-
-
-    @staticmethod
-    def cluster_list(workspace, token, printLoc=sys.stdout):
-        clusterresp = requests.get(workspace + "/api/2.0/clusters/list", headers={'Authorization': 'Bearer %s' % token})
-        clusterjson = clusterresp.text
-        print("cluster list is %s" % clusterjson, file=printLoc)
-        jsonout = json.loads(clusterjson)
-        return jsonout
-
-
-    @staticmethod
-    def cluster_get_master_addr(workspace, clusterid, token, printLoc=sys.stdout):
-        jsonout = ClusterUtils.cluster_state(workspace, clusterid, token, printLoc=printLoc)
-        addr = ClusterUtils.get_master_addr_from_json(jsonout)
-        print("master addr is %s" % addr, file=printLoc)
-        return addr
-
diff --git a/jenkins/databricks/create.py b/jenkins/databricks/create.py
deleted file mode 100644
index e50765deb176..000000000000
--- a/jenkins/databricks/create.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from clusterutils import ClusterUtils
-import getopt
-import sys
-
-# This scripts create and starts a Databricks cluster and waits for it to be running.
-#
-# The name parameter is meant to be a unique name used when creating the cluster. Note we
-# append the epoch time to the end of it to help prevent collisions.
-#
-# Returns cluster id to stdout, all other logs default to stderr
-#
-# User is responsible for removing cluster if a failure or when done with cluster.
-def main():
-  workspace = 'https://dbc-9ff9942e-a9c4.cloud.databricks.com'
-  token = ''
-  sshkey = ''
-  cluster_name = 'CI-GPU-databricks-0.4.0-SNAPSHOT'
-  idletime = 240
-  runtime = '7.0.x-gpu-ml-scala2.12'
-  num_workers = 1
-  worker_type = 'g4dn.xlarge'
-  driver_type = 'g4dn.xlarge'
-
-  try:
-      opts, args = getopt.getopt(sys.argv[1:], 'hw:t:k:n:i:r:o:d:e:',
-                                 ['workspace=', 'token=', 'sshkey=', 'clustername=', 'idletime=',
-                                     'runtime=', 'workertype=', 'drivertype=', 'numworkers='])
-  except getopt.GetoptError:
-      print(
-          'create.py -w <workspace> -t <token> -k <sshkey> -n <clustername> -i <idletime> -r <runtime> -o <workernodetype> -d <drivernodetype> -e <numworkers>')
-      sys.exit(2)
-
-  for opt, arg in opts:
-      if opt == '-h':
-          print(
-              'create.py -w <workspace> -t <token> -k <sshkey> -n <clustername> -i <idletime> -r <runtime> -o <workernodetype> -d <drivernodetype> -e <numworkers>')
-          sys.exit()
-      elif opt in ('-w', '--workspace'):
-          workspace = arg
-      elif opt in ('-t', '--token'):
-          token = arg
-      elif opt in ('-k', '--sshkey'):
-          sshkey = arg
-      elif opt in ('-n', '--clustername'):
-          cluster_name = arg
-      elif opt in ('-i', '--idletime'):
-          idletime = arg
-      elif opt in ('-r', '--runtime'):
-          runtime = arg
-      elif opt in ('-o', '--workertype'):
-          worker_type = arg
-      elif opt in ('-d', '--drivertype'):
-          driver_type = arg
-      elif opt in ('-e', '--numworkers'):
-          num_workers = arg
-
-  print('-w is ' + workspace, file=sys.stderr)
-  print('-k is ' + sshkey, file=sys.stderr)
-  print('-n is ' + cluster_name, file=sys.stderr)
-  print('-i is ' + str(idletime), file=sys.stderr)
-  print('-r is ' + runtime, file=sys.stderr)
-  print('-o is ' + worker_type, file=sys.stderr)
-  print('-d is ' + driver_type, file=sys.stderr)
-  print('-e is ' + str(num_workers), file=sys.stderr)
-
-  if not sshkey:
-      print("You must specify an sshkey!", file=sys.stderr)
-      sys.exit(2)
-
-  if not token:
-      print("You must specify an token!", file=sys.stderr)
-      sys.exit(2)
-
-  templ = ClusterUtils.generate_create_templ(sshkey, cluster_name, runtime, idletime,
-          num_workers, driver_type, worker_type, printLoc=sys.stderr)
-  clusterid = ClusterUtils.create_cluster(workspace, templ, token, printLoc=sys.stderr)
-  ClusterUtils.wait_for_cluster_start(workspace, clusterid, token, printLoc=sys.stderr)
-
-  # only print the clusterid to stdout so a calling script can get it easily
-  print(clusterid, file=sys.stdout)
-
-if __name__ == '__main__':
-  main()
diff --git a/jenkins/databricks/deploy.sh b/jenkins/databricks/deploy.sh
deleted file mode 100755
index 6c8a755ab769..000000000000
--- a/jenkins/databricks/deploy.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-set -e
-rm -rf deploy
-mkdir -p deploy
-cd deploy
-tar -zxvf ../spark-rapids-built.tgz
-cd spark-rapids
-echo "Maven mirror is $MVN_URM_MIRROR"
-SERVER_ID='snapshots'
-SERVER_URL="$URM_URL-local"
-SCALA_VERSION=`mvn help:evaluate -q -pl dist -Dexpression=scala.binary.version -DforceStdout`
-# remove the periods so change something like 3.0.0 to 300
-VERSION_NUM=${BASE_SPARK_VERSION//.}
-SPARK_VERSION_STR=spark$VERSION_NUM
-SPARK_PLUGIN_JAR_VERSION=`mvn help:evaluate -q -pl dist -Dexpression=project.version -DforceStdout`
-DB_SHIM_DIRECTORY=${SPARK_VERSION_STR}db
-DBJARFPATH=./shims/${DB_SHIM_DIRECTORY}/target/rapids-4-spark-shims-$SPARK_VERSION_STR-databricks_$SCALA_VERSION-$SPARK_PLUGIN_JAR_VERSION.jar
-echo "Databricks jar is: $DBJARFPATH"
-mvn -B deploy:deploy-file $MVN_URM_MIRROR '-P!snapshot-shims' -Durl=$SERVER_URL -DrepositoryId=$SERVER_ID \
-    -Dfile=$DBJARFPATH -DpomFile=shims/${DB_SHIM_DIRECTORY}/pom.xml
diff --git a/jenkins/databricks/run-tests.py b/jenkins/databricks/run-tests.py
deleted file mode 100644
index 0337a2ab3112..000000000000
--- a/jenkins/databricks/run-tests.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import json
-import requests
-import sys
-import getopt
-import time
-import os
-import subprocess
-from clusterutils import ClusterUtils
-
-
-def main():
-  workspace = 'https://dbc-9ff9942e-a9c4.cloud.databricks.com'
-  token = ''
-  private_key_file = "~/.ssh/id_rsa"
-  local_script = 'build.sh'
-  script_dest = '/home/ubuntu/build.sh'
-  source_tgz = 'spark-rapids-ci.tgz'
-  tgz_dest = '/home/ubuntu/spark-rapids-ci.tgz'
-  base_spark_pom_version = '3.0.0'
-  clusterid = ''
-  build_profiles = 'databricks,!snapshot-shims'
-
-  try:
-      opts, args = getopt.getopt(sys.argv[1:], 'hw:t:c:p:l:d:z:m:v:b:',
-                                 ['workspace=', 'token=', 'clusterid=', 'private=', 'localscript=', 'dest=', 'sparktgz=', 'basesparkpomversion=', 'buildprofiles='])
-  except getopt.GetoptError:
-      print(
-          'run-tests.py -s <workspace> -t <token> -c <clusterid> -p <privatekeyfile> -l <localscript> -d <scriptdestinatino> -z <sparktgz> -v <basesparkpomversion> -b <buildprofiles>')
-      sys.exit(2)
-
-  for opt, arg in opts:
-      if opt == '-h':
-          print(
-              'run-tests.py -s <workspace> -t <token> -c <clusterid> -p <privatekeyfile> -n <skipstartingcluster> -l <localscript> -d <scriptdestinatino>, -z <sparktgz> -v <basesparkpomversion> -b <buildprofiles>')
-          sys.exit()
-      elif opt in ('-w', '--workspace'):
-          workspace = arg
-      elif opt in ('-t', '--token'):
-          token = arg
-      elif opt in ('-c', '--clusterid'):
-          clusterid = arg
-      elif opt in ('-p', '--private'):
-          private_key_file = arg
-      elif opt in ('-l', '--localscript'):
-          local_script = arg
-      elif opt in ('-d', '--dest'):
-          script_dest = arg
-      elif opt in ('-z', '--sparktgz'):
-          source_tgz = arg
-      elif opt in ('-v', '--basesparkpomversion'):
-          base_spark_pom_version = arg
-      elif opt in ('-b', '--bulidprofiles'):
-          build_profiles = arg
-
-  print('-w is ' + workspace)
-  print('-c is ' + clusterid)
-  print('-p is ' + private_key_file)
-  print('-l is ' + local_script)
-  print('-d is ' + script_dest)
-  print('-z is ' + source_tgz)
-  print('-v is ' + base_spark_pom_version)
-  print('-b is ' + build_profiles)
-
-  master_addr = ClusterUtils.cluster_get_master_addr(workspace, clusterid, token)
-  if master_addr is None:
-      print("Error, didn't get master address")
-      sys.exit(1)
-  print("Master node address is: %s" % master_addr)
-  print("Copying script")
-  rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" %s ubuntu@%s:%s" % (private_key_file, local_script, master_addr, script_dest)
-  print("rsync command: %s" % rsync_command)
-  subprocess.check_call(rsync_command, shell = True)
-
-  print("Copying source")
-  rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" %s ubuntu@%s:%s" % (private_key_file, source_tgz, master_addr, tgz_dest)
-  print("rsync command: %s" % rsync_command)
-  subprocess.check_call(rsync_command, shell = True)
-
-  ssh_command = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@%s -p 2200 -i %s %s %s %s %s 2>&1 | tee buildout; if [ `echo ${PIPESTATUS[0]}` -ne 0 ]; then false; else true; fi" % (master_addr, private_key_file, script_dest, tgz_dest, base_spark_pom_version, build_profiles)
-  print("ssh command: %s" % ssh_command)
-  subprocess.check_call(ssh_command, shell = True)
-
-  print("Copying built tarball back")
-  rsync_command = "rsync  -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" ubuntu@%s:/home/ubuntu/spark-rapids-built.tgz ./" % (private_key_file, master_addr)
-  print("rsync command to get built tarball: %s" % rsync_command)
-  subprocess.check_call(rsync_command, shell = True)
-
-if __name__ == '__main__':
-  main()
diff --git a/jenkins/databricks/shutdown.py b/jenkins/databricks/shutdown.py
deleted file mode 100644
index 654594ae53a8..000000000000
--- a/jenkins/databricks/shutdown.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from clusterutils import ClusterUtils
-import getopt
-import sys
-
-# shutdown or delete a databricks cluster
-def main():
-  workspace = 'https://dbc-9ff9942e-a9c4.cloud.databricks.com'
-  token = ''
-  clusterid = '0617-140138-umiak14'
-  delete = False
-
-  try:
-      opts, args = getopt.getopt(sys.argv[1:], 'hs:t:c:d',
-                                 ['workspace=', 'token=', 'clusterid=', 'delete'])
-  except getopt.GetoptError:
-      print(
-          'shutdown.py -s <workspace> -t <token> -c <clusterid>')
-      sys.exit(2)
-
-  for opt, arg in opts:
-      if opt == '-h':
-          print(
-              'shutdown.py -s <workspace> -t <token> -c <clusterid>')
-          sys.exit()
-      elif opt in ('-s', '--workspace'):
-          workspace = arg
-      elif opt in ('-t', '--token'):
-          token = arg
-      elif opt in ('-c', '--clusterid'):
-          clusterid = arg
-      elif opt in ('-d', '--delete'):
-          delete = True
-
-  print('-s is ' + workspace)
-  print('-c is ' + clusterid)
-
-  if not clusterid:
-      print("You must specify clusterid!")
-      sys.exit(1)
-
-  if not token:
-      print("You must specify token!")
-      sys.exit(1)
-
-  if delete:
-      ClusterUtils.delete_cluster(workspace, clusterid, token)
-  else:
-      ClusterUtils.terminate_cluster(workspace, clusterid, token)
-
-if __name__ == '__main__':
-  main()