diff --git a/jenkins/databricks/build.sh b/jenkins/databricks/build.sh index e79c70e5de3..4a84ac00b2a 100755 --- a/jenkins/databricks/build.sh +++ b/jenkins/databricks/build.sh @@ -103,36 +103,5 @@ mvn -B install:install-file \ mvn -B -P${BUILD_PROFILES} clean package -DskipTests -# Copy so we pick up new built jar and latest CuDF jar. Note that the jar names have to be -# exactly what is in the statically setup Databricks cluster we use. -echo "Copying rapids jars: dist/target/$RAPIDS_BUILT_JAR udf-examples/target/$RAPIDS_UDF_JAR $DB_JAR_LOC" -sudo cp dist/target/$RAPIDS_BUILT_JAR udf-examples/target/$RAPIDS_UDF_JAR $DB_JAR_LOC -echo "Copying cudf jars: $CUDF_JAR $DB_JAR_LOC" -sudo cp $CUDF_JAR $DB_JAR_LOC - -# tests -export PATH=/databricks/conda/envs/databricks-ml-gpu/bin:/databricks/conda/condabin:$PATH -sudo /databricks/conda/envs/databricks-ml-gpu/bin/pip install pytest sre_yield requests pandas \ - pyarrow findspark pytest-xdist -cd /home/ubuntu/spark-rapids/integration_tests -export SPARK_HOME=/databricks/spark -# change to not point at databricks confs so we don't conflict with their settings -export SPARK_CONF_DIR=$PWD -export PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/pyspark/:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip -sudo ln -s /databricks/jars/ $SPARK_HOME/jars || true -sudo chmod 777 /databricks/data/logs/ -sudo chmod 777 /databricks/data/logs/* -echo { \"port\":\"15002\" } > ~/.databricks-connect -if [ `ls $DB_JAR_LOC/rapids* | wc -l` -gt 2 ]; then - echo "ERROR: Too many rapids jars in $DB_JAR_LOC" - ls $DB_JAR_LOC/rapids* - exit 1 -fi -if [ `ls $DB_JAR_LOC/cudf* | wc -l` -gt 1 ]; then - echo "ERROR: Too many cudf jars in $DB_JAR_LOC" - ls $DB_JAR_LOC/cudf* - exit 1 -fi -bash run_pyspark_from_build.sh --runtime_env="databricks" cd /home/ubuntu tar -zcvf spark-rapids-built.tgz spark-rapids diff --git a/jenkins/databricks/clusterutils.py b/jenkins/databricks/clusterutils.py index af085ee0f58..fdb937014ef 100644 --- a/jenkins/databricks/clusterutils.py +++ b/jenkins/databricks/clusterutils.py @@ -23,7 +23,7 @@ class ClusterUtils(object): @staticmethod def generate_create_templ(sshKey, cluster_name, runtime, idle_timeout, - num_workers, driver_node_type, worker_node_type, + num_workers, driver_node_type, worker_node_type, cloud_provider, printLoc=sys.stdout): timeStr = str(int(time.time())) uniq_name = cluster_name + "-" + timeStr @@ -31,13 +31,14 @@ def generate_create_templ(sshKey, cluster_name, runtime, idle_timeout, templ['cluster_name'] = uniq_name print("cluster name is going to be %s" % uniq_name, file=printLoc) templ['spark_version'] = runtime - templ['aws_attributes'] = { - "zone_id": "us-west-2a", - "first_on_demand": 1, - "availability": "SPOT_WITH_FALLBACK", - "spot_bid_price_percent": 100, - "ebs_volume_count": 0 - } + if (cloud_provider == 'aws'): + templ['aws_attributes'] = { + "zone_id": "us-west-2a", + "first_on_demand": 1, + "availability": "SPOT_WITH_FALLBACK", + "spot_bid_price_percent": 100, + "ebs_volume_count": 0 + } templ['autotermination_minutes'] = idle_timeout templ['enable_elastic_disk'] = 'false' templ['enable_local_disk_encryption'] = 'false' diff --git a/jenkins/databricks/create.py b/jenkins/databricks/create.py index 10b1d99c2b4..ef632bf1722 100644 --- a/jenkins/databricks/create.py +++ b/jenkins/databricks/create.py @@ -33,20 +33,21 @@ def main(): num_workers = 1 worker_type = 'g4dn.xlarge' driver_type = 'g4dn.xlarge' + cloud_provider = 'aws' try: - opts, args = getopt.getopt(sys.argv[1:], 'hw:t:k:n:i:r:o:d:e:', + opts, args = getopt.getopt(sys.argv[1:], 'hw:t:k:n:i:r:o:d:e:s:', ['workspace=', 'token=', 'sshkey=', 'clustername=', 'idletime=', - 'runtime=', 'workertype=', 'drivertype=', 'numworkers=']) + 'runtime=', 'workertype=', 'drivertype=', 'numworkers=', 'cloudprovider=']) except getopt.GetoptError: print( - 'create.py -w -t -k -n -i -r -o -d -e ') + 'create.py -w -t -k -n -i -r -o -d -e -s ') sys.exit(2) for opt, arg in opts: if opt == '-h': print( - 'create.py -w -t -k -n -i -r -o -d -e ') + 'create.py -w -t -k -n -i -r -o -d -e -s ') sys.exit() elif opt in ('-w', '--workspace'): workspace = arg @@ -66,6 +67,8 @@ def main(): driver_type = arg elif opt in ('-e', '--numworkers'): num_workers = arg + elif opt in ('-s', '--cloudprovider'): + cloud_provider = arg print('-w is ' + workspace, file=sys.stderr) print('-k is ' + sshkey, file=sys.stderr) @@ -75,6 +78,7 @@ def main(): print('-o is ' + worker_type, file=sys.stderr) print('-d is ' + driver_type, file=sys.stderr) print('-e is ' + str(num_workers), file=sys.stderr) + print('-s is ' + cloud_provider, file=sys.stderr) if not sshkey: print("You must specify an sshkey!", file=sys.stderr) @@ -85,7 +89,7 @@ def main(): sys.exit(2) templ = ClusterUtils.generate_create_templ(sshkey, cluster_name, runtime, idletime, - num_workers, driver_type, worker_type, printLoc=sys.stderr) + num_workers, driver_type, worker_type, cloud_provider, printLoc=sys.stderr) clusterid = ClusterUtils.create_cluster(workspace, templ, token, printLoc=sys.stderr) ClusterUtils.wait_for_cluster_start(workspace, clusterid, token, printLoc=sys.stderr) diff --git a/jenkins/databricks/params.py b/jenkins/databricks/params.py new file mode 100644 index 00000000000..e96acded2b4 --- /dev/null +++ b/jenkins/databricks/params.py @@ -0,0 +1,71 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import getopt + +workspace = 'https://dbc-9ff9942e-a9c4.cloud.databricks.com' +token = '' +private_key_file = "~/.ssh/id_rsa" +local_script = 'build.sh' +script_dest = '/home/ubuntu/build.sh' +source_tgz = 'spark-rapids-ci.tgz' +tgz_dest = '/home/ubuntu/spark-rapids-ci.tgz' +base_spark_pom_version = '3.0.0' +clusterid = '' +build_profiles = 'databricks,!snapshot-shims' +jar_path = '' + +try: + opts, args = getopt.getopt(sys.argv[1:], 'hw:t:c:p:l:d:z:m:v:b:j:', + ['workspace=', 'token=', 'clusterid=', 'private=', 'localscript=', 'dest=', 'sparktgz=', 'basesparkpomversion=', 'buildprofiles=', 'jarpath']) +except getopt.GetoptError: + print( + 'run-tests.py -s -t -c -p -l -d -z -v -b -j ') + sys.exit(2) + +for opt, arg in opts: + if opt == '-h': + print( + 'run-tests.py -s -t -c -p -n -l -d , -z -v -b ') + sys.exit() + elif opt in ('-w', '--workspace'): + workspace = arg + elif opt in ('-t', '--token'): + token = arg + elif opt in ('-c', '--clusterid'): + clusterid = arg + elif opt in ('-p', '--private'): + private_key_file = arg + elif opt in ('-l', '--localscript'): + local_script = arg + elif opt in ('-d', '--dest'): + script_dest = arg + elif opt in ('-z', '--sparktgz'): + source_tgz = arg + elif opt in ('-v', '--basesparkpomversion'): + base_spark_pom_version = arg + elif opt in ('-b', '--bulidprofiles'): + build_profiles = arg + elif opt in ('-j', '--jarpath'): + jar_path = arg + +print('-w is ' + workspace) +print('-c is ' + clusterid) +print('-p is ' + private_key_file) +print('-l is ' + local_script) +print('-d is ' + script_dest) +print('-z is ' + source_tgz) +print('-v is ' + base_spark_pom_version) +print('-j is ' + jar_path) diff --git a/jenkins/databricks/run-build.py b/jenkins/databricks/run-build.py new file mode 100644 index 00000000000..3793f3b41d5 --- /dev/null +++ b/jenkins/databricks/run-build.py @@ -0,0 +1,51 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import requests +import sys +import getopt +import time +import os +import subprocess +from clusterutils import ClusterUtils +import params + +def main(): + master_addr = ClusterUtils.cluster_get_master_addr(params.workspace, params.clusterid, params.token) + if master_addr is None: + print("Error, didn't get master address") + sys.exit(1) + print("Master node address is: %s" % master_addr) + + print("Copying script") + rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" %s ubuntu@%s:%s" % (params.private_key_file, params.local_script, master_addr, params.script_dest) + print("rsync command: %s" % rsync_command) + subprocess.check_call(rsync_command, shell = True) + + print("Copying source") + rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" %s ubuntu@%s:%s" % (params.private_key_file, params.source_tgz, master_addr, params.tgz_dest) + print("rsync command: %s" % rsync_command) + subprocess.check_call(rsync_command, shell = True) + + ssh_command = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@%s -p 2200 -i %s %s %s %s %s 2>&1 | tee buildout; if [ `echo ${PIPESTATUS[0]}` -ne 0 ]; then false; else true; fi" % (master_addr, params.private_key_file, params.script_dest, params.tgz_dest, params.base_spark_pom_version, params.build_profiles) + print("ssh command: %s" % ssh_command) + subprocess.check_call(ssh_command, shell = True) + + print("Copying built tarball back") + rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" ubuntu@%s:/home/ubuntu/spark-rapids-built.tgz ./" % (params.private_key_file, master_addr) + print("rsync command to get built tarball: %s" % rsync_command) + subprocess.check_call(rsync_command, shell = True) + +if __name__ == '__main__': + main() diff --git a/jenkins/databricks/run-tests.py b/jenkins/databricks/run-tests.py index 0337a2ab311..3e33d7215ab 100644 --- a/jenkins/databricks/run-tests.py +++ b/jenkins/databricks/run-tests.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,84 +19,25 @@ import os import subprocess from clusterutils import ClusterUtils +import params def main(): - workspace = 'https://dbc-9ff9942e-a9c4.cloud.databricks.com' - token = '' - private_key_file = "~/.ssh/id_rsa" - local_script = 'build.sh' - script_dest = '/home/ubuntu/build.sh' - source_tgz = 'spark-rapids-ci.tgz' - tgz_dest = '/home/ubuntu/spark-rapids-ci.tgz' - base_spark_pom_version = '3.0.0' - clusterid = '' - build_profiles = 'databricks,!snapshot-shims' - try: - opts, args = getopt.getopt(sys.argv[1:], 'hw:t:c:p:l:d:z:m:v:b:', - ['workspace=', 'token=', 'clusterid=', 'private=', 'localscript=', 'dest=', 'sparktgz=', 'basesparkpomversion=', 'buildprofiles=']) - except getopt.GetoptError: - print( - 'run-tests.py -s -t -c -p -l -d -z -v -b ') - sys.exit(2) - - for opt, arg in opts: - if opt == '-h': - print( - 'run-tests.py -s -t -c -p -n -l -d , -z -v -b ') - sys.exit() - elif opt in ('-w', '--workspace'): - workspace = arg - elif opt in ('-t', '--token'): - token = arg - elif opt in ('-c', '--clusterid'): - clusterid = arg - elif opt in ('-p', '--private'): - private_key_file = arg - elif opt in ('-l', '--localscript'): - local_script = arg - elif opt in ('-d', '--dest'): - script_dest = arg - elif opt in ('-z', '--sparktgz'): - source_tgz = arg - elif opt in ('-v', '--basesparkpomversion'): - base_spark_pom_version = arg - elif opt in ('-b', '--bulidprofiles'): - build_profiles = arg - - print('-w is ' + workspace) - print('-c is ' + clusterid) - print('-p is ' + private_key_file) - print('-l is ' + local_script) - print('-d is ' + script_dest) - print('-z is ' + source_tgz) - print('-v is ' + base_spark_pom_version) - print('-b is ' + build_profiles) - - master_addr = ClusterUtils.cluster_get_master_addr(workspace, clusterid, token) + master_addr = ClusterUtils.cluster_get_master_addr(params.workspace, params.clusterid, params.token) if master_addr is None: print("Error, didn't get master address") sys.exit(1) print("Master node address is: %s" % master_addr) - print("Copying script") - rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" %s ubuntu@%s:%s" % (private_key_file, local_script, master_addr, script_dest) - print("rsync command: %s" % rsync_command) - subprocess.check_call(rsync_command, shell = True) - print("Copying source") - rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" %s ubuntu@%s:%s" % (private_key_file, source_tgz, master_addr, tgz_dest) + print("Copying script") + rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" %s ubuntu@%s:%s" % (params.private_key_file, params.local_script, master_addr, params.script_dest) print("rsync command: %s" % rsync_command) subprocess.check_call(rsync_command, shell = True) - ssh_command = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@%s -p 2200 -i %s %s %s %s %s 2>&1 | tee buildout; if [ `echo ${PIPESTATUS[0]}` -ne 0 ]; then false; else true; fi" % (master_addr, private_key_file, script_dest, tgz_dest, base_spark_pom_version, build_profiles) + ssh_command = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@%s -p 2200 -i %s %s %s 2>&1 | tee testout; if [ `echo ${PIPESTATUS[0]}` -ne 0 ]; then false; else true; fi" % (master_addr, params.private_key_file, params.script_dest, params.jar_path) print("ssh command: %s" % ssh_command) subprocess.check_call(ssh_command, shell = True) - print("Copying built tarball back") - rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" ubuntu@%s:/home/ubuntu/spark-rapids-built.tgz ./" % (private_key_file, master_addr) - print("rsync command to get built tarball: %s" % rsync_command) - subprocess.check_call(rsync_command, shell = True) - if __name__ == '__main__': main() diff --git a/jenkins/databricks/test.sh b/jenkins/databricks/test.sh new file mode 100755 index 00000000000..baefe8b6da7 --- /dev/null +++ b/jenkins/databricks/test.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# +# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +LOCAL_JAR_PATH=$1 + +# tests +export PATH=/databricks/conda/envs/databricks-ml-gpu/bin:/databricks/conda/condabin:$PATH +sudo /databricks/conda/envs/databricks-ml-gpu/bin/pip install pytest sre_yield requests pandas \ + pyarrow findspark pytest-xdist + +export SPARK_HOME=/databricks/spark +# change to not point at databricks confs so we don't conflict with their settings +export SPARK_CONF_DIR=$PWD +export PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/pyspark/:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip +sudo ln -s /databricks/jars/ $SPARK_HOME/jars || true +sudo chmod 777 /databricks/data/logs/ +sudo chmod 777 /databricks/data/logs/* +echo { \"port\":\"15002\" } > ~/.databricks-connect + +if [ -d "$LOCAL_JAR_PATH" ]; then + ## Run tests with jars in the LOCAL_JAR_PATH dir downloading from the denpedency repo + LOCAL_JAR_PATH=$LOCAL_JAR_PATH bash $LOCAL_JAR_PATH/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" +else + ## Run tests with jars building from the spark-rapids source code + bash /home/ubuntu/spark-rapids/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" +fi