Skip to content

Commit

Permalink
Split DB scripts to make them common for the build and IT pipeline (N…
Browse files Browse the repository at this point in the history
…VIDIA#1933)

* Split DB scripts to make them common for the build and IT pipeline

Signed-off-by: Tim Liu <timl@nvidia.com>

* update getopt for jar_path

* Add 'cluster_type' parameter to make create.py common for aws and azure Databricks

Signed-off-by: Tim Liu <timl@nvidia.com>

* Change to use a more readable var name

change the var name from cluster_type to cloud_provider.

Signed-off-by: Tim Liu <timl@nvidia.com>
  • Loading branch information
NvTimLiu authored Mar 16, 2021
1 parent d54b86f commit 442b518
Show file tree
Hide file tree
Showing 7 changed files with 188 additions and 109 deletions.
31 changes: 0 additions & 31 deletions jenkins/databricks/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -103,36 +103,5 @@ mvn -B install:install-file \

mvn -B -P${BUILD_PROFILES} clean package -DskipTests

# Copy so we pick up new built jar and latest CuDF jar. Note that the jar names have to be
# exactly what is in the statically setup Databricks cluster we use.
echo "Copying rapids jars: dist/target/$RAPIDS_BUILT_JAR udf-examples/target/$RAPIDS_UDF_JAR $DB_JAR_LOC"
sudo cp dist/target/$RAPIDS_BUILT_JAR udf-examples/target/$RAPIDS_UDF_JAR $DB_JAR_LOC
echo "Copying cudf jars: $CUDF_JAR $DB_JAR_LOC"
sudo cp $CUDF_JAR $DB_JAR_LOC

# tests
export PATH=/databricks/conda/envs/databricks-ml-gpu/bin:/databricks/conda/condabin:$PATH
sudo /databricks/conda/envs/databricks-ml-gpu/bin/pip install pytest sre_yield requests pandas \
pyarrow findspark pytest-xdist
cd /home/ubuntu/spark-rapids/integration_tests
export SPARK_HOME=/databricks/spark
# change to not point at databricks confs so we don't conflict with their settings
export SPARK_CONF_DIR=$PWD
export PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/pyspark/:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip
sudo ln -s /databricks/jars/ $SPARK_HOME/jars || true
sudo chmod 777 /databricks/data/logs/
sudo chmod 777 /databricks/data/logs/*
echo { \"port\":\"15002\" } > ~/.databricks-connect
if [ `ls $DB_JAR_LOC/rapids* | wc -l` -gt 2 ]; then
echo "ERROR: Too many rapids jars in $DB_JAR_LOC"
ls $DB_JAR_LOC/rapids*
exit 1
fi
if [ `ls $DB_JAR_LOC/cudf* | wc -l` -gt 1 ]; then
echo "ERROR: Too many cudf jars in $DB_JAR_LOC"
ls $DB_JAR_LOC/cudf*
exit 1
fi
bash run_pyspark_from_build.sh --runtime_env="databricks"
cd /home/ubuntu
tar -zcvf spark-rapids-built.tgz spark-rapids
17 changes: 9 additions & 8 deletions jenkins/databricks/clusterutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,22 @@ class ClusterUtils(object):

@staticmethod
def generate_create_templ(sshKey, cluster_name, runtime, idle_timeout,
num_workers, driver_node_type, worker_node_type,
num_workers, driver_node_type, worker_node_type, cloud_provider,
printLoc=sys.stdout):
timeStr = str(int(time.time()))
uniq_name = cluster_name + "-" + timeStr
templ = {}
templ['cluster_name'] = uniq_name
print("cluster name is going to be %s" % uniq_name, file=printLoc)
templ['spark_version'] = runtime
templ['aws_attributes'] = {
"zone_id": "us-west-2a",
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"spot_bid_price_percent": 100,
"ebs_volume_count": 0
}
if (cloud_provider == 'aws'):
templ['aws_attributes'] = {
"zone_id": "us-west-2a",
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"spot_bid_price_percent": 100,
"ebs_volume_count": 0
}
templ['autotermination_minutes'] = idle_timeout
templ['enable_elastic_disk'] = 'false'
templ['enable_local_disk_encryption'] = 'false'
Expand Down
14 changes: 9 additions & 5 deletions jenkins/databricks/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,21 @@ def main():
num_workers = 1
worker_type = 'g4dn.xlarge'
driver_type = 'g4dn.xlarge'
cloud_provider = 'aws'

try:
opts, args = getopt.getopt(sys.argv[1:], 'hw:t:k:n:i:r:o:d:e:',
opts, args = getopt.getopt(sys.argv[1:], 'hw:t:k:n:i:r:o:d:e:s:',
['workspace=', 'token=', 'sshkey=', 'clustername=', 'idletime=',
'runtime=', 'workertype=', 'drivertype=', 'numworkers='])
'runtime=', 'workertype=', 'drivertype=', 'numworkers=', 'cloudprovider='])
except getopt.GetoptError:
print(
'create.py -w <workspace> -t <token> -k <sshkey> -n <clustername> -i <idletime> -r <runtime> -o <workernodetype> -d <drivernodetype> -e <numworkers>')
'create.py -w <workspace> -t <token> -k <sshkey> -n <clustername> -i <idletime> -r <runtime> -o <workernodetype> -d <drivernodetype> -e <numworkers> -s <cloudprovider>')
sys.exit(2)

for opt, arg in opts:
if opt == '-h':
print(
'create.py -w <workspace> -t <token> -k <sshkey> -n <clustername> -i <idletime> -r <runtime> -o <workernodetype> -d <drivernodetype> -e <numworkers>')
'create.py -w <workspace> -t <token> -k <sshkey> -n <clustername> -i <idletime> -r <runtime> -o <workernodetype> -d <drivernodetype> -e <numworkers> -s <cloudprovider>')
sys.exit()
elif opt in ('-w', '--workspace'):
workspace = arg
Expand All @@ -66,6 +67,8 @@ def main():
driver_type = arg
elif opt in ('-e', '--numworkers'):
num_workers = arg
elif opt in ('-s', '--cloudprovider'):
cloud_provider = arg

print('-w is ' + workspace, file=sys.stderr)
print('-k is ' + sshkey, file=sys.stderr)
Expand All @@ -75,6 +78,7 @@ def main():
print('-o is ' + worker_type, file=sys.stderr)
print('-d is ' + driver_type, file=sys.stderr)
print('-e is ' + str(num_workers), file=sys.stderr)
print('-s is ' + cloud_provider, file=sys.stderr)

if not sshkey:
print("You must specify an sshkey!", file=sys.stderr)
Expand All @@ -85,7 +89,7 @@ def main():
sys.exit(2)

templ = ClusterUtils.generate_create_templ(sshkey, cluster_name, runtime, idletime,
num_workers, driver_type, worker_type, printLoc=sys.stderr)
num_workers, driver_type, worker_type, cloud_provider, printLoc=sys.stderr)
clusterid = ClusterUtils.create_cluster(workspace, templ, token, printLoc=sys.stderr)
ClusterUtils.wait_for_cluster_start(workspace, clusterid, token, printLoc=sys.stderr)

Expand Down
71 changes: 71 additions & 0 deletions jenkins/databricks/params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import getopt

workspace = 'https://dbc-9ff9942e-a9c4.cloud.databricks.com'
token = ''
private_key_file = "~/.ssh/id_rsa"
local_script = 'build.sh'
script_dest = '/home/ubuntu/build.sh'
source_tgz = 'spark-rapids-ci.tgz'
tgz_dest = '/home/ubuntu/spark-rapids-ci.tgz'
base_spark_pom_version = '3.0.0'
clusterid = ''
build_profiles = 'databricks,!snapshot-shims'
jar_path = ''

try:
opts, args = getopt.getopt(sys.argv[1:], 'hw:t:c:p:l:d:z:m:v:b:j:',
['workspace=', 'token=', 'clusterid=', 'private=', 'localscript=', 'dest=', 'sparktgz=', 'basesparkpomversion=', 'buildprofiles=', 'jarpath'])
except getopt.GetoptError:
print(
'run-tests.py -s <workspace> -t <token> -c <clusterid> -p <privatekeyfile> -l <localscript> -d <scriptdestinatino> -z <sparktgz> -v <basesparkpomversion> -b <buildprofiles> -j <jarpath>')
sys.exit(2)

for opt, arg in opts:
if opt == '-h':
print(
'run-tests.py -s <workspace> -t <token> -c <clusterid> -p <privatekeyfile> -n <skipstartingcluster> -l <localscript> -d <scriptdestinatino>, -z <sparktgz> -v <basesparkpomversion> -b <buildprofiles>')
sys.exit()
elif opt in ('-w', '--workspace'):
workspace = arg
elif opt in ('-t', '--token'):
token = arg
elif opt in ('-c', '--clusterid'):
clusterid = arg
elif opt in ('-p', '--private'):
private_key_file = arg
elif opt in ('-l', '--localscript'):
local_script = arg
elif opt in ('-d', '--dest'):
script_dest = arg
elif opt in ('-z', '--sparktgz'):
source_tgz = arg
elif opt in ('-v', '--basesparkpomversion'):
base_spark_pom_version = arg
elif opt in ('-b', '--bulidprofiles'):
build_profiles = arg
elif opt in ('-j', '--jarpath'):
jar_path = arg

print('-w is ' + workspace)
print('-c is ' + clusterid)
print('-p is ' + private_key_file)
print('-l is ' + local_script)
print('-d is ' + script_dest)
print('-z is ' + source_tgz)
print('-v is ' + base_spark_pom_version)
print('-j is ' + jar_path)
51 changes: 51 additions & 0 deletions jenkins/databricks/run-build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright (c) 2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import requests
import sys
import getopt
import time
import os
import subprocess
from clusterutils import ClusterUtils
import params

def main():
master_addr = ClusterUtils.cluster_get_master_addr(params.workspace, params.clusterid, params.token)
if master_addr is None:
print("Error, didn't get master address")
sys.exit(1)
print("Master node address is: %s" % master_addr)

print("Copying script")
rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" %s ubuntu@%s:%s" % (params.private_key_file, params.local_script, master_addr, params.script_dest)
print("rsync command: %s" % rsync_command)
subprocess.check_call(rsync_command, shell = True)

print("Copying source")
rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" %s ubuntu@%s:%s" % (params.private_key_file, params.source_tgz, master_addr, params.tgz_dest)
print("rsync command: %s" % rsync_command)
subprocess.check_call(rsync_command, shell = True)

ssh_command = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@%s -p 2200 -i %s %s %s %s %s 2>&1 | tee buildout; if [ `echo ${PIPESTATUS[0]}` -ne 0 ]; then false; else true; fi" % (master_addr, params.private_key_file, params.script_dest, params.tgz_dest, params.base_spark_pom_version, params.build_profiles)
print("ssh command: %s" % ssh_command)
subprocess.check_call(ssh_command, shell = True)

print("Copying built tarball back")
rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" ubuntu@%s:/home/ubuntu/spark-rapids-built.tgz ./" % (params.private_key_file, master_addr)
print("rsync command to get built tarball: %s" % rsync_command)
subprocess.check_call(rsync_command, shell = True)

if __name__ == '__main__':
main()
71 changes: 6 additions & 65 deletions jenkins/databricks/run-tests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -19,84 +19,25 @@
import os
import subprocess
from clusterutils import ClusterUtils
import params


def main():
workspace = 'https://dbc-9ff9942e-a9c4.cloud.databricks.com'
token = ''
private_key_file = "~/.ssh/id_rsa"
local_script = 'build.sh'
script_dest = '/home/ubuntu/build.sh'
source_tgz = 'spark-rapids-ci.tgz'
tgz_dest = '/home/ubuntu/spark-rapids-ci.tgz'
base_spark_pom_version = '3.0.0'
clusterid = ''
build_profiles = 'databricks,!snapshot-shims'

try:
opts, args = getopt.getopt(sys.argv[1:], 'hw:t:c:p:l:d:z:m:v:b:',
['workspace=', 'token=', 'clusterid=', 'private=', 'localscript=', 'dest=', 'sparktgz=', 'basesparkpomversion=', 'buildprofiles='])
except getopt.GetoptError:
print(
'run-tests.py -s <workspace> -t <token> -c <clusterid> -p <privatekeyfile> -l <localscript> -d <scriptdestinatino> -z <sparktgz> -v <basesparkpomversion> -b <buildprofiles>')
sys.exit(2)

for opt, arg in opts:
if opt == '-h':
print(
'run-tests.py -s <workspace> -t <token> -c <clusterid> -p <privatekeyfile> -n <skipstartingcluster> -l <localscript> -d <scriptdestinatino>, -z <sparktgz> -v <basesparkpomversion> -b <buildprofiles>')
sys.exit()
elif opt in ('-w', '--workspace'):
workspace = arg
elif opt in ('-t', '--token'):
token = arg
elif opt in ('-c', '--clusterid'):
clusterid = arg
elif opt in ('-p', '--private'):
private_key_file = arg
elif opt in ('-l', '--localscript'):
local_script = arg
elif opt in ('-d', '--dest'):
script_dest = arg
elif opt in ('-z', '--sparktgz'):
source_tgz = arg
elif opt in ('-v', '--basesparkpomversion'):
base_spark_pom_version = arg
elif opt in ('-b', '--bulidprofiles'):
build_profiles = arg

print('-w is ' + workspace)
print('-c is ' + clusterid)
print('-p is ' + private_key_file)
print('-l is ' + local_script)
print('-d is ' + script_dest)
print('-z is ' + source_tgz)
print('-v is ' + base_spark_pom_version)
print('-b is ' + build_profiles)

master_addr = ClusterUtils.cluster_get_master_addr(workspace, clusterid, token)
master_addr = ClusterUtils.cluster_get_master_addr(params.workspace, params.clusterid, params.token)
if master_addr is None:
print("Error, didn't get master address")
sys.exit(1)
print("Master node address is: %s" % master_addr)
print("Copying script")
rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" %s ubuntu@%s:%s" % (private_key_file, local_script, master_addr, script_dest)
print("rsync command: %s" % rsync_command)
subprocess.check_call(rsync_command, shell = True)

print("Copying source")
rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" %s ubuntu@%s:%s" % (private_key_file, source_tgz, master_addr, tgz_dest)
print("Copying script")
rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" %s ubuntu@%s:%s" % (params.private_key_file, params.local_script, master_addr, params.script_dest)
print("rsync command: %s" % rsync_command)
subprocess.check_call(rsync_command, shell = True)

ssh_command = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@%s -p 2200 -i %s %s %s %s %s 2>&1 | tee buildout; if [ `echo ${PIPESTATUS[0]}` -ne 0 ]; then false; else true; fi" % (master_addr, private_key_file, script_dest, tgz_dest, base_spark_pom_version, build_profiles)
ssh_command = "ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@%s -p 2200 -i %s %s %s 2>&1 | tee testout; if [ `echo ${PIPESTATUS[0]}` -ne 0 ]; then false; else true; fi" % (master_addr, params.private_key_file, params.script_dest, params.jar_path)
print("ssh command: %s" % ssh_command)
subprocess.check_call(ssh_command, shell = True)

print("Copying built tarball back")
rsync_command = "rsync -I -Pave \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 2200 -i %s\" ubuntu@%s:/home/ubuntu/spark-rapids-built.tgz ./" % (private_key_file, master_addr)
print("rsync command to get built tarball: %s" % rsync_command)
subprocess.check_call(rsync_command, shell = True)

if __name__ == '__main__':
main()
42 changes: 42 additions & 0 deletions jenkins/databricks/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash
#
# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -e

LOCAL_JAR_PATH=$1

# tests
export PATH=/databricks/conda/envs/databricks-ml-gpu/bin:/databricks/conda/condabin:$PATH
sudo /databricks/conda/envs/databricks-ml-gpu/bin/pip install pytest sre_yield requests pandas \
pyarrow findspark pytest-xdist

export SPARK_HOME=/databricks/spark
# change to not point at databricks confs so we don't conflict with their settings
export SPARK_CONF_DIR=$PWD
export PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/pyspark/:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip
sudo ln -s /databricks/jars/ $SPARK_HOME/jars || true
sudo chmod 777 /databricks/data/logs/
sudo chmod 777 /databricks/data/logs/*
echo { \"port\":\"15002\" } > ~/.databricks-connect

if [ -d "$LOCAL_JAR_PATH" ]; then
## Run tests with jars in the LOCAL_JAR_PATH dir downloading from the denpedency repo
LOCAL_JAR_PATH=$LOCAL_JAR_PATH bash $LOCAL_JAR_PATH/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks"
else
## Run tests with jars building from the spark-rapids source code
bash /home/ubuntu/spark-rapids/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks"
fi

0 comments on commit 442b518

Please sign in to comment.