Skip to content

Commit

Permalink
Databricks CI improvements and support runtime env parameter to xfail…
Browse files Browse the repository at this point in the history
… certain tests (NVIDIA#297)

* Add a profile to build for Databricks

* Change xfail to be annotation

Co-authored-by: Thomas Graves <tgraves@nvidia.com>
  • Loading branch information
tgravescs and tgravescs authored Jun 29, 2020
1 parent 542945c commit 6aeb15a
Show file tree
Hide file tree
Showing 12 changed files with 178 additions and 105 deletions.
3 changes: 3 additions & 0 deletions integration_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,6 @@ def pytest_addoption(parser):
parser.addoption(
"--debug_tmp_path", action='store_true', default=False, help="if true don't delete tmp_path contents for debugging"
)
parser.addoption(
"--runtime_env", action='store', default="Apache", help="the runtime environment for the tests - apache or databricks"
)
14 changes: 14 additions & 0 deletions integration_tests/src/main/python/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ def is_allowing_any_non_gpu():
def get_non_gpu_allowed():
return _non_gpu_allowed

_runtime_env = "apache"

def runtime_env():
return _runtime_env.lower()

def is_apache_runtime():
return runtime_env() == "apache"

def is_databricks_runtime():
return runtime_env() == "databricks"

_limit = -1

def get_limit():
Expand Down Expand Up @@ -112,6 +123,9 @@ def pytest_runtest_setup(item):
else:
_limit = -1

def pytest_configure(config):
global _runtime_env
_runtime_env = config.getoption('runtime_env')

def pytest_collection_modifyitems(config, items):
for item in items:
Expand Down
3 changes: 3 additions & 0 deletions integration_tests/src/main/python/parquet_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import pytest

from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_writes_are_equal_collect, assert_gpu_fallback_collect
from conftest import is_databricks_runtime
from datetime import date, datetime, timezone
from data_gen import *
from marks import *
Expand Down Expand Up @@ -145,6 +146,8 @@ def test_simple_partitioned_read(spark_tmp_path):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : spark.read.parquet(data_path))

@pytest.mark.xfail(condition=is_databricks_runtime(),
reason='https://github.com/NVIDIA/spark-rapids/issues/192')
def test_read_merge_schema(spark_tmp_path):
# Once https://github.com/NVIDIA/spark-rapids/issues/133 and https://github.com/NVIDIA/spark-rapids/issues/132 are fixed
# we should go with a more standard set of generators
Expand Down
19 changes: 10 additions & 9 deletions integration_tests/src/main/python/qa_nightly_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from conftest import is_databricks_runtime
import pytest

SELECT_SQL = [
Expand Down Expand Up @@ -665,15 +666,15 @@
("SELECT COUNT(byteF) as count, (AVG(intF) * 5.0) as avg, (SUM(intF) + MAX(shortF * 3)) as summax FROM test_table GROUP BY intF*3", "COUNT(byteF), AVG(intF) * 5.0, SUM(intF) + MAX(shortF * 3) GROUP BY intF*3"),
("SELECT COUNT(*) as count, (AVG(intF) * 5.0) as avg, (SUM(intF) + MAX(shortF * 3)) as summax FROM test_table GROUP BY intF*3", "COUNT(*), AVG(intF) * 5.0, SUM(intF) + MAX(shortF * 3) GROUP BY intF*3"),
# ("SELECT SUM(intF) OVER (PARTITION BY byteF ORDER BY shortF) as sum_total FROM test_table", "SUM(intF) OVER (PARTITION BY byteF ORDER BY shortF) as sum_total"),
("SELECT ROW_NUMBER() OVER (PARTITION BY byteF ORDER BY byteF) row_num, byteF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY byteF ORDER BY byteF) row_num, byteF"),
("SELECT ROW_NUMBER() OVER (PARTITION BY shortF ORDER BY shortF) row_num, shortF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY shortF ORDER BY shortF) row_num, shortF"),
("SELECT ROW_NUMBER() OVER (PARTITION BY intF ORDER BY intF) row_num, intF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY intF ORDER BY intF) row_num, intF"),
("SELECT ROW_NUMBER() OVER (PARTITION BY longF ORDER BY longF) row_num, longF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY longF ORDER BY longF) row_num, longF"),
("SELECT ROW_NUMBER() OVER (PARTITION BY floatF ORDER BY floatF) row_num, floatF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY floatF ORDER BY floatF) row_num, floatF"),
("SELECT ROW_NUMBER() OVER (PARTITION BY booleanF ORDER BY booleanF) row_num, booleanF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY booleanF ORDER BY booleanF) row_num, booleanF"),
("SELECT ROW_NUMBER() OVER (PARTITION BY strF ORDER BY strF) row_num, strF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY strF ORDER BY strF) row_num, strF"),
("SELECT ROW_NUMBER() OVER (PARTITION BY dateF ORDER BY dateF) row_num, dateF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY dateF ORDER BY dateF) row_num, dateF"),
("SELECT ROW_NUMBER() OVER (PARTITION BY timestampF ORDER BY timestampF) row_num, timestampF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY timestampF ORDER BY timestampF) row_num, timestampF"),
pytest.param(("SELECT ROW_NUMBER() OVER (PARTITION BY byteF ORDER BY byteF) row_num, byteF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY byteF ORDER BY byteF) row_num, byteF"), marks=pytest.mark.xfail(is_databricks_runtime(), reason='https://github.com/NVIDIA/spark-rapids/issues/203')),
pytest.param(("SELECT ROW_NUMBER() OVER (PARTITION BY shortF ORDER BY shortF) row_num, shortF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY shortF ORDER BY shortF) row_num, shortF"), marks=pytest.mark.xfail(is_databricks_runtime(), reason='https://github.com/NVIDIA/spark-rapids/issues/203')),
pytest.param(("SELECT ROW_NUMBER() OVER (PARTITION BY intF ORDER BY intF) row_num, intF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY intF ORDER BY intF) row_num, intF"), marks=pytest.mark.xfail(is_databricks_runtime(), reason='https://github.com/NVIDIA/spark-rapids/issues/203')),
pytest.param(("SELECT ROW_NUMBER() OVER (PARTITION BY longF ORDER BY longF) row_num, longF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY longF ORDER BY longF) row_num, longF"), marks=pytest.mark.xfail(is_databricks_runtime(), reason='https://github.com/NVIDIA/spark-rapids/issues/203')),
pytest.param(("SELECT ROW_NUMBER() OVER (PARTITION BY floatF ORDER BY floatF) row_num, floatF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY floatF ORDER BY floatF) row_num, floatF"), marks=pytest.mark.xfail(is_databricks_runtime(), reason='https://github.com/NVIDIA/spark-rapids/issues/203')),
pytest.param(("SELECT ROW_NUMBER() OVER (PARTITION BY booleanF ORDER BY booleanF) row_num, booleanF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY booleanF ORDER BY booleanF) row_num, booleanF"), marks=pytest.mark.xfail(is_databricks_runtime(), reason='https://github.com/NVIDIA/spark-rapids/issues/203')),
pytest.param(("SELECT ROW_NUMBER() OVER (PARTITION BY strF ORDER BY strF) row_num, strF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY strF ORDER BY strF) row_num, strF"), marks=pytest.mark.xfail(is_databricks_runtime(), reason='https://github.com/NVIDIA/spark-rapids/issues/203')),
pytest.param(("SELECT ROW_NUMBER() OVER (PARTITION BY dateF ORDER BY dateF) row_num, dateF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY dateF ORDER BY dateF) row_num, dateF"), marks=pytest.mark.xfail(is_databricks_runtime(), reason='https://github.com/NVIDIA/spark-rapids/issues/203')),
pytest.param(("SELECT ROW_NUMBER() OVER (PARTITION BY timestampF ORDER BY timestampF) row_num, timestampF FROM test_table", "ROW_NUMBER() OVER (PARTITION BY timestampF ORDER BY timestampF) row_num, timestampF"), marks=pytest.mark.xfail(is_databricks_runtime(), reason='https://github.com/NVIDIA/spark-rapids/issues/203')),
# ("window/row/range (need change)", "window/row/range (need change)"),
#("SELECT byteF, SUM(byteF) OVER (PARTITION BY byteF ORDER BY byteF RANGE BETWEEN 20 PRECEDING AND 10 FOLLOWING ) as sum_total FROM test_table", "byteF, SUM(byteF) OVER (PARTITION BY byteF ORDER BY byteF RANGE BETWEEN 20 PRECEDING AND 10 FOLLOWING ) as sum_total"),
#("SELECT SUM(intF) OVER (PARTITION BY byteF ORDER BY byteF RANGE BETWEEN 20 PRECEDING AND 10 FOLLOWING ) as sum_total FROM test_table", "SUM(intF) OVER (PARTITION BY byteF ORDER BY byteF RANGE BETWEEN 20 PRECEDING AND 10 FOLLOWING ) as sum_total"),
Expand Down
62 changes: 26 additions & 36 deletions jenkins/Jenkinsfile.databricksnightly
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,15 @@
*/

pipeline {
agent { label 'vanilla' }
agent {
docker {
label 'docker-gpu'
image 'urm.nvidia.com/sw-spark-docker/plugin:dev-ubuntu16-cuda10.1'
args '--runtime=nvidia -v ${HOME}/.m2:${HOME}/.m2:rw \
-v ${HOME}/.zinc:${HOME}/.zinc:rw \
-v /etc/passwd:/etc/passwd -v /etc/group:/etc/group'
}
}

options {
ansiColor('xterm')
Expand All @@ -33,6 +41,14 @@ pipeline {
parameters {
choice(name: 'DEPLOY_TO', choices: ['Urm', 'Local'],
description: 'Where to deploy artifacts to')
string(name: 'DATABRICKS_VERSION',
defaultValue: '0.2-databricks-SNAPSHOT', description: 'Version to set')
string(name: 'CUDF_VERSION',
defaultValue: '0.15-SNAPSHOT', description: 'Cudf version to use')
string(name: 'CUDA_VERSION',
defaultValue: 'cuda10-1', description: 'cuda version to use')
string(name: 'CLUSTER_ID',
defaultValue: '0617-140138-umiak14', description: 'databricks cluster id')
string(name: 'REF', defaultValue: 'branch-0.2', description: 'Commit to build')
}

Expand All @@ -42,6 +58,10 @@ pipeline {
LIBCUDF_KERNEL_CACHE_PATH='/tmp'
URM_CREDS = credentials("svcngcc_artifactory")
DATABRICKS_TOKEN = credentials("SPARK_DATABRICKS_TOKEN")
SCALA_VERSION = '2.12'
SPARK_VERSION = '3.0.0'
CI_RAPIDS_JAR = 'rapids-4-spark_2.12-0.1-SNAPSHOT-ci.jar'
CI_CUDF_JAR = 'cudf-0.14-cuda10-1.jar'
}

triggers {
Expand All @@ -50,54 +70,24 @@ pipeline {

stages {
stage('Ubuntu16 CUDA10.1') {
agent {
dockerfile {
label 'docker-gpu'
filename 'Dockerfile.ubuntu16'
dir "jenkins"
args '--runtime=nvidia -v ${HOME}/.m2:${HOME}/.m2:rw \
-v ${HOME}/.zinc:${HOME}/.zinc:rw \
-v /etc/passwd:/etc/passwd -v /etc/group:/etc/group'
}
}
steps {
script {
sshagent(credentials : ['svcngcc_pubpriv']) {
sh "mvn versions:set -DnewVersion=0.2.0-databricks-SNAPSHOT && git clean -d -f"
sh "mvn versions:set -DnewVersion=$DATABRICKS_VERSION && git clean -d -f"
sh "mvn dependency:get -Dartifact=ai.rapids:cudf:$CUDF_VERSION -Ddest=./"
sh "patch -p1 < ./jenkins/databricks/dbimports.patch"
sh "tar -zcvf spark-rapids-ci.tgz * || true"
sh "python3.6 ./jenkins/databricks/run-tests.py -z ./spark-rapids-ci.tgz -t $DATABRICKS_TOKEN -p /home/svcngcc/.ssh/id_rsa -l ./jenkins/databricks/build.sh"
sh "tar -zcvf spark-rapids-ci.tgz *"
sh "python3.6 ./jenkins/databricks/run-tests.py -c $CLUSTER_ID -z ./spark-rapids-ci.tgz -t $DATABRICKS_TOKEN -p /home/svcngcc/.ssh/id_rsa -l ./jenkins/databricks/build.sh -j $CI_RAPIDS_JAR -b $DATABRICKS_VERSION -k $SPARK_VERSION -a $SCALA_VERSION -f $CUDF_VERSION -u $CUDA_VERSION -m $CI_CUDF_JAR"
sh "./jenkins/databricks/deploy.sh"
}
}
}
}
stage('cleanup') {
agent {
dockerfile {
label 'docker-gpu'
filename 'Dockerfile.ubuntu16'
dir "jenkins"
args '--runtime=nvidia -v ${HOME}/.m2:${HOME}/.m2:rw \
-v ${HOME}/.zinc:${HOME}/.zinc:rw \
-v /etc/passwd:/etc/passwd -v /etc/group:/etc/group'
}
}
steps {
script {
sh "python3.6 ./jenkins/databricks/shutdown.py -t $DATABRICKS_TOKEN"
}
}
}
} // end of stages
post {
always {
script {
if (currentBuild.currentResult == "SUCCESS") {
slack("#rapidsai-spark-cicd", "Success", color: "#33CC33")
} else {
slack("#rapidsai-spark-cicd", "Failed", color: "#FF0000")
}
sh "python3.6 ./jenkins/databricks/shutdown.py -c $CLUSTER_ID -t $DATABRICKS_TOKEN || true"
}
}
}
Expand Down
16 changes: 13 additions & 3 deletions jenkins/Jenkinsfile.databricksrelease
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def SERVERS_MAP = [

def SEC_IDS = [
Local: ['local-gpg-passphrase', 'local-gpg-private-key', 'local-username-password'],
Sonatype: ['rapids-gpg-passphrase', 'rapids-gpg-private-key', 'sonatype-username-password']
Sonatype: ['SPARK_RAPIDS_GPG_PASSPHRASE', 'SPARK_RAPIDS_GPG_PRIVATE_KEY', 'SPARK_SONATYPE_USERPASS']
]

pipeline {
Expand All @@ -52,6 +52,12 @@ pipeline {
parameters {
choice(name: 'DEPLOY_TO', choices: ['Sonatype'],
description: 'Where to deploy artifacts to')
string(name: 'DATABRICKS_VERSION',
defaultValue: '0.2-databricks-SNAPSHOT', description: 'Version to set')
string(name: 'CUDF_VERSION',
defaultValue: '0.15-SNAPSHOT', description: 'Cudf version to use')
string(name: 'CUDA_VERSION',
defaultValue: 'cuda10-1', description: 'cuda version to use')
string(name: 'REF', defaultValue: 'branch-0.2', description: 'Commit to build')
}

Expand All @@ -64,17 +70,21 @@ pipeline {
DATABRICKS_TOKEN = credentials("SPARK_DATABRICKS_TOKEN")
DIST_PL='dist'
SQL_PL='sql-plugin'
SCALA_VERSION = '2.12'
SPARK_VERSION = '3.0.0'
CI_RAPIDS_JAR = 'rapids-4-spark_2.12-0.1-SNAPSHOT-ci.jar'
CI_CUDF_JAR = 'cudf-0.14-cuda10-1.jar'
}

stages {
stage('Build') {
steps {
script {
sshagent(credentials : ['svcngcc_pubpriv']) {
sh "mvn versions:set -DnewVersion=0.1.0-databricks && git clean -d -f"
sh "mvn versions:set -DnewVersion=0.2.0-databricks && git clean -d -f"
sh "patch -p1 < ./jenkins/databricks/dbimports.patch"
sh "tar -zcvf spark-rapids-ci.tgz * || true"
sh "python3.6 ./jenkins/databricks/run-tests.py -z ./spark-rapids-ci.tgz -t $DATABRICKS_TOKEN -p /home/svcngcc/.ssh/id_rsa -l ./jenkins/databricks/build.sh"
sh "python3.6 ./jenkins/databricks/run-tests.py -z ./spark-rapids-ci.tgz -t $DATABRICKS_TOKEN -p /home/svcngcc/.ssh/id_rsa -l ./jenkins/databricks/build.sh -j $CI_RAPIDS_JAR -b $DATABRICKS_VERSION -k $SPARK_VERSION -a $SCALA_VERSION -f $CUDF_VERSION -u $CUDA_VERSION -m $CI_CUDF_JAR"
}
}
}
Expand Down
83 changes: 48 additions & 35 deletions jenkins/databricks/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,85 +17,98 @@

set -e

SPARKTGZ=/home/ubuntu/spark-rapids-ci.tgz
if [ "$1" != "" ]; then
SPARKTGZ=$1
fi
SPARKTGZ=$1
DATABRICKS_VERSION=$2
SCALA_VERSION=$3
CI_RAPIDS_JAR=$4
SPARK_VERSION=$5
CUDF_VERSION=$6
CUDA_VERSION=$7
CI_CUDF_JAR=$8

echo "Spark version is $SPARK_VERSION"
echo "scala version is: $SCALA_VERSION"

# this has to match the Databricks init script
DB_JAR_LOC=/databricks/jars
DB_RAPIDS_JAR_LOC=$DB_JAR_LOC/$CI_RAPIDS_JAR
DB_CUDF_JAR_LOC=$DB_JAR_LOC/$CI_CUDF_JAR
RAPIDS_BUILT_JAR=rapids-4-spark_$SCALA_VERSION-$DATABRICKS_VERSION.jar

sudo apt install -y maven
rm -rf spark-rapids
mkdir spark-rapids
tar -zxvf $SPARKTGZ -C spark-rapids
cd spark-rapids
# pull 3.0.0 artifacts and ignore errors then install databricks jars, then build again
mvn clean package || true
M2DIR=/home/ubuntu/.m2/repository
CUDF_JAR=./cudf-${CUDF_VERSION}.jar
mvn install:install-file \
-Dmaven.repo.local=$M2DIR \
-Dfile=./$CUDF_JAR \
-DgroupId=ai.rapids \
-DartifactId=cudf \
-Dversion=$CUDF_VERSION \
-Dclassifier=$CUDA_VERSION \
-Dpackaging=jar

# pull normal Spark artifacts and ignore errors then install databricks jars, then build again
JARDIR=/databricks/jars
SQLJAR=----workspace_spark_3_0--sql--core--core-hive-2.3__hadoop-2.7_2.12_deploy.jar
CATALYSTJAR=----workspace_spark_3_0--sql--catalyst--catalyst-hive-2.3__hadoop-2.7_2.12_deploy.jar
ANNOTJAR=----workspace_spark_3_0--common--tags--tags-hive-2.3__hadoop-2.7_2.12_deploy.jar
COREJAR=----workspace_spark_3_0--core--core-hive-2.3__hadoop-2.7_2.12_deploy.jar
VERSIONJAR=----workspace_spark_3_0--core--libcore_generated_resources.jar
VERSION=3.0.0
SQLJAR=----workspace_spark_3_0--sql--core--core-hive-2.3__hadoop-2.7_${SCALA_VERSION}_deploy.jar
CATALYSTJAR=----workspace_spark_3_0--sql--catalyst--catalyst-hive-2.3__hadoop-2.7_${SCALA_VERSION}_deploy.jar
ANNOTJAR=----workspace_spark_3_0--common--tags--tags-hive-2.3__hadoop-2.7_${SCALA_VERSION}_deploy.jar
COREJAR=----workspace_spark_3_0--core--core-hive-2.3__hadoop-2.7_${SCALA_VERSION}_deploy.jar
mvn install:install-file \
-Dmaven.repo.local=$M2DIR \
-Dfile=$JARDIR/$COREJAR \
-DgroupId=org.apache.spark \
-DartifactId=spark-core_2.12 \
-Dversion=$VERSION \
-DartifactId=spark-core_$SCALA_VERSION \
-Dversion=$SPARK_VERSION \
-Dpackaging=jar

mvn install:install-file \
-Dmaven.repo.local=$M2DIR \
-Dfile=$JARDIR/$CATALYSTJAR \
-DgroupId=org.apache.spark \
-DartifactId=spark-catalyst_2.12 \
-Dversion=$VERSION \
-DartifactId=spark-catalyst_$SCALA_VERSION \
-Dversion=$SPARK_VERSION \
-Dpackaging=jar

mvn install:install-file \
-Dmaven.repo.local=$M2DIR \
-Dfile=$JARDIR/$SQLJAR \
-DgroupId=org.apache.spark \
-DartifactId=spark-sql_2.12 \
-Dversion=$VERSION \
-DartifactId=spark-sql_$SCALA_VERSION \
-Dversion=$SPARK_VERSION \
-Dpackaging=jar

mvn install:install-file \
-Dmaven.repo.local=$M2DIR \
-Dfile=$JARDIR/$ANNOTJAR \
-DgroupId=org.apache.spark \
-DartifactId=spark-annotation_2.12 \
-Dversion=$VERSION \
-DartifactId=spark-annotation_$SCALA_VERSION \
-Dversion=$SPARK_VERSION \
-Dpackaging=jar

mvn install:install-file \
-Dmaven.repo.local=$M2DIR \
-Dfile=$JARDIR/$VERSIONJAR \
-DgroupId=org.apache.spark \
-DartifactId=spark-version_2.12 \
-Dversion=$VERSION \
-Dpackaging=jar

mvn -Pdatabricks clean verify -DskipTests
mvn -Pdatabricks clean package -DskipTests

# Copy so we pick up new built jar. Note that the jar name rapids-4-spark_2.12-0.1-SNAPSHOT-ci.jar has to be
# exactly that because its based on the staticly setup Databricks cluster we use. That cluster specifically
# installs the jar with the name rapids-4-spark_2.12-0.1-SNAPSHOT-ci.jar. Do not change that name
# without changing the Databricks cluster setup.
sudo cp dist/target/rapids-4-spark_2.12-*-SNAPSHOT.jar /databricks/jars/rapids-4-spark_2.12-0.1-SNAPSHOT-ci.jar
# Copy so we pick up new built jar and latesty CuDF jar. Note that the jar names has to be
# exactly what is in the staticly setup Databricks cluster we use.
sudo cp dist/target/$RAPIDS_BUILT_JAR $DB_RAPIDS_JAR_LOC
sudo cp ./$CUDF_JAR $DB_CUDF_JAR_LOC

# tests
export PATH=/databricks/conda/envs/databricks-ml-gpu/bin:/databricks/conda/condabin:$PATH
sudo /databricks/conda/envs/databricks-ml-gpu/bin/pip install pytest sre_yield
cd /home/ubuntu/spark-rapids/integration_tests
export SPARK_HOME=/databricks/spark
# change to not point at databricks confs so we don't conflict with their settings
export SPARK_CONF_DIR=$PWD
export PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/pyspark/:$SPARK_HOME/python/lib/py4j-0.10.9-src.zip
sudo ln -s /databricks/jars/ $SPARK_HOME/jars || true
sudo chmod 777 /databricks/data/logs/
sudo chmod 777 /databricks/data/logs/*
echo { \"port\":\"15002\" } > ~/.databricks-connect
$SPARK_HOME/bin/spark-submit ./runtests.py 2>&1 | tee out

$SPARK_HOME/bin/spark-submit ./runtests.py --runtime_env="databricks"
cd /home/ubuntu
tar -zcvf spark-rapids-built.tgz spark-rapids
Loading

0 comments on commit 6aeb15a

Please sign in to comment.