From 5b4c575cbb54d72c96b8d15b122792cd2a0d7bbe Mon Sep 17 00:00:00 2001 From: Tim Liu Date: Tue, 23 Jan 2024 23:11:06 +0800 Subject: [PATCH] Smoke test with '--package' to fetch the plugin jar (#10238) * Smoke test with '--package' to fetch the plugin jar To fix https://github.com/NVIDIA/spark-rapids/issues/10160 Run SPARK_SHELL_SMOKE_TEST with '--package' to fetch the plugin jar Can downloading the plugin jar from specified artifact repo by '--repositories' This test can fetch plugin jar from internal maven repo, maven central, or Sonatype staging repo. Signed-off-by: Tim Liu * Update integration_tests/run_pyspark_from_build.sh This suggestion is reasonable Co-authored-by: Jason Lowe * not nest the 'repositories' parameter Signed-off-by: Tim Liu --------- Signed-off-by: Tim Liu Co-authored-by: Jason Lowe --- integration_tests/run_pyspark_from_build.sh | 7 ++++++- jenkins/spark-tests.sh | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh index f6e32c72161..cc983d49b3c 100755 --- a/integration_tests/run_pyspark_from_build.sh +++ b/integration_tests/run_pyspark_from_build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -333,10 +333,15 @@ EOF --driver-class-path "${PYSP_TEST_spark_driver_extraClassPath}" --conf spark.executor.extraClassPath="${PYSP_TEST_spark_driver_extraClassPath}" ) + elif [[ -n "$PYSP_TEST_spark_jars_packages" ]]; then + SPARK_SHELL_ARGS_ARR+=(--packages "${PYSP_TEST_spark_jars_packages}") else SPARK_SHELL_ARGS_ARR+=(--jars "${PYSP_TEST_spark_jars}") fi + if [[ -n "$PYSP_TEST_spark_jars_repositories" ]]; then + SPARK_SHELL_ARGS_ARR+=(--repositories "${PYSP_TEST_spark_jars_repositories}") + fi # NOTE grep is used not only for checking the output but also # to workaround the fact that spark-shell catches all failures. # In this test it exits not because of the failure but because it encounters diff --git a/jenkins/spark-tests.sh b/jenkins/spark-tests.sh index 368a62ac1e8..0a455afcb10 100755 --- a/jenkins/spark-tests.sh +++ b/jenkins/spark-tests.sh @@ -304,6 +304,11 @@ if [[ $TEST_MODE == "DEFAULT" ]]; then PYSP_TEST_spark_shuffle_manager=com.nvidia.spark.rapids.${SHUFFLE_SPARK_SHIM}.RapidsShuffleManager \ ./run_pyspark_from_build.sh + SPARK_SHELL_SMOKE_TEST=1 \ + PYSP_TEST_spark_jars_packages=com.nvidia:rapids-4-spark_${SCALA_BINARY_VER}:${PROJECT_VER} \ + PYSP_TEST_spark_jars_repositories=${PROJECT_REPO} \ + ./run_pyspark_from_build.sh + # ParquetCachedBatchSerializer cache_test PYSP_TEST_spark_sql_cache_serializer=com.nvidia.spark.ParquetCachedBatchSerializer \ ./run_pyspark_from_build.sh -k cache_test