Skip to content

Build / Spark Connect Python-only (master, Python 3.11) #61

Build / Spark Connect Python-only (master, Python 3.11)

Build / Spark Connect Python-only (master, Python 3.11) #61

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
name: Build / Spark Connect Python-only (master, Python 3.11)
on:
schedule:
- cron: '0 19 * * *'
jobs:
# Build: build Spark and run the tests for specified modules using SBT
build:
name: "Build modules: pyspark-connect"
runs-on: ubuntu-latest
timeout-minutes: 300
if: github.repository == 'apache/spark'
steps:
- name: Checkout Spark repository
uses: actions/checkout@v4
- name: Cache Scala, SBT and Maven
uses: actions/cache@v4
with:
path: |
build/apache-maven-*
build/scala-*
build/*.jar
~/.sbt
key: build-spark-connect-python-only-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-spark-connect-python-only-
- name: Cache Coursier local repository
uses: actions/cache@v4
with:
path: ~/.cache/coursier
key: coursier-build-spark-connect-python-only-${{ hashFiles('**/pom.xml') }}
restore-keys: |
coursier-build-spark-connect-python-only-
- name: Install Java 17
uses: actions/setup-java@v4
with:
distribution: zulu
java-version: 17
- name: Install Python 3.11
uses: actions/setup-python@v5
with:
python-version: '3.11'
architecture: x64
- name: Build Spark
run: |
./build/sbt -Phive test:package
- name: Install pure Python package (pyspark-connect)
env:
SPARK_TESTING: 1
run: |
cd python
python packaging/connect/setup.py sdist
cd dist
pip install pyspark-connect-*.tar.gz
pip install scikit-learn torch torchvision torcheval
- name: Run tests
env:
SPARK_CONNECT_TESTING_REMOTE: sc://localhost
SPARK_TESTING: 1
run: |
# Start a Spark Connect server
./sbin/start-connect-server.sh --jars `find connector/connect/server/target -name spark-connect*SNAPSHOT.jar`
# Remove Py4J and PySpark zipped library to make sure there is no JVM connection
rm python/lib/*
rm -r python/pyspark
./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-connect,pyspark-ml-connect
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v4
with:
name: test-results-spark-connect-python-only
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v4
with:
name: unit-tests-log-spark-connect-python-only
path: "**/target/unit-tests.log"