Skip to content

Commit

Permalink
Add support for docker image with Spark connect server (#9759)
Browse files Browse the repository at this point in the history
Summary:
Add support for docker image with Spark connect server for fuzzer test to use.
#9270

Pull Request resolved: #9759

Reviewed By: Yuhta

Differential Revision: D57726964

Pulled By: kgpai

fbshipit-source-id: 41a1e93dcf31c1ac1ab7e1529992ca8efc26c7ab
  • Loading branch information
rui-mo authored and facebook-github-bot committed May 29, 2024
1 parent 44a8175 commit 33b23f8
Show file tree
Hide file tree
Showing 13 changed files with 96 additions and 5 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ jobs:
file: "scripts/prestojava-container.dockerfile"
args: "PRESTO_VERSION=0.284"
tags: "ghcr.io/facebookincubator/velox-dev:presto-java"
- name: Spark server
file: "scripts/spark-container.dockerfile"
args: "SPARK_VERSION=3.5.1"
tags: "ghcr.io/facebookincubator/velox-dev:spark-server"

steps:
- name: Login to GitHub Container Registry
Expand Down
21 changes: 21 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,24 @@ services:
- .:/velox:delegated
working_dir: /velox
command: /velox/scripts/docker-command.sh

spark-server:
# Usage:
# docker-compose pull spark-server or docker-compose build spark-server
# docker-compose run --rm spark-server
# or
# docker-compose run -e NUM_THREADS=<NUMBER_OF_THREADS_TO_USE> --rm spark-server
# to set the number of threads used during compilation
image: ghcr.io/facebookincubator/velox-dev:spark-server
build:
args:
- SPARK_VERSION=3.5.1
context: .
dockerfile: scripts/spark-container.dockerfile
environment:
NUM_THREADS: 8 # default value for NUM_THREADS
CCACHE_DIR: "/velox/.ccache"
volumes:
- .:/velox:delegated
working_dir: /velox
command: /velox/scripts/docker-command.sh
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
10 changes: 5 additions & 5 deletions scripts/prestojava-container.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ RUN dnf install -y java-11-openjdk less procps python3 tzdata \
# detailed here : https://github.com/facebookincubator/velox/issues/8127
ENV TZ=America/Los_Angeles

COPY scripts/etc/config.properties.example $PRESTO_HOME/etc/config.properties
COPY scripts/etc/jvm.config.example $PRESTO_HOME/etc/jvm.config
COPY scripts/etc/node.properties $PRESTO_HOME/etc/node.properties
COPY scripts/etc/hive.properties $PRESTO_HOME/etc/catalog
COPY scripts/start-prestojava.sh /opt
COPY scripts/presto/etc/config.properties.example $PRESTO_HOME/etc/config.properties
COPY scripts/presto/etc/jvm.config.example $PRESTO_HOME/etc/jvm.config
COPY scripts/presto/etc/node.properties $PRESTO_HOME/etc/node.properties
COPY scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog
COPY scripts/presto/start-prestojava.sh /opt

WORKDIR /velox
44 changes: 44 additions & 0 deletions scripts/spark-container.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Build the test and build container for presto_cpp
#
FROM ghcr.io/facebookincubator/velox-dev:centos8

ARG SPARK_VERSION=3.5.1

ADD scripts /velox/scripts/
RUN wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz
RUN wget https://repo1.maven.org/maven2/org/apache/spark/spark-connect_2.12/${SPARK_VERSION}/spark-connect_2.12-${SPARK_VERSION}.jar

ARG SPARK_PKG=spark-${SPARK_VERSION}-bin-hadoop3.tgz
ARG SPARK_CONNECT_JAR=spark-connect_2.12-${SPARK_VERSION}.jar

ENV SPARK_HOME="/opt/spark-server"

RUN dnf install -y java-11-openjdk less procps python3 tzdata \
&& ln -s $(which python3) /usr/bin/python \
&& tar -zxf $SPARK_PKG \
&& mv ./spark-${SPARK_VERSION}-bin-hadoop3 $SPARK_HOME \
&& mv ./$SPARK_CONNECT_JAR ${SPARK_HOME}/jars/

# We set the timezone to America/Los_Angeles due to issue
# detailed here : https://github.com/facebookincubator/velox/issues/8127
ENV TZ=America/Los_Angeles

COPY scripts/spark/conf/spark-defaults.conf.example $SPARK_HOME/conf/spark-defaults.conf
COPY scripts/spark/conf/spark-env.sh.example $SPARK_HOME/conf/spark-env.sh
COPY scripts/spark/conf/workers.example $SPARK_HOME/conf/workers
COPY scripts/spark/start-spark.sh /opt

WORKDIR /velox
1 change: 1 addition & 0 deletions scripts/spark/conf/spark-defaults.conf.example
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
spark.master local[*]
1 change: 1 addition & 0 deletions scripts/spark/conf/spark-env.sh.example
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export SPARK_DAEMON_MEMORY=5g
1 change: 1 addition & 0 deletions scripts/spark/conf/workers.example
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
localhost
19 changes: 19 additions & 0 deletions scripts/spark/start-spark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/sh
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

cd $SPARK_HOME
./sbin/start-connect-server.sh --jars $SPARK_HOME/jars/spark-connect_2.12-3.5.1.jar

0 comments on commit 33b23f8

Please sign in to comment.