diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index a820376afd85..f79def97fef8 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -95,6 +95,10 @@ jobs: file: "scripts/prestojava-container.dockerfile" args: "PRESTO_VERSION=0.284" tags: "ghcr.io/facebookincubator/velox-dev:presto-java" + - name: Spark server + file: "scripts/spark-container.dockerfile" + args: "SPARK_VERSION=3.5.1" + tags: "ghcr.io/facebookincubator/velox-dev:spark-server" steps: - name: Login to GitHub Container Registry diff --git a/docker-compose.yml b/docker-compose.yml index 80fe1bb0d318..e5a91c901a78 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -74,3 +74,24 @@ services: - .:/velox:delegated working_dir: /velox command: /velox/scripts/docker-command.sh + + spark-server: + # Usage: + # docker-compose pull spark-server or docker-compose build spark-server + # docker-compose run --rm spark-server + # or + # docker-compose run -e NUM_THREADS= --rm spark-server + # to set the number of threads used during compilation + image: ghcr.io/facebookincubator/velox-dev:spark-server + build: + args: + - SPARK_VERSION=3.5.1 + context: . + dockerfile: scripts/spark-container.dockerfile + environment: + NUM_THREADS: 8 # default value for NUM_THREADS + CCACHE_DIR: "/velox/.ccache" + volumes: + - .:/velox:delegated + working_dir: /velox + command: /velox/scripts/docker-command.sh diff --git a/scripts/etc/config.properties.example b/scripts/presto/etc/config.properties.example similarity index 100% rename from scripts/etc/config.properties.example rename to scripts/presto/etc/config.properties.example diff --git a/scripts/etc/hive.properties b/scripts/presto/etc/hive.properties similarity index 100% rename from scripts/etc/hive.properties rename to scripts/presto/etc/hive.properties diff --git a/scripts/etc/jvm.config.example b/scripts/presto/etc/jvm.config.example similarity index 100% rename from scripts/etc/jvm.config.example rename to scripts/presto/etc/jvm.config.example diff --git a/scripts/etc/node.properties b/scripts/presto/etc/node.properties similarity index 100% rename from scripts/etc/node.properties rename to scripts/presto/etc/node.properties diff --git a/scripts/start-prestojava.sh b/scripts/presto/start-prestojava.sh similarity index 100% rename from scripts/start-prestojava.sh rename to scripts/presto/start-prestojava.sh diff --git a/scripts/prestojava-container.dockerfile b/scripts/prestojava-container.dockerfile index f11f46830a8b..005d481b5740 100644 --- a/scripts/prestojava-container.dockerfile +++ b/scripts/prestojava-container.dockerfile @@ -42,10 +42,10 @@ RUN dnf install -y java-11-openjdk less procps python3 tzdata \ # detailed here : https://github.com/facebookincubator/velox/issues/8127 ENV TZ=America/Los_Angeles -COPY scripts/etc/config.properties.example $PRESTO_HOME/etc/config.properties -COPY scripts/etc/jvm.config.example $PRESTO_HOME/etc/jvm.config -COPY scripts/etc/node.properties $PRESTO_HOME/etc/node.properties -COPY scripts/etc/hive.properties $PRESTO_HOME/etc/catalog -COPY scripts/start-prestojava.sh /opt +COPY scripts/presto/etc/config.properties.example $PRESTO_HOME/etc/config.properties +COPY scripts/presto/etc/jvm.config.example $PRESTO_HOME/etc/jvm.config +COPY scripts/presto/etc/node.properties $PRESTO_HOME/etc/node.properties +COPY scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog +COPY scripts/presto/start-prestojava.sh /opt WORKDIR /velox diff --git a/scripts/spark-container.dockerfile b/scripts/spark-container.dockerfile new file mode 100644 index 000000000000..579edeb2d4b5 --- /dev/null +++ b/scripts/spark-container.dockerfile @@ -0,0 +1,44 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Build the test and build container for presto_cpp +# +FROM ghcr.io/facebookincubator/velox-dev:centos8 + +ARG SPARK_VERSION=3.5.1 + +ADD scripts /velox/scripts/ +RUN wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz +RUN wget https://repo1.maven.org/maven2/org/apache/spark/spark-connect_2.12/${SPARK_VERSION}/spark-connect_2.12-${SPARK_VERSION}.jar + +ARG SPARK_PKG=spark-${SPARK_VERSION}-bin-hadoop3.tgz +ARG SPARK_CONNECT_JAR=spark-connect_2.12-${SPARK_VERSION}.jar + +ENV SPARK_HOME="/opt/spark-server" + +RUN dnf install -y java-11-openjdk less procps python3 tzdata \ + && ln -s $(which python3) /usr/bin/python \ + && tar -zxf $SPARK_PKG \ + && mv ./spark-${SPARK_VERSION}-bin-hadoop3 $SPARK_HOME \ + && mv ./$SPARK_CONNECT_JAR ${SPARK_HOME}/jars/ + +# We set the timezone to America/Los_Angeles due to issue +# detailed here : https://github.com/facebookincubator/velox/issues/8127 +ENV TZ=America/Los_Angeles + +COPY scripts/spark/conf/spark-defaults.conf.example $SPARK_HOME/conf/spark-defaults.conf +COPY scripts/spark/conf/spark-env.sh.example $SPARK_HOME/conf/spark-env.sh +COPY scripts/spark/conf/workers.example $SPARK_HOME/conf/workers +COPY scripts/spark/start-spark.sh /opt + +WORKDIR /velox diff --git a/scripts/spark/conf/spark-defaults.conf.example b/scripts/spark/conf/spark-defaults.conf.example new file mode 100644 index 000000000000..5b008b448014 --- /dev/null +++ b/scripts/spark/conf/spark-defaults.conf.example @@ -0,0 +1 @@ +spark.master local[*] diff --git a/scripts/spark/conf/spark-env.sh.example b/scripts/spark/conf/spark-env.sh.example new file mode 100644 index 000000000000..8cd004a86130 --- /dev/null +++ b/scripts/spark/conf/spark-env.sh.example @@ -0,0 +1 @@ +export SPARK_DAEMON_MEMORY=5g diff --git a/scripts/spark/conf/workers.example b/scripts/spark/conf/workers.example new file mode 100644 index 000000000000..2fbb50c4a8dc --- /dev/null +++ b/scripts/spark/conf/workers.example @@ -0,0 +1 @@ +localhost diff --git a/scripts/spark/start-spark.sh b/scripts/spark/start-spark.sh new file mode 100755 index 000000000000..20794364700c --- /dev/null +++ b/scripts/spark/start-spark.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +cd $SPARK_HOME +./sbin/start-connect-server.sh --jars $SPARK_HOME/jars/spark-connect_2.12-3.5.1.jar