forked from apache/systemds
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SYSTEMDS-31] Shell and Python scripts to run SystemDS locally
[SYSTEMDS-32] Shell script to run SystemDS with spark-submit
- Loading branch information
1 parent
2b4ab23
commit 7239c9b
Showing
5 changed files
with
492 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#!/bin/bash | ||
#------------------------------------------------------------- | ||
# | ||
# Copyright 2019 Graz University of Technology | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
#set -x | ||
|
||
# This script is a simplified version of sparkDML.sh in order to | ||
# allow a simple drop-in replacement for 'hadoop jar' without | ||
# the need to change any command line arguments. | ||
|
||
#export HADOOP_CONF_DIR=/etc/hadoop/conf | ||
#SPARK_HOME=../spark-2.3.1-bin-hadoop2.7 | ||
#export HADOOP_HOME=${HADOOP_HOME:-/usr/hdp/2.5.0.0-1245/hadoop} | ||
#HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/usr/hdp/2.5.0.0-1245/hadoop/conf} | ||
|
||
export SPARK_MAJOR_VERSION=2 | ||
|
||
#$SPARK_HOME/bin/spark-submit \ | ||
spark-submit \ | ||
--master yarn \ | ||
--driver-memory 80g \ | ||
--num-executors 1 \ | ||
--executor-memory 60g \ | ||
--executor-cores 19 \ | ||
--conf "spark.yarn.am.extraJavaOptions -Dhdp.version=2.5.0.0-1245" \ | ||
"$@" | ||
|
||
# # run spark submit locally | ||
# spark-submit \ | ||
# "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
#!/usr/bin/env python | ||
#------------------------------------------------------------- | ||
# | ||
# Copyright 2019 Graz University of Technology | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
import os | ||
import sys | ||
from os.path import join | ||
import argparse | ||
import platform | ||
from utils import get_env_systemds_root, find_dml_file, log4j_path, config_path | ||
|
||
|
||
def default_classpath(systemds_root): | ||
""" | ||
Classpath information required for excution | ||
return: String | ||
Classpath location of build, library and hadoop directories | ||
""" | ||
build_lib = join(systemds_root, 'target', '*') | ||
lib_lib = join(systemds_root, 'target', 'lib', '*') | ||
hadoop_lib = join(systemds_root, 'target', 'lib', 'hadoop', '*') | ||
sysds_jar = join(systemds_root, 'target', 'SystemDS.jar') | ||
return build_lib, lib_lib, hadoop_lib, sysds_jar | ||
|
||
|
||
def standalone_execution_entry(nvargs, args, config, explain, debug, stats, gpu, heapmem, f): | ||
""" | ||
This function is responsible for the execution of arguments via | ||
subprocess call in singlenode mode | ||
""" | ||
|
||
systemds_root = get_env_systemds_root() | ||
script_file = find_dml_file(systemds_root, f) | ||
|
||
if platform.system() == 'Windows': | ||
default_cp = ';'.join(default_classpath(systemds_root)) | ||
else: | ||
default_cp = ':'.join(default_classpath(systemds_root)) | ||
|
||
java_memory = '-Xmx' + heapmem + ' -Xms4g -Xmn1g' | ||
|
||
# Log4j | ||
log4j = log4j_path(systemds_root) | ||
log4j_properties_path = '-Dlog4j.configuration=file:{}'.format(log4j) | ||
|
||
# Config | ||
if config is None: | ||
default_config = config_path(systemds_root) | ||
else: | ||
default_config = config | ||
|
||
ds_options = [] | ||
if nvargs is not None: | ||
ds_options.append('-nvargs') | ||
ds_options.append(' '.join(nvargs)) | ||
if args is not None: | ||
ds_options.append('-args') | ||
ds_options.append(' '.join(args)) | ||
if explain is not None: | ||
ds_options.append('-explain') | ||
ds_options.append(explain) | ||
if debug is not False: | ||
ds_options.append('-debug') | ||
if stats is not None: | ||
ds_options.append('-stats') | ||
ds_options.append(stats) | ||
if gpu is not None: | ||
ds_options.append('-gpu') | ||
ds_options.append(gpu) | ||
|
||
os.environ['HADOOP_HOME'] = '/tmp/systemds' | ||
|
||
cmd = ['java', java_memory, log4j_properties_path, | ||
'-cp', default_cp, 'org.tugraz.sysds.api.DMLScript', | ||
'-f', script_file, '-exec', 'singlenode', '-config', default_config, | ||
' '.join(ds_options)] | ||
|
||
cmd = ' '.join(cmd) | ||
print(cmd) | ||
|
||
return_code = os.system(cmd) | ||
return return_code | ||
|
||
|
||
if __name__ == '__main__': | ||
|
||
fn = sys.argv[0] | ||
if os.path.exists(fn): | ||
#print(os.path.basename(fn)) | ||
print(fn[:fn.rfind('/')]) | ||
|
||
cparser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, | ||
description='System-DS Standalone Script') | ||
|
||
# SYSTEM-DS Options | ||
cparser.add_argument('-nvargs', help='List of attributeName-attributeValue pairs', nargs='+', metavar='') | ||
cparser.add_argument('-args', help='List of positional argument values', metavar='', nargs='+') | ||
cparser.add_argument('-config', help='System-DS configuration file (e.g SystemDS-config.xml)', metavar='') | ||
cparser.add_argument('-explain', help='explains plan levels can be hops, runtime, ' | ||
'recompile_hops, recompile_runtime', nargs='?', const='runtime', metavar='') | ||
cparser.add_argument('-debug', help='runs in debug mode', action='store_true') | ||
cparser.add_argument('-stats', help='Monitor and report caching/recompilation statistics, ' | ||
'heavy hitter <count> is 10 unless overridden', nargs='?', const='10', | ||
metavar='') | ||
cparser.add_argument('-gpu', help='uses CUDA instructions when reasonable, ' | ||
'set <force> option to skip conservative memory estimates ' | ||
'and use GPU wherever possible', nargs='?') | ||
cparser.add_argument('-heapmem', help='maximum JVM heap memory', metavar='', default='8g') | ||
cparser.add_argument('-f', required=True, help='specifies dml file to execute; ' | ||
'path can be local/hdfs/gpfs', metavar='') | ||
|
||
args = cparser.parse_args() | ||
arg_dict = vars(args) | ||
return_code = standalone_execution_entry(**arg_dict) | ||
|
||
if return_code != 0: | ||
print('Failed to run SystemDS. Exit code :' + str(return_code)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
#!/usr/bin/env bash | ||
#------------------------------------------------------------- | ||
# | ||
# Copyright 2019 Graz University of Technology | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
|
||
# error help print | ||
printSimpleUsage() | ||
{ | ||
cat << EOF | ||
Usage: $0 <dml-filename> [arguments] [-help] | ||
-help - Print detailed help message | ||
EOF | ||
exit 1 | ||
} | ||
|
||
# Script internally invokes 'java -Xmx4g -Xms4g -Xmn400m [Custom-Java-Options] -jar StandaloneSystemDS.jar -f <dml-filename> -exec singlenode -config=SystemDS-config.xml [Optional-Arguments]' | ||
|
||
if [ -z "$1" ] ; then | ||
echo "Wrong Usage."; | ||
printSimpleUsage | ||
fi | ||
|
||
if [ ! -z $SYSTEMDS_ROOT ]; then | ||
PROJECT_ROOT_DIR="$SYSTEMDS_ROOT" | ||
echo "SYTEMDS_ROOT is set to:" $SYSTEMDS_ROOT | ||
else | ||
# find the systemDS root path which contains the bin folder, the script folder and the target folder | ||
# tolerate path with spaces | ||
SCRIPT_DIR=$( dirname "$0" ) | ||
PROJECT_ROOT_DIR=$( cd "${SCRIPT_DIR}/.." ; pwd -P ) | ||
fi | ||
|
||
USER_DIR=$PWD | ||
|
||
BUILD_DIR=${PROJECT_ROOT_DIR}/target | ||
HADOOP_LIB_DIR=${BUILD_DIR}/lib | ||
DML_SCRIPT_CLASS=${BUILD_DIR}/classes/org/tugraz/sysds/api/DMLScript.class | ||
|
||
BUILD_ERR_MSG="You must build the project before running this script." | ||
BUILD_DIR_ERR_MSG="Could not find target directory \"${BUILD_DIR}\". ${BUILD_ERR_MSG}" | ||
HADOOP_LIB_ERR_MSG="Could not find required libraries \"${HADOOP_LIB_DIR}/*\". ${BUILD_ERR_MSG}" | ||
DML_SCRIPT_ERR_MSG="Could not find \"${DML_SCRIPT_CLASS}\". ${BUILD_ERR_MSG}" | ||
|
||
# check if the project had been built and the jar files exist | ||
if [ ! -d "${BUILD_DIR}" ]; then echo "${BUILD_DIR_ERR_MSG}"; exit 1; fi | ||
if [ ! -d "${HADOOP_LIB_DIR}" ]; then echo "${HADOOP_LIB_ERR_MSG}"; exit 1; fi | ||
if [ ! -f "${DML_SCRIPT_CLASS}" ]; then echo "${DML_SCRIPT_ERR_MSG}"; exit 1; fi | ||
|
||
|
||
echo "================================================================================" | ||
|
||
# if the present working directory is the project root or bin folder, then use the temp folder as user.dir | ||
if [ "$USER_DIR" = "$PROJECT_ROOT_DIR" ] || [ "$USER_DIR" = "$PROJECT_ROOT_DIR/bin" ] | ||
then | ||
USER_DIR=${PROJECT_ROOT_DIR}/temp | ||
echo "Output dir: $USER_DIR" | ||
fi | ||
|
||
|
||
# if the SystemDS-config.xml does not exist, create it from the template | ||
if [ ! -f "${PROJECT_ROOT_DIR}/conf/SystemDS-config.xml" ] | ||
then | ||
cp "${PROJECT_ROOT_DIR}/conf/SystemDS-config.xml.template" \ | ||
"${PROJECT_ROOT_DIR}/conf/SystemDS-config.xml" | ||
echo "... created ${PROJECT_ROOT_DIR}/conf/SystemDS-config.xml" | ||
fi | ||
|
||
# if the log4j.properties do not exis, create them from the template | ||
if [ ! -f "${PROJECT_ROOT_DIR}/conf/log4j.properties" ] | ||
then | ||
cp "${PROJECT_ROOT_DIR}/conf/log4j.properties.template" \ | ||
"${PROJECT_ROOT_DIR}/conf/log4j.properties" | ||
echo "... created ${PROJECT_ROOT_DIR}/conf/log4j.properties" | ||
fi | ||
|
||
|
||
|
||
|
||
# add hadoop libraries which were generated by the build to the classpath | ||
CLASSPATH=\"${BUILD_DIR}/lib/*\" | ||
|
||
#SYSTEM_DS_JAR=$( find $PROJECT_ROOT_DIR/target/system-ds-*-SNAPSHOT.jar ) | ||
SYSTEM_DS_JAR=\"${BUILD_DIR}/classes\" | ||
|
||
CLASSPATH=${CLASSPATH}:${SYSTEM_DS_JAR} | ||
|
||
echo "================================================================================" | ||
|
||
# Set default Java options | ||
SYSTEMDS_DEFAULT_JAVA_OPTS="\ | ||
-Xmx8g -Xms4g -Xmn1g \ | ||
-cp $CLASSPATH \ | ||
-Dlog4j.configuration=file:'$PROJECT_ROOT_DIR/conf/log4j.properties' \ | ||
-Duser.dir='$USER_DIR'" | ||
|
||
# Add any custom Java options set by the user at command line, overriding defaults as necessary. | ||
if [ ! -z "${SYSTEMDS_JAVA_OPTS}" ]; then | ||
SYSTEMDS_DEFAULT_JAVA_OPTS+=" ${SYSTEMDS_JAVA_OPTS}" | ||
unset SYSTEMDS_JAVA_OPTS | ||
fi | ||
|
||
# Add any custom Java options set by the user in the environment variables file, overriding defaults as necessary. | ||
if [ -f "${PROJECT_ROOT_DIR}/conf/systemds-env.sh" ]; then | ||
. "${PROJECT_ROOT_DIR}/conf/systemds-env.sh" | ||
if [ ! -z "${SYSTEMDS_JAVA_OPTS}" ]; then | ||
SYSTEMDS_DEFAULT_JAVA_OPTS+=" ${SYSTEMDS_JAVA_OPTS}" | ||
fi | ||
fi | ||
|
||
|
||
printUsageExit() | ||
{ | ||
CMD="\ | ||
java ${SYSTEMDS_DEFAULT_JAVA_OPTS} \ | ||
org.tugraz.sysds.api.DMLScript \ | ||
-help" | ||
# echo ${CMD} | ||
eval ${CMD} | ||
exit 0 | ||
} | ||
|
||
while getopts "h:f:" options; do | ||
case $options in | ||
h ) echo Warning: Help requested. Will exit after usage message | ||
printUsageExit | ||
;; | ||
\? ) echo Warning: Help requested. Will exit after usage message | ||
printUsageExit | ||
;; | ||
f ) #echo "Shifting args due to -f" | ||
shift | ||
;; | ||
* ) echo Error: Unexpected error while processing options | ||
esac | ||
done | ||
|
||
# Peel off first argument so that $@ contains arguments to DML script | ||
SCRIPT_FILE=$1 | ||
shift | ||
|
||
# if the script file path was omitted, try to complete the script path | ||
if [ ! -f "$SCRIPT_FILE" ] | ||
then | ||
SCRIPT_FILE_NAME=$(basename $SCRIPT_FILE) | ||
SCRIPT_FILE_FOUND=$(find "$PROJECT_ROOT_DIR/scripts" -name "$SCRIPT_FILE_NAME") | ||
if [ ! "$SCRIPT_FILE_FOUND" ] | ||
then | ||
echo "Could not find DML script: $SCRIPT_FILE" | ||
printSimpleUsage | ||
else | ||
SCRIPT_FILE=$SCRIPT_FILE_FOUND | ||
echo "DML script: $SCRIPT_FILE" | ||
fi | ||
fi | ||
|
||
|
||
# Invoke the jar with options and arguments | ||
CMD="\ | ||
java ${SYSTEMDS_DEFAULT_JAVA_OPTS} \ | ||
org.tugraz.sysds.api.DMLScript \ | ||
-f '$SCRIPT_FILE' \ | ||
-exec singlenode \ | ||
-config '$PROJECT_ROOT_DIR/conf/SystemDS-config.xml' \ | ||
$@" | ||
|
||
export HADOOP_HOME=/tmp/systemds | ||
eval ${CMD} | ||
|
||
RETURN_CODE=$? | ||
|
||
# if there was an error, display the full java command (in case some of the variable substitutions broke it) | ||
if [ $RETURN_CODE -ne 0 ] | ||
then | ||
echo "Failed to run SystemDS. Exit code: $RETURN_CODE" | ||
LF=$'\n' | ||
|
||
|
||
# keep empty lines above for the line breaks | ||
echo " ${CMD// /$LF }" | ||
fi | ||
|
Oops, something went wrong.