Skip to content

Commit

Permalink
[SYSTEMML-688] Update dml file packaging in artifacts
Browse files Browse the repository at this point in the history
Move dml to scripts dir for main, in-memory, sources, and standalone jars.
Include algorithms, datagen, and utils directories.
Exclude obsolete algorithms, perftest, and staging.
Update distrib.xml and standalone.xml to reflect similar packaging.
Update ScriptUtils.scala and LogisticRegression.scala for these changes.

Closes apache#167.
  • Loading branch information
deroneriksson committed May 19, 2016
1 parent 17aac21 commit 71733fd
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 136 deletions.
27 changes: 18 additions & 9 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,24 @@
</properties>

<build>

<resources>
<resource>
<directory>scripts/algorithms</directory>
<includes>
<include>**/*.dml</include>
</includes>
</resource>
</resources>

<!-- Adds scripts to main jar, in-memory jar, sources jar, and standalone jar -->
<resources>
<resource>
<directory>scripts</directory>
<excludes>
<exclude>algorithms/obsolete/*</exclude>
<exclude>algorithms/obsolete</exclude>
<exclude>perftest/*</exclude>
<exclude>perftest</exclude>
<exclude>staging/**/*</exclude>
<exclude>staging</exclude>
<!-- <exclude>*.sh</exclude> --> <!-- applies to sparkDML.sh -->
</excludes>
<targetPath>scripts</targetPath>
</resource>
</resources>

<plugins>

<plugin>
Expand Down
68 changes: 10 additions & 58 deletions src/assembly/distrib.xml
Original file line number Diff line number Diff line change
Expand Up @@ -42,64 +42,16 @@

<fileSet>
<directory>${basedir}/scripts</directory>
<includes>
<include>sparkDML.sh</include>
</includes>
<outputDirectory>.</outputDirectory>
</fileSet>

<fileSet>
<directory>${basedir}/scripts/algorithms</directory>
<includes>
<include>ALS_predict.dml</include>
<include>ALS_topk_predict.dml</include>
<include>ALS.dml</include>
<include>apply-transform.dml</include>
<include>bivar-stats.dml</include>
<include>Cox-predict.dml</include>
<include>Cox.dml</include>
<include>decision-tree-predict.dml</include>
<include>decision-tree.dml</include>
<include>GLM-predict.dml</include>
<include>GLM.dml</include>
<include>KM.dml</include>
<include>Kmeans-predict.dml</include>
<include>Kmeans.dml</include>
<include>l2-svm-predict.dml</include>
<include>l2-svm.dml</include>
<include>LinearRegCG.dml</include>
<include>LinearRegDS.dml</include>
<include>m-svm-predict.dml</include>
<include>m-svm.dml</include>
<include>MultiLogReg.dml</include>
<include>naive-bayes-predict.dml</include>
<include>naive-bayes.dml</include>
<include>PCA.dml</include>
<include>random-forest-predict.dml</include>
<include>random-forest.dml</include>
<include>StepGLM.dml</include>
<include>StepLinearRegDS.dml</include>
<include>stratstats.dml</include>
<include>transform.dml</include>
<include>Univar-Stats.dml</include>
</includes>
<outputDirectory>./algorithms</outputDirectory>
</fileSet>

<fileSet>
<directory>${basedir}/scripts/utils</directory>
<includes>
<include>cbind.dml</include>
<include>csv2bin.dml</include>
<include>head.dml</include>
<include>project.dml</include>
<include>rowIndexMax.dml</include>
<include>sample.dml</include>
<include>splitXY-dummy.dml</include>
<include>splitXY.dml</include>
<include>write.dml</include>
</includes>
<outputDirectory>./algorithms/utils</outputDirectory>
<excludes>
<exclude>algorithms/obsolete/*</exclude>
<exclude>algorithms/obsolete</exclude>
<exclude>perftest/*</exclude>
<exclude>perftest</exclude>
<exclude>staging/**/*</exclude>
<exclude>staging</exclude>
<!-- <exclude>*.sh</exclude> --> <!-- applies to sparkDML.sh -->
</excludes>
<outputDirectory>scripts</outputDirectory>
</fileSet>

<fileSet>
Expand Down
72 changes: 12 additions & 60 deletions src/assembly/standalone.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
<!-- Assembly file for the "standalone" SystemML release for running on a standalone machine. -->
<!-- Assembly file for the "standalone" tar.gz and zip SystemML releases for running on a standalone machine. -->
<id>standalone</id>

<formats>
Expand All @@ -33,65 +33,17 @@

<fileSets>
<fileSet>
<directory>${basedir}/scripts/algorithms</directory>
<includes>
<include>ALS_predict.dml</include>
<include>ALS_topk_predict.dml</include>
<include>ALS.dml</include>
<include>apply-transform.dml</include>
<include>bivar-stats.dml</include>
<include>Cox-predict.dml</include>
<include>Cox.dml</include>
<include>decision-tree-predict.dml</include>
<include>decision-tree.dml</include>
<include>GLM-predict.dml</include>
<include>GLM.dml</include>
<include>KM.dml</include>
<include>Kmeans-predict.dml</include>
<include>Kmeans.dml</include>
<include>l2-svm-predict.dml</include>
<include>l2-svm.dml</include>
<include>LinearRegCG.dml</include>
<include>LinearRegDS.dml</include>
<include>m-svm-predict.dml</include>
<include>m-svm.dml</include>
<include>MultiLogReg.dml</include>
<include>naive-bayes-predict.dml</include>
<include>naive-bayes.dml</include>
<include>PCA.dml</include>
<include>random-forest-predict.dml</include>
<include>random-forest.dml</include>
<include>StepGLM.dml</include>
<include>StepLinearRegDS.dml</include>
<include>stratstats.dml</include>
<include>transform.dml</include>
<include>Univar-Stats.dml</include>
</includes>
<outputDirectory>./scripts/algorithms</outputDirectory>
</fileSet>

<fileSet>
<directory>${basedir}/scripts/datagen</directory>
<includes>
<include>genLinearRegressionData.dml</include>
</includes>
<outputDirectory>./scripts/datagen</outputDirectory>
</fileSet>

<fileSet>
<directory>${basedir}/scripts/utils</directory>
<includes>
<include>cbind.dml</include>
<include>csv2bin.dml</include>
<include>head.dml</include>
<include>project.dml</include>
<include>rowIndexMax.dml</include>
<include>sample.dml</include>
<include>splitXY-dummy.dml</include>
<include>splitXY.dml</include>
<include>write.dml</include>
</includes>
<outputDirectory>./scripts/utils</outputDirectory>
<directory>${basedir}/scripts</directory>
<excludes>
<exclude>algorithms/obsolete/*</exclude>
<exclude>algorithms/obsolete</exclude>
<exclude>perftest/*</exclude>
<exclude>perftest</exclude>
<exclude>staging/**/*</exclude>
<exclude>staging</exclude>
<!-- <exclude>*.sh</exclude> --> <!-- applies to sparkDML.sh -->
</excludes>
<outputDirectory>scripts</outputDirectory>
</fileSet>

<fileSet>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.apache.sysml.api.ml

import java.io.File
import org.apache.sysml.api.{ MLContext, MLOutput }
import org.apache.sysml.runtime.matrix.MatrixCharacteristics
import org.apache.sysml.runtime.instructions.spark.utils.{ RDDConverterUtilsExt => RDDConverterUtils }
Expand Down Expand Up @@ -60,7 +61,7 @@ trait HasRegParam extends Params {
final def getRegParam: Double = $(regParam)
}
object LogisticRegression {
final val scriptPath = "MultiLogReg.dml"
final val scriptPath = "scripts" + File.separator + "algorithms" + File.separator + "MultiLogReg.dml"
}

/**
Expand Down Expand Up @@ -107,7 +108,7 @@ class LogisticRegression(override val uid: String, val sc: SparkContext) extends
}
}
object LogisticRegressionModel {
final val scriptPath = "GLM-predict.dml"
final val scriptPath = "scripts" + File.separator + "algorithms" + File.separator + "GLM-predict.dml"
}

/**
Expand Down
15 changes: 8 additions & 7 deletions src/main/scala/org/apache/sysml/api/ml/ScriptsUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,24 @@ object ScriptsUtils {
/*
* Internal function to get dml path
*/
private[sysml] def resolvePath(filename: String): String = {
private[sysml] def resolvePath(scriptPath: String): String = {
import java.io.File
ScriptsUtils.systemmlHome + File.separator + "algorithms" + File.separator + filename
ScriptsUtils.systemmlHome + File.separator + scriptPath
}

/*
/*
* Internal function to get dml string from jar
*/
private[sysml] def getDMLScript(algorithmFileName: String): String = {
private[sysml] def getDMLScript(scriptPath: String): String = {
var reader: BufferedReader = null
val out = new StringBuilder()
try {
val in = {
if (systemmlHome == null || systemmlHome.equals("")) {
classOf[LogisticRegression].getClassLoader().getResourceAsStream(algorithmFileName)
val resourcePath = "/" + scriptPath.replace("\\", "/")
classOf[LogisticRegression].getResourceAsStream(resourcePath)
} else {
new java.io.FileInputStream(resolvePath(algorithmFileName))
new java.io.FileInputStream(resolvePath(scriptPath))
}
}
var reader = new BufferedReader(new InputStreamReader(in))
Expand All @@ -65,7 +66,7 @@ object ScriptsUtils {
}
} catch {
case ex: Exception =>
throw new DMLRuntimeException("Cannot read the algorithm file " + algorithmFileName, ex)
throw new DMLRuntimeException("Cannot read the script file " + scriptPath, ex)
} finally {
if (reader != null)
reader.close();
Expand Down

0 comments on commit 71733fd

Please sign in to comment.