From 64c154cafa47a55ecb720a992dcf4fb5ffbda184 Mon Sep 17 00:00:00 2001
From: WenboZhao <wenbo.fisher.zhao@gmail.com>
Date: Tue, 24 Jan 2017 15:16:51 -0500
Subject: [PATCH] Update Spark build instruction (#8)

(cherry picked from commit b406258e64675b1d7db772705fe3c9a6502e5e0d)
(cherry picked from commit 898cde59707615d5deffebc13f0fb6f902ff1aab)
(cherry picked from commit 109289d0644b9807481e26d51999923741d7b23e)
---
 README.md    | 28 ++++++++++++++--------------
 core/pom.xml |  4 ++--
 2 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/README.md b/README.md
index 69ae8986ad894..c04d1154aa42f 100644
--- a/README.md
+++ b/README.md
@@ -7,32 +7,32 @@ In order to build this package, you need to build and install `cook jobclient` f
 git clone https://github.com/twosigma/Cook.git
 cd Cook/jobclient
 mvn package
-mvn org.apache.maven.plugins:maven-install-plugin:2.5.2:install-file -Dfile=target/cook-jobclient-0.1.0.jar -DpomFile=pom.xml
+mvn org.apache.maven.plugins:maven-install-plugin:2.5.2:install-file \
+  -Dfile=target/cook-jobclient-0.1.2-snapshot.jar \
+  -DpomFile=pom.xml
 ```
 
-Now, we are ready to build the Spark distribution as follows.
-
+Now, we are ready to build the Spark distribution as follows. Note that if you are using Java 7, we
+probably need to increase heap size used by Maven a little bit. However, if you are on Java 8, you
+could ignore the following step.
 ```
-# Install package to local m2 repository
-build/mvn install -DskipTests=true -Dscala-2.11 -Phadoop-2.6 -Dhadoop.version=2.6.0-cdh5.4.4jco
-
-# Build jar for release without hive support
-./make-distribution.sh --tgz --skip-java-test --scala-version 2.11 -Phadoop-2.6 -Dhadoop.version=2.6.0-cdh5.4.4jco
-
-# Build jar for release with hive support
-./make-distribution.sh --tgz --skip-java-test --scala-version 2.11 -Phive -Phive-thriftserver -Phadoop-2.6 -Dhadoop.version=2.6.0-cdh5.4.4jco
+export MAVEN_OPTS="-Xmx4g -XX:MaxPermSize=1024M -XX:ReservedCodeCacheSize=1024m"
+```
+Then, we could
+```
+./dev/make-distribution.sh --tgz --name hadoop-provided-scala2.11 -Dscala-2.11 -Phadoop-2.6,hadoop-provided,hive -DskipTests
 ```
 
 The tarball will be created with the hadoop version and scala version
 embedded in the tarball name.  Additionally, we use `git describe
 --tags` to create the spark version, rather than just taking what's in
-the pom.xml files.  This way, we get a tarball name that looks like
+the pom.xml files. This way, we get a tarball name that looks like
 
-    spark-1.6.1-31-g9dc4df0-bin-hadoop2.6.0-cdh5.4.4jco-scala2.10.tgz
+    spark-2.0.2-31-g9dc4df0-bin-hadoop-provided-scala2.11.tgz
 
 rather than
 
-    spark-1.6.1-bin-2.6.0-cdh5.4.4jco.tgz
+    spark-2.0.2-bin-hadoop-provided-scala2.11.tgz
 
 and thus we can manage multiple internal releases on the same upstream
 version, and also manage our scala version dependencies appropriately.
diff --git a/core/pom.xml b/core/pom.xml
index 06a4b3087f818..026186c0c1f3e 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -34,9 +34,9 @@
   <url>http://spark.apache.org/</url>
   <dependencies>
     <dependency>
-      <groupId>com.twosigma</groupId>
+      <groupId>twosigma</groupId>
       <artifactId>cook-jobclient</artifactId>
-      <version>0.1.0</version>
+      <version>0.1.2-snapshot</version>
     </dependency>
     <dependency>
       <groupId>org.apache.avro</groupId>