Skip to content

Commit

Permalink
Consolidate Spark vendor shim dependency management [databricks] (#9182)
Browse files Browse the repository at this point in the history
Replace numerous instance of duplicate dependency definitions for cloudera and databricks shims by aggregated definitions.

Verification along the lines :
`buildall` and unjar all jars in separate dirs
```bash
cd before
find . -path '*/target/*.jar' | grep -v 'dist/target/deps' | xargs -n 1 bash -c 'jar_dir=.jars/$(basename $1); mkdir -p $jar_dir; unzip -d $jar_dir $1 \*.class' _
...
diff -r before/spark-rapids/.jars after/spark-rapids/.jars
Only in before/spark-rapids/.jars/rapids-4-spark-integration-tests_2.12-23.10.0-SNAPSHOT-spark330db-jar-with-dependencies.jar/org/apache: arrow
``` 

The diff is because of the previous special-case compile-scope for arrow in integraion_tests just in the databricks prfoile. I think it may no longer be necessary. If a post-merge test breaks, will fix in a follow-up PR. 

Signed-off-by: Gera Shegalov <gera@apache.org>
  • Loading branch information
gerashegalov authored Sep 6, 2023
1 parent cdf5433 commit 0e2fc80
Show file tree
Hide file tree
Showing 11 changed files with 452 additions and 1,262 deletions.
50 changes: 4 additions & 46 deletions api_validation/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -77,53 +77,11 @@
</activation>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark321cdh.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
</exclusion>
</exclusions>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${spark321cdh.version}</version>
<exclusions>
<!-- spark-core tries to pull a curator-recipes version we don't want -->
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
</exclusion>
</exclusions>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
<version>${arrow.cdh.version}</version>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-cdh-bom</artifactId>
<version>${project.version}</version>
<type>pom</type>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-common</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>
Expand Down
191 changes: 12 additions & 179 deletions datagen/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -71,54 +71,12 @@
</activation>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark321cdh.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
</exclusion>
</exclusions>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-cdh-bom</artifactId>
<version>${project.version}</version>
<type>pom</type>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${spark321cdh.version}</version>
<exclusions>
<!-- spark-core tries to pull a curator-recipes version we don't want -->
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
</exclusion>
</exclusions>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
<version>${arrow.cdh.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-common</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>
<profile>
Expand All @@ -131,54 +89,12 @@
</activation>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark330cdh.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
</exclusion>
</exclusions>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-cdh-bom</artifactId>
<version>${project.version}</version>
<type>pom</type>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${spark330cdh.version}</version>
<exclusions>
<!-- spark-core tries to pull a curator-recipes version we don't want -->
<exclusion>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
</exclusion>
</exclusions>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
<version>${arrow.cdh.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-common</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</profile>
<profile>
Expand All @@ -196,93 +112,10 @@
</activation>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-annotation_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-unsafe_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-reflect</artifactId>
<version>${scala.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.esotericsoftware.kryo</groupId>
<artifactId>kryo-shaded-db</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-format</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-serde</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-io</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.client.version}</version>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-db-bom</artifactId>
<version>${project.version}</version>
<type>pom</type>
<scope>provided</scope>
</dependency>
</dependencies>
Expand Down
Loading

0 comments on commit 0e2fc80

Please sign in to comment.