Skip to content

Commit

Permalink
Log DBR BuildInfo [databricks] (#11455)
Browse files Browse the repository at this point in the history
Fixes #8587
 
- Match the dbrVersion from the binaries
- Log the build info exposed in current_version
- Fix the WITH_BLOOP build

```
2024-09-10 16:21:42,019 [Thread-6] WARN  com.nvidia.spark.rapids.DatabricksShimServiceProvider - Databricks Runtime Build Info match: SUCCESS
        DBR_VERSION: 11.3.x-snapshot-gpu-ml-scala2.12
        spark.BuildInfo.gitHash: d65fb2374451fd10bf416297dc22549bcbaf2702
        databricks.BuildInfo.gitHash: b7fd9d058866e0f98f78304bf90c690198e2b208
        databricks.BuildInfo.gitTimestamp: 20240820204043
```
    
Signed-off-by: Gera Shegalov <gera@apache.org>
  • Loading branch information
gerashegalov authored Sep 10, 2024
1 parent 9fc0667 commit a92bfbf
Show file tree
Hide file tree
Showing 8 changed files with 100 additions and 22 deletions.
8 changes: 6 additions & 2 deletions jenkins/databricks/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,15 @@ else
fi

if [[ "$WITH_BLOOP" == "1" ]]; then
MVN_OPT="ch.epfl.scala:bloop-maven-plugin:bloopInstall $MVN_OPT"
MVN_OPT="-DbloopInstall $MVN_OPT"
MVN_PHASES="clean install"
export JAVA_HOME="/usr/lib/jvm/zulu11"
else
MVN_PHASES="clean package"
fi

# Build the RAPIDS plugin by running package command for databricks
$MVN_CMD -B -Ddatabricks -Dbuildver=$BUILDVER clean package -DskipTests $MVN_OPT
$MVN_CMD -B -Ddatabricks -Dbuildver=$BUILDVER $MVN_PHASES -DskipTests $MVN_OPT

if [[ "$WITH_DEFAULT_UPSTREAM_SHIM" != "0" ]]; then
echo "Building the default Spark shim and creating a two-shim dist jar"
Expand Down
13 changes: 8 additions & 5 deletions jenkins/databricks/install_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ def define_deps(spark_version, scala_version):
f'{spark_prefix}--common--tags--tags-{spark_suffix}_deploy.jar'),
Artifact('org.apache.spark', f'spark-core_{scala_version}',
f'{spark_prefix}--core--core-{spark_suffix}_deploy.jar'),

Artifact('org.apache.spark', f'spark-versions_{scala_version}',
f'spark--versions--*--shim_{scala_version}_deploy.jar'),
Artifact('org.apache.spark', f'databricks-versions_{scala_version}',
f'common--build-info--build-info-spark_*_{scala_version}_deploy.jar'),
# Spark Hive Patches
Artifact('org.apache.spark', f'spark-hive_{scala_version}',
f'{spark_prefix}--sql--hive--hive-{spark_suffix}_*.jar'),
Expand Down Expand Up @@ -132,17 +135,17 @@ def define_deps(spark_version, scala_version):
# Parquet
if spark_version.startswith('3.4'):
deps += [
Artifact('org.apache.parquet', 'parquet-hadoop',
Artifact('org.apache.parquet', 'parquet-hadoop',
f'{spark_prefix}--third_party--parquet-mr--parquet-hadoop--parquet-hadoop-shaded--*--libparquet-hadoop-internal.jar'),
Artifact('org.apache.parquet', 'parquet-common',
Artifact('org.apache.parquet', 'parquet-common',
f'{spark_prefix}--third_party--parquet-mr--parquet-common--parquet-common-shaded--*--libparquet-common-internal.jar'),
Artifact('org.apache.parquet', 'parquet-column',
f'{spark_prefix}--third_party--parquet-mr--parquet-column--parquet-column-shaded--*--libparquet-column-internal.jar'),
Artifact('org.apache.parquet', 'parquet-format',
f'{spark_prefix}--third_party--parquet-mr--parquet-format-structures--parquet-format-structures-shaded--*--libparquet-format-structures-internal.jar'),
Artifact('shaded.parquet.org.apache.thrift', f'shaded-parquet-thrift_{scala_version}',
Artifact('shaded.parquet.org.apache.thrift', f'shaded-parquet-thrift_{scala_version}',
f'{spark_prefix}--third_party--parquet-mr--parquet-format-structures--parquet-format-structures-shaded--*--org.apache.thrift__libthrift__0.16.0.jar'),
Artifact('org.apache.parquet', f'parquet-format-internal_{scala_version}',
Artifact('org.apache.parquet', f'parquet-format-internal_{scala_version}',
f'{spark_prefix}--third_party--parquet-mr--parquet-format-structures--parquet-format-structures-shaded--*--libparquet-thrift.jar')
]
else:
Expand Down
12 changes: 12 additions & 0 deletions scala2.13/shim-deps/databricks/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -291,5 +291,17 @@
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-versions_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>databricks-versions_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
</dependencies>
</project>
12 changes: 12 additions & 0 deletions shim-deps/databricks/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -291,5 +291,17 @@
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-versions_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>databricks-versions_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>compile</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*** spark-rapids-shim-json-lines
{"spark": "330db"}
{"spark": "332db"}
{"spark": "341db"}
spark-rapids-shim-json-lines ***/
package com.nvidia.spark.rapids

import scala.util.Try

object DatabricksShimServiceProvider {
val log = org.slf4j.LoggerFactory.getLogger(getClass().getName().stripSuffix("$"))
def matchesVersion(dbrVersion: String): Boolean = {
Try {
val sparkBuildInfo = org.apache.spark.BuildInfo
val databricksBuildInfo = com.databricks.BuildInfo
val matchRes = sparkBuildInfo.dbrVersion.startsWith(dbrVersion)
val matchStatus = if (matchRes) "SUCCESS" else "FAILURE"
val logMessage =
s"""Databricks Runtime Build Info match: $matchStatus
|\tDBR_VERSION: ${sparkBuildInfo.dbrVersion}
|\tspark.BuildInfo.gitHash: ${sparkBuildInfo.gitHash}
|\tdatabricks.BuildInfo.gitHash: ${databricksBuildInfo.gitHash}
|\tdatabricks.BuildInfo.gitTimestamp: ${databricksBuildInfo.gitTimestamp}"""
.stripMargin
if (matchRes) {
log.warn(logMessage)
} else {
log.debug(logMessage)
}
matchRes
}.recover {
case x: Throwable =>
log.debug("Databricks detection failed: " + x, x)
false
}.getOrElse(false)
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,9 +19,7 @@
spark-rapids-shim-json-lines ***/
package com.nvidia.spark.rapids.shims.spark330db

import com.nvidia.spark.rapids.{DatabricksShimVersion, ShimVersion}

import org.apache.spark.SparkEnv
import com.nvidia.spark.rapids._

object SparkShimServiceProvider {
val VERSION = DatabricksShimVersion(3, 3, 0)
Expand All @@ -32,6 +30,6 @@ class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceP
override def getShimVersion: ShimVersion = SparkShimServiceProvider.VERSION

def matchesVersion(version: String): Boolean = {
SparkEnv.get.conf.get("spark.databricks.clusterUsageTags.sparkVersion", "").startsWith("11.3.")
DatabricksShimServiceProvider.matchesVersion("11.3.x")
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,9 +19,7 @@
spark-rapids-shim-json-lines ***/
package com.nvidia.spark.rapids.shims.spark332db

import com.nvidia.spark.rapids.{DatabricksShimVersion, ShimVersion}

import org.apache.spark.SparkEnv
import com.nvidia.spark.rapids._

object SparkShimServiceProvider {
val VERSION = DatabricksShimVersion(3, 3, 2)
Expand All @@ -32,6 +30,6 @@ class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceP
override def getShimVersion: ShimVersion = SparkShimServiceProvider.VERSION

def matchesVersion(version: String): Boolean = {
SparkEnv.get.conf.get("spark.databricks.clusterUsageTags.sparkVersion", "").startsWith("12.2.")
DatabricksShimServiceProvider.matchesVersion("12.2.x")
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,9 +19,7 @@
spark-rapids-shim-json-lines ***/
package com.nvidia.spark.rapids.shims.spark341db

import com.nvidia.spark.rapids.{DatabricksShimVersion, ShimVersion}

import org.apache.spark.SparkEnv
import com.nvidia.spark.rapids._

object SparkShimServiceProvider {
val VERSION = DatabricksShimVersion(3, 4, 1)
Expand All @@ -32,6 +30,6 @@ class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceP
override def getShimVersion: ShimVersion = SparkShimServiceProvider.VERSION

def matchesVersion(version: String): Boolean = {
SparkEnv.get.conf.get("spark.databricks.clusterUsageTags.sparkVersion", "").startsWith("13.3.")
DatabricksShimServiceProvider.matchesVersion("13.3.x")
}
}

0 comments on commit a92bfbf

Please sign in to comment.