Skip to content

Commit

Permalink
Merge branch 'branch-21.08' into reader-semaphore-perf-fix
Browse files Browse the repository at this point in the history
  • Loading branch information
jlowe committed Jul 7, 2021
2 parents 4b6ec23 + cd1481d commit 8ca06a6
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 4 deletions.
6 changes: 3 additions & 3 deletions scripts/audit-spark-3.2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@
# This script generates the commits that went in Apache Spark for audit.
# Audit is required to evaluate if the code needs to be updated based
# on new commits merged in Apache Spark. This currently audits changes for
# Spark-3.2 (master branch).
# Spark branch-3.2
# Arguments:
# lastcommit - File which contains the latest commit hash when this script ran last.
# basebranch - branch in Apache Spark for which commits needs to be audited.
# Currently it's master as Spark-3.2 branch is not cut yet.
# Currently it's Apache Spark's branch-3.2.
# tag - tag until which the commits are audited


set -ex
ABSOLUTE_PATH=$(cd $(dirname $0) && pwd)
lastcommit=""
basebranch="master"
basebranch="branch-3.2"
tag="v3.1.1-rc3"
REF=${REF:-"main"}
REF=main
Expand Down
2 changes: 1 addition & 1 deletion tools/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ GPU generated event logs.
- Spark 3.0.1 or newer, the Qualification tool just needs the Spark jars and the Profiling tool
runs a Spark application so needs the Spark runtime.
- Java 8 or above
- Complete Spark event log(s) from Spark 3.0 or above version.
- Spark event log(s) from Spark 2.0 or above version.
Support both rolled and compressed event logs with `.lz4`, `.lzf`, `.snappy` and `.zstd` suffixes.
Also support Databricks specific rolled and compressed(.gz) eventlogs.
The tool does not support nested directories, event log files or event log directories should be
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
App Name,App ID,Score,Potential Problems,SQL Dataframe Duration,App Duration,Executor CPU Time Percent,App Duration Estimated,SQL Duration with Potential Problems,SQL Ids with Failures
Spark shell,local-1624892957956,21.07,"",3751,17801,58.47,false,0,""
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,18 @@ class ApplicationInfoSuite extends FunSuite with Logging {
}
}

test("test spark2 eventlog") {
val eventLog = Array(s"$logDir/spark2-eventlog.zstd")
val apps = ToolTestUtils.processProfileApps(eventLog, sparkSession)
assert(apps.size == 1)
assert(apps.head.sparkVersion.equals("2.2.3"))
assert(apps.head.gpuMode.equals(false))
assert(apps.head.jobStart.size == 1)
assert(apps.head.jobStart.head.jobID.equals(0))
val stage0 = apps.head.stageSubmitted.filter(_.stageId == 0)
assert(stage0.head.numTasks.equals(6))
}

test("malformed json eventlog") {
val eventLog = s"$logDir/malformed_json_eventlog.zstd"
TrampolineUtil.withTempDir { tempDir =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ class QualificationSuite extends FunSuite with BeforeAndAfterEach with Logging {
runQualificationTest(logFiles, "nds_q86_test_expectation.csv")
}

test("spark2 eventlog") {
val profileLogDir = ToolTestUtils.getTestResourcePath("spark-events-profiling")
val log = s"$profileLogDir/spark2-eventlog.zstd"
runQualificationTest(Array(log), "spark2_expectation.csv")
}

test("test udf event logs") {
val logFiles = Array(
s"$logDir/dataset_eventlog",
Expand Down

0 comments on commit 8ca06a6

Please sign in to comment.