Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Qualification: fix sorting and add unit-tests script #5869

Merged
merged 5 commits into from
Jun 24, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tools/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ event_log_profiling.log
**/ui/assets/
**/ui-dependencies-cache/
**/ui/js/data-output.js
## ignore output folders of the test scripts
**/dev/qualification-output/
Original file line number Diff line number Diff line change
Expand Up @@ -69,37 +69,35 @@ class Qualification(outputDir: String, numRows: Int, hadoopConf: Configuration,
val qWriter = new QualOutputWriter(getReportOutputPath, reportReadSchema, printStdout)
// sort order and limit only applies to the report summary text file,
// the csv file we write the entire data in descending order
val estimatedSorted = sortForExecutiveSummary(allAppsSum.map(_.estimatedInfo), order)
qWriter.writeReport(allAppsSum, estimatedSorted, numRows)
val sortedDetailed = sortForCSVDetailedReport(allAppsSum)
qWriter.writeDetailedReport(sortedDetailed)
val sortedDescDetailed = sortDescForDetailedReport(allAppsSum)
qWriter.writeReport(allAppsSum, sortForExecutiveSummary(sortedDescDetailed, order), numRows)
qWriter.writeDetailedReport(sortedDescDetailed)
qWriter.writeExecReport(allAppsSum, order)
qWriter.writeStageReport(allAppsSum, order)
if (uiEnabled) {
QualificationReportGenerator.generateDashBoard(outputDir, allAppsSum)
}
sortedDetailed
sortedDescDetailed
}

private def sortForCSVDetailedReport(
private def sortDescForDetailedReport(
allAppsSum: Seq[QualificationSummaryInfo]): Seq[QualificationSummaryInfo] = {
// Default sorting for of the csv files.
// Default sorting for of the csv files. Use the endTime to break the tie.
allAppsSum.sortBy(sum => {
(sum.estimatedInfo.recommendation, sum.estimatedInfo.estimatedGpuSpeedup)
(sum.estimatedInfo.recommendation, sum.estimatedInfo.estimatedGpuSpeedup,
sum.estimatedInfo.estimatedGpuTimeSaved, sum.startTime + sum.estimatedInfo.appDur)
}).reverse
}

// Sorting for the pretty printed executive summary
private def sortForExecutiveSummary(sumsToWrite: Seq[EstimatedSummaryInfo],
// Sorting for the pretty printed executive summary.
// The sums elements is ordered in descending order. so, only we need to reverse it if the order
// is ascending
private def sortForExecutiveSummary(appsSumDesc: Seq[QualificationSummaryInfo],
order: String): Seq[EstimatedSummaryInfo] = {
if (QualificationArgs.isOrderAsc(order)) {
sumsToWrite.sortBy(sum => {
(sum.recommendation, sum.estimatedGpuSpeedup, sum.estimatedGpuTimeSaved)
})
appsSumDesc.reverse.map(_.estimatedInfo)
} else {
sumsToWrite.sortBy(sum => {
(sum.recommendation, sum.estimatedGpuSpeedup, sum.estimatedGpuTimeSaved)
}).reverse
appsSumDesc.map(_.estimatedInfo)
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
App Name,App ID,Recommendation,Estimated GPU Speedup,Estimated GPU Duration,Estimated GPU Time Saved,SQL DF Duration,SQL Dataframe Task Duration,App Duration,GPU Opportunity,Executor CPU Time Percent,SQL Ids with Failures,Unsupported Read File Formats and Types,Unsupported Write Data Format,Complex Types,Nested Complex Types,Potential Problems,Longest SQL Duration,NONSQL Task Duration Plus Overhead,Unsupported Task Duration,Supported SQL DF Task Duration,Task Speedup Factor,App Duration Estimated
Rapids Spark Profiling Tool Unit Tests,local-1622043423018,Recommended,1.71,9497.79,6821.2,12434,132257,16319,10589,37.7,"","",JSON,"","","",7143,4717,19616,112641,2.81,false
Rapids Spark Profiling Tool Unit Tests,local-1623281204390,Not Recommended,1.0,6239.7,0.29,2032,4666,6240,0,46.27,"",JSON[string:bigint:int],JSON,"","",UDF,1209,5793,4664,2,1.5,false
Spark shell,local-1651187225439,Not Recommended,1.0,355496.4,140.59,760,180,355637,350,87.88,"",JSON[string:bigint:int],"","","","",498,343411,97,83,1.67,false
Spark shell,local-1651188809790,Not Recommended,1.0,166192.46,22.53,911,283,166215,45,81.18,"",JSON[string:bigint:int],"","","",UDF,715,133608,269,14,2.0,false
Rapids Spark Profiling Tool Unit Tests,local-1623281204390,Not Recommended,1.0,6239.7,0.29,2032,4666,6240,0,46.27,"",JSON[string:bigint:int],JSON,"","",UDF,1209,5793,4664,2,1.5,false
Loading