From 86fc104551a8de7b999d9658f0e781275ea0ad65 Mon Sep 17 00:00:00 2001 From: sperlingxx Date: Mon, 22 Feb 2021 19:09:55 +0800 Subject: [PATCH 01/28] spillable cache for GpuCartesianRDD Signed-off-by: sperlingxx --- .../sql/rapids/GpuCartesianProductExec.scala | 50 ++++++++++++------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala index fc6c9b89f28..64916825a2c 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala @@ -18,8 +18,10 @@ package org.apache.spark.sql.rapids import java.io.{IOException, ObjectInputStream, ObjectOutputStream} +import scala.collection.mutable + import ai.rapids.cudf.{JCudfSerialization, NvtxColor, NvtxRange, Table} -import com.nvidia.spark.rapids.{Arm, GpuBindReferences, GpuBuildLeft, GpuColumnVector, GpuExec, GpuExpression, GpuMetric, GpuSemaphore, MetricsLevel} +import com.nvidia.spark.rapids.{Arm, GpuBindReferences, GpuBuildLeft, GpuColumnVector, GpuExec, GpuExpression, GpuMetric, GpuSemaphore, MetricsLevel, SpillableColumnarBatch, SpillPriorities} import com.nvidia.spark.rapids.RapidsPluginImplicits._ import org.apache.spark.{Dependency, NarrowDependency, Partition, SparkContext, TaskContext} @@ -141,27 +143,41 @@ class GpuCartesianRDD( override def compute(split: Partition, context: TaskContext): Iterator[ColumnarBatch] = { val currSplit = split.asInstanceOf[GpuCartesianPartition] + + // create a buffer to cache stream-side data in a spillable manner + val spillBatchBuffer = mutable.ArrayBuffer[SpillableColumnarBatch]() + closeOnExcept(spillBatchBuffer) { buffer => + rdd2.iterator(currSplit.s2, context).foreach { cb => + // TODO: is it necessary to create a specific spill priorities for spillBatchBuffer? + buffer += SpillableColumnarBatch( + cb.getBatch, SpillPriorities.ACTIVE_ON_DECK_PRIORITY) + } + } + rdd1.iterator(currSplit.s1, context).flatMap { lhs => val table = withResource(lhs) { lhs => GpuColumnVector.from(lhs.getBatch) } - // Ideally instead of looping through and recomputing rdd2 for - // each batch in rdd1 we would instead cache rdd2 in a way that - // it could spill to disk so we can avoid re-computation - val ret = GpuBroadcastNestedLoopJoinExecBase.innerLikeJoin( - rdd2.iterator(currSplit.s2, context).map(i => i.getBatch), - table, - GpuBuildLeft, - boundCondition, - outputSchema, - joinTime, - joinOutputRows, - numOutputRows, - numOutputBatches, - filterTime, - totalTime) + val ret = closeOnExcept(spillBatchBuffer) { buffer => + GpuBroadcastNestedLoopJoinExecBase.innerLikeJoin( + // fetch stream-side data from buffer in case of re-computation + buffer.toIterator.map(spill => spill.getColumnarBatch()), + table, + GpuBuildLeft, + boundCondition, + outputSchema, + joinTime, + joinOutputRows, + numOutputRows, + numOutputBatches, + filterTime, + totalTime) + } - CompletionIterator[ColumnarBatch, Iterator[ColumnarBatch]](ret, table.close()) + CompletionIterator[ColumnarBatch, Iterator[ColumnarBatch]](ret, { + table.close() + spillBatchBuffer.safeClose() + }) } } From 07b2d1585ea918d21c668653f8429096ece21f0b Mon Sep 17 00:00:00 2001 From: sperlingxx Date: Tue, 23 Feb 2021 18:05:22 +0800 Subject: [PATCH 02/28] lazy cache --- .../sql/rapids/GpuCartesianProductExec.scala | 50 +++++++++++-------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala index 64916825a2c..e88b44e4a82 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala @@ -146,34 +146,40 @@ class GpuCartesianRDD( // create a buffer to cache stream-side data in a spillable manner val spillBatchBuffer = mutable.ArrayBuffer[SpillableColumnarBatch]() - closeOnExcept(spillBatchBuffer) { buffer => - rdd2.iterator(currSplit.s2, context).foreach { cb => - // TODO: is it necessary to create a specific spill priorities for spillBatchBuffer? - buffer += SpillableColumnarBatch( - cb.getBatch, SpillPriorities.ACTIVE_ON_DECK_PRIORITY) - } - } - rdd1.iterator(currSplit.s1, context).flatMap { lhs => + rdd1.iterator(currSplit.s1, context).zipWithIndex.flatMap { case (lhs, index) => val table = withResource(lhs) { lhs => GpuColumnVector.from(lhs.getBatch) } - val ret = closeOnExcept(spillBatchBuffer) { buffer => - GpuBroadcastNestedLoopJoinExecBase.innerLikeJoin( - // fetch stream-side data from buffer in case of re-computation - buffer.toIterator.map(spill => spill.getColumnarBatch()), - table, - GpuBuildLeft, - boundCondition, - outputSchema, - joinTime, - joinOutputRows, - numOutputRows, - numOutputBatches, - filterTime, - totalTime) + + val streamIterator = if (index == 0) { + // lazily compute and cache stream-side data + rdd2.iterator(currSplit.s2, context).map { serializableBatch => + closeOnExcept(spillBatchBuffer) { buffer => + val batch = SpillableColumnarBatch( + serializableBatch.getBatch, SpillPriorities.ACTIVE_ON_DECK_PRIORITY) + buffer += batch + batch.getColumnarBatch() + } + } + } else { + // fetch stream-side data directly if they are cached + spillBatchBuffer.toIterator.map(_.getColumnarBatch()) } + val ret = GpuBroadcastNestedLoopJoinExecBase.innerLikeJoin( + streamIterator, + table, + GpuBuildLeft, + boundCondition, + outputSchema, + joinTime, + joinOutputRows, + numOutputRows, + numOutputBatches, + filterTime, + totalTime) + CompletionIterator[ColumnarBatch, Iterator[ColumnarBatch]](ret, { table.close() spillBatchBuffer.safeClose() From 72b2e12b172e1f7ede7f5bfb6ddd3188b5556918 Mon Sep 17 00:00:00 2001 From: NvTimLiu <50287591+NvTimLiu@users.noreply.github.com> Date: Tue, 2 Mar 2021 00:20:03 +0800 Subject: [PATCH 03/28] Update cudf dependency to 0.18 (#1828) * Depend on the cuDF v0.18 Change rapids brannch-0.4 to depend on cuDF v0.18 release jars Prepare for the for the rapids v0.4.0 release Signed-off-by: Tim Liu * cudf 0.17-SNAPSHOT to 0.17 --- jenkins/Dockerfile-blossom.integration.centos7 | 2 +- jenkins/printJarVersion.sh | 2 +- jenkins/version-def.sh | 2 +- pom.xml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/jenkins/Dockerfile-blossom.integration.centos7 b/jenkins/Dockerfile-blossom.integration.centos7 index a95d8939c87..928b53a3e6b 100644 --- a/jenkins/Dockerfile-blossom.integration.centos7 +++ b/jenkins/Dockerfile-blossom.integration.centos7 @@ -18,7 +18,7 @@ # # Arguments: # CUDA_VER=10.1 or 10.2 -# CUDF_VER=0.16, 0.17-SNAPSHOT, or 0.18-SNAPSHOT +# CUDF_VER=0.16, 0.17, or 0.18 # URM_URL= ### ARG CUDA_VER=10.1 diff --git a/jenkins/printJarVersion.sh b/jenkins/printJarVersion.sh index 666c63218cd..949dad420dc 100755 --- a/jenkins/printJarVersion.sh +++ b/jenkins/printJarVersion.sh @@ -24,7 +24,7 @@ function print_ver(){ SERVER_ID=$5 # Collect snapshot dependency info only in Jenkins build - # In dev build, print 'SNAPSHOT' tag without time stamp, e.g.: cudf-0.18-SNAPSHOT.jar + # In dev build, print 'SNAPSHOT' tag without time stamp, e.g.: cudf-0.18.jar if [[ "$VERSION" == *"-SNAPSHOT" && -n "$JENKINS_URL" ]]; then PREFIX=${VERSION%-SNAPSHOT} # List the latest SNAPSHOT jar file in the maven repo diff --git a/jenkins/version-def.sh b/jenkins/version-def.sh index d172606a7cd..ecfbe6fc748 100755 --- a/jenkins/version-def.sh +++ b/jenkins/version-def.sh @@ -26,7 +26,7 @@ for VAR in $OVERWRITE_PARAMS;do done IFS=$PRE_IFS -CUDF_VER=${CUDF_VER:-"0.18-SNAPSHOT"} +CUDF_VER=${CUDF_VER:-"0.18"} CUDA_CLASSIFIER=${CUDA_CLASSIFIER:-"cuda10-1"} PROJECT_VER=${PROJECT_VER:-"0.4.0-SNAPSHOT"} PROJECT_TEST_VER=${PROJECT_TEST_VER:-"0.4.0-SNAPSHOT"} diff --git a/pom.xml b/pom.xml index 03000cf6f95..5e4e4724467 100644 --- a/pom.xml +++ b/pom.xml @@ -147,7 +147,7 @@ 1.8 ${spark300.version} cuda10-1 - 0.18-SNAPSHOT + 0.18 2.12 2.12.8 1.5.8 From bb0353519009daec9555e1f8f72c5071429aa63e Mon Sep 17 00:00:00 2001 From: NvTimLiu <50287591+NvTimLiu@users.noreply.github.com> Date: Tue, 2 Mar 2021 00:50:57 +0800 Subject: [PATCH 04/28] Update mortgage tests to support reading multiple dataset formats (#1808) * mortgage support multiple dataset formats change mortgage sample class to support dataset formats csv/orc/parquet Signed-off-by: Tim Liu * Update 1, copyright 2021 2, throw an error if there are more than 5 arguments 3, match-case optimize Signed-off-by: Tim Liu * Update 1, print some helpful info for the input arguments 2, exit instead of exeption, when arguments are wrongly set * fix typo * Fix Nothing value in 'case _ =>' * update --- .../rapids/tests/mortgage/MortgageSpark.scala | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/mortgage/MortgageSpark.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/mortgage/MortgageSpark.scala index 514a2da0b2d..04b9350ff05 100644 --- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/mortgage/MortgageSpark.scala +++ b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/mortgage/MortgageSpark.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -422,6 +422,10 @@ object AggregatesWithJoin { object Main { def main(args: Array[String]): Unit = { + if (args.length < 4 || args.length > 5) { + System.err.println("Usage: [csv|orc|parquet]") + System.exit(1) + } val perfPath = args(1) val acqPath = args(2) val output = args(3) @@ -430,8 +434,18 @@ object Main { .appName("MortgageJob") .getOrCreate() - 0.until(10).foreach { _ => - Run.parquet(session, perfPath, acqPath).write.mode("overwrite").parquet(output) + // extend args to support csv/orc/parquet dataset + val dataFrameFormatMap = Map( + "csv" -> Run.csv(session, perfPath, acqPath), + "orc" -> Run.orc(session, perfPath, acqPath), + "parquet" -> Run.parquet(session, perfPath, acqPath) + ) + val format = args.lift(4).getOrElse("parquet") + if (!dataFrameFormatMap.contains(format)) { + System.err.println(s"Invalid input format $format, expected one of csv, orc, parquet") + System.exit(1) } + + 0.until(10).foreach( _ => dataFrameFormatMap(format).write.mode("overwrite").parquet(output)) } } From 17657fed2c59296b777756e7f926fb82d527abef Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Mon, 1 Mar 2021 11:47:00 -0600 Subject: [PATCH 05/28] Remove benchmarks (#1826) Signed-off-by: Jason Lowe --- README.md | 10 - docs/benchmarks.md | 212 - docs/get-started/Dockerfile.cuda | 3 +- integration_tests/README.md | 20 - integration_tests/conftest.py | 19 - .../src/main/python/benchmark.py | 155 - integration_tests/src/main/python/conftest.py | 138 - .../src/main/python/tpcds_test.py | 51 - .../src/main/python/tpch_test.py | 178 - .../src/main/python/tpcxbb_test.py | 43 - .../spark/rapids/tests/BenchmarkRunner.scala | 334 -- .../rapids/tests/common/BenchUtils.scala | 853 --- .../rapids/tests/common/CompareResults.scala | 83 - .../rapids/tests/tpcds/TpcdsLikeBench.scala | 44 - .../rapids/tests/tpcds/TpcdsLikeSpark.scala | 4777 ----------------- .../rapids/tests/tpch/TpchLikeBench.scala | 70 - .../rapids/tests/tpch/TpchLikeSpark.scala | 1190 ---- .../rapids/tests/tpcxbb/TpcxbbLikeBench.scala | 86 - .../rapids/tests/tpcxbb/TpcxbbLikeSpark.scala | 2132 -------- ...488b-81f2-00813dd4bfbe-c000.snappy.parquet | Bin 87932 -> 0 bytes ...47ae-a8c6-b4665bea5c5d-c000.snappy.parquet | Bin 49579 -> 0 bytes ...438a-bb6e-a0fd2ab56fd4-c000.snappy.parquet | Bin 3023 -> 0 bytes ...4550-994b-1a1553b26438-c000.snappy.parquet | Bin 46429 -> 0 bytes ...49a4-b942-b9c4d0774ded-c000.snappy.parquet | Bin 39424 -> 0 bytes ...462b-a69a-2a356b8d9f37-c000.snappy.parquet | Bin 56736 -> 0 bytes ...4a7c-b9df-259eba52db22-c000.snappy.parquet | Bin 1502 -> 0 bytes ...4a39-88ab-b1fbd89f0b09-c000.snappy.parquet | Bin 11630 -> 0 bytes .../rapids/tests/common/BenchUtilsSuite.scala | 155 - .../tpch/TpchLikeAdaptiveSparkSuite.scala | 24 - .../tests/tpch/TpchLikeSparkSuite.scala | 213 - tests/README.md | 32 +- .../rapids/SparkQueryCompareTestSuite.scala | 7 +- 32 files changed, 10 insertions(+), 10819 deletions(-) delete mode 100644 docs/benchmarks.md delete mode 100644 integration_tests/src/main/python/benchmark.py delete mode 100644 integration_tests/src/main/python/tpcds_test.py delete mode 100644 integration_tests/src/main/python/tpch_test.py delete mode 100644 integration_tests/src/main/python/tpcxbb_test.py delete mode 100644 integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/BenchmarkRunner.scala delete mode 100644 integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/common/BenchUtils.scala delete mode 100644 integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/common/CompareResults.scala delete mode 100644 integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcds/TpcdsLikeBench.scala delete mode 100644 integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcds/TpcdsLikeSpark.scala delete mode 100644 integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeBench.scala delete mode 100644 integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeSpark.scala delete mode 100644 integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcxbb/TpcxbbLikeBench.scala delete mode 100644 integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcxbb/TpcxbbLikeSpark.scala delete mode 100644 integration_tests/src/test/resources/tpch/customer.tbl/part-00000-5e3bf4cd-c6e9-488b-81f2-00813dd4bfbe-c000.snappy.parquet delete mode 100644 integration_tests/src/test/resources/tpch/lineitem.tbl/part-00000-6de1e1da-3f9b-47ae-a8c6-b4665bea5c5d-c000.snappy.parquet delete mode 100644 integration_tests/src/test/resources/tpch/nation.tbl/part-00000-d9f4400b-9af3-438a-bb6e-a0fd2ab56fd4-c000.snappy.parquet delete mode 100644 integration_tests/src/test/resources/tpch/orders.tbl/part-00000-eac6ea97-a06e-4550-994b-1a1553b26438-c000.snappy.parquet delete mode 100644 integration_tests/src/test/resources/tpch/part.tbl/part-00000-3364eea2-f1b6-49a4-b942-b9c4d0774ded-c000.snappy.parquet delete mode 100644 integration_tests/src/test/resources/tpch/partsupp.tbl/part-00000-458bd63a-12e2-462b-a69a-2a356b8d9f37-c000.snappy.parquet delete mode 100644 integration_tests/src/test/resources/tpch/region.tbl/part-00000-03defe09-3d7c-4a7c-b9df-259eba52db22-c000.snappy.parquet delete mode 100644 integration_tests/src/test/resources/tpch/supplier.tbl/part-00000-317059ee-49a7-4a39-88ab-b1fbd89f0b09-c000.snappy.parquet delete mode 100644 integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/common/BenchUtilsSuite.scala delete mode 100644 integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeAdaptiveSparkSuite.scala delete mode 100644 integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeSparkSuite.scala diff --git a/README.md b/README.md index 45cf522c619..2b758c3665b 100644 --- a/README.md +++ b/README.md @@ -5,18 +5,8 @@ The RAPIDS Accelerator for Apache Spark provides a set of plugins for [Apache Spark](https://spark.apache.org) that leverage GPUs to accelerate processing via the [RAPIDS](https://rapids.ai) libraries and [UCX](https://www.openucx.org/). -![TPCxBB Like query results](./docs/img/tpcxbb-like-results.png "TPCxBB Like Query Results") - -The chart above shows results from running ETL queries based off of the -[TPCxBB benchmark](http://www.tpc.org/tpcx-bb/default.asp). These are **not** official results in -any way. It uses a 10TB Dataset (scale factor 10,000), stored in parquet. The processing happened on -a two node DGX-2 cluster. Each node has 96 CPU cores, 1.5TB host memory, 16 V100 GPUs, and 512 GB -GPU memory. - To get started and try the plugin out use the [getting started guide](./docs/get-started/getting-started.md). -For more information about these benchmarks, see the [benchmark guide](./docs/benchmarks.md). - ## Compatibility The SQL plugin tries to produce results that are bit for bit identical with Apache Spark. diff --git a/docs/benchmarks.md b/docs/benchmarks.md deleted file mode 100644 index 6e52e722f24..00000000000 --- a/docs/benchmarks.md +++ /dev/null @@ -1,212 +0,0 @@ ---- -layout: page -title: Benchmarks -nav_order: 8 ---- -# Benchmarks - -The `integration_test` module contains benchmarks derived from the -[TPC-DS](http://www.tpc.org/tpcds/), [TPC-H](http://www.tpc.org/tpch/), and -[TPCx-BB](http://www.tpc.org/tpcx-bb/default5.asp) benchmarks. These are not official TPC -benchmarks and are only intended to be used to compare relative performance between CPU and GPU -and to help catch performance regressions in the plugin. - -## Data Generation - -For each of these benchmarks, source data must be generated by a utility that can generate data at -any scale factor, where the scale factor is an integer representing approximately how many -gigabytes of data will be generated, with scale factor 1 meaning ~1 GB, and scale factor 1000 meaning ~1 TB, -for example. - -Further information on data generation can be found using the following links: - -- [TPC-DS Data Generator](https://github.com/databricks/tpcds-kit) -- [TPC-H Data Generator](https://github.com/electrum/tpch-dbgen) -- [TPCx-BB Data Generator](http://www.tpc.org/tpc_documents_current_versions/current_specifications5.asp) - -The remainder of this document is based on the TPC-DS benchmark but the steps are very similar for -the other benchmarks. The main difference is that the package and class name is different for each -benchmark. - -| Benchmark | Package | Class Names | -|-----------|--------------------------------------|----------------------------------| -| TPC-DS | com.nvidia.spark.rapids.tests.tpcds | ConvertFiles, TpcdsLikeBench | -| TPC-xBB | com.nvidia.spark.rapids.tests.tpcxbb | ConvertFiles, TpcxbbLikeBench | -| TPC-H | com.nvidia.spark.rapids.tests.tpch | ConvertFiles, TpchLikeBench | - -## Spark Shell - -The integration test jar needs to be added to the `--jars` configuration option when launching the -Spark shell. This jar can be found in the `integration_tests/target` directory after running -`mvn package`, with a filename matching `rapids-4-spark-integration-tests_2.12-0.4.0.jar`. - -To run benchmarks on the GPU, the RAPIDS Accelerator for Apache Spark must also be installed, -following the instructions provided in the [Getting Started](get-started/getting-started.md) guide. - -## Converting to Parquet - -Although it is possible to run benchmarks directly against the CSV data generated by the TPC data -generators, it is common to convert the data to Parquet format and run benchmarks against the -Parquet files instead. - -The `integration_test` module contains code for converting the CSV data sets to Parquet. - -The following commands can be entered into spark-shell to perform the conversion. - -```scala -import com.nvidia.spark.rapids.tests.tpcds._ -TpcdsLikeSpark.csvToParquet(spark, "/path/to/input", "/path/to/output") -``` - -Note that the code for converting CSV to Parquet does not explicitly specify the number of -partitions to write, so the size of the resulting parquet files will vary depending on the value -for `spark.default.parallelism`, which by default is based on the number of available executor -cores. However, the file conversion methods accept `coalesce` and `repartition` arguments to -better control the size of the partitions on a per-table basis. - -Example using `coalesce` and `repartition` options to control the number and size of partitions -for specific tables. - -```scala -TpcdsLikeSpark.csvToParquet(spark, "/path/to/input", "/path/to/output", - coalesce=Map("customer_address" -> 1), repartition=Map("web_sales" -> 256)) -``` - -It is also possible to use `spark-submit` to run the file conversion process. - -```bash -$SPARK_HOME/bin/spark-submit \ - --master $SPARK_MASTER_URL \ - --jars $SPARK_RAPIDS_PLUGIN_JAR,$CUDF_JAR \ - --class com.nvidia.spark.rapids.tests.tpcds.ConvertFiles \ - $SPARK_RAPIDS_PLUGIN_INTEGRATION_TEST_JAR \ - --input /path/to/input \ - --output /path/to/output \ - --output-format parquet \ - --coalesce customer_address=1 \ - --repartition web_sales=256 inventory=128 -``` - -It should also be noted that no decimal types will be output. The conversion code uses explicit -schemas to ensure that decimal types are converted to floating-point types instead because the -plugin does not yet support decimal types but these will be supported in a future release. - -## Running Benchmarks from a Spark shell - -The benchmarks can be executed in two modes currently: - -- Execute the query and collect the results to the driver -- Execute the query and write the results to disk (in Parquet, CSV, or ORC format) - -The following commands can be entered into spark-shell to register the data files that the -benchmark will query. - -```scala -import com.nvidia.spark.rapids.tests.tpcds._ -TpcdsLikeSpark.setupAllParquet(spark, "/path/to/tpcds") -``` - -The benchmark can be executed with the following syntax to execute the query and collect the -results to the driver. - -```scala -import com.nvidia.spark.rapids.tests._ -val benchmark = new BenchmarkRunner(new TpcdsLikeBench()) -benchmark.collect(spark, "q5", iterations=3) -``` - -The benchmark can be executed with the following syntax to execute the query and write the results -to Parquet. There are also `writeCsv` and `writeOrc` methods for writing the output to CSV or ORC -files. - -```scala -import com.nvidia.spark.rapids.tests._ -val benchmark = new BenchmarkRunner(new TpcdsLikeBench()) -benchmark.writeParquet(spark, "q5", "/data/output/tpcds/q5", iterations=3) -``` - -## Running Benchmarks from spark-submit - -The benchmark runner has a command-line interface, allowing it to be submitted -to Spark using `spark-submit` which can be more practical than using the Spark shell when -running a series of benchmarks using automation. - -Here is an example `spark-submit` command for running TPC-DS query 5, reading from Parquet and -writing results to Parquet. The `--output` and `--output-format` arguments can be omitted to -have the benchmark call `collect()` on the results instead. - -```bash -$SPARK_HOME/bin/spark-submit \ - --master $SPARK_MASTER_URL \ - --jars $SPARK_RAPIDS_PLUGIN_JAR,$CUDF_JAR \ - --class com.nvidia.spark.rapids.tests.BenchmarkRunner \ - $SPARK_RAPIDS_PLUGIN_INTEGRATION_TEST_JAR \ - --benchmark tpcds \ - --query q5 \ - --input /raid/tpcds-3TB-parquet-largefiles \ - --input-format parquet \ - --output /raid/tpcds-output/tpcds-q5-cpu \ - --output-format parquet \ - --summary-file-prefix tpcds-q5-cpu \ - --iterations 1 -``` - -## Benchmark JSON Output - -Each benchmark run produces a JSON file containing information about the environment and the query, -including the following items: - -- Spark version -- Spark configuration -- Environment variables -- Logical and physical query plan -- SQL metrics for the executed plan -- Timing information for each query iteration - -Care should be taken to ensure that no sensitive information is captured from the environment -before sharing these JSON files. Environment variables with names containing the words `PASSWORD`, -`TOKEN`, or `SECRET` are filtered out, but this may not be sufficient to prevent leaking secrets. - -## Automating Benchmarks - -For convenience, the [benchmark.py](../integration_tests/src/main/python/benchmark.py) script is -provided, allowing benchmarks to be run in an automated way with multiple configurations. Example -usage is provided in the documentation within the script. - -## Verifying Results - -It is important to verify that queries actually produced the correct output, especially when -comparing between CPU and GPU benchmarks. A utility is provided to help with this. - -This is a simple utility that pulls results down to the driver for comparison so will only work for -data sets that can fit in the driver's memory. - -If data needs sorting before comparison, this is delegated to Spark before collecting the results. - -Example usage from spark-shell: - -```scala -val cpu = spark.read.parquet("/data/tpcxbb/q5-cpu") -val gpu = spark.read.parquet("/data/tpcxbb/q5-gpu") -import com.nvidia.spark.rapids.tests.common._ -BenchUtils.compareResults(cpu, gpu, inputFormat="parquet", ignoreOrdering=true, epsilon=0.0001) -``` - -This will report on any differences between the two dataframes. - -The verification utility can also be run using `spark-submit` using the following syntax. - -```bash -$SPARK_HOME/bin/spark-submit \ - --master $SPARK_MASTER_URL \ - --jars $SPARK_RAPIDS_PLUGIN_JAR,$CUDF_JAR \ - --class com.nvidia.spark.rapids.tests.common.CompareResults \ - $SPARK_RAPIDS_PLUGIN_INTEGRATION_TEST_JAR \ - --input1 /path/to/result1 \ - --input2 /path/to/result2 \ - --input-format parquet -``` - -## Performance Tuning - -Please refer to the [Tuning Guide](tuning-guide.md) for information on performance tuning. diff --git a/docs/get-started/Dockerfile.cuda b/docs/get-started/Dockerfile.cuda index f1c504baaf6..6e5a3688bd0 100644 --- a/docs/get-started/Dockerfile.cuda +++ b/docs/get-started/Dockerfile.cuda @@ -1,5 +1,5 @@ # -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -35,7 +35,6 @@ RUN set -ex && \ ln -s /lib /lib64 && \ mkdir -p /opt/spark && \ mkdir -p /opt/spark/jars && \ - mkdir -p /opt/tpch && \ mkdir -p /opt/spark/examples && \ mkdir -p /opt/spark/work-dir && \ mkdir -p /opt/sparkRapidsPlugin && \ diff --git a/integration_tests/README.md b/integration_tests/README.md index 7ae2b7d11ec..7b8ec0501b5 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -171,26 +171,6 @@ any GPU resources on the cluster. For standalone, Mesos, and Kubernetes you can of executors you want to use per application. The extra core is for the driver. Dynamic allocation can mess with these settings under YARN and even though it is off by default you probably want to be sure it is disabled (spark.dynamicAllocation.enabled=false). -### Enabling TPCxBB/TPCH/TPCDS/Mortgage Tests - -The TPCxBB, TPCH, TPCDS, and Mortgage tests in this framework can be enabled by providing a couple of options: - - * TPCxBB `tpcxbb-format` (optional, defaults to "parquet"), and `tpcxbb-path` (required, path to the TPCxBB data). - * TPCH `tpch-format` (optional, defaults to "parquet"), and `tpch-path` (required, path to the TPCH data). - * TPCDS `tpcds-format` (optional, defaults to "parquet"), and `tpcds-path` (required, path to the TPCDS data). - * Mortgage `mortgage-format` (optional, defaults to "parquet"), and `mortgage-path` (required, path to the Mortgage data). - -As an example, here is the `spark-submit` command with the TPCxBB parameters on CUDA 10.1: - -``` -$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0.jar,rapids-4-spark-udf-examples_2.12-0.4.0.jar,cudf-0.18-cuda10-1.jar,rapids-4-spark-tests_2.12-0.4.0.jar" ./runtests.py --tpcxbb_format="csv" --tpcxbb_path="/path/to/tpcxbb/csv" -``` - -Be aware that running these tests with read data requires at least an entire GPU, and preferable several GPUs/executors -in your cluster so please be careful when enabling these tests. Also some of these test actually produce non-deterministic -results when run in a real cluster. If you do see failures when running these tests please contact us so we can investigate -them and possibly tag the tests appropriately when running on an actual cluster. - ### Enabling cudf_udf Tests The cudf_udf tests in this framework are testing Pandas UDF(user-defined function) with cuDF. They are disabled by default not only because of the complicated environment setup, but also because GPU resources scheduling for Pandas UDF is an experimental feature now, the performance may not always be better. diff --git a/integration_tests/conftest.py b/integration_tests/conftest.py index 751f112397e..109e0477fdd 100644 --- a/integration_tests/conftest.py +++ b/integration_tests/conftest.py @@ -14,24 +14,6 @@ def pytest_addoption(parser): """Pytest hook to define command line options for pytest""" - parser.addoption( - "--tpcxbb_format", action="store", default="parquet", help="format of TPCXbb data" - ) - parser.addoption( - "--tpcxbb_path", action="store", default=None, help="path to TPCXbb data" - ) - parser.addoption( - "--tpcds_format", action="store", default="parquet", help="format of TPC-DS data" - ) - parser.addoption( - "--tpcds_path", action="store", default=None, help="path to TPC-DS data" - ) - parser.addoption( - "--tpch_format", action="store", default="parquet", help="format of TPCH data" - ) - parser.addoption( - "--tpch_path", action="store", default=None, help="path to TPCH data" - ) parser.addoption( "--mortgage_format", action="store", default="parquet", help="format of Mortgage data" ) @@ -61,4 +43,3 @@ def pytest_addoption(parser): "--test_type", action='store', default="developer", help="the type of tests that are being run to help check all the correct tests are run - developer, pre-commit, or nightly" ) - diff --git a/integration_tests/src/main/python/benchmark.py b/integration_tests/src/main/python/benchmark.py deleted file mode 100644 index dbbc3ffa3c8..00000000000 --- a/integration_tests/src/main/python/benchmark.py +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import sys - -def main(): - """Iterate over a series of configurations and run benchmarks for each of the specified - queries using that configuration. - - Example usage: - - python benchmark.py \ - --template /path/to/template \ - --benchmark tpcds \ - --input /path/to/input \ - --input-format parquet \ - --output /path/to/output \ - --output-format parquet \ - --configs cpu gpu-ucx-on \ - --query q4 q5 - - In this example, configuration key-value pairs will be loaded from cpu.properties and - gpu-ucx-on.properties and appended to a spark-submit-template.txt to build the spark-submit - commands to run the benchmark. These configuration property files simply contain key-value - pairs in the format key=value with one pair per line. For example: - - spark.executor.cores=2 - spark.rapids.sql.enabled=true - spark.sql.adaptive.enabled=true - - A template file must be provided, containing the command to call spark-submit along - with any cluster-specific configuration options and any spark configuration settings that - will be common to all benchmark runs. The template should end with a line-continuation - symbol since additional --conf options will be appended for each benchmark run. - - Example template: - - $SPARK_HOME/bin/spark-submit \ - --master $SPARK_MASTER_URL \ - --conf spark.plugins=com.nvidia.spark.SQLPlugin \ - --conf spark.eventLog.enabled=true \ - --conf spark.eventLog.dir=./spark-event-logs \ - - The output and output-format arguments can be omitted to run the benchmark and collect - results to the driver rather than write the query output to disk. - - This benchmark script assumes that the following environment variables have been set for - the location of the relevant JAR files to be used: - - - SPARK_RAPIDS_PLUGIN_JAR - - SPARK_RAPIDS_PLUGIN_INTEGRATION_TEST_JAR - - CUDF_JAR - - """ - - parser = argparse.ArgumentParser(description='Run TPC benchmarks.') - parser.add_argument('--benchmark', required=True, - help='Name of benchmark to run (tpcds, tpcxbb, tpch)') - parser.add_argument('--template', required=True, - help='Path to a template script that invokes spark-submit') - parser.add_argument('--input', required=True, - help='Path to source data set') - parser.add_argument('--input-format', required=True, - help='Format of input data set (parquet or csv)') - parser.add_argument('--append-dat', required=False, action='store_true', - help='Append .dat to path (for tpcds only)') - parser.add_argument('--output', required=False, - help='Path to write query output to') - parser.add_argument('--output-format', required=False, - help='Format to write to (parquet or orc)') - parser.add_argument('--configs', required=True, type=str, nargs='+', - help='One or more configuration filenames to run') - parser.add_argument('--query', required=True, type=str, nargs='+', - help='Queries to run') - parser.add_argument('--iterations', required=False, - help='The number of iterations to run (defaults to 1)') - parser.add_argument('--gc-between-runs', required=False, action='store_true', - help='Whether to call System.gc between iterations') - parser.add_argument('--upload-uri', required=False, - help='Upload URI for summary output') - - args = parser.parse_args() - - with open(args.template, "r") as myfile: - template = myfile.read() - - for config_name in args.configs: - config = load_properties(config_name + ".properties") - for query in args.query: - summary_file_prefix = "{}-{}".format(args.benchmark, config_name) - - cmd = ['--conf spark.app.name="' + summary_file_prefix + '"'] - for k, v in config.items(): - cmd.append("--conf " + k + "=" + v) - - cmd.append("--jars $SPARK_RAPIDS_PLUGIN_JAR,$CUDF_JAR") - cmd.append("--class com.nvidia.spark.rapids.tests.BenchmarkRunner") - cmd.append("$SPARK_RAPIDS_PLUGIN_INTEGRATION_TEST_JAR") - cmd.append("--benchmark " + args.benchmark) - cmd.append("--query " + query) - cmd.append("--input " + args.input) - cmd.append("--input-format {}".format(args.input_format)) - - if args.append_dat is True: - cmd.append("--append-dat ") - - if args.output is not None: - cmd.append("--output " + args.output + "/" + config_name + "/" + query) - - if args.output_format is not None: - cmd.append("--output-format {}".format(args.output_format)) - - cmd.append("--summary-file-prefix " + summary_file_prefix) - - if args.gc_between_runs is True: - cmd.append("--gc-between-runs ") - - if args.upload_uri is not None: - cmd.append("--upload-uri " + args.upload_uri) - - if args.iterations is None: - cmd.append("--iterations 1") - else: - cmd.append("--iterations {}".format(args.iterations)) - - cmd = template.strip() + "\n " + " ".join(cmd).strip() - - # run spark-submit - print(cmd) - os.system(cmd) - - -def load_properties(filename): - myvars = {} - with open(filename) as myfile: - for line in myfile: - name, var = line.partition("=")[::2] - myvars[name.strip()] = var.strip() - return myvars - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/integration_tests/src/main/python/conftest.py b/integration_tests/src/main/python/conftest.py index 24a36700af3..05a0c6529d5 100644 --- a/integration_tests/src/main/python/conftest.py +++ b/integration_tests/src/main/python/conftest.py @@ -279,109 +279,6 @@ def _get_jvm(spark): def spark_jvm(): return _get_jvm(get_spark_i_know_what_i_am_doing()) -class TpchRunner: - def __init__(self, tpch_format, tpch_path): - self.tpch_format = tpch_format - self.tpch_path = tpch_path - self.setup(get_spark_i_know_what_i_am_doing()) - - def setup(self, spark): - jvm_session = _get_jvm_session(spark) - jvm = _get_jvm(spark) - formats = { - "csv": jvm.com.nvidia.spark.rapids.tests.tpch.TpchLikeSpark.setupAllCSV, - "parquet": jvm.com.nvidia.spark.rapids.tests.tpch.TpchLikeSpark.setupAllParquet, - "orc": jvm.com.nvidia.spark.rapids.tests.tpch.TpchLikeSpark.setupAllOrc - } - if not self.tpch_format in formats: - raise RuntimeError("{} is not a supported tpch input type".format(self.tpch_format)) - formats.get(self.tpch_format)(jvm_session, self.tpch_path) - - def do_test_query(self, query): - spark = get_spark_i_know_what_i_am_doing() - jvm_session = _get_jvm_session(spark) - jvm = _get_jvm(spark) - tests = { - "q1": jvm.com.nvidia.spark.rapids.tests.tpch.Q1Like, - "q2": jvm.com.nvidia.spark.rapids.tests.tpch.Q2Like, - "q3": jvm.com.nvidia.spark.rapids.tests.tpch.Q3Like, - "q4": jvm.com.nvidia.spark.rapids.tests.tpch.Q4Like, - "q5": jvm.com.nvidia.spark.rapids.tests.tpch.Q5Like, - "q6": jvm.com.nvidia.spark.rapids.tests.tpch.Q6Like, - "q7": jvm.com.nvidia.spark.rapids.tests.tpch.Q7Like, - "q8": jvm.com.nvidia.spark.rapids.tests.tpch.Q8Like, - "q9": jvm.com.nvidia.spark.rapids.tests.tpch.Q9Like, - "q10": jvm.com.nvidia.spark.rapids.tests.tpch.Q10Like, - "q11": jvm.com.nvidia.spark.rapids.tests.tpch.Q11Like, - "q12": jvm.com.nvidia.spark.rapids.tests.tpch.Q12Like, - "q13": jvm.com.nvidia.spark.rapids.tests.tpch.Q13Like, - "q14": jvm.com.nvidia.spark.rapids.tests.tpch.Q14Like, - "q15": jvm.com.nvidia.spark.rapids.tests.tpch.Q15Like, - "q16": jvm.com.nvidia.spark.rapids.tests.tpch.Q16Like, - "q17": jvm.com.nvidia.spark.rapids.tests.tpch.Q17Like, - "q18": jvm.com.nvidia.spark.rapids.tests.tpch.Q18Like, - "q19": jvm.com.nvidia.spark.rapids.tests.tpch.Q19Like, - "q20": jvm.com.nvidia.spark.rapids.tests.tpch.Q20Like, - "q21": jvm.com.nvidia.spark.rapids.tests.tpch.Q21Like, - "q22": jvm.com.nvidia.spark.rapids.tests.tpch.Q22Like - } - df = tests.get(query).apply(jvm_session) - return DataFrame(df, spark.getActiveSession()) - -@pytest.fixture(scope="session") -def tpch(request): - tpch_format = request.config.getoption("tpch_format") - tpch_path = request.config.getoption("tpch_path") - if tpch_path is None: - std_path = request.config.getoption("std_input_path") - if std_path is None: - skip_unless_precommit_tests("TPCH is not configured to run") - else: - tpch_path = std_path + '/tpch/' - tpch_format = 'parquet' - yield TpchRunner(tpch_format, tpch_path) - -class TpcxbbRunner: - def __init__(self, tpcxbb_format, tpcxbb_path): - self.tpcxbb_format = tpcxbb_format - self.tpcxbb_path = tpcxbb_path - self.setup(get_spark_i_know_what_i_am_doing()) - - def setup(self, spark): - jvm_session = _get_jvm_session(spark) - jvm = _get_jvm(spark) - formats = { - "csv": jvm.com.nvidia.spark.rapids.tests.tpcxbb.TpcxbbLikeSpark.setupAllCSV, - "parquet": jvm.com.nvidia.spark.rapids.tests.tpcxbb.TpcxbbLikeSpark.setupAllParquet, - "orc": jvm.com.nvidia.spark.rapids.tests.tpcxbb.TpcxbbLikeSpark.setupAllOrc - } - if not self.tpcxbb_format in formats: - raise RuntimeError("{} is not a supported tpcxbb input type".format(self.tpcxbb_format)) - formats.get(self.tpcxbb_format)(jvm_session,self.tpcxbb_path) - - def do_test_query(self, query): - spark = get_spark_i_know_what_i_am_doing() - jvm_session = _get_jvm_session(spark) - jvm = _get_jvm(spark) - tests = { - "q5": jvm.com.nvidia.spark.rapids.tests.tpcxbb.Q5Like, - "q16": jvm.com.nvidia.spark.rapids.tests.tpcxbb.Q16Like, - "q21": jvm.com.nvidia.spark.rapids.tests.tpcxbb.Q21Like, - "q22": jvm.com.nvidia.spark.rapids.tests.tpcxbb.Q22Like - } - df = tests.get(query).apply(jvm_session) - return DataFrame(df, spark.getActiveSession()) - -@pytest.fixture(scope="session") -def tpcxbb(request): - tpcxbb_format = request.config.getoption("tpcxbb_format") - tpcxbb_path = request.config.getoption("tpcxbb_path") - if tpcxbb_path is None: - # TPCxBB is not required for any test runs - pytest.skip("TPCxBB not configured to run") - else: - yield TpcxbbRunner(tpcxbb_format, tpcxbb_path) - class MortgageRunner: def __init__(self, mortgage_format, mortgage_acq_path, mortgage_perf_path): self.mortgage_format = mortgage_format @@ -418,41 +315,6 @@ def mortgage(request): else: yield MortgageRunner(mortgage_format, mortgage_path + '/acq', mortgage_path + '/perf') -class TpcdsRunner: - def __init__(self, tpcds_format, tpcds_path): - self.tpcds_format = tpcds_format - self.tpcds_path = tpcds_path - self.setup(get_spark_i_know_what_i_am_doing()) - - def setup(self, spark): - jvm_session = _get_jvm_session(spark) - jvm = _get_jvm(spark) - formats = { - "csv": jvm.com.nvidia.spark.rapids.tests.tpcds.TpcdsLikeSpark.setupAllCSV, - "parquet": jvm.com.nvidia.spark.rapids.tests.tpcds.TpcdsLikeSpark.setupAllParquet, - "orc": jvm.com.nvidia.spark.rapids.tests.tpcds.TpcdsLikeSpark.setupAllOrc - } - if not self.tpcds_format in formats: - raise RuntimeError("{} is not a supported tpcds input type".format(self.tpcds_format)) - formats.get(self.tpcds_format)(jvm_session, self.tpcds_path, True, True) - - def do_test_query(self, query): - spark = get_spark_i_know_what_i_am_doing() - jvm_session = _get_jvm_session(spark) - jvm = _get_jvm(spark) - df = jvm.com.nvidia.spark.rapids.tests.tpcds.TpcdsLikeSpark.run(jvm_session, query) - return DataFrame(df, spark.getActiveSession()) - -@pytest.fixture(scope="session") -def tpcds(request): - tpcds_format = request.config.getoption("tpcds_format") - tpcds_path = request.config.getoption("tpcds_path") - if tpcds_path is None: - # TPC-DS is not required for any test runs - pytest.skip("TPC-DS not configured to run") - else: - yield TpcdsRunner(tpcds_format, tpcds_path) - @pytest.fixture(scope="session") def enable_cudf_udf(request): enable_udf_cudf = request.config.getoption("cudf_udf") diff --git a/integration_tests/src/main/python/tpcds_test.py b/integration_tests/src/main/python/tpcds_test.py deleted file mode 100644 index 5b8780af375..00000000000 --- a/integration_tests/src/main/python/tpcds_test.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from asserts import assert_gpu_and_cpu_are_equal_collect -from marks import incompat, ignore_order, allow_non_gpu, approximate_float - -queries = ['q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8', 'q9', - 'q10', 'q11', 'q12', 'q13', 'q14a', 'q14b', 'q15', 'q16', 'q17', 'q18', 'q19', - 'q20', 'q21', 'q22', 'q23a', 'q23b', 'q24a', 'q24b', 'q25', 'q26', 'q27', 'q28', 'q29', - 'q30', 'q31', 'q32', 'q33', 'q34', 'q35', 'q36', 'q37', 'q38', 'q39a', 'q39b', - 'q40', 'q41', 'q42', 'q43', 'q44', 'q45', 'q46', 'q47', 'q48', 'q49', - 'q50', 'q51', 'q52', 'q53', 'q54', 'q55', 'q56', 'q57', 'q58', 'q59', - 'q60', 'q61', 'q62', 'q63', 'q64', 'q65', 'q66', 'q68', 'q69', - 'q71', 'q72', 'q73', 'q74', 'q75', 'q76', 'q77', 'q78', 'q79', - 'q80', 'q81', 'q82', 'q83', 'q84', 'q85', 'q86', 'q87', 'q88', 'q89', - 'q90', 'q91', 'q92', 'q93', 'q94', 'q95', 'q96', 'q97', 'q98', 'q99', - 'ss_max', 'ss_maxb'] - -@incompat -@ignore_order -@approximate_float -@allow_non_gpu(any=True) -@pytest.mark.parametrize('query', queries) -def test_tpcds(tpcds, query): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpcds.do_test_query(query), - conf={'spark.rapids.sql.variableFloatAgg.enabled': 'true'}) - -no_var_agg_queries = ['q67', 'q70'] - -@incompat -@ignore_order -@approximate_float -@allow_non_gpu(any=True) -@pytest.mark.parametrize('query', no_var_agg_queries) -def test_tpcds_no_var_agg(tpcds, query): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpcds.do_test_query(query)) diff --git a/integration_tests/src/main/python/tpch_test.py b/integration_tests/src/main/python/tpch_test.py deleted file mode 100644 index fee8f665314..00000000000 --- a/integration_tests/src/main/python/tpch_test.py +++ /dev/null @@ -1,178 +0,0 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from asserts import assert_gpu_and_cpu_are_equal_collect -from conftest import is_databricks_runtime -from marks import approximate_float, incompat, ignore_order, allow_non_gpu, allow_non_gpu_databricks -from spark_session import with_spark_session - -_base_conf = {'spark.rapids.sql.variableFloatAgg.enabled': 'true', - 'spark.rapids.sql.hasNans': 'false', - 'spark.sql.adaptive.enabled': 'false'} - -_adaptive_conf = _base_conf.copy() -_adaptive_conf.update({'spark.sql.adaptive.enabled': 'true'}) - -_test_confs = [_base_conf] - -# don't run the AQE tests on databricks - https://github.com/NVIDIA/spark-rapids/issues/1059 -# for some reaosn xfail doesn't work, it causes other issues when those tests run and fail -if not is_databricks_runtime(): - _test_confs.append(_adaptive_conf) - -@approximate_float -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q1(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q1"), conf=conf) - -@approximate_float -@incompat -@allow_non_gpu('TakeOrderedAndProjectExec', 'SortOrder', 'AttributeReference') -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q2(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q2"), conf=conf) - -@approximate_float -@allow_non_gpu('TakeOrderedAndProjectExec', 'SortOrder', 'AttributeReference') -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q3(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q3"), conf=conf) - -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q4(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q4"), conf=conf) - -@approximate_float -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q5(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q5"), conf=conf) - -@approximate_float -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q6(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q6"), conf=conf) - -@approximate_float -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q7(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q7"), conf=conf) - -@approximate_float -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q8(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q8"), conf=conf) - -@approximate_float -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q9(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q9"), conf=conf) - -@incompat -@approximate_float -@allow_non_gpu('TakeOrderedAndProjectExec', 'SortOrder', 'AttributeReference') -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q10(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q10"), conf=conf) - -@approximate_float -@allow_non_gpu('FilterExec', 'And', 'IsNotNull', 'GreaterThan', 'AttributeReference', 'ScalarSubquery') -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q11(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q11"), conf=conf) - -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q12(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q12"), conf=conf) - -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q13(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q13"), conf=conf) - -@approximate_float -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q14(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q14"), conf=conf) - -@approximate_float -#fp sum does not work on Q15 -@allow_non_gpu(any=True) -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q15(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q15")) - -@allow_non_gpu('BroadcastNestedLoopJoinExec', 'Or', 'IsNull', 'EqualTo', 'AttributeReference', 'BroadcastExchangeExec') -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q16(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q16"), conf=conf) - -@approximate_float -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q17(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q17"), conf=conf) - -@incompat -@approximate_float -@allow_non_gpu('TakeOrderedAndProjectExec', 'SortOrder', 'AttributeReference') -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q18(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q18"), conf=conf) - -@approximate_float -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q19(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q19"), conf=conf) - -@pytest.mark.parametrize('conf', _test_confs) -@allow_non_gpu_databricks('BroadcastHashJoinExec', 'BroadcastExchangeExec', 'GreaterThan', 'Cast') -def test_tpch_q20(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q20"), conf=conf) - -@allow_non_gpu('TakeOrderedAndProjectExec', 'SortOrder', 'AttributeReference', - 'SortMergeJoinExec', 'BroadcastHashJoinExec', 'BroadcastExchangeExec', - 'Alias', 'Not', 'EqualTo') -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q21(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q21"), conf=conf) - -@approximate_float -#Once ScalarSubqery if fixed the rest should just work -@allow_non_gpu('FilterExec', 'And', 'AttributeReference', 'IsNotNull', 'In', 'Substring', 'Literal', 'GreaterThan', 'ScalarSubquery') -@allow_non_gpu_databricks('EphemeralSubstring') -@pytest.mark.parametrize('conf', _test_confs) -def test_tpch_q22(tpch, conf): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpch.do_test_query("q22"), conf=conf) diff --git a/integration_tests/src/main/python/tpcxbb_test.py b/integration_tests/src/main/python/tpcxbb_test.py deleted file mode 100644 index 4f8d2b95a9d..00000000000 --- a/integration_tests/src/main/python/tpcxbb_test.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - -from asserts import assert_gpu_and_cpu_are_equal_collect -from marks import incompat, ignore_order, allow_non_gpu, approximate_float - -@ignore_order -def test_tpcxbb_q5(tpcxbb): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpcxbb.do_test_query("q5")) - -@incompat -@approximate_float -@ignore_order -@allow_non_gpu(any=True) -def test_tpcxbb_q16(tpcxbb): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpcxbb.do_test_query("q16")) - -@ignore_order -@allow_non_gpu(any=True) -def test_tpcxbb_q21(tpcxbb): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpcxbb.do_test_query("q21")) - -@ignore_order -@allow_non_gpu(any=True) -def test_tpcxbb_q22(tpcxbb): - assert_gpu_and_cpu_are_equal_collect( - lambda spark : tpcxbb.do_test_query("q22")) diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/BenchmarkRunner.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/BenchmarkRunner.scala deleted file mode 100644 index fc8ca8384ae..00000000000 --- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/BenchmarkRunner.scala +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.nvidia.spark.rapids.tests - -import java.net.URI - -import scala.util.{Failure, Success, Try} - -import com.nvidia.spark.rapids.tests.common.{BenchmarkReport, BenchmarkSuite, BenchUtils} -import com.nvidia.spark.rapids.tests.tpcds.TpcdsLikeBench -import com.nvidia.spark.rapids.tests.tpch.TpchLikeBench -import com.nvidia.spark.rapids.tests.tpcxbb.TpcxbbLikeBench -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileSystem, Path} -import org.rogach.scallop.ScallopConf - -import org.apache.spark.sql.{SaveMode, SparkSession} - -/** - * The BenchmarkRunner can be submitted using spark-submit to run any of the TPC-* benchmarks. - */ -object BenchmarkRunner { - - def main(args: Array[String]): Unit = { - val conf = new BenchmarkConf(args) - - if (conf.appendDat() && !conf.benchmark().equalsIgnoreCase("tpcds")) { - System.err.println( - s"The --append-dat flag is not supported for benchmark ${conf.benchmark()}") - System.exit(-1) - } - - if (conf.query.isEmpty) { - System.err.println("At least one query must be specified") - System.exit(-1) - } - - val benchmarks = Map( - "tpcds" -> new TpcdsLikeBench(conf.appendDat()), - "tpch" -> new TpchLikeBench(), - "tpcxbb" -> new TpcxbbLikeBench() - ) - - benchmarks.get(conf.benchmark().toLowerCase) match { - case Some(bench) => - // only include the query name in the app name if a single query is being run - val appName = if (conf.query().size == 1) { - s"${bench.name()} Like Bench ${conf.query().head}" - } else { - s"${bench.name()} Like Bench" - } - val spark = SparkSession.builder.appName(appName).getOrCreate() - spark.sparkContext.setJobDescription("Register input tables") - conf.inputFormat().toLowerCase match { - case "parquet" => bench.setupAllParquet(spark, conf.input()) - case "csv" => bench.setupAllCSV(spark, conf.input()) - case "orc" => bench.setupAllOrc(spark, conf.input()) - case other => - System.err.println(s"Invalid input format: $other") - System.exit(-1) - } - - val runner = new BenchmarkRunner(bench) - - conf.query().foreach { query => - - println(s"*** RUNNING ${bench.name()} QUERY $query") - val summaryFilePrefixWithQuery = conf.summaryFilePrefix.toOption - .map(prefix => s"$prefix-$query") - val report = Try(conf.output.toOption match { - case Some(path) => conf.outputFormat().toLowerCase match { - case "parquet" => - runner.writeParquet( - spark, - query, - path, - iterations = conf.iterations(), - summaryFilePrefix = summaryFilePrefixWithQuery, - gcBetweenRuns = conf.gcBetweenRuns(), - generateDotGraph = conf.generateDot()) - case "csv" => - runner.writeCsv( - spark, - query, - path, - iterations = conf.iterations(), - summaryFilePrefix = summaryFilePrefixWithQuery, - gcBetweenRuns = conf.gcBetweenRuns(), - generateDotGraph = conf.generateDot()) - case "orc" => - runner.writeOrc( - spark, - query, - path, - iterations = conf.iterations(), - summaryFilePrefix = summaryFilePrefixWithQuery, - gcBetweenRuns = conf.gcBetweenRuns(), - generateDotGraph = conf.generateDot()) - case other => - throw new IllegalArgumentException(s"Invalid or unspecified output format: $other") - } - case _ => - runner.collect( - spark, - query, - conf.iterations(), - summaryFilePrefix = summaryFilePrefixWithQuery, - gcBetweenRuns = conf.gcBetweenRuns(), - generateDotGraph = conf.generateDot()) - }) - - report match { - case Success(report) => - if (conf.uploadUri.isSupplied) { - println(s"Uploading ${report.filename} to " + - s"${conf.uploadUri()}/${report.filename}") - - val hadoopConf = spark.sparkContext.hadoopConfiguration - val fs = FileSystem.newInstance(new URI(conf.uploadUri()), hadoopConf) - fs.copyFromLocalFile( - new Path(report.filename), - new Path(conf.uploadUri(), report.filename)) - } - - case Failure(e) => - System.err.println(e.getMessage) - System.exit(-1) - } - } - case _ => - System.err.println(s"Invalid benchmark name: ${conf.benchmark()}. Supported benchmarks " + - s"are ${benchmarks.keys.mkString(",")}") - System.exit(-1) - } - } -} - -/** - * This is a wrapper for a specific benchmark suite that provides methods for executing queries - * and collecting the results, or writing the results to one of the supported output formats. - * - * @param bench Benchmark suite (TpcdsLikeBench, TpcxbbLikeBench, or TpchLikeBench). - */ -class BenchmarkRunner(val bench: BenchmarkSuite) { - - /** - * This method performs a benchmark by executing a query and collecting the results to the - * driver and can be called from Spark shell using the following syntax: - * - * val benchmark = new BenchmarkRunner(TpcdsLikeBench) - * benchmark.collect(spark, "q5", 3) - * - * @param spark The Spark session - * @param query The name of the query to run e.g. "q5" - * @param iterations The number of times to run the query. - * @param summaryFilePrefix Optional prefix for the generated JSON summary file. - * @param gcBetweenRuns Whether to call `System.gc` between iterations to cause Spark to - * call `unregisterShuffle` - * @param generateDotGraph Boolean specifying whether to generate a query plan diagram in - * DOT format - */ - def collect( - spark: SparkSession, - query: String, - iterations: Int = 3, - summaryFilePrefix: Option[String] = None, - gcBetweenRuns: Boolean = false, - generateDotGraph: Boolean = false - ): BenchmarkReport = { - BenchUtils.collect( - spark, - spark => bench.createDataFrame(spark, query), - query, - summaryFilePrefix.getOrElse(s"${bench.shortName()}-$query-collect"), - iterations, - gcBetweenRuns, - generateDotGraph) - } - - /** - * This method performs a benchmark of executing a query and writing the results to CSV files - * and can be called from Spark shell using the following syntax: - * - * val benchmark = new BenchmarkRunner(TpcdsLikeBench) - * benchmark.writeCsv(spark, "q5", 3, "/path/to/write") - * - * @param spark The Spark session - * @param query The name of the query to run e.g. "q5" - * @param path The path to write the results to - * @param mode The SaveMode to use when writing the results - * @param writeOptions Write options - * @param iterations The number of times to run the query. - * @param summaryFilePrefix Optional prefix for the generated JSON summary file. - * @param gcBetweenRuns Whether to call `System.gc` between iterations to cause Spark to - * call `unregisterShuffle` - * @param generateDotGraph Boolean specifying whether to generate a query plan diagram in - * DOT format - */ - def writeCsv( - spark: SparkSession, - query: String, - path: String, - mode: SaveMode = SaveMode.Overwrite, - writeOptions: Map[String, String] = Map.empty, - iterations: Int = 3, - summaryFilePrefix: Option[String] = None, - gcBetweenRuns: Boolean = false, - generateDotGraph: Boolean = false): BenchmarkReport = { - BenchUtils.writeCsv( - spark, - spark => bench.createDataFrame(spark, query), - query, - summaryFilePrefix.getOrElse(s"${bench.shortName()}-$query-csv"), - iterations, - gcBetweenRuns, - path, - mode, - writeOptions, - generateDotGraph) - } - - /** - * This method performs a benchmark of executing a query and writing the results to ORC files - * and can be called from Spark shell using the following syntax: - * - * val benchmark = new BenchmarkRunner(TpcdsLikeBench) - * benchmark.writeOrc(spark, "q5", 3, "/path/to/write") - * - * @param spark The Spark session - * @param query The name of the query to run e.g. "q5" - * @param path The path to write the results to - * @param mode The SaveMode to use when writing the results - * @param writeOptions Write options - * @param iterations The number of times to run the query. - * @param summaryFilePrefix Optional prefix for the generated JSON summary file. - * @param gcBetweenRuns Whether to call `System.gc` between iterations to cause Spark to - * call `unregisterShuffle` - * @param generateDotGraph Boolean specifying whether to generate a query plan diagram in - * DOT format - */ - def writeOrc( - spark: SparkSession, - query: String, - path: String, - mode: SaveMode = SaveMode.Overwrite, - writeOptions: Map[String, String] = Map.empty, - iterations: Int = 3, - summaryFilePrefix: Option[String] = None, - gcBetweenRuns: Boolean = false, - generateDotGraph: Boolean = false): BenchmarkReport = { - BenchUtils.writeOrc( - spark, - spark => bench.createDataFrame(spark, query), - query, - summaryFilePrefix.getOrElse(s"${bench.shortName()}-$query-csv"), - iterations, - gcBetweenRuns, - path, - mode, - writeOptions, - generateDotGraph) - } - - /** - * This method performs a benchmark of executing a query and writing the results to Parquet files - * and can be called from Spark shell using the following syntax: - * - * val benchmark = new BenchmarkRunner(TpcdsLikeBench) - * benchmark.writeParquet(spark, "q5", 3, "/path/to/write") - * - * @param spark The Spark session - * @param query The name of the query to run e.g. "q5" - * @param path The path to write the results to - * @param mode The SaveMode to use when writing the results - * @param writeOptions Write options - * @param iterations The number of times to run the query - * @param summaryFilePrefix Optional prefix for the generated JSON summary file. - * @param gcBetweenRuns Whether to call `System.gc` between iterations to cause Spark to - * call `unregisterShuffle` - * @param generateDotGraph Boolean specifying whether to generate a query plan diagram in - * DOT format - */ - def writeParquet( - spark: SparkSession, - query: String, - path: String, - mode: SaveMode = SaveMode.Overwrite, - writeOptions: Map[String, String] = Map.empty, - iterations: Int = 3, - summaryFilePrefix: Option[String] = None, - gcBetweenRuns: Boolean = false, - generateDotGraph: Boolean = false): BenchmarkReport = { - BenchUtils.writeParquet( - spark, - spark => bench.createDataFrame(spark, query), - query, - summaryFilePrefix.getOrElse(s"${bench.shortName()}-$query-parquet"), - iterations, - gcBetweenRuns, - path, - mode, - writeOptions, - generateDotGraph) - } -} - -class BenchmarkConf(arguments: Seq[String]) extends ScallopConf(arguments) { - val benchmark = opt[String](required = true) - val input = opt[String](required = true) - val inputFormat = opt[String](required = true) - val appendDat = opt[Boolean](required = false, default = Some(false)) - val query = opt[List[String]](required = true) - val iterations = opt[Int](default = Some(3)) - val output = opt[String](required = false) - val outputFormat = opt[String](required = false) - val summaryFilePrefix = opt[String](required = false) - val gcBetweenRuns = opt[Boolean](required = false, default = Some(false)) - val uploadUri = opt[String](required = false) - val generateDot = opt[Boolean](required = false, default = Some(false)) - verify() -} diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/common/BenchUtils.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/common/BenchUtils.scala deleted file mode 100644 index 6870cc203b2..00000000000 --- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/common/BenchUtils.scala +++ /dev/null @@ -1,853 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.nvidia.spark.rapids.tests.common - -import java.io.{File, FileOutputStream, FileWriter, PrintWriter, StringWriter} -import java.time.Instant -import java.util.concurrent.TimeUnit -import java.util.concurrent.TimeUnit.NANOSECONDS - -import scala.collection.convert.ImplicitConversions.`iterator asScala` -import scala.collection.mutable.ListBuffer - -import org.json4s.DefaultFormats -import org.json4s.jackson.JsonMethods.parse -import org.json4s.jackson.Serialization.writePretty - -import org.apache.spark.{SPARK_BUILD_USER, SPARK_VERSION, Success, TaskEndReason} -import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd} -import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession} -import org.apache.spark.sql.execution.{InputAdapter, QueryExecution, SparkPlan, WholeStageCodegenExec} -import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, QueryStageExec} -import org.apache.spark.sql.execution.datasources.FilePartition -import org.apache.spark.sql.functions.col -import org.apache.spark.sql.types.DataTypes -import org.apache.spark.sql.util.QueryExecutionListener - -object BenchUtils { - - val STATUS_COMPLETED = "Completed" - val STATUS_COMPLETED_WITH_TASK_FAILURES = "CompletedWithTaskFailures" - val STATUS_FAILED = "Failed" - - /** Perform benchmark of calling collect */ - def collect( - spark: SparkSession, - createDataFrame: SparkSession => DataFrame, - queryDescription: String, - filenameStub: String, - iterations: Int, - gcBetweenRuns: Boolean, - generateDotGraph: Boolean = false - ): BenchmarkReport = { - runBench( - spark, - createDataFrame, - Collect(), - queryDescription, - filenameStub, - iterations, - gcBetweenRuns, - generateDotGraph) - } - - /** Perform benchmark of writing results to CSV */ - def writeCsv( - spark: SparkSession, - createDataFrame: SparkSession => DataFrame, - queryDescription: String, - filenameStub: String, - iterations: Int, - gcBetweenRuns: Boolean, - path: String, - mode: SaveMode = SaveMode.Overwrite, - writeOptions: Map[String, String] = Map.empty, - generateDotGraph: Boolean = false): BenchmarkReport = { - runBench( - spark, - createDataFrame, - WriteCsv(path, mode, writeOptions), - queryDescription, - filenameStub, - iterations, - gcBetweenRuns, - generateDotGraph) - } - - /** Perform benchmark of writing results to ORC */ - def writeOrc( - spark: SparkSession, - createDataFrame: SparkSession => DataFrame, - queryDescription: String, - filenameStub: String, - iterations: Int, - gcBetweenRuns: Boolean, - path: String, - mode: SaveMode = SaveMode.Overwrite, - writeOptions: Map[String, String] = Map.empty, - generateDotGraph: Boolean = false): BenchmarkReport = { - runBench( - spark, - createDataFrame, - WriteOrc(path, mode, writeOptions), - queryDescription, - filenameStub, - iterations, - gcBetweenRuns, - generateDotGraph) - } - - /** Perform benchmark of writing results to Parquet */ - def writeParquet( - spark: SparkSession, - createDataFrame: SparkSession => DataFrame, - queryDescription: String, - filenameStub: String, - iterations: Int, - gcBetweenRuns: Boolean, - path: String, - mode: SaveMode = SaveMode.Overwrite, - writeOptions: Map[String, String] = Map.empty, - generateDotGraph: Boolean = false): BenchmarkReport = { - runBench( - spark, - createDataFrame, - WriteParquet(path, mode, writeOptions), - queryDescription, - filenameStub, - iterations, - gcBetweenRuns, - generateDotGraph: Boolean) - } - - /** - * Run the specified number of cold and hot runs and record the timings and summary of the - * query and results to file, including all Spark configuration options and environment - * variables. - * - * @param spark The Spark session - * @param createDataFrame Function to create a DataFrame from the Spark session. - * @param resultsAction Optional action to perform after creating the DataFrame, with default - * behavior of calling df.collect() but user could provide function to - * save results to CSV or Parquet instead. - * @param filenameStub The prefix for the output file. The current timestamp will be appended - * to ensure that filenames are unique and that results are not - * inadvertently overwritten. - * @param iterations The number of times to run the query. - * @param gcBetweenRuns Boolean specifying whether to run System.gc() between runs - * @param generateDotGraph Boolean specifying whether to generate a query plan diagram in - * DOT format - */ - def runBench( - spark: SparkSession, - createDataFrame: SparkSession => DataFrame, - resultsAction: ResultsAction, - queryDescription: String, - filenameStub: String, - iterations: Int, - gcBetweenRuns: Boolean, - generateDotGraph: Boolean - ): BenchmarkReport = { - - assert(iterations > 0) - - val queryStartTime = Instant.now() - val logPrefix = s"[BENCHMARK RUNNER] [$queryDescription]" - - val queryPlansWithMetrics = new ListBuffer[SparkPlanNode]() - val exceptions = new ListBuffer[String]() - - var df: DataFrame = null - val queryStatus = new ListBuffer[String]() - val queryTimes = new ListBuffer[Long]() - val rowCounts = new ListBuffer[Long]() - for (i <- 0 until iterations) { - spark.sparkContext.setJobDescription(s"Benchmark Run: query=$queryDescription; iteration=$i") - - // cause Spark to call unregisterShuffle - if (i > 0 && gcBetweenRuns) { - // we must null out the dataframe reference to allow - // GC to clean up the shuffle - df = null - System.gc() - System.gc() - } - - // capture spark plan metrics on the first run - if (i == 0) { - spark.listenerManager.register(new BenchmarkListener(queryPlansWithMetrics, exceptions)) - } - - println(s"$logPrefix Start iteration $i:") - val start = System.nanoTime() - val taskFailureListener = new TaskFailureListener - try { - spark.sparkContext.addSparkListener(taskFailureListener) - df = createDataFrame(spark) - - resultsAction match { - case Collect() => - val rows = df.collect() - rowCounts.append(rows.length) - case WriteCsv(path, mode, options) => - ensureValidColumnNames(df).write.mode(mode).options(options).csv(path) - case WriteOrc(path, mode, options) => - ensureValidColumnNames(df).write.mode(mode).options(options).orc(path) - case WriteParquet(path, mode, options) => - ensureValidColumnNames(df).write.mode(mode).options(options).parquet(path) - } - - val end = System.nanoTime() - val elapsed = NANOSECONDS.toMillis(end - start) - queryTimes.append(elapsed) - - val failureOpt = taskFailureListener.taskFailures.headOption - val status = failureOpt.map(_ => STATUS_COMPLETED_WITH_TASK_FAILURES) - .getOrElse(STATUS_COMPLETED) - failureOpt.foreach(failure => exceptions.append(failure.toString)) - - queryStatus.append(status) - println(s"$logPrefix Iteration $i took $elapsed msec. Status: $status.") - - } catch { - case e: Exception => - val end = System.nanoTime() - val elapsed = NANOSECONDS.toMillis(end - start) - println(s"$logPrefix Iteration $i failed after $elapsed msec.") - queryStatus.append(STATUS_FAILED) - queryTimes.append(elapsed) - exceptions.append(BenchUtils.stackTraceAsString(e)) - e.printStackTrace() - } finally { - spark.sparkContext.removeSparkListener(taskFailureListener) - } - } - - // only show query times if there were no failed queries - if (!queryStatus.contains(STATUS_FAILED)) { - - // summarize all query times - for (i <- 0 until iterations) { - println(s"$logPrefix Iteration $i took ${queryTimes(i)} msec. Status: ${queryStatus(i)}") - } - - // for multiple runs, summarize cold/hot timings - if (iterations > 1) { - println(s"$logPrefix Cold run: ${queryTimes(0)} msec.") - val hotRuns = queryTimes.drop(1) - val numHotRuns = hotRuns.length - println(s"$logPrefix Best of $numHotRuns hot run(s): ${hotRuns.min} msec.") - println(s"$logPrefix Worst of $numHotRuns hot run(s): ${hotRuns.max} msec.") - println(s"$logPrefix Average of $numHotRuns hot run(s): " + - s"${hotRuns.sum.toDouble / numHotRuns} msec.") - } - } - - // write results to file - val filename = s"$filenameStub-${queryStartTime.toEpochMilli}.json" - println(s"$logPrefix Saving benchmark report to $filename") - - // try not to leak secrets - val redacted = Seq("TOKEN", "SECRET", "PASSWORD") - val envVars: Map[String, String] = sys.env - .filterNot(entry => redacted.exists(entry._1.toUpperCase.contains)) - - val testConfiguration = TestConfiguration( - gcBetweenRuns - ) - - val environment = Environment( - envVars, - sparkConf = df.sparkSession.conf.getAll, - getSparkVersion) - - // if the query plan is invalid, referencing the `executedPlan` lazy val - // can throw an exception - val executedPlanStr = try { - df.queryExecution.executedPlan.toString() - } catch { - case e: Exception => - exceptions.append(stackTraceAsString(e)) - "Failed to capture executedPlan - see exceptions in report" - } - - val queryPlan = QueryPlan( - df.queryExecution.logical.toString(), - executedPlanStr - ) - - var report = BenchmarkReport( - filename, - queryStartTime.toEpochMilli, - environment, - testConfiguration, - "", - Map.empty, - queryDescription, - queryPlan, - queryPlansWithMetrics, - rowCounts, - queryTimes, - queryStatus, - exceptions) - - report = resultsAction match { - case Collect() => report.copy( - action = "collect") - - case w: WriteCsv => report.copy( - action = "csv", - writeOptions = w.writeOptions) - - case w: WriteOrc => report.copy( - action = "orc", - writeOptions = w.writeOptions) - - case w: WriteParquet => report.copy( - action = "parquet", - writeOptions = w.writeOptions) - } - - writeReport(report, filename) - - if (generateDotGraph) { - queryPlansWithMetrics.headOption match { - case Some(plan) => - val filename = s"$filenameStub-${queryStartTime.toEpochMilli}.dot" - println(s"$logPrefix Saving query plan diagram to $filename") - BenchUtils.generateDotGraph(plan, None, filename) - case _ => - println(s"$logPrefix Cannot generate query plan diagram because there are no query plans") - } - } - - report - } - - /** - * Replace any invalid column names with c0, c1, and so on so that there are no columns with - * names based on expressions such as "round((sun_sales1 / sun_sales2), 2)". This is necessary - * when writing query output to Parquet. - */ - private def ensureValidColumnNames(df: DataFrame): DataFrame = { - def isColumnStart(ch: Char) = ch.isLetter || ch == '_' - def isColumnPart(ch: Char) = ch.isLetterOrDigit || ch == '_' - def isValid(name: String) = name.length > 0 && - isColumnStart(name.charAt(0)) && - name.substring(1).toCharArray.forall(isColumnPart) - val renameColumnExprs = df.columns.zipWithIndex.map { - case (name, i) => if (isValid(name)) { - col(name) - } else { - col(name).as("c" + i) - } - } - df.select(renameColumnExprs: _*) - } - - def readReport(file: File): BenchmarkReport = { - implicit val formats = DefaultFormats - val json = parse(file) - json.extract[BenchmarkReport] - } - - def writeReport(report: BenchmarkReport, filename: String): Unit = { - implicit val formats = DefaultFormats - val os = new FileOutputStream(filename) - os.write(writePretty(report).getBytes) - os.close() - } - - def validateCoalesceRepartition( - coalesce: Map[String, Int], - repartition: Map[String, Int]): Unit = { - val duplicates = coalesce.keys.filter(name => repartition.contains(name)) - if (duplicates.nonEmpty) { - throw new IllegalArgumentException( - s"Cannot both coalesce and repartition the same table: ${duplicates.mkString(",")}") - } - } - - def applyCoalesceRepartition( - name: String, - df: DataFrame, - coalesce: Map[String, Int], - repartition: Map[String, Int]): DataFrame = { - (coalesce.get(name), repartition.get(name)) match { - case (Some(_), Some(_)) => - // this should be unreachable due to earlier validation - throw new IllegalArgumentException( - s"Cannot both coalesce and repartition the same table: $name") - case (Some(n), _) => df.coalesce(n) - case (_, Some(n)) => df.repartition(n) - case _ => df - } - } - - /** - * Generate a DOT graph for one query plan, or showing differences between two query plans. - * - * Diff mode is intended for comparing query plans that are expected to have the same - * structure, such as two different runs of the same query but with different tuning options. - * - * When running in diff mode, any differences in SQL metrics are shown. Also, if the plan - * starts to deviate then the graph will show where the plans deviate and will not recurse - * further. - * - * Example usage: - * - *
-   * val a = BenchUtils.readReport(new File("tpcxbb-q5-parquet-config1.json"))
-   * val b = BenchUtils.readReport(new File("tpcxbb-q5-parquet-config2.json"))
-   * BenchUtils.generateDotGraph(a.queryPlans.head, Some(b.queryPlans.head), "/tmp/graph.dot")
-   * 
- * - * Graphviz and other tools can be used to generate images from DOT files. - * - * See https://graphviz.org/pdf/dotguide.pdf for a description of DOT files. - */ - def generateDotGraph(a: SparkPlanNode, b: Option[SparkPlanNode], filename: String): Unit = { - - var nextId = 1 - - def isGpuPlan(plan: SparkPlanNode): Boolean = plan.name.startsWith("Gpu") - - /** Recursively graph the operator nodes in the spark plan */ - def writeGraph( - w: PrintWriter, - a: SparkPlanNode, - b: SparkPlanNode, - id: Int = 0): Unit = { - if (a.name == b.name && a.children.length == b.children.length) { - val metricNames = (a.metrics.map(_.name) ++ b.metrics.map(_.name)).distinct.sorted - val metrics = metricNames.map(name => { - val l = a.metrics.find(_.name == name) - val r = b.metrics.find(_.name == name) - if (l.isDefined && r.isDefined) { - val metric1 = l.get - val metric2 = r.get - if (metric1.value == metric2.value) { - s"$name: ${metric1.value}" - } else { - metric1.metricType match { - case "nsTiming" => - val n1 = metric1.value.toString.toLong - val n2 = metric2.value.toString.toLong - val pct = (n2-n1) * 100.0 / n1 - val pctStr = if (pct < 0) { - f"$pct%.1f" - } else { - f"+$pct%.1f" - } - s"$name: ${TimeUnit.NANOSECONDS.toSeconds(n1)} / " + - s"${TimeUnit.NANOSECONDS.toSeconds(n2)} s ($pctStr %)" - case _ => - s"$name: ${metric1.value} / ${metric2.value}" - } - } - } - }).mkString("\n") - - val nvGreen = "#76b900" - val blue = "#0071c5" - val color = if (isGpuPlan(a)) { nvGreen } else { blue } - - w.println( - s"""node$id [shape=box,color="$color", - |label = "${a.name} #${a.id}\n - |$metrics"]; - | /* ${a.description} */ - |""".stripMargin) - a.children.indices.foreach(i => { - val childId = nextId - nextId += 1 - writeGraph(w, a.children(i), b.children(i), childId); - - val style = (isGpuPlan(a), isGpuPlan(a.children(i))) match { - case (true, true) => s"""color="$nvGreen"""" - case (false, false) => s"""color="$blue"""" - case _ => - // show emphasis on transitions between CPU and GPU - "color=red, style=bold" - } - w.println(s"node$childId -> node$id [$style];") - }) - } else { - // plans have diverged - cannot recurse further - w.println( - s"""node$id [shape=box, color=red, - |label = "plans diverge here: ${a.name} vs ${b.name}"];""".stripMargin) - } - } - - // write the dot graph to a file - val w = new PrintWriter(new FileWriter(filename)) - w.println("digraph G {") - writeGraph(w, a, b.getOrElse(a), 0) - w.println("}") - w.close() - } - - def getSparkVersion: String = { - // hack for databricks, try to find something more reliable? - if (SPARK_BUILD_USER.equals("Databricks")) { - SPARK_VERSION + "-databricks" - } else { - SPARK_VERSION - } - } - - /** - * Perform a diff of the results collected from two DataFrames, allowing for differences in - * precision. - * - * The intended usage is to run timed benchmarks that write results to file and then separately - * use this utility to compare those result sets. This code performs a sort and a collect and - * is only suitable for data sets that can fit in the driver's memory. For larger datasets, - * a better approach would be to convert the results to single files, download them locally - * and adapt this Scala code to read those files directly (without using Spark). - * - * Example usage: - * - *
-   * scala> val cpu = spark.read.parquet("/data/q5-cpu")
-   * scala> val gpu = spark.read.parquet("/data/q5-gpu")
-   * scala> import com.nvidia.spark.rapids.tests.common._
-   * scala> BenchUtils.compareResults(cpu, gpu, ignoreOrdering=true, epsilon=0.0)
-   * Collecting rows from DataFrame
-   * Collected 989754 rows in 7.701 seconds
-   * Collecting rows from DataFrame
-   * Collected 989754 rows in 2.325 seconds
-   * Results match
-   * 
- * - * @param df1 DataFrame to compare. - * @param df2 DataFrame to compare. - * @param readPathAction Function to create DataFrame from a path when reading individual - * partitions from a partitioned data source. - * @param ignoreOrdering Sort the data collected from the DataFrames before comparing them. - * @param useIterator When set to true, use `toLocalIterator` to load one partition at a time - * into driver memory, reducing memory usage at the cost of performance - * because processing will be single-threaded. - * @param maxErrors Maximum number of differences to report. - * @param epsilon Allow for differences in precision when comparing floating point values. - */ - def compareResults( - df1: DataFrame, - df2: DataFrame, - inputFormat: String, - ignoreOrdering: Boolean, - useIterator: Boolean = false, - maxErrors: Int = 10, - epsilon: Double = 0.00001): Unit = { - - val spark = df1.sparkSession - - val readPathAction = (path: String) => spark.read.format(inputFormat).load(path) - - val count1 = df1.count() - val count2 = df2.count() - - if (count1 == count2) { - println(s"Both DataFrames contain $count1 rows") - - val (result1, result2) = if (!ignoreOrdering && - (df1.rdd.getNumPartitions > 1 || df2.rdd.getNumPartitions > 1)) { - (collectPartitioned(df1, readPathAction), - collectPartitioned(df2, readPathAction)) - } else { - (collectResults(df1, ignoreOrdering, useIterator), - collectResults(df2, ignoreOrdering, useIterator)) - } - - var errors = 0 - var i = 0 - while (i < count1 && errors < maxErrors) { - val l = result1.next() - val r = result2.next() - if (!rowEqual(l, r, epsilon)) { - println(s"Row $i:\n${l.mkString(",")}\n${r.mkString(",")}\n") - errors += 1 - } - i += 1 - } - println(s"Processed $i rows") - - if (errors == maxErrors) { - println(s"Aborting comparison after reaching maximum of $maxErrors errors") - } else if (errors == 0) { - println(s"Results match") - } else { - println(s"There were $errors errors") - } - } else { - println(s"DataFrame row counts do not match: $count1 != $count2") - } - } - - private def collectResults( - df: DataFrame, - ignoreOrdering: Boolean, - useIterator: Boolean): Iterator[Seq[Any]] = { - - // apply sorting if specified - val resultDf = if (ignoreOrdering) { - // let Spark do the sorting, sorting by non-float columns first, then float columns - val nonFloatCols = df.schema.fields - .filter(field => !(field.dataType == DataTypes.FloatType || - field.dataType == DataTypes.DoubleType)) - .map(field => col(field.name)) - val floatCols = df.schema.fields - .filter(field => field.dataType == DataTypes.FloatType || - field.dataType == DataTypes.DoubleType) - .map(field => col(field.name)) - df.sort((nonFloatCols ++ floatCols): _*) - } else { - df - } - - val it: Iterator[Row] = if (useIterator) { - resultDf.toLocalIterator() - } else { - println("Collecting rows from DataFrame") - val t1 = System.currentTimeMillis() - val rows = resultDf.collect() - val t2 = System.currentTimeMillis() - println(s"Collected ${rows.length} rows in ${(t2-t1)/1000.0} seconds") - rows.toIterator - } - - // map Iterator[Row] to Iterator[Seq[Any]] - it.map(_.toSeq) - } - - /** - * Collect data from a partitioned data source, preserving order by reading files in - * alphabetical order. - */ - private def collectPartitioned( - df: DataFrame, - readPathAction: String => DataFrame): Iterator[Seq[Any]] = { - val files = df.rdd.partitions.flatMap { - case p: FilePartition => p.files - case other => - throw new RuntimeException(s"Expected FilePartition, found ${other.getClass}") - } - files.map(_.filePath).sorted.flatMap(path => { - readPathAction(path).collect() - }).toIterator.map(_.toSeq) - } - - private def rowEqual(row1: Seq[Any], row2: Seq[Any], epsilon: Double): Boolean = { - row1.zip(row2).forall { - case (l, r) => compare(l, r, epsilon) - } - } - - // this is copied from SparkQueryCompareTestSuite - private def compare(expected: Any, actual: Any, epsilon: Double = 0.0): Boolean = { - def doublesAreEqualWithinPercentage(expected: Double, actual: Double): (String, Boolean) = { - if (!compare(expected, actual)) { - if (expected != 0) { - val v = Math.abs((expected - actual) / expected) - (s"\n\nABS($expected - $actual) / ABS($actual) == $v is not <= $epsilon ", v <= epsilon) - } else { - val v = Math.abs(expected - actual) - (s"\n\nABS($expected - $actual) == $v is not <= $epsilon ", v <= epsilon) - } - } else { - ("SUCCESS", true) - } - } - (expected, actual) match { - case (a: Float, b: Float) if a.isNaN && b.isNaN => true - case (a: Double, b: Double) if a.isNaN && b.isNaN => true - case (null, null) => true - case (null, _) => false - case (_, null) => false - case (a: Array[_], b: Array[_]) => - a.length == b.length && a.zip(b).forall { case (l, r) => compare(l, r, epsilon) } - case (a: Map[_, _], b: Map[_, _]) => - a.size == b.size && a.keys.forall { aKey => - b.keys.find(bKey => compare(aKey, bKey)) - .exists(bKey => compare(a(aKey), b(bKey))) - } - case (a: Iterable[_], b: Iterable[_]) => - a.size == b.size && a.zip(b).forall { case (l, r) => compare(l, r, epsilon) } - case (a: Product, b: Product) => - compare(a.productIterator.toSeq, b.productIterator.toSeq, epsilon) - case (a: Row, b: Row) => - compare(a.toSeq, b.toSeq, epsilon) - // 0.0 == -0.0, turn float/double to bits before comparison, to distinguish 0.0 and -0.0. - case (a: Double, b: Double) if epsilon <= 0 => - java.lang.Double.doubleToRawLongBits(a) == java.lang.Double.doubleToRawLongBits(b) - case (a: Double, b: Double) if epsilon > 0 => - val ret = doublesAreEqualWithinPercentage(a, b) - if (!ret._2) { - System.err.println(ret._1 + " (double)") - } - ret._2 - case (a: Float, b: Float) if epsilon <= 0 => - java.lang.Float.floatToRawIntBits(a) == java.lang.Float.floatToRawIntBits(b) - case (a: Float, b: Float) if epsilon > 0 => - val ret = doublesAreEqualWithinPercentage(a, b) - if (!ret._2) { - System.err.println(ret._1 + " (float)") - } - ret._2 - case (a, b) => a == b - } - } - - def stackTraceAsString(e: Throwable): String = { - val sw = new StringWriter() - val w = new PrintWriter(sw) - e.printStackTrace(w) - w.close() - sw.toString - } -} - -class TaskFailureListener extends SparkListener { - - val taskFailures = new ListBuffer[TaskEndReason]() - - override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = { - taskEnd.reason match { - case Success => - case reason => taskFailures += reason - } - super.onTaskEnd(taskEnd) - } - -} - -class BenchmarkListener( - queryPlans: ListBuffer[SparkPlanNode], - exceptions: ListBuffer[String]) extends QueryExecutionListener { - - override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = { - addQueryPlan(qe) - } - - override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = { - addQueryPlan(qe) - exceptions += BenchUtils.stackTraceAsString(exception) - } - - private def addQueryPlan(qe: QueryExecution) = { - try { - queryPlans += toJson(qe.executedPlan) - } catch { - case e: Exception => - exceptions.append(BenchUtils.stackTraceAsString(e)) - } - } - - private def toJson(plan: SparkPlan): SparkPlanNode = { - plan match { - case WholeStageCodegenExec(child) => toJson(child) - case InputAdapter(child) => toJson(child) - case _ => - val children: Seq[SparkPlanNode] = plan match { - case s: AdaptiveSparkPlanExec => Seq(toJson(s.executedPlan)) - case s: QueryStageExec => Seq(toJson(s.plan)) - case _ => plan.children.map(child => toJson(child)) - } - val metrics: Seq[SparkSQLMetric] = plan.metrics - .map(m => SparkSQLMetric(m._1, m._2.metricType, m._2.value)).toSeq - - SparkPlanNode( - plan.id, - plan.nodeName, - plan.simpleStringWithNodeId(), - metrics, - children) - } - } -} - -trait BenchmarkSuite { - def name(): String - def shortName(): String - def setupAllParquet(spark: SparkSession, path: String) - def setupAllCSV(spark: SparkSession, path: String) - def setupAllOrc(spark: SparkSession, path: String) - def createDataFrame(spark: SparkSession, query: String): DataFrame -} - -/** Top level benchmark report class */ -case class BenchmarkReport( - filename: String, - startTime: Long, - env: Environment, - testConfiguration: TestConfiguration, - action: String, - writeOptions: Map[String, String], - query: String, - queryPlan: QueryPlan, - queryPlans: Seq[SparkPlanNode], - rowCounts: Seq[Long], - queryTimes: Seq[Long], - queryStatus: Seq[String], - exceptions: Seq[String]) - -/** Configuration options that affect how the tests are run */ -case class TestConfiguration( - gcBetweenRuns: Boolean -) - -/** Details about the query plan */ -case class QueryPlan( - logical: String, - executedPlan: String) - -case class SparkPlanNode( - id: Int, - name: String, - description: String, - metrics: Seq[SparkSQLMetric], - children: Seq[SparkPlanNode]) - -case class SparkSQLMetric( - name: String, - metricType: String, - value: Any) - -/** Details about the environment where the benchmark ran */ -case class Environment( - envVars: Map[String, String], - sparkConf: Map[String, String], - sparkVersion: String) - -sealed trait ResultsAction - -case class Collect() extends ResultsAction - -case class WriteCsv( - path: String, - mode: SaveMode, - writeOptions: Map[String, String]) extends ResultsAction - -case class WriteOrc( - path: String, - mode: SaveMode, - writeOptions: Map[String, String]) extends ResultsAction - -case class WriteParquet( - path: String, - mode: SaveMode, - writeOptions: Map[String, String]) extends ResultsAction diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/common/CompareResults.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/common/CompareResults.scala deleted file mode 100644 index bdae797af9b..00000000000 --- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/common/CompareResults.scala +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.nvidia.spark.rapids.tests.common - -import org.rogach.scallop.ScallopConf - -import org.apache.spark.sql.SparkSession - -/** - * Utility for comparing two csv or parquet files, such as the output from a benchmark, to - * verify that they match, allowing for differences in precision. - * - * This utility is intended to be run via spark-submit. - * - * Example usage: - * - *
- * \$SPARK_HOME/bin/spark-submit --jars \$SPARK_RAPIDS_PLUGIN_JAR,\$CUDF_JAR \
- *   --master local[*] \
- *   --class com.nvidia.spark.rapids.tests.common.CompareResults \
- *   \$SPARK_RAPIDS_PLUGIN_INTEGRATION_TEST_JAR \
- *   --input1 /path/to/result1 \
- *   --input2 /path/to/result2 \
- *   --input-format parquet
- * 
- */ -object CompareResults { - def main(arg: Array[String]): Unit = { - val conf = new Conf(arg) - - val spark = SparkSession.builder - .appName("CompareResults") - // disable plugin so that we can see FilePartition rather than DatasourceRDDPartition and - // can retrieve individual partition filenames - .config("spark.rapids.sql.enabled", "false") - .getOrCreate() - - val dfReader = spark.read.format(conf.inputFormat()) - - BenchUtils.compareResults( - dfReader.load(conf.input1()), - dfReader.load(conf.input2()), - conf.inputFormat(), - conf.ignoreOrdering(), - conf.useIterator(), - conf.maxErrors(), - conf.epsilon()) - } -} - -class Conf(arguments: Seq[String]) extends ScallopConf(arguments) { - /** Path to first data set */ - val input1 = opt[String](required = true) - /** Path to second data set */ - val input2 = opt[String](required = true) - /** Input format (csv, parquet or orc) */ - val inputFormat = opt[String](required = true) - /** Sort the data collected from the DataFrames before comparing them. */ - val ignoreOrdering = opt[Boolean](required = false, default = Some(false)) - /** - * When set to true, use `toLocalIterator` to load one partition at a time into driver memory, - * reducing memory usage at the cost of performance because processing will be single-threaded. - */ - val useIterator = opt[Boolean](required = false, default = Some(false)) - /** Maximum number of differences to report */ - val maxErrors = opt[Int](required = false, default = Some(10)) - /** Allow for differences in precision when comparing floating point values */ - val epsilon = opt[Double](required = false, default = Some(0.00001)) - verify() -} \ No newline at end of file diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcds/TpcdsLikeBench.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcds/TpcdsLikeBench.scala deleted file mode 100644 index 369b32ef483..00000000000 --- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcds/TpcdsLikeBench.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.nvidia.spark.rapids.tests.tpcds - -import com.nvidia.spark.rapids.tests.common.BenchmarkSuite - -import org.apache.spark.sql.{DataFrame, SparkSession} - -class TpcdsLikeBench(val appendDat: Boolean = false) extends BenchmarkSuite { - override def name(): String = "TPC-DS" - - override def shortName(): String = "tpcds" - - override def setupAllParquet(spark: SparkSession, path: String): Unit = { - TpcdsLikeSpark.setupAllParquet(spark, path, appendDat) - } - - override def setupAllCSV(spark: SparkSession, path: String): Unit = { - TpcdsLikeSpark.setupAllCSV(spark, path, appendDat) - } - - override def setupAllOrc(spark: SparkSession, path: String): Unit = { - TpcdsLikeSpark.setupAllOrc(spark, path, appendDat) - } - - override def createDataFrame(spark: SparkSession, query: String): DataFrame = { - TpcdsLikeSpark.run(spark, query) - } -} - diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcds/TpcdsLikeSpark.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcds/TpcdsLikeSpark.scala deleted file mode 100644 index 5878f7fca81..00000000000 --- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcds/TpcdsLikeSpark.scala +++ /dev/null @@ -1,4777 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.nvidia.spark.rapids.tests.tpcds - -import com.nvidia.spark.rapids.tests.common.BenchUtils -import com.nvidia.spark.rapids.tests.tpcds.TpcdsLikeSpark.{csvToOrc, csvToParquet} -import org.rogach.scallop.ScallopConf - -import org.apache.spark.sql.{DataFrame, DataFrameWriter, Row, SparkSession} -import org.apache.spark.sql.types.{DataType, DateType, DecimalType, DoubleType, IntegerType, LongType, StringType, StructField, StructType} - -case class Table( - name: String, // also the base name for the data - partitionColumns: Seq[String], - schema: StructType) { - - private[this] def path(basePath: String, appendDat: Boolean) = { - val rest = if (appendDat) { - ".dat" - } else { - "" - } - basePath + "/" + name + rest - } - - def readCSV(spark: SparkSession, basePath: String, appendDat: Boolean): DataFrame = - spark.read.option("delimiter", "|") - .schema(schema) - .csv(path(basePath, appendDat)) - - def setupCSV(spark: SparkSession, basePath: String, appendDat: Boolean): Unit = - readCSV(spark, basePath, appendDat).createOrReplaceTempView(name) - - def setupParquet(spark: SparkSession, basePath: String, appendDat: Boolean): Unit = - spark.read.parquet(path(basePath, appendDat)).createOrReplaceTempView(name) - - def setupOrc(spark: SparkSession, basePath: String, appendDat: Boolean): Unit = - spark.read.orc(path(basePath, appendDat)).createOrReplaceTempView(name) - - def setup( - spark: SparkSession, - basePath: String, - format: String, - appendDat: Boolean): Unit = { - spark.read.format(format).load(path(basePath, appendDat)).createOrReplaceTempView(name) - } - - private def setupWrite( - spark: SparkSession, - name: String, - inputBase: String, - coalesce: Map[String, Int], - repartition: Map[String, Int], - writePartitioning: Boolean): DataFrameWriter[Row] = { - val df = readCSV(spark, inputBase, appendDat = true) - val repart = BenchUtils.applyCoalesceRepartition(name, df, coalesce, repartition) - val tmp = repart.write.mode("overwrite") - if (writePartitioning && partitionColumns.nonEmpty) { - tmp.partitionBy(partitionColumns: _*) - } else { - tmp - } - } - - def csvToParquet( - spark: SparkSession, - inputBase: String, - outputBase: String, - coalesce: Map[String, Int], - repartition: Map[String, Int], - writePartitioning: Boolean, - appendDat: Boolean): Unit = - setupWrite(spark, name, inputBase, coalesce, repartition, writePartitioning) - .parquet(path(outputBase, appendDat)) - - def csvToOrc( - spark: SparkSession, - inputBase: String, - outputBase: String, - coalesce: Map[String, Int], - repartition: Map[String, Int], - writePartitioning: Boolean, - appendDat: Boolean): Unit = - setupWrite(spark, name, inputBase, coalesce, repartition, writePartitioning) - .orc(path(outputBase, appendDat)) -} - -case class Query(name: String, query: String) { - def apply(spark: SparkSession): DataFrame = spark.sql(query) -} - -/** - * A set of tests based off of TPC-DS queries. The schemas and the queries are based off of - * the data bricks code at https://github.com/databricks/spark-sql-perf with a few modifications - * to some of the quires so that the names of columns returned is not ambiguous. This lets our - * correctness tests auto sort the data to account for ambiguous ordering. - */ -object TpcdsLikeSpark { - def csvToParquet( - spark: SparkSession, - baseInput: String, - baseOutput: String, - coalesce: Map[String, Int] = Map.empty, - repartition: Map[String, Int] = Map.empty, - writePartitioning: Boolean = false, - appendDat: Boolean = true, - useDecimalType: Boolean = false): Unit = { - tables(useDecimalType).foreach(_.csvToParquet( - spark, - baseInput, - baseOutput, - coalesce, - repartition, - writePartitioning, - appendDat)) - } - - def csvToOrc( - spark: SparkSession, - baseInput: String, - baseOutput: String, - coalesce: Map[String, Int] = Map.empty, - repartition: Map[String, Int] = Map.empty, - writePartitioning: Boolean = false, - appendDat: Boolean = true, - useDecimalType: Boolean = false): Unit = { - tables(useDecimalType).foreach(_.csvToOrc( - spark, - baseInput, - baseOutput, - coalesce, - repartition, - writePartitioning, - appendDat)) - } - - def setupAllCSV( - spark: SparkSession, - basePath: String, - appendDat: Boolean = true, - useDecimalType: Boolean = false): Unit = { - tables(useDecimalType).foreach(_.setupCSV(spark, basePath, appendDat)) - } - - def setupAllParquet( - spark: SparkSession, - basePath: String, - appendDat: Boolean = true, - useDecimalType: Boolean = false): Unit = { - tables(useDecimalType).foreach(_.setupParquet(spark, basePath, appendDat)) - } - - def setupAllOrc( - spark: SparkSession, - basePath: String, - appendDat: Boolean = true, - useDecimalType: Boolean = false): Unit = { - tables(useDecimalType).foreach(_.setupOrc(spark, basePath, appendDat)) - } - - def setupAll( - spark: SparkSession, - basePath: String, - format: String, - appendDat: Boolean = true, - useDecimalType: Boolean = false): Unit = { - tables(useDecimalType).foreach(_.setup(spark, basePath, format, appendDat)) - } - - private def decimalType(useDecimalType: Boolean, p: Int, s: Int): DataType = { - if (useDecimalType) { - DecimalType(p, s) - } else { - DoubleType - } - } - - private def tables(useDecimalType: Boolean) = { - Array( - Table( - "catalog_sales", - Seq("cs_sold_date_sk"), - StructType(Array( - StructField("cs_sold_date_sk", IntegerType), - StructField("cs_sold_time_sk", IntegerType), - StructField("cs_ship_date_sk", IntegerType), - StructField("cs_bill_customer_sk", IntegerType), - StructField("cs_bill_cdemo_sk", IntegerType), - StructField("cs_bill_hdemo_sk", IntegerType), - StructField("cs_bill_addr_sk", IntegerType), - StructField("cs_ship_customer_sk", IntegerType), - StructField("cs_ship_cdemo_sk", IntegerType), - StructField("cs_ship_hdemo_sk", IntegerType), - StructField("cs_ship_addr_sk", IntegerType), - StructField("cs_call_center_sk", IntegerType), - StructField("cs_catalog_page_sk", IntegerType), - StructField("cs_ship_mode_sk", IntegerType), - StructField("cs_warehouse_sk", IntegerType), - StructField("cs_item_sk", IntegerType), - StructField("cs_promo_sk", IntegerType), - StructField("cs_order_number", LongType), - StructField("cs_quantity", IntegerType), - StructField("cs_wholesale_cost", decimalType(useDecimalType, 7, 2)), - StructField("cs_list_price", decimalType(useDecimalType, 7, 2)), - StructField("cs_sales_price", decimalType(useDecimalType, 7, 2)), - StructField("cs_ext_discount_amt", decimalType(useDecimalType, 7, 2)), - StructField("cs_ext_sales_price", decimalType(useDecimalType, 7, 2)), - StructField("cs_ext_wholesale_cost", decimalType(useDecimalType, 7, 2)), - StructField("cs_ext_list_price", decimalType(useDecimalType, 7, 2)), - StructField("cs_ext_tax", decimalType(useDecimalType, 7, 2)), - StructField("cs_coupon_amt", decimalType(useDecimalType, 7, 2)), - StructField("cs_ext_ship_cost", decimalType(useDecimalType, 7, 2)), - StructField("cs_net_paid", decimalType(useDecimalType, 7, 2)), - StructField("cs_net_paid_inc_tax", decimalType(useDecimalType, 7, 2)), - StructField("cs_net_paid_inc_ship", decimalType(useDecimalType, 7, 2)), - StructField("cs_net_paid_inc_ship_tax", decimalType(useDecimalType, 7, 2)), - StructField("cs_net_profit", decimalType(useDecimalType, 7, 2)) - ))), - Table( - "catalog_returns", - Seq("cr_returned_date_sk"), - StructType(Array( - StructField("cr_returned_date_sk", IntegerType), - StructField("cr_returned_time_sk", IntegerType), - StructField("cr_item_sk", IntegerType), - StructField("cr_refunded_customer_sk", IntegerType), - StructField("cr_refunded_cdemo_sk", IntegerType), - StructField("cr_refunded_hdemo_sk", IntegerType), - StructField("cr_refunded_addr_sk", IntegerType), - StructField("cr_returning_customer_sk", IntegerType), - StructField("cr_returning_cdemo_sk", IntegerType), - StructField("cr_returning_hdemo_sk", IntegerType), - StructField("cr_returning_addr_sk", IntegerType), - StructField("cr_call_center_sk", IntegerType), - StructField("cr_catalog_page_sk", IntegerType), - StructField("cr_ship_mode_sk", IntegerType), - StructField("cr_warehouse_sk", IntegerType), - StructField("cr_reason_sk", IntegerType), - StructField("cr_order_number", LongType), - StructField("cr_return_quantity", IntegerType), - StructField("cr_return_amount", decimalType(useDecimalType, 7, 2)), - StructField("cr_return_tax", decimalType(useDecimalType, 7, 2)), - StructField("cr_return_amt_inc_tax", decimalType(useDecimalType, 7, 2)), - StructField("cr_fee", decimalType(useDecimalType, 7, 2)), - StructField("cr_return_ship_cost", decimalType(useDecimalType, 7, 2)), - StructField("cr_refunded_cash", decimalType(useDecimalType, 7, 2)), - StructField("cr_reversed_charge", decimalType(useDecimalType, 7, 2)), - StructField("cr_store_credit", decimalType(useDecimalType, 7, 2)), - StructField("cr_net_loss", decimalType(useDecimalType, 7, 2)) - ))), - Table( - "inventory", - Seq("inv_date_sk"), - StructType(Array( - StructField("inv_date_sk", IntegerType), - StructField("inv_item_sk", IntegerType), - StructField("inv_warehouse_sk", IntegerType), - StructField("inv_quantity_on_hand", IntegerType) - ))), - Table( - "store_sales", - Seq("ss_sold_date_sk"), - StructType(Array( - StructField("ss_sold_date_sk", IntegerType), - StructField("ss_sold_time_sk", IntegerType), - StructField("ss_item_sk", IntegerType), - StructField("ss_customer_sk", IntegerType), - StructField("ss_cdemo_sk", IntegerType), - StructField("ss_hdemo_sk", IntegerType), - StructField("ss_addr_sk", IntegerType), - StructField("ss_store_sk", IntegerType), - StructField("ss_promo_sk", IntegerType), - StructField("ss_ticket_number", LongType), - StructField("ss_quantity", IntegerType), - StructField("ss_wholesale_cost", decimalType(useDecimalType, 7, 2)), - StructField("ss_list_price", decimalType(useDecimalType, 7, 2)), - StructField("ss_sales_price", decimalType(useDecimalType, 7, 2)), - StructField("ss_ext_discount_amt", decimalType(useDecimalType, 7, 2)), - StructField("ss_ext_sales_price", decimalType(useDecimalType, 7, 2)), - StructField("ss_ext_wholesale_cost", decimalType(useDecimalType, 7, 2)), - StructField("ss_ext_list_price", decimalType(useDecimalType, 7, 2)), - StructField("ss_ext_tax", decimalType(useDecimalType, 7, 2)), - StructField("ss_coupon_amt", decimalType(useDecimalType, 7, 2)), - StructField("ss_net_paid", decimalType(useDecimalType, 7, 2)), - StructField("ss_net_paid_inc_tax", decimalType(useDecimalType, 7, 2)), - StructField("ss_net_profit", decimalType(useDecimalType, 7, 2)) - ))), - Table( - "store_returns", - Seq("sr_returned_date_sk"), - StructType(Array( - StructField("sr_returned_date_sk", IntegerType), - StructField("sr_return_time_sk", IntegerType), - StructField("sr_item_sk", IntegerType), - StructField("sr_customer_sk", IntegerType), - StructField("sr_cdemo_sk", IntegerType), - StructField("sr_hdemo_sk", IntegerType), - StructField("sr_addr_sk", IntegerType), - StructField("sr_store_sk", IntegerType), - StructField("sr_reason_sk", IntegerType), - StructField("sr_ticket_number", LongType), - StructField("sr_return_quantity", IntegerType), - StructField("sr_return_amt", decimalType(useDecimalType, 7, 2)), - StructField("sr_return_tax", decimalType(useDecimalType, 7, 2)), - StructField("sr_return_amt_inc_tax", decimalType(useDecimalType, 7, 2)), - StructField("sr_fee", decimalType(useDecimalType, 7, 2)), - StructField("sr_return_ship_cost", decimalType(useDecimalType, 7, 2)), - StructField("sr_refunded_cash", decimalType(useDecimalType, 7, 2)), - StructField("sr_reversed_charge", decimalType(useDecimalType, 7, 2)), - StructField("sr_store_credit", decimalType(useDecimalType, 7, 2)), - StructField("sr_net_loss", decimalType(useDecimalType, 7, 2)) - ))), - Table( - "web_sales", - Seq("ws_sold_date_sk"), - StructType(Array( - StructField("ws_sold_date_sk", IntegerType), - StructField("ws_sold_time_sk", IntegerType), - StructField("ws_ship_date_sk", IntegerType), - StructField("ws_item_sk", IntegerType), - StructField("ws_bill_customer_sk", IntegerType), - StructField("ws_bill_cdemo_sk", IntegerType), - StructField("ws_bill_hdemo_sk", IntegerType), - StructField("ws_bill_addr_sk", IntegerType), - StructField("ws_ship_customer_sk", IntegerType), - StructField("ws_ship_cdemo_sk", IntegerType), - StructField("ws_ship_hdemo_sk", IntegerType), - StructField("ws_ship_addr_sk", IntegerType), - StructField("ws_web_page_sk", IntegerType), - StructField("ws_web_site_sk", IntegerType), - StructField("ws_ship_mode_sk", IntegerType), - StructField("ws_warehouse_sk", IntegerType), - StructField("ws_promo_sk", IntegerType), - StructField("ws_order_number", LongType), - StructField("ws_quantity", IntegerType), - StructField("ws_wholesale_cost", decimalType(useDecimalType, 7, 2)), - StructField("ws_list_price", decimalType(useDecimalType, 7, 2)), - StructField("ws_sales_price", decimalType(useDecimalType, 7, 2)), - StructField("ws_ext_discount_amt", decimalType(useDecimalType, 7, 2)), - StructField("ws_ext_sales_price", decimalType(useDecimalType, 7, 2)), - StructField("ws_ext_wholesale_cost", decimalType(useDecimalType, 7, 2)), - StructField("ws_ext_list_price", decimalType(useDecimalType, 7, 2)), - StructField("ws_ext_tax", decimalType(useDecimalType, 7, 2)), - StructField("ws_coupon_amt", decimalType(useDecimalType, 7, 2)), - StructField("ws_ext_ship_cost", decimalType(useDecimalType, 7, 2)), - StructField("ws_net_paid", decimalType(useDecimalType, 7, 2)), - StructField("ws_net_paid_inc_tax", decimalType(useDecimalType, 7, 2)), - StructField("ws_net_paid_inc_ship", decimalType(useDecimalType, 7, 2)), - StructField("ws_net_paid_inc_ship_tax", decimalType(useDecimalType, 7, 2)), - StructField("ws_net_profit", decimalType(useDecimalType, 7, 2)) - ))), - Table( - "web_returns", - Seq("wr_returned_date_sk"), - StructType(Array( - StructField("wr_returned_date_sk", IntegerType), - StructField("wr_returned_time_sk", IntegerType), - StructField("wr_item_sk", IntegerType), - StructField("wr_refunded_customer_sk", IntegerType), - StructField("wr_refunded_cdemo_sk", IntegerType), - StructField("wr_refunded_hdemo_sk", IntegerType), - StructField("wr_refunded_addr_sk", IntegerType), - StructField("wr_returning_customer_sk", IntegerType), - StructField("wr_returning_cdemo_sk", IntegerType), - StructField("wr_returning_hdemo_sk", IntegerType), - StructField("wr_returning_addr_sk", IntegerType), - StructField("wr_web_page_sk", IntegerType), - StructField("wr_reason_sk", IntegerType), - StructField("wr_order_number", LongType), - StructField("wr_return_quantity", IntegerType), - StructField("wr_return_amt", decimalType(useDecimalType, 7, 2)), - StructField("wr_return_tax", decimalType(useDecimalType, 7, 2)), - StructField("wr_return_amt_inc_tax", decimalType(useDecimalType, 7, 2)), - StructField("wr_fee", decimalType(useDecimalType, 7, 2)), - StructField("wr_return_ship_cost", decimalType(useDecimalType, 7, 2)), - StructField("wr_refunded_cash", decimalType(useDecimalType, 7, 2)), - StructField("wr_reversed_charge", decimalType(useDecimalType, 7, 2)), - StructField("wr_account_credit", decimalType(useDecimalType, 7, 2)), - StructField("wr_net_loss", decimalType(useDecimalType, 7, 2)) - ))), - Table( - "call_center", - Seq(), - StructType(Array( - StructField("cc_call_center_sk", IntegerType), - StructField("cc_call_center_id", StringType), - StructField("cc_rec_start_date", DateType), - StructField("cc_rec_end_date", DateType), - StructField("cc_closed_date_sk", IntegerType), - StructField("cc_open_date_sk", IntegerType), - StructField("cc_name", StringType), - StructField("cc_class", StringType), - StructField("cc_employees", IntegerType), - StructField("cc_sq_ft", IntegerType), - StructField("cc_hours", StringType), - StructField("cc_manager", StringType), - StructField("cc_mkt_id", IntegerType), - StructField("cc_mkt_class", StringType), - StructField("cc_mkt_desc", StringType), - StructField("cc_market_manager", StringType), - StructField("cc_division", IntegerType), - StructField("cc_division_name", StringType), - StructField("cc_company", IntegerType), - StructField("cc_company_name", StringType), - StructField("cc_street_number", StringType), - StructField("cc_street_name", StringType), - StructField("cc_street_type", StringType), - StructField("cc_suite_number", StringType), - StructField("cc_city", StringType), - StructField("cc_county", StringType), - StructField("cc_state", StringType), - StructField("cc_zip", StringType), - StructField("cc_country", StringType), - StructField("cc_gmt_offset", decimalType(useDecimalType, 5, 2)), - StructField("cc_tax_percentage", decimalType(useDecimalType, 5, 2)) - ))), - Table( - "catalog_page", - Seq(), - StructType(Array( - StructField("cp_catalog_page_sk", IntegerType), - StructField("cp_catalog_page_id", StringType), - StructField("cp_start_date_sk", IntegerType), - StructField("cp_end_date_sk", IntegerType), - StructField("cp_department", StringType), - StructField("cp_catalog_number", IntegerType), - StructField("cp_catalog_page_number", IntegerType), - StructField("cp_description", StringType), - StructField("cp_type", StringType) - ))), - Table( - "customer", - Seq(), - StructType(Array( - StructField("c_customer_sk", IntegerType), - StructField("c_customer_id", StringType), - StructField("c_current_cdemo_sk", IntegerType), - StructField("c_current_hdemo_sk", IntegerType), - StructField("c_current_addr_sk", IntegerType), - StructField("c_first_shipto_date_sk", IntegerType), - StructField("c_first_sales_date_sk", IntegerType), - StructField("c_salutation", StringType), - StructField("c_first_name", StringType), - StructField("c_last_name", StringType), - StructField("c_preferred_cust_flag", StringType), - StructField("c_birth_day", IntegerType), - StructField("c_birth_month", IntegerType), - StructField("c_birth_year", IntegerType), - StructField("c_birth_country", StringType), - StructField("c_login", StringType), - StructField("c_email_address", StringType), - StructField("c_last_review_date", StringType) - ))), - Table( - "customer_address", - Seq(), - StructType(Array( - StructField("ca_address_sk", IntegerType), - StructField("ca_address_id", StringType), - StructField("ca_street_number", StringType), - StructField("ca_street_name", StringType), - StructField("ca_street_type", StringType), - StructField("ca_suite_number", StringType), - StructField("ca_city", StringType), - StructField("ca_county", StringType), - StructField("ca_state", StringType), - StructField("ca_zip", StringType), - StructField("ca_country", StringType), - StructField("ca_gmt_offset", decimalType(useDecimalType, 5, 2)), - StructField("ca_location_type", StringType) - ))), - Table( - "customer_demographics", - Seq(), - StructType(Array( - StructField("cd_demo_sk", IntegerType), - StructField("cd_gender", StringType), - StructField("cd_marital_status", StringType), - StructField("cd_education_status", StringType), - StructField("cd_purchase_estimate", IntegerType), - StructField("cd_credit_rating", StringType), - StructField("cd_dep_count", IntegerType), - StructField("cd_dep_employed_count", IntegerType), - StructField("cd_dep_college_count", IntegerType) - ))), - Table( - "date_dim", - Seq(), - StructType(Array( - StructField("d_date_sk", IntegerType), - StructField("d_date_id", StringType), - StructField("d_date", DateType), - StructField("d_month_seq", IntegerType), - StructField("d_week_seq", IntegerType), - StructField("d_quarter_seq", IntegerType), - StructField("d_year", IntegerType), - StructField("d_dow", IntegerType), - StructField("d_moy", IntegerType), - StructField("d_dom", IntegerType), - StructField("d_qoy", IntegerType), - StructField("d_fy_year", IntegerType), - StructField("d_fy_quarter_seq", IntegerType), - StructField("d_fy_week_seq", IntegerType), - StructField("d_day_name", StringType), - StructField("d_quarter_name", StringType), - StructField("d_holiday", StringType), - StructField("d_weekend", StringType), - StructField("d_following_holiday", StringType), - StructField("d_first_dom", IntegerType), - StructField("d_last_dom", IntegerType), - StructField("d_same_day_ly", IntegerType), - StructField("d_same_day_lq", IntegerType), - StructField("d_current_day", StringType), - StructField("d_current_week", StringType), - StructField("d_current_month", StringType), - StructField("d_current_quarter", StringType), - StructField("d_current_year", StringType) - ))), - Table( - "household_demographics", - Seq(), - StructType(Array( - StructField("hd_demo_sk", IntegerType), - StructField("hd_income_band_sk", IntegerType), - StructField("hd_buy_potential", StringType), - StructField("hd_dep_count", IntegerType), - StructField("hd_vehicle_count", IntegerType) - ))), - Table( - "income_band", - Seq(), - StructType(Array( - StructField("ib_income_band_sk", IntegerType), - StructField("ib_lower_bound", IntegerType), - StructField("ib_upper_bound", IntegerType) - ))), - Table( - "item", - Seq(), - StructType(Array( - StructField("i_item_sk", IntegerType), - StructField("i_item_id", StringType), - StructField("i_rec_start_date", DateType), - StructField("i_rec_end_date", DateType), - StructField("i_item_desc", StringType), - StructField("i_current_price", decimalType(useDecimalType, 7, 2)), - StructField("i_wholesale_cost", decimalType(useDecimalType, 7, 2)), - StructField("i_brand_id", IntegerType), - StructField("i_brand", StringType), - StructField("i_class_id", IntegerType), - StructField("i_class", StringType), - StructField("i_category_id", IntegerType), - StructField("i_category", StringType), - StructField("i_manufact_id", IntegerType), - StructField("i_manufact", StringType), - StructField("i_size", StringType), - StructField("i_formulation", StringType), - StructField("i_color", StringType), - StructField("i_units", StringType), - StructField("i_container", StringType), - StructField("i_manager_id", IntegerType), - StructField("i_product_name", StringType) - ))), - Table( - "promotion", - Seq(), - StructType(Array( - StructField("p_promo_sk", IntegerType), - StructField("p_promo_id", StringType), - StructField("p_start_date_sk", IntegerType), - StructField("p_end_date_sk", IntegerType), - StructField("p_item_sk", IntegerType), - StructField("p_cost", DoubleType), // should be decimalType(useDecimalType, 15, 2) - StructField("p_response_target", IntegerType), - StructField("p_promo_name", StringType), - StructField("p_channel_dmail", StringType), - StructField("p_channel_email", StringType), - StructField("p_channel_catalog", StringType), - StructField("p_channel_tv", StringType), - StructField("p_channel_radio", StringType), - StructField("p_channel_press", StringType), - StructField("p_channel_event", StringType), - StructField("p_channel_demo", StringType), - StructField("p_channel_details", StringType), - StructField("p_purpose", StringType), - StructField("p_discount_active", StringType) - ))), - Table( - "reason", - Seq(), - StructType(Array( - StructField("r_reason_sk", IntegerType), - StructField("r_reason_id", StringType), - StructField("r_reason_desc", StringType) - ))), - Table( - "ship_mode", - Seq(), - StructType(Array( - StructField("sm_ship_mode_sk", IntegerType), - StructField("sm_ship_mode_id", StringType), - StructField("sm_type", StringType), - StructField("sm_code", StringType), - StructField("sm_carrier", StringType), - StructField("sm_contract", StringType) - ))), - Table( - "store", - Seq(), - StructType(Array( - StructField("s_store_sk", IntegerType), - StructField("s_store_id", StringType), - StructField("s_rec_start_date", DateType), - StructField("s_rec_end_date", DateType), - StructField("s_closed_date_sk", IntegerType), - StructField("s_store_name", StringType), - StructField("s_number_employees", IntegerType), - StructField("s_floor_space", IntegerType), - StructField("s_hours", StringType), - StructField("s_manager", StringType), - StructField("s_market_id", IntegerType), - StructField("s_geography_class", StringType), - StructField("s_market_desc", StringType), - StructField("s_market_manager", StringType), - StructField("s_division_id", IntegerType), - StructField("s_division_name", StringType), - StructField("s_company_id", IntegerType), - StructField("s_company_name", StringType), - StructField("s_street_number", StringType), - StructField("s_street_name", StringType), - StructField("s_street_type", StringType), - StructField("s_suite_number", StringType), - StructField("s_city", StringType), - StructField("s_county", StringType), - StructField("s_state", StringType), - StructField("s_zip", StringType), - StructField("s_country", StringType), - StructField("s_gmt_offset", decimalType(useDecimalType, 5, 2)), - StructField("s_tax_precentage", decimalType(useDecimalType, 5, 2)) - ))), - Table( - "time_dim", - Seq(), - StructType(Array( - StructField("t_time_sk", IntegerType), - StructField("t_time_id", StringType), - StructField("t_time", IntegerType), - StructField("t_hour", IntegerType), - StructField("t_minute", IntegerType), - StructField("t_second", IntegerType), - StructField("t_am_pm", StringType), - StructField("t_shift", StringType), - StructField("t_sub_shift", StringType), - StructField("t_meal_time", StringType) - ))), - Table( - "warehouse", - Seq(), - StructType(Array( - StructField("w_warehouse_sk", IntegerType), - StructField("w_warehouse_id", StringType), - StructField("w_warehouse_name", StringType), - StructField("w_warehouse_sq_ft", IntegerType), - StructField("w_street_number", StringType), - StructField("w_street_name", StringType), - StructField("w_street_type", StringType), - StructField("w_suite_number", StringType), - StructField("w_city", StringType), - StructField("w_county", StringType), - StructField("w_state", StringType), - StructField("w_zip", StringType), - StructField("w_country", StringType), - StructField("w_gmt_offset", decimalType(useDecimalType, 5, 2)) - ))), - Table( - "web_page", - Seq(), - StructType(Array( - StructField("wp_web_page_sk", IntegerType), - StructField("wp_web_page_id", StringType), - StructField("wp_rec_start_date", DateType), - StructField("wp_rec_end_date", DateType), - StructField("wp_creation_date_sk", IntegerType), - StructField("wp_access_date_sk", IntegerType), - StructField("wp_autogen_flag", StringType), - StructField("wp_customer_sk", IntegerType), - StructField("wp_url", StringType), - StructField("wp_type", StringType), - StructField("wp_char_count", IntegerType), - StructField("wp_link_count", IntegerType), - StructField("wp_image_count", IntegerType), - StructField("wp_max_ad_count", IntegerType) - ))), - Table( - "web_site", - Seq(), - StructType(Array( - StructField("web_site_sk", IntegerType), - StructField("web_site_id", StringType), - StructField("web_rec_start_date", DateType), - StructField("web_rec_end_date", DateType), - StructField("web_name", StringType), - StructField("web_open_date_sk", IntegerType), - StructField("web_close_date_sk", IntegerType), - StructField("web_class", StringType), - StructField("web_manager", StringType), - StructField("web_mkt_id", IntegerType), - StructField("web_mkt_class", StringType), - StructField("web_mkt_desc", StringType), - StructField("web_market_manager", StringType), - StructField("web_company_id", IntegerType), - StructField("web_company_name", StringType), - StructField("web_street_number", StringType), - StructField("web_street_name", StringType), - StructField("web_street_type", StringType), - StructField("web_suite_number", StringType), - StructField("web_city", StringType), - StructField("web_county", StringType), - StructField("web_state", StringType), - StructField("web_zip", StringType), - StructField("web_country", StringType), - StructField("web_gmt_offset", decimalType(useDecimalType, 5, 2)), - StructField("web_tax_percentage", decimalType(useDecimalType, 5, 2)) - ))) - ) - } - - // scalastyle:off line.size.limit - - val queries : Map[String, Query] = Array( - Query("q1", - """ - | WITH customer_total_return AS - | (SELECT sr_customer_sk AS ctr_customer_sk, sr_store_sk AS ctr_store_sk, - | sum(sr_return_amt) AS ctr_total_return - | FROM store_returns, date_dim - | WHERE sr_returned_date_sk = d_date_sk AND d_year = 2000 - | GROUP BY sr_customer_sk, sr_store_sk) - | SELECT c_customer_id - | FROM customer_total_return ctr1, store, customer - | WHERE ctr1.ctr_total_return > - | (SELECT avg(ctr_total_return)*1.2 - | FROM customer_total_return ctr2 - | WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk) - | AND s_store_sk = ctr1.ctr_store_sk - | AND s_state = 'TN' - | AND ctr1.ctr_customer_sk = c_customer_sk - | ORDER BY c_customer_id LIMIT 100 - | - |""".stripMargin), - Query("q2", - """ - | WITH wscs as - | (SELECT sold_date_sk, sales_price - | FROM (SELECT ws_sold_date_sk sold_date_sk, ws_ext_sales_price sales_price - | FROM web_sales - | UNION ALL - | SELECT cs_sold_date_sk sold_date_sk, cs_ext_sales_price sales_price - | FROM catalog_sales) x), - | wswscs AS - | (SELECT d_week_seq, - | sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, - | sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, - | sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, - | sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, - | sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, - | sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, - | sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales - | FROM wscs, date_dim - | WHERE d_date_sk = sold_date_sk - | GROUP BY d_week_seq) - | SELECT d_week_seq1 - | ,round(sun_sales1/sun_sales2,2) - | ,round(mon_sales1/mon_sales2,2) - | ,round(tue_sales1/tue_sales2,2) - | ,round(wed_sales1/wed_sales2,2) - | ,round(thu_sales1/thu_sales2,2) - | ,round(fri_sales1/fri_sales2,2) - | ,round(sat_sales1/sat_sales2,2) - | FROM - | (SELECT wswscs.d_week_seq d_week_seq1 - | ,sun_sales sun_sales1 - | ,mon_sales mon_sales1 - | ,tue_sales tue_sales1 - | ,wed_sales wed_sales1 - | ,thu_sales thu_sales1 - | ,fri_sales fri_sales1 - | ,sat_sales sat_sales1 - | FROM wswscs,date_dim - | WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001) y, - | (SELECT wswscs.d_week_seq d_week_seq2 - | ,sun_sales sun_sales2 - | ,mon_sales mon_sales2 - | ,tue_sales tue_sales2 - | ,wed_sales wed_sales2 - | ,thu_sales thu_sales2 - | ,fri_sales fri_sales2 - | ,sat_sales sat_sales2 - | FROM wswscs, date_dim - | WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001 + 1) z - | WHERE d_week_seq1=d_week_seq2-53 - | ORDER BY d_week_seq1 - | - |""".stripMargin), - Query("q3", - """ - | SELECT dt.d_year, item.i_brand_id brand_id, item.i_brand brand,SUM(ss_ext_sales_price) sum_agg - | FROM date_dim dt, store_sales, item - | WHERE dt.d_date_sk = store_sales.ss_sold_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - | AND item.i_manufact_id = 128 - | AND dt.d_moy=11 - | GROUP BY dt.d_year, item.i_brand, item.i_brand_id - | ORDER BY dt.d_year, sum_agg desc, brand_id - | LIMIT 100 - | - |""".stripMargin), - Query("q4", - """ - |WITH year_total AS ( - | SELECT c_customer_id customer_id, - | c_first_name customer_first_name, - | c_last_name customer_last_name, - | c_preferred_cust_flag customer_preferred_cust_flag, - | c_birth_country customer_birth_country, - | c_login customer_login, - | c_email_address customer_email_address, - | d_year dyear, - | sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total, - | 's' sale_type - | FROM customer, store_sales, date_dim - | WHERE c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk - | GROUP BY c_customer_id, - | c_first_name, - | c_last_name, - | c_preferred_cust_flag, - | c_birth_country, - | c_login, - | c_email_address, - | d_year - | UNION ALL - | SELECT c_customer_id customer_id, - | c_first_name customer_first_name, - | c_last_name customer_last_name, - | c_preferred_cust_flag customer_preferred_cust_flag, - | c_birth_country customer_birth_country, - | c_login customer_login, - | c_email_address customer_email_address, - | d_year dyear, - | sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total, - | 'c' sale_type - | FROM customer, catalog_sales, date_dim - | WHERE c_customer_sk = cs_bill_customer_sk AND cs_sold_date_sk = d_date_sk - | GROUP BY c_customer_id, - | c_first_name, - | c_last_name, - | c_preferred_cust_flag, - | c_birth_country, - | c_login, - | c_email_address, - | d_year - | UNION ALL - | SELECT c_customer_id customer_id - | ,c_first_name customer_first_name - | ,c_last_name customer_last_name - | ,c_preferred_cust_flag customer_preferred_cust_flag - | ,c_birth_country customer_birth_country - | ,c_login customer_login - | ,c_email_address customer_email_address - | ,d_year dyear - | ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total - | ,'w' sale_type - | FROM customer, web_sales, date_dim - | WHERE c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk - | GROUP BY c_customer_id, - | c_first_name, - | c_last_name, - | c_preferred_cust_flag, - | c_birth_country, - | c_login, - | c_email_address, - | d_year) - | SELECT - | t_s_secyear.customer_id, - | t_s_secyear.customer_first_name, - | t_s_secyear.customer_last_name, - | t_s_secyear.customer_preferred_cust_flag - | FROM year_total t_s_firstyear, year_total t_s_secyear, year_total t_c_firstyear, - | year_total t_c_secyear, year_total t_w_firstyear, year_total t_w_secyear - | WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id - | and t_s_firstyear.customer_id = t_c_secyear.customer_id - | and t_s_firstyear.customer_id = t_c_firstyear.customer_id - | and t_s_firstyear.customer_id = t_w_firstyear.customer_id - | and t_s_firstyear.customer_id = t_w_secyear.customer_id - | and t_s_firstyear.sale_type = 's' - | and t_c_firstyear.sale_type = 'c' - | and t_w_firstyear.sale_type = 'w' - | and t_s_secyear.sale_type = 's' - | and t_c_secyear.sale_type = 'c' - | and t_w_secyear.sale_type = 'w' - | and t_s_firstyear.dyear = 2001 - | and t_s_secyear.dyear = 2001+1 - | and t_c_firstyear.dyear = 2001 - | and t_c_secyear.dyear = 2001+1 - | and t_w_firstyear.dyear = 2001 - | and t_w_secyear.dyear = 2001+1 - | and t_s_firstyear.year_total > 0 - | and t_c_firstyear.year_total > 0 - | and t_w_firstyear.year_total > 0 - | and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end - | > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - | and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end - | > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - | ORDER BY - | t_s_secyear.customer_id, - | t_s_secyear.customer_first_name, - | t_s_secyear.customer_last_name, - | t_s_secyear.customer_preferred_cust_flag - | LIMIT 100 - | - |""".stripMargin), - Query("q5", - """ - | WITH ssr AS - | (SELECT s_store_id, - | sum(sales_price) as sales, - | sum(profit) as profit, - | sum(return_amt) as returns, - | sum(net_loss) as profit_loss - | FROM - | (SELECT ss_store_sk as store_sk, - | ss_sold_date_sk as date_sk, - | ss_ext_sales_price as sales_price, - | ss_net_profit as profit, - | cast(0 as decimal(7,2)) as return_amt, - | cast(0 as decimal(7,2)) as net_loss - | FROM store_sales - | UNION ALL - | SELECT sr_store_sk as store_sk, - | sr_returned_date_sk as date_sk, - | cast(0 as decimal(7,2)) as sales_price, - | cast(0 as decimal(7,2)) as profit, - | sr_return_amt as return_amt, - | sr_net_loss as net_loss - | FROM store_returns) - | salesreturns, date_dim, store - | WHERE date_sk = d_date_sk - | and d_date between cast('2000-08-23' as date) - | and ((cast('2000-08-23' as date) + interval '14' day)) - | and store_sk = s_store_sk - | GROUP BY s_store_id), - | csr AS - | (SELECT cp_catalog_page_id, - | sum(sales_price) as sales, - | sum(profit) as profit, - | sum(return_amt) as returns, - | sum(net_loss) as profit_loss - | FROM - | (SELECT cs_catalog_page_sk as page_sk, - | cs_sold_date_sk as date_sk, - | cs_ext_sales_price as sales_price, - | cs_net_profit as profit, - | cast(0 as decimal(7,2)) as return_amt, - | cast(0 as decimal(7,2)) as net_loss - | FROM catalog_sales - | UNION ALL - | SELECT cr_catalog_page_sk as page_sk, - | cr_returned_date_sk as date_sk, - | cast(0 as decimal(7,2)) as sales_price, - | cast(0 as decimal(7,2)) as profit, - | cr_return_amount as return_amt, - | cr_net_loss as net_loss - | from catalog_returns - | ) salesreturns, date_dim, catalog_page - | WHERE date_sk = d_date_sk - | and d_date between cast('2000-08-23' as date) - | and ((cast('2000-08-23' as date) + interval '14' day)) - | and page_sk = cp_catalog_page_sk - | GROUP BY cp_catalog_page_id) - | , - | wsr AS - | (SELECT web_site_id, - | sum(sales_price) as sales, - | sum(profit) as profit, - | sum(return_amt) as returns, - | sum(net_loss) as profit_loss - | from - | (select ws_web_site_sk as wsr_web_site_sk, - | ws_sold_date_sk as date_sk, - | ws_ext_sales_price as sales_price, - | ws_net_profit as profit, - | cast(0 as decimal(7,2)) as return_amt, - | cast(0 as decimal(7,2)) as net_loss - | from web_sales - | union all - | select ws_web_site_sk as wsr_web_site_sk, - | wr_returned_date_sk as date_sk, - | cast(0 as decimal(7,2)) as sales_price, - | cast(0 as decimal(7,2)) as profit, - | wr_return_amt as return_amt, - | wr_net_loss as net_loss - | FROM web_returns LEFT OUTER JOIN web_sales on - | ( wr_item_sk = ws_item_sk - | and wr_order_number = ws_order_number) - | ) salesreturns, date_dim, web_site - | WHERE date_sk = d_date_sk - | and d_date between cast('2000-08-23' as date) - | and ((cast('2000-08-23' as date) + interval '14' day)) - | and wsr_web_site_sk = web_site_sk - | GROUP BY web_site_id) - | SELECT channel, - | id, - | sum(sales) as sales, - | sum(returns) as returns, - | sum(profit) as profit - | from - | (select 'store channel' as channel, - | concat('store', s_store_id) as id, - | sales, - | returns, - | (profit - profit_loss) as profit - | FROM ssr - | UNION ALL - | select 'catalog channel' as channel, - | concat('catalog_page', cp_catalog_page_id) as id, - | sales, - | returns, - | (profit - profit_loss) as profit - | FROM csr - | UNION ALL - | SELECT 'web channel' as channel, - | concat('web_site', web_site_id) as id, - | sales, - | returns, - | (profit - profit_loss) as profit - | FROM wsr - | ) x - | GROUP BY ROLLUP (channel, id) - | ORDER BY channel, id - | LIMIT 100 - | - |""".stripMargin - ), - Query("q6", - """ - |SELECT state, cnt FROM ( - | SELECT a.ca_state state, count(*) cnt - | FROM - | customer_address a, customer c, store_sales s, date_dim d, item i - | WHERE a.ca_address_sk = c.c_current_addr_sk - | AND c.c_customer_sk = s.ss_customer_sk - | AND s.ss_sold_date_sk = d.d_date_sk - | AND s.ss_item_sk = i.i_item_sk - | AND d.d_month_seq = - | (SELECT distinct (d_month_seq) FROM date_dim - | WHERE d_year = 2001 AND d_moy = 1) - | AND i.i_current_price > 1.2 * - | (SELECT avg(j.i_current_price) FROM item j - | WHERE j.i_category = i.i_category) - | GROUP BY a.ca_state - |) x - |WHERE cnt >= 10 - |ORDER BY cnt LIMIT 100 - |""".stripMargin), - Query("q7", - """ - | SELECT i_item_id, - | avg(ss_quantity) agg1, - | avg(ss_list_price) agg2, - | avg(ss_coupon_amt) agg3, - | avg(ss_sales_price) agg4 - | FROM store_sales, customer_demographics, date_dim, item, promotion - | WHERE ss_sold_date_sk = d_date_sk AND - | ss_item_sk = i_item_sk AND - | ss_cdemo_sk = cd_demo_sk AND - | ss_promo_sk = p_promo_sk AND - | cd_gender = 'M' AND - | cd_marital_status = 'S' AND - | cd_education_status = 'College' AND - | (p_channel_email = 'N' or p_channel_event = 'N') AND - | d_year = 2000 - | GROUP BY i_item_id - | ORDER BY i_item_id LIMIT 100 - |""".stripMargin), - Query("q8", - """ - | select s_store_name, sum(ss_net_profit) - | from store_sales, date_dim, store, - | (SELECT ca_zip - | from ( - | (SELECT substr(ca_zip,1,5) ca_zip FROM customer_address - | WHERE substr(ca_zip,1,5) IN ( - | '24128','76232','65084','87816','83926','77556','20548', - | '26231','43848','15126','91137','61265','98294','25782', - | '17920','18426','98235','40081','84093','28577','55565', - | '17183','54601','67897','22752','86284','18376','38607', - | '45200','21756','29741','96765','23932','89360','29839', - | '25989','28898','91068','72550','10390','18845','47770', - | '82636','41367','76638','86198','81312','37126','39192', - | '88424','72175','81426','53672','10445','42666','66864', - | '66708','41248','48583','82276','18842','78890','49448', - | '14089','38122','34425','79077','19849','43285','39861', - | '66162','77610','13695','99543','83444','83041','12305', - | '57665','68341','25003','57834','62878','49130','81096', - | '18840','27700','23470','50412','21195','16021','76107', - | '71954','68309','18119','98359','64544','10336','86379', - | '27068','39736','98569','28915','24206','56529','57647', - | '54917','42961','91110','63981','14922','36420','23006', - | '67467','32754','30903','20260','31671','51798','72325', - | '85816','68621','13955','36446','41766','68806','16725', - | '15146','22744','35850','88086','51649','18270','52867', - | '39972','96976','63792','11376','94898','13595','10516', - | '90225','58943','39371','94945','28587','96576','57855', - | '28488','26105','83933','25858','34322','44438','73171', - | '30122','34102','22685','71256','78451','54364','13354', - | '45375','40558','56458','28286','45266','47305','69399', - | '83921','26233','11101','15371','69913','35942','15882', - | '25631','24610','44165','99076','33786','70738','26653', - | '14328','72305','62496','22152','10144','64147','48425', - | '14663','21076','18799','30450','63089','81019','68893', - | '24996','51200','51211','45692','92712','70466','79994', - | '22437','25280','38935','71791','73134','56571','14060', - | '19505','72425','56575','74351','68786','51650','20004', - | '18383','76614','11634','18906','15765','41368','73241', - | '76698','78567','97189','28545','76231','75691','22246', - | '51061','90578','56691','68014','51103','94167','57047', - | '14867','73520','15734','63435','25733','35474','24676', - | '94627','53535','17879','15559','53268','59166','11928', - | '59402','33282','45721','43933','68101','33515','36634', - | '71286','19736','58058','55253','67473','41918','19515', - | '36495','19430','22351','77191','91393','49156','50298', - | '87501','18652','53179','18767','63193','23968','65164', - | '68880','21286','72823','58470','67301','13394','31016', - | '70372','67030','40604','24317','45748','39127','26065', - | '77721','31029','31880','60576','24671','45549','13376', - | '50016','33123','19769','22927','97789','46081','72151', - | '15723','46136','51949','68100','96888','64528','14171', - | '79777','28709','11489','25103','32213','78668','22245', - | '15798','27156','37930','62971','21337','51622','67853', - | '10567','38415','15455','58263','42029','60279','37125', - | '56240','88190','50308','26859','64457','89091','82136', - | '62377','36233','63837','58078','17043','30010','60099', - | '28810','98025','29178','87343','73273','30469','64034', - | '39516','86057','21309','90257','67875','40162','11356', - | '73650','61810','72013','30431','22461','19512','13375', - | '55307','30625','83849','68908','26689','96451','38193', - | '46820','88885','84935','69035','83144','47537','56616', - | '94983','48033','69952','25486','61547','27385','61860', - | '58048','56910','16807','17871','35258','31387','35458', - | '35576')) - | INTERSECT - | (select ca_zip - | FROM - | (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt - | FROM customer_address, customer - | WHERE ca_address_sk = c_current_addr_sk and - | c_preferred_cust_flag='Y' - | group by ca_zip - | having count(*) > 10) A1) - | ) A2 - | ) V1 - | where ss_store_sk = s_store_sk - | and ss_sold_date_sk = d_date_sk - | and d_qoy = 2 and d_year = 1998 - | and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) - | group by s_store_name - | order by s_store_name LIMIT 100 - |""".stripMargin), - Query("q9", - """ - |select case when (select count(*) from store_sales - | where ss_quantity between 1 and 20) > 74129 - | then (select avg(ss_ext_discount_amt) from store_sales - | where ss_quantity between 1 and 20) - | else (select avg(ss_net_paid) from store_sales - | where ss_quantity between 1 and 20) end bucket1 , - | case when (select count(*) from store_sales - | where ss_quantity between 21 and 40) > 122840 - | then (select avg(ss_ext_discount_amt) from store_sales - | where ss_quantity between 21 and 40) - | else (select avg(ss_net_paid) from store_sales - | where ss_quantity between 21 and 40) end bucket2, - | case when (select count(*) from store_sales - | where ss_quantity between 41 and 60) > 56580 - | then (select avg(ss_ext_discount_amt) from store_sales - | where ss_quantity between 41 and 60) - | else (select avg(ss_net_paid) from store_sales - | where ss_quantity between 41 and 60) end bucket3, - | case when (select count(*) from store_sales - | where ss_quantity between 61 and 80) > 10097 - | then (select avg(ss_ext_discount_amt) from store_sales - | where ss_quantity between 61 and 80) - | else (select avg(ss_net_paid) from store_sales - | where ss_quantity between 61 and 80) end bucket4, - | case when (select count(*) from store_sales - | where ss_quantity between 81 and 100) > 165306 - | then (select avg(ss_ext_discount_amt) from store_sales - | where ss_quantity between 81 and 100) - | else (select avg(ss_net_paid) from store_sales - | where ss_quantity between 81 and 100) end bucket5 - |from reason - |where r_reason_sk = 1 - |""".stripMargin), - Query("q10", - """ - | select - | cd_gender, cd_marital_status, cd_education_status, count(*) cnt1, - | cd_purchase_estimate, count(*) cnt2, cd_credit_rating, count(*) cnt3, - | cd_dep_count, count(*) cnt4, cd_dep_employed_count, count(*) cnt5, - | cd_dep_college_count, count(*) cnt6 - | from - | customer c, customer_address ca, customer_demographics - | where - | c.c_current_addr_sk = ca.ca_address_sk and - | ca_county in ('Rush County','Toole County','Jefferson County', - | 'Dona Ana County','La Porte County') and - | cd_demo_sk = c.c_current_cdemo_sk AND - | exists (select * from store_sales, date_dim - | where c.c_customer_sk = ss_customer_sk AND - | ss_sold_date_sk = d_date_sk AND - | d_year = 2002 AND - | d_moy between 1 AND 1+3) AND - | (exists (select * from web_sales, date_dim - | where c.c_customer_sk = ws_bill_customer_sk AND - | ws_sold_date_sk = d_date_sk AND - | d_year = 2002 AND - | d_moy between 1 AND 1+3) or - | exists (select * from catalog_sales, date_dim - | where c.c_customer_sk = cs_ship_customer_sk AND - | cs_sold_date_sk = d_date_sk AND - | d_year = 2002 AND - | d_moy between 1 AND 1+3)) - | group by cd_gender, - | cd_marital_status, - | cd_education_status, - | cd_purchase_estimate, - | cd_credit_rating, - | cd_dep_count, - | cd_dep_employed_count, - | cd_dep_college_count - | order by cd_gender, - | cd_marital_status, - | cd_education_status, - | cd_purchase_estimate, - | cd_credit_rating, - | cd_dep_count, - | cd_dep_employed_count, - | cd_dep_college_count - |LIMIT 100 - |""".stripMargin), - Query("q11", - """ - |with year_total as ( - | select c_customer_id customer_id - | ,c_first_name customer_first_name - | ,c_last_name customer_last_name - | ,c_preferred_cust_flag customer_preferred_cust_flag - | ,c_birth_country customer_birth_country - | ,c_login customer_login - | ,c_email_address customer_email_address - | ,d_year dyear - | ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total - | ,'s' sale_type - | from customer, store_sales, date_dim - | where c_customer_sk = ss_customer_sk - | and ss_sold_date_sk = d_date_sk - | group by c_customer_id - | ,c_first_name - | ,c_last_name - | ,c_preferred_cust_flag - | ,c_birth_country - | ,c_login - | ,c_email_address - | ,d_year - | union all - | select c_customer_id customer_id - | ,c_first_name customer_first_name - | ,c_last_name customer_last_name - | ,c_preferred_cust_flag customer_preferred_cust_flag - | ,c_birth_country customer_birth_country - | ,c_login customer_login - | ,c_email_address customer_email_address - | ,d_year dyear - | ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total - | ,'w' sale_type - | from customer, web_sales, date_dim - | where c_customer_sk = ws_bill_customer_sk - | and ws_sold_date_sk = d_date_sk - | group by - | c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, - | c_login, c_email_address, d_year) - | select - | t_s_secyear.customer_id - | ,t_s_secyear.customer_first_name - | ,t_s_secyear.customer_last_name - | ,t_s_secyear.customer_preferred_cust_flag - | from year_total t_s_firstyear - | ,year_total t_s_secyear - | ,year_total t_w_firstyear - | ,year_total t_w_secyear - | where t_s_secyear.customer_id = t_s_firstyear.customer_id - | and t_s_firstyear.customer_id = t_w_secyear.customer_id - | and t_s_firstyear.customer_id = t_w_firstyear.customer_id - | and t_s_firstyear.sale_type = 's' - | and t_w_firstyear.sale_type = 'w' - | and t_s_secyear.sale_type = 's' - | and t_w_secyear.sale_type = 'w' - | and t_s_firstyear.dyear = 2001 - | and t_s_secyear.dyear = 2001+1 - | and t_w_firstyear.dyear = 2001 - | and t_w_secyear.dyear = 2001+1 - | and t_s_firstyear.year_total > 0 - | and t_w_firstyear.year_total > 0 - | and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end - | > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end - | order by - | t_s_secyear.customer_id - | ,t_s_secyear.customer_first_name - | ,t_s_secyear.customer_last_name - | , - |t_s_secyear.customer_preferred_cust_flag - | LIMIT 100 - |""".stripMargin), - Query("q12", - """ - | select i_item_id, - | i_item_desc, i_category, i_class, i_current_price, - | sum(ws_ext_sales_price) as itemrevenue, - | sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over - | (partition by i_class) as revenueratio - | from - | web_sales, item, date_dim - | where - | ws_item_sk = i_item_sk - | and i_category in ('Sports', 'Books', 'Home') - | and ws_sold_date_sk = d_date_sk - | and d_date between cast('1999-02-22' as date) - | and (cast('1999-02-22' as date) + interval '30' day) - | group by - | i_item_id, i_item_desc, i_category, i_class, i_current_price - | order by - | i_category, i_class, i_item_id, i_item_desc, revenueratio - | LIMIT 100 - |""".stripMargin), - Query("q13", - """ - | select avg(ss_quantity) - | ,avg(ss_ext_sales_price) - | ,avg(ss_ext_wholesale_cost) - | ,sum(ss_ext_wholesale_cost) - | from store_sales - | ,store - | ,customer_demographics - | ,household_demographics - | ,customer_address - | ,date_dim - | where s_store_sk = ss_store_sk - | and ss_sold_date_sk = d_date_sk and d_year = 2001 - | and((ss_hdemo_sk=hd_demo_sk - | and cd_demo_sk = ss_cdemo_sk - | and cd_marital_status = 'M' - | and cd_education_status = 'Advanced Degree' - | and ss_sales_price between 100.00 and 150.00 - | and hd_dep_count = 3 - | )or - | (ss_hdemo_sk=hd_demo_sk - | and cd_demo_sk = ss_cdemo_sk - | and cd_marital_status = 'S' - | and cd_education_status = 'College' - | and ss_sales_price between 50.00 and 100.00 - | and hd_dep_count = 1 - | ) or - | (ss_hdemo_sk=hd_demo_sk - | and cd_demo_sk = ss_cdemo_sk - | and cd_marital_status = 'W' - | and cd_education_status = '2 yr Degree' - | and ss_sales_price between 150.00 and 200.00 - | and hd_dep_count = 1 - | )) - | and((ss_addr_sk = ca_address_sk - | and ca_country = 'United States' - | and ca_state in ('TX', 'OH', 'TX') - | and ss_net_profit between 100 and 200 - | ) or - | (ss_addr_sk = ca_address_sk - | and ca_country = 'United States' - | and ca_state in ('OR', 'NM', 'KY') - | and ss_net_profit between 150 and 300 - | ) or - | (ss_addr_sk = ca_address_sk - | and ca_country = 'United States' - | and ca_state in ('VA', 'TX', 'MS') - | and ss_net_profit between 50 and 250 - | )) - |""".stripMargin), - Query("q14a", - """ - |with cross_items as - | (select i_item_sk ss_item_sk - | from item, - | (select iss.i_brand_id brand_id, iss.i_class_id class_id, iss.i_category_id category_id - | from store_sales, item iss, date_dim d1 - | where ss_item_sk = iss.i_item_sk - | and ss_sold_date_sk = d1.d_date_sk - | and d1.d_year between 1999 AND 1999 + 2 - | intersect - | select ics.i_brand_id, ics.i_class_id, ics.i_category_id - | from catalog_sales, item ics, date_dim d2 - | where cs_item_sk = ics.i_item_sk - | and cs_sold_date_sk = d2.d_date_sk - | and d2.d_year between 1999 AND 1999 + 2 - | intersect - | select iws.i_brand_id, iws.i_class_id, iws.i_category_id - | from web_sales, item iws, date_dim d3 - | where ws_item_sk = iws.i_item_sk - | and ws_sold_date_sk = d3.d_date_sk - | and d3.d_year between 1999 AND 1999 + 2) x - | where i_brand_id = brand_id - | and i_class_id = class_id - | and i_category_id = category_id - |), - | avg_sales as - | (select avg(quantity*list_price) average_sales - | from ( - | select ss_quantity quantity, ss_list_price list_price - | from store_sales, date_dim - | where ss_sold_date_sk = d_date_sk - | and d_year between 1999 and 2001 - | union all - | select cs_quantity quantity, cs_list_price list_price - | from catalog_sales, date_dim - | where cs_sold_date_sk = d_date_sk - | and d_year between 1999 and 1999 + 2 - | union all - | select ws_quantity quantity, ws_list_price list_price - | from web_sales, date_dim - | where ws_sold_date_sk = d_date_sk - | and d_year between 1999 and 1999 + 2) x) - | select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) - | from( - | select 'store' channel, i_brand_id,i_class_id - | ,i_category_id,sum(ss_quantity*ss_list_price) sales - | , count(*) number_sales - | from store_sales, item, date_dim - | where ss_item_sk in (select ss_item_sk from cross_items) - | and ss_item_sk = i_item_sk - | and ss_sold_date_sk = d_date_sk - | and d_year = 1999+2 - | and d_moy = 11 - | group by i_brand_id,i_class_id,i_category_id - | having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) - | union all - | select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales - | from catalog_sales, item, date_dim - | where cs_item_sk in (select ss_item_sk from cross_items) - | and cs_item_sk = i_item_sk - | and cs_sold_date_sk = d_date_sk - | and d_year = 1999+2 - | and d_moy = 11 - | group by i_brand_id,i_class_id,i_category_id - | having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) - | union all - | select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales - | from web_sales, item, date_dim - | where ws_item_sk in (select ss_item_sk from cross_items) - | and ws_item_sk = i_item_sk - | and ws_sold_date_sk = d_date_sk - | and d_year = 1999+2 - | and d_moy = 11 - | group by i_brand_id,i_class_id,i_category_id - | having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) - | ) y - | group by rollup (channel, i_brand_id,i_class_id,i_category_id) - | order by channel,i_brand_id,i_class_id,i_category_id - | limit 100 - |""".stripMargin), - Query("q14b", - """ - |with cross_items as - | (select i_item_sk ss_item_sk - | from item, - | (select iss.i_brand_id brand_id, iss.i_class_id class_id, iss.i_category_id category_id - | from store_sales, item iss, date_dim d1 - | where ss_item_sk = iss.i_item_sk - | and ss_sold_date_sk = d1.d_date_sk - | and d1.d_year between 1999 AND 1999 + 2 - | intersect - | select ics.i_brand_id, ics.i_class_id, ics.i_category_id - | from catalog_sales, item ics, date_dim d2 - | where cs_item_sk = ics.i_item_sk - | and cs_sold_date_sk = d2.d_date_sk - | and d2.d_year between 1999 AND 1999 + 2 - | intersect - | select iws.i_brand_id, iws.i_class_id, iws.i_category_id - | from web_sales, item iws, date_dim d3 - | where ws_item_sk = iws.i_item_sk - | and ws_sold_date_sk = d3.d_date_sk - | and d3.d_year between 1999 AND 1999 + 2) x - | where i_brand_id = brand_id - | and i_class_id = class_id - | and i_category_id = category_id - | ), - | avg_sales as - | (select avg(quantity*list_price) average_sales - | from (select ss_quantity quantity, ss_list_price list_price - | from store_sales, date_dim - | where ss_sold_date_sk = d_date_sk and d_year between 1999 and 1999 + 2 - | union all - | select cs_quantity quantity, cs_list_price list_price - | from catalog_sales, date_dim - | where cs_sold_date_sk = d_date_sk and d_year between 1999 and 1999 + 2 - | union all - | select ws_quantity quantity, ws_list_price list_price - | from web_sales, date_dim - | where ws_sold_date_sk = d_date_sk and d_year between 1999 and 1999 + 2) x) - | - | select this_year.channel as ty_channel, - | last_year.channel as ly_channel, - | this_year.i_brand_id as ty_i_brand_id, - | last_year.i_brand_id as ly_i_branch_id, - | this_year.i_category_id as ty_i_category_id, - | last_year.i_category_id as ly_i_category_id, - | this_year.i_class_id as ty_i_class_id, - | last_year.i_class_id as ly_i_class_id, - | this_year.number_sales as ty_number_sales, - | last_year.number_sales as ly_number_sales, - | this_year.sales as ty_sales, - | last_year.sales as ly_sales from - | (select 'store' channel, i_brand_id,i_class_id,i_category_id - | ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales - | from store_sales, item, date_dim - | where ss_item_sk in (select ss_item_sk from cross_items) - | and ss_item_sk = i_item_sk - | and ss_sold_date_sk = d_date_sk - | and d_week_seq = (select d_week_seq from date_dim - | where d_year = 1999 + 1 and d_moy = 12 and d_dom = 11) - | group by i_brand_id,i_class_id,i_category_id - | having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year, - | (select 'store' channel, i_brand_id,i_class_id - | ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales - | from store_sales, item, date_dim - | where ss_item_sk in (select ss_item_sk from cross_items) - | and ss_item_sk = i_item_sk - | and ss_sold_date_sk = d_date_sk - | and d_week_seq = (select d_week_seq from date_dim - | where d_year = 1999 and d_moy = 12 and d_dom = 11) - | group by i_brand_id,i_class_id,i_category_id - | having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year - | where this_year.i_brand_id= last_year.i_brand_id - | and this_year.i_class_id = last_year.i_class_id - | and this_year.i_category_id = last_year.i_category_id - | order by ty_channel, ty_i_brand_id, ty_i_class_id, ty_i_category_id - | limit 100 - |""".stripMargin), - Query("q15", - """ - | - | select ca_zip, sum(cs_sales_price) - | from catalog_sales, customer, customer_address, date_dim - | where cs_bill_customer_sk = c_customer_sk - | and c_current_addr_sk = ca_address_sk - | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', - | '85392', '85460', '80348', '81792') - | or ca_state in ('CA','WA','GA') - | or cs_sales_price > 500) - | and cs_sold_date_sk = d_date_sk - | and d_qoy = 2 and d_year = 2001 - | group by ca_zip - | order by ca_zip - | limit 100 - |""".stripMargin), - Query("q16", - """ - | select - | count(distinct cs_order_number) as `order count`, - | sum(cs_ext_ship_cost) as `total shipping cost`, - | sum(cs_net_profit) as `total net profit` - | from - | catalog_sales cs1, date_dim, customer_address, call_center - | where - | d_date between cast ('2002-02-01' as date) and (cast('2002-02-01' as date) + interval '60' day) - | and cs1.cs_ship_date_sk = d_date_sk - | and cs1.cs_ship_addr_sk = ca_address_sk - | and ca_state = 'GA' - | and cs1.cs_call_center_sk = cc_call_center_sk - | and cc_county in ('Williamson County','Williamson County','Williamson County','Williamson County', 'Williamson County') - | and exists (select * - | from catalog_sales cs2 - | where cs1.cs_order_number = cs2.cs_order_number - | and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) - | and not exists(select * - | from catalog_returns cr1 - | where cs1.cs_order_number = cr1.cr_order_number) - | order by count(distinct cs_order_number) - | limit 100 - |""".stripMargin), - Query("q17", - """ - |select i_item_id - | ,i_item_desc - | ,s_state - | ,count(ss_quantity) as store_sales_quantitycount - | ,avg(ss_quantity) as store_sales_quantityave - | ,stddev_samp(ss_quantity) as store_sales_quantitystdev - | ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov - | ,count(sr_return_quantity) as_store_returns_quantitycount - | ,avg(sr_return_quantity) as_store_returns_quantityave - | ,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev - | ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov - | ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave - | ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitystdev - | ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov - | from store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item - | where d1.d_quarter_name = '2001Q1' - | and d1.d_date_sk = ss_sold_date_sk - | and i_item_sk = ss_item_sk - | and s_store_sk = ss_store_sk - | and ss_customer_sk = sr_customer_sk - | and ss_item_sk = sr_item_sk - | and ss_ticket_number = sr_ticket_number - | and sr_returned_date_sk = d2.d_date_sk - | and d2.d_quarter_name in ('2001Q1','2001Q2','2001Q3') - | and sr_customer_sk = cs_bill_customer_sk - | and sr_item_sk = cs_item_sk - | and cs_sold_date_sk = d3.d_date_sk - | and d3.d_quarter_name in ('2001Q1','2001Q2','2001Q3') - | group by i_item_id, i_item_desc, s_state - | order by i_item_id, i_item_desc, s_state - | limit 100 - |""".stripMargin), - Query("q18", - """ - |select i_item_id, - | ca_country, - | ca_state, - | ca_county, - | avg( cast(cs_quantity as decimal(12,2))) agg1, - | avg( cast(cs_list_price as decimal(12,2))) agg2, - | avg( cast(cs_coupon_amt as decimal(12,2))) agg3, - | avg( cast(cs_sales_price as decimal(12,2))) agg4, - | avg( cast(cs_net_profit as decimal(12,2))) agg5, - | avg( cast(c_birth_year as decimal(12,2))) agg6, - | avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 - | from catalog_sales, customer_demographics cd1, - | customer_demographics cd2, customer, customer_address, date_dim, item - | where cs_sold_date_sk = d_date_sk and - | cs_item_sk = i_item_sk and - | cs_bill_cdemo_sk = cd1.cd_demo_sk and - | cs_bill_customer_sk = c_customer_sk and - | cd1.cd_gender = 'F' and - | cd1.cd_education_status = 'Unknown' and - | c_current_cdemo_sk = cd2.cd_demo_sk and - | c_current_addr_sk = ca_address_sk and - | c_birth_month in (1,6,8,9,12,2) and - | d_year = 1998 and - | ca_state in ('MS','IN','ND','OK','NM','VA','MS') - | group by rollup (i_item_id, ca_country, ca_state, ca_county) - | order by ca_country, ca_state, ca_county, i_item_id - | LIMIT 100 - |""".stripMargin), - Query("q19", - """ - | select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, - | sum(ss_ext_sales_price) ext_price - | from date_dim, store_sales, item,customer,customer_address,store - | where d_date_sk = ss_sold_date_sk - | and ss_item_sk = i_item_sk - | and i_manager_id = 8 - | and d_moy = 11 - | and d_year = 1998 - | and ss_customer_sk = c_customer_sk - | and c_current_addr_sk = ca_address_sk - | and substr(ca_zip,1,5) <> substr(s_zip,1,5) - | and ss_store_sk = s_store_sk - | group by i_brand, i_brand_id, i_manufact_id, i_manufact - | order by ext_price desc, brand, brand_id, i_manufact_id, i_manufact - | limit 100 - |""".stripMargin), - Query("q20", - """ - |select i_item_id, i_item_desc - | ,i_category - | ,i_class - | ,i_current_price - | ,sum(cs_ext_sales_price) as itemrevenue - | ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over - | (partition by i_class) as revenueratio - | from catalog_sales, item, date_dim - | where cs_item_sk = i_item_sk - | and i_category in ('Sports', 'Books', 'Home') - | and cs_sold_date_sk = d_date_sk - | and d_date between cast('1999-02-22' as date) - | and (cast('1999-02-22' as date) + interval '30' day) - | group by i_item_id, i_item_desc, i_category, i_class, i_current_price - | order by i_category, i_class, i_item_id, i_item_desc, revenueratio - | limit 100 - |""".stripMargin), - Query("q21", - """ - | select * from( - | select w_warehouse_name, i_item_id, - | sum(case when (cast(d_date as date) < cast ('2000-03-11' as date)) - | then inv_quantity_on_hand - | else 0 end) as inv_before, - | sum(case when (cast(d_date as date) >= cast ('2000-03-11' as date)) - | then inv_quantity_on_hand - | else 0 end) as inv_after - | from inventory, warehouse, item, date_dim - | where i_current_price between 0.99 and 1.49 - | and i_item_sk = inv_item_sk - | and inv_warehouse_sk = w_warehouse_sk - | and inv_date_sk = d_date_sk - | and d_date between (cast('2000-03-11' as date) - interval '30' day) - | and (cast('2000-03-11' as date) + interval '30' day) - | group by w_warehouse_name, i_item_id) x - | where (case when inv_before > 0 - | then inv_after / inv_before - | else null - | end) between 2.0/3.0 and 3.0/2.0 - | order by w_warehouse_name, i_item_id - | limit 100 - |""".stripMargin), - Query("q22", - """ - | select i_product_name, i_brand, i_class, i_category, avg(inv_quantity_on_hand) qoh - | from inventory, date_dim, item, warehouse - | where inv_date_sk=d_date_sk - | and inv_item_sk=i_item_sk - | and inv_warehouse_sk = w_warehouse_sk - | and d_month_seq between 1200 and 1200 + 11 - | group by rollup(i_product_name, i_brand, i_class, i_category) - | order by qoh, i_product_name, i_brand, i_class, i_category - | limit 100 - |""".stripMargin), - Query("q23a", - """ - |with frequent_ss_items as - | (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt - | from store_sales, date_dim, item - | where ss_sold_date_sk = d_date_sk - | and ss_item_sk = i_item_sk - | and d_year in (2000, 2000+1, 2000+2,2000+3) - | group by substr(i_item_desc,1,30),i_item_sk,d_date - | having count(*) >4), - | max_store_sales as - | (select max(csales) tpcds_cmax - | from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales - | from store_sales, customer, date_dim - | where ss_customer_sk = c_customer_sk - | and ss_sold_date_sk = d_date_sk - | and d_year in (2000, 2000+1, 2000+2,2000+3) - | group by c_customer_sk) x), - | best_ss_customer as - | (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales - | from store_sales, customer - | where ss_customer_sk = c_customer_sk - | group by c_customer_sk - | having sum(ss_quantity*ss_sales_price) > (95/100.0) * - | (select * from max_store_sales)) - | select sum(sales) - | from (select cs_quantity*cs_list_price sales - | from catalog_sales, date_dim - | where d_year = 2000 - | and d_moy = 2 - | and cs_sold_date_sk = d_date_sk - | and cs_item_sk in (select item_sk from frequent_ss_items) - | and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) - | union all - | (select ws_quantity*ws_list_price sales - | from web_sales, date_dim - | where d_year = 2000 - | and d_moy = 2 - | and ws_sold_date_sk = d_date_sk - | and ws_item_sk in (select item_sk from frequent_ss_items) - | and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) x - | limit 100 - |""".stripMargin), - Query("q23b", - """ - |with frequent_ss_items as - | (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt - | from store_sales, date_dim, item - | where ss_sold_date_sk = d_date_sk - | and ss_item_sk = i_item_sk - | and d_year in (2000, 2000+1, 2000+2,2000+3) - | group by substr(i_item_desc,1,30),i_item_sk,d_date - | having count(*) > 4), - | max_store_sales as - | (select max(csales) tpcds_cmax - | from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales - | from store_sales, customer, date_dim - | where ss_customer_sk = c_customer_sk - | and ss_sold_date_sk = d_date_sk - | and d_year in (2000, 2000+1, 2000+2,2000+3) - | group by c_customer_sk) x), - | best_ss_customer as - | (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales - | from store_sales - | ,customer - | where ss_customer_sk = c_customer_sk - | group by c_customer_sk - | having sum(ss_quantity*ss_sales_price) > (95/100.0) * - | (select * from max_store_sales)) - | select c_last_name,c_first_name,sales - | from ((select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales - | from catalog_sales, customer, date_dim - | where d_year = 2000 - | and d_moy = 2 - | and cs_sold_date_sk = d_date_sk - | and cs_item_sk in (select item_sk from frequent_ss_items) - | and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) - | and cs_bill_customer_sk = c_customer_sk - | group by c_last_name,c_first_name) - | union all - | (select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales - | from web_sales, customer, date_dim - | where d_year = 2000 - | and d_moy = 2 - | and ws_sold_date_sk = d_date_sk - | and ws_item_sk in (select item_sk from frequent_ss_items) - | and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) - | and ws_bill_customer_sk = c_customer_sk - | group by c_last_name,c_first_name)) y - | order by c_last_name,c_first_name,sales - | limit 100 - |""".stripMargin), - Query("q24a", - """ - | with ssales as - | (select c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, - | i_current_price, i_manager_id, i_units, i_size, sum(ss_net_paid) netpaid - | from store_sales, store_returns, store, item, customer, customer_address - | where ss_ticket_number = sr_ticket_number - | and ss_item_sk = sr_item_sk - | and ss_customer_sk = c_customer_sk - | and ss_item_sk = i_item_sk - | and ss_store_sk = s_store_sk - | and c_birth_country = upper(ca_country) - | and s_zip = ca_zip - | and s_market_id = 8 - | group by c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, - | i_current_price, i_manager_id, i_units, i_size) - | select c_last_name, c_first_name, s_store_name, sum(netpaid) paid - | from ssales - | where i_color = 'pale' - | group by c_last_name, c_first_name, s_store_name - | having sum(netpaid) > (select 0.05*avg(netpaid) from ssales) - |""".stripMargin), - Query("q24b", - """ - | with ssales as - | (select c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, - | i_current_price, i_manager_id, i_units, i_size, sum(ss_net_paid) netpaid - | from store_sales, store_returns, store, item, customer, customer_address - | where ss_ticket_number = sr_ticket_number - | and ss_item_sk = sr_item_sk - | and ss_customer_sk = c_customer_sk - | and ss_item_sk = i_item_sk - | and ss_store_sk = s_store_sk - | and c_birth_country = upper(ca_country) - | and s_zip = ca_zip - | and s_market_id = 8 - | group by c_last_name, c_first_name, s_store_name, ca_state, s_state, - | i_color, i_current_price, i_manager_id, i_units, i_size) - | select c_last_name, c_first_name, s_store_name, sum(netpaid) paid - | from ssales - | where i_color = 'chiffon' - | group by c_last_name, c_first_name, s_store_name - | having sum(netpaid) > (select 0.05*avg(netpaid) from ssales) - |""".stripMargin), - Query("q25", - """ - |select i_item_id, i_item_desc, s_store_id, s_store_name, - | sum(ss_net_profit) as store_sales_profit, - | sum(sr_net_loss) as store_returns_loss, - | sum(cs_net_profit) as catalog_sales_profit - | from - | store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, - | store, item - | where - | d1.d_moy = 4 - | and d1.d_year = 2001 - | and d1.d_date_sk = ss_sold_date_sk - | and i_item_sk = ss_item_sk - | and s_store_sk = ss_store_sk - | and ss_customer_sk = sr_customer_sk - | and ss_item_sk = sr_item_sk - | and ss_ticket_number = sr_ticket_number - | and sr_returned_date_sk = d2.d_date_sk - | and d2.d_moy between 4 and 10 - | and d2.d_year = 2001 - | and sr_customer_sk = cs_bill_customer_sk - | and sr_item_sk = cs_item_sk - | and cs_sold_date_sk = d3.d_date_sk - | and d3.d_moy between 4 and 10 - | and d3.d_year = 2001 - | group by - | i_item_id, i_item_desc, s_store_id, s_store_name - | order by - | i_item_id, i_item_desc, s_store_id, s_store_name - | limit 100 - |""".stripMargin), - Query("q26", - """ - | select i_item_id, - | avg(cs_quantity) agg1, - | avg(cs_list_price) agg2, - | avg(cs_coupon_amt) agg3, - | avg(cs_sales_price) agg4 - | from catalog_sales, customer_demographics, date_dim, item, promotion - | where cs_sold_date_sk = d_date_sk and - | cs_item_sk = i_item_sk and - | cs_bill_cdemo_sk = cd_demo_sk and - | cs_promo_sk = p_promo_sk and - | cd_gender = 'M' and - | cd_marital_status = 'S' and - | cd_education_status = 'College' and - | (p_channel_email = 'N' or p_channel_event = 'N') and - | d_year = 2000 - | group by i_item_id - | order by i_item_id - | limit 100 - |""".stripMargin), - Query("q27", - """ - | select i_item_id, - | s_state, grouping(s_state) g_state, - | avg(ss_quantity) agg1, - | avg(ss_list_price) agg2, - | avg(ss_coupon_amt) agg3, - | avg(ss_sales_price) agg4 - | from store_sales, customer_demographics, date_dim, store, item - | where ss_sold_date_sk = d_date_sk and - | ss_item_sk = i_item_sk and - | ss_store_sk = s_store_sk and - | ss_cdemo_sk = cd_demo_sk and - | cd_gender = 'M' and - | cd_marital_status = 'S' and - | cd_education_status = 'College' and - | d_year = 2002 and - | s_state in ('TN','TN', 'TN', 'TN', 'TN', 'TN') - | group by rollup (i_item_id, s_state) - | order by i_item_id, s_state - | limit 100 - |""".stripMargin), - Query("q28", - """ - | select * - | from (select avg(ss_list_price) B1_LP - | ,count(ss_list_price) B1_CNT - | ,count(distinct ss_list_price) B1_CNTD - | from store_sales - | where ss_quantity between 0 and 5 - | and (ss_list_price between 8 and 8+10 - | or ss_coupon_amt between 459 and 459+1000 - | or ss_wholesale_cost between 57 and 57+20)) B1 cross join - | (select avg(ss_list_price) B2_LP - | ,count(ss_list_price) B2_CNT - | ,count(distinct ss_list_price) B2_CNTD - | from store_sales - | where ss_quantity between 6 and 10 - | and (ss_list_price between 90 and 90+10 - | or ss_coupon_amt between 2323 and 2323+1000 - | or ss_wholesale_cost between 31 and 31+20)) B2 cross join - | (select avg(ss_list_price) B3_LP - | ,count(ss_list_price) B3_CNT - | ,count(distinct ss_list_price) B3_CNTD - | from store_sales - | where ss_quantity between 11 and 15 - | and (ss_list_price between 142 and 142+10 - | or ss_coupon_amt between 12214 and 12214+1000 - | or ss_wholesale_cost between 79 and 79+20)) B3 cross join - | (select avg(ss_list_price) B4_LP - | ,count(ss_list_price) B4_CNT - | ,count(distinct ss_list_price) B4_CNTD - | from store_sales - | where ss_quantity between 16 and 20 - | and (ss_list_price between 135 and 135+10 - | or ss_coupon_amt between 6071 and 6071+1000 - | or ss_wholesale_cost between 38 and 38+20)) B4 cross join - | (select avg(ss_list_price) B5_LP - | ,count(ss_list_price) B5_CNT - | ,count(distinct ss_list_price) B5_CNTD - | from store_sales - | where ss_quantity between 21 and 25 - | and (ss_list_price between 122 and 122+10 - | or ss_coupon_amt between 836 and 836+1000 - | or ss_wholesale_cost between 17 and 17+20)) B5 cross join - | (select avg(ss_list_price) B6_LP - | ,count(ss_list_price) B6_CNT - | ,count(distinct ss_list_price) B6_CNTD - | from store_sales - | where ss_quantity between 26 and 30 - | and (ss_list_price between 154 and 154+10 - | or ss_coupon_amt between 7326 and 7326+1000 - | or ss_wholesale_cost between 7 and 7+20)) B6 - | limit 100 - |""".stripMargin), - Query("q29", - """ - | select - | i_item_id - | ,i_item_desc - | ,s_store_id - | ,s_store_name - | ,sum(ss_quantity) as store_sales_quantity - | ,sum(sr_return_quantity) as store_returns_quantity - | ,sum(cs_quantity) as catalog_sales_quantity - | from - | store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, - | date_dim d3, store, item - | where - | d1.d_moy = 9 - | and d1.d_year = 1999 - | and d1.d_date_sk = ss_sold_date_sk - | and i_item_sk = ss_item_sk - | and s_store_sk = ss_store_sk - | and ss_customer_sk = sr_customer_sk - | and ss_item_sk = sr_item_sk - | and ss_ticket_number = sr_ticket_number - | and sr_returned_date_sk = d2.d_date_sk - | and d2.d_moy between 9 and 9 + 3 - | and d2.d_year = 1999 - | and sr_customer_sk = cs_bill_customer_sk - | and sr_item_sk = cs_item_sk - | and cs_sold_date_sk = d3.d_date_sk - | and d3.d_year in (1999,1999+1,1999+2) - | group by - | i_item_id, i_item_desc, s_store_id, s_store_name - | order by - | i_item_id, i_item_desc, s_store_id, s_store_name - | limit 100 - |""".stripMargin), - Query("q30", - """ - | with customer_total_return as - | (select wr_returning_customer_sk as ctr_customer_sk - | ,ca_state as ctr_state, - | sum(wr_return_amt) as ctr_total_return - | from web_returns, date_dim, customer_address - | where wr_returned_date_sk = d_date_sk - | and d_year = 2002 - | and wr_returning_addr_sk = ca_address_sk - | group by wr_returning_customer_sk,ca_state) - | select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag - | ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address - | ,c_last_review_date,ctr_total_return - | from customer_total_return ctr1, customer_address, customer - | where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 - | from customer_total_return ctr2 - | where ctr1.ctr_state = ctr2.ctr_state) - | and ca_address_sk = c_current_addr_sk - | and ca_state = 'GA' - | and ctr1.ctr_customer_sk = c_customer_sk - | order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag - | ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address - | ,c_last_review_date,ctr_total_return - | limit 100 - |""".stripMargin), - Query("q31", - """ - | with ss as - | (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales - | from store_sales,date_dim,customer_address - | where ss_sold_date_sk = d_date_sk - | and ss_addr_sk=ca_address_sk - | group by ca_county,d_qoy, d_year), - | ws as - | (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales - | from web_sales,date_dim,customer_address - | where ws_sold_date_sk = d_date_sk - | and ws_bill_addr_sk=ca_address_sk - | group by ca_county,d_qoy, d_year) - | select - | ss1.ca_county - | ,ss1.d_year - | ,ws2.web_sales/ws1.web_sales web_q1_q2_increase - | ,ss2.store_sales/ss1.store_sales store_q1_q2_increase - | ,ws3.web_sales/ws2.web_sales web_q2_q3_increase - | ,ss3.store_sales/ss2.store_sales store_q2_q3_increase - | from - | ss ss1, ss ss2, ss ss3, ws ws1, ws ws2, ws ws3 - | where - | ss1.d_qoy = 1 - | and ss1.d_year = 2000 - | and ss1.ca_county = ss2.ca_county - | and ss2.d_qoy = 2 - | and ss2.d_year = 2000 - | and ss2.ca_county = ss3.ca_county - | and ss3.d_qoy = 3 - | and ss3.d_year = 2000 - | and ss1.ca_county = ws1.ca_county - | and ws1.d_qoy = 1 - | and ws1.d_year = 2000 - | and ws1.ca_county = ws2.ca_county - | and ws2.d_qoy = 2 - | and ws2.d_year = 2000 - | and ws1.ca_county = ws3.ca_county - | and ws3.d_qoy = 3 - | and ws3.d_year = 2000 - | and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end - | > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end - | and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end - | > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end - | order by ss1.ca_county - |""".stripMargin), - Query("q32", - """ - | select sum(cs_ext_discount_amt) as `excess discount amount` - | from - | catalog_sales, item, date_dim - | where - | i_manufact_id = 977 - | and i_item_sk = cs_item_sk - | and d_date between cast ('2000-01-27' as date) and (cast('2000-01-27' as date) + interval '90' day) - | and d_date_sk = cs_sold_date_sk - | and cs_ext_discount_amt > ( - | select 1.3 * avg(cs_ext_discount_amt) - | from catalog_sales, date_dim - | where cs_item_sk = i_item_sk - | and d_date between cast ('2000-01-27' as date) and (cast('2000-01-27' as date) + interval '90' day) - | and d_date_sk = cs_sold_date_sk) - |limit 100 - |""".stripMargin), - Query("q33", - """ - | with ss as ( - | select - | i_manufact_id,sum(ss_ext_sales_price) total_sales - | from - | store_sales, date_dim, customer_address, item - | where - | i_manufact_id in (select i_manufact_id - | from item - | where i_category in ('Electronics')) - | and ss_item_sk = i_item_sk - | and ss_sold_date_sk = d_date_sk - | and d_year = 1998 - | and d_moy = 5 - | and ss_addr_sk = ca_address_sk - | and ca_gmt_offset = -5 - | group by i_manufact_id), cs as - | (select i_manufact_id, sum(cs_ext_sales_price) total_sales - | from catalog_sales, date_dim, customer_address, item - | where - | i_manufact_id in ( - | select i_manufact_id from item - | where - | i_category in ('Electronics')) - | and cs_item_sk = i_item_sk - | and cs_sold_date_sk = d_date_sk - | and d_year = 1998 - | and d_moy = 5 - | and cs_bill_addr_sk = ca_address_sk - | and ca_gmt_offset = -5 - | group by i_manufact_id), - | ws as ( - | select i_manufact_id,sum(ws_ext_sales_price) total_sales - | from - | web_sales, date_dim, customer_address, item - | where - | i_manufact_id in (select i_manufact_id from item - | where i_category in ('Electronics')) - | and ws_item_sk = i_item_sk - | and ws_sold_date_sk = d_date_sk - | and d_year = 1998 - | and d_moy = 5 - | and ws_bill_addr_sk = ca_address_sk - | and ca_gmt_offset = -5 - | group by i_manufact_id) - | select i_manufact_id ,sum(total_sales) total_sales - | from (select * from ss - | union all - | select * from cs - | union all - | select * from ws) tmp1 - | group by i_manufact_id - | order by total_sales - |limit 100 - |""".stripMargin), - Query("q34", - """ - | select c_last_name, c_first_name, c_salutation, c_preferred_cust_flag, ss_ticket_number, - | cnt - | FROM - | (select ss_ticket_number, ss_customer_sk, count(*) cnt - | from store_sales,date_dim,store,household_demographics - | where store_sales.ss_sold_date_sk = date_dim.d_date_sk - | and store_sales.ss_store_sk = store.s_store_sk - | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk - | and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) - | and (household_demographics.hd_buy_potential = '>10000' or - | household_demographics.hd_buy_potential = 'unknown') - | and household_demographics.hd_vehicle_count > 0 - | and (case when household_demographics.hd_vehicle_count > 0 - | then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count - | else null - | end) > 1.2 - | and date_dim.d_year in (1999, 1999+1, 1999+2) - | and store.s_county in ('Williamson County','Williamson County','Williamson County','Williamson County', - | 'Williamson County','Williamson County','Williamson County','Williamson County') - | group by ss_ticket_number,ss_customer_sk) dn,customer - | where ss_customer_sk = c_customer_sk - | and cnt between 15 and 20 - | order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number - |""".stripMargin), - Query("q35", - """ - | select - | ca_state, - | cd_gender, - | cd_marital_status, - | cd_dep_count, - | count(*) cnt1, - | min(cd_dep_count), - | max(cd_dep_count), - | avg(cd_dep_count), - | cd_dep_employed_count, - | count(*) cnt2, - | min(cd_dep_employed_count), - | max(cd_dep_employed_count), - | avg(cd_dep_employed_count), - | cd_dep_college_count, - | count(*) cnt3, - | min(cd_dep_college_count), - | max(cd_dep_college_count), - | avg(cd_dep_college_count) - | from - | customer c,customer_address ca,customer_demographics - | where - | c.c_current_addr_sk = ca.ca_address_sk and - | cd_demo_sk = c.c_current_cdemo_sk and - | exists (select * from store_sales, date_dim - | where c.c_customer_sk = ss_customer_sk and - | ss_sold_date_sk = d_date_sk and - | d_year = 2002 and - | d_qoy < 4) and - | (exists (select * from web_sales, date_dim - | where c.c_customer_sk = ws_bill_customer_sk and - | ws_sold_date_sk = d_date_sk and - | d_year = 2002 and - | d_qoy < 4) or - | exists (select * from catalog_sales, date_dim - | where c.c_customer_sk = cs_ship_customer_sk and - | cs_sold_date_sk = d_date_sk and - | d_year = 2002 and - | d_qoy < 4)) - | group by ca_state, cd_gender, cd_marital_status, cd_dep_count, - | cd_dep_employed_count, cd_dep_college_count - | order by ca_state, cd_gender, cd_marital_status, cd_dep_count, - | cd_dep_employed_count, cd_dep_college_count - | limit 100 - |""".stripMargin), - Query("q36", - """ - | select - | sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin - | ,i_category - | ,i_class - | ,grouping(i_category)+grouping(i_class) as lochierarchy - | ,rank() over ( - | partition by grouping(i_category)+grouping(i_class), - | case when grouping(i_class) = 0 then i_category end - | order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent - | from - | store_sales, date_dim d1, item, store - | where - | d1.d_year = 2001 - | and d1.d_date_sk = ss_sold_date_sk - | and i_item_sk = ss_item_sk - | and s_store_sk = ss_store_sk - | and s_state in ('TN','TN','TN','TN','TN','TN','TN','TN') - | group by rollup(i_category,i_class) - | order by - | lochierarchy desc - | ,case when lochierarchy = 0 then i_category end - | ,rank_within_parent - | limit 100 - |""".stripMargin), - Query("q37", - """ - | select i_item_id, i_item_desc, i_current_price - | from item, inventory, date_dim, catalog_sales - | where i_current_price between 68 and 68 + 30 - | and inv_item_sk = i_item_sk - | and d_date_sk=inv_date_sk - | and d_date between cast('2000-02-01' as date) and (cast('2000-02-01' as date) + interval '60' day) - | and i_manufact_id in (677,940,694,808) - | and inv_quantity_on_hand between 100 and 500 - | and cs_item_sk = i_item_sk - | group by i_item_id,i_item_desc,i_current_price - | order by i_item_id - | limit 100 - |""".stripMargin), - Query("q38", - """ - | select count(*) from ( - | select distinct c_last_name, c_first_name, d_date - | from store_sales, date_dim, customer - | where store_sales.ss_sold_date_sk = date_dim.d_date_sk - | and store_sales.ss_customer_sk = customer.c_customer_sk - | and d_month_seq between 1200 and 1200 + 11 - | intersect - | select distinct c_last_name, c_first_name, d_date - | from catalog_sales, date_dim, customer - | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk - | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk - | and d_month_seq between 1200 and 1200 + 11 - | intersect - | select distinct c_last_name, c_first_name, d_date - | from web_sales, date_dim, customer - | where web_sales.ws_sold_date_sk = date_dim.d_date_sk - | and web_sales.ws_bill_customer_sk = customer.c_customer_sk - | and d_month_seq between 1200 and 1200 + 11 - | ) hot_cust - | limit 100 - |""".stripMargin), - Query("q39a", - """ - | with inv as - | (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - | ,stdev,mean, case mean when 0 then null else stdev/mean end cov - | from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - | ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean - | from inventory, item, warehouse, date_dim - | where inv_item_sk = i_item_sk - | and inv_warehouse_sk = w_warehouse_sk - | and inv_date_sk = d_date_sk - | and d_year = 2001 - | group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo - | where case mean when 0 then 0 else stdev/mean end > 1) - | - | select inv1.w_warehouse_sk as inv1_w_warehouse_sk, - | inv1.i_item_sk as inv1_i_item_sk, - | inv1.d_moy as inv1_d_moy, - | inv1.mean as inv1_mean, - | inv1.cov as inv1_cov, - | inv2.w_warehouse_sk as inv2_w_warehouse_sk, - | inv2.i_item_sk as inv2_i_item_sk, - | inv2.d_moy as inv2_d_moy, - | inv2.mean as inv2_mean, - | inv2.cov as inv2_cov - | from inv inv1,inv inv2 - | where inv1.i_item_sk = inv2.i_item_sk - | and inv1.w_warehouse_sk = inv2.w_warehouse_sk - | and inv1.d_moy=1 - | and inv2.d_moy=1+1 - | order by inv1_w_warehouse_sk,inv1_i_item_sk,inv1_d_moy,inv1_mean,inv1_cov - | ,inv2_d_moy,inv2_mean, inv2_cov - |""".stripMargin), - Query("q39b", - """ - | with inv as - | (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - | ,stdev,mean, case mean when 0 then null else stdev/mean end cov - | from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - | ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean - | from inventory, item, warehouse, date_dim - | where inv_item_sk = i_item_sk - | and inv_warehouse_sk = w_warehouse_sk - | and inv_date_sk = d_date_sk - | and d_year = 2001 - | group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo - | where case mean when 0 then 0 else stdev/mean end > 1) - | - | select inv1.w_warehouse_sk as inv1_w_warehouse_sk, - | inv1.i_item_sk as inv1_i_item_sk, - | inv1.d_moy as inv1_d_moy, - | inv1.mean as inv1_mean, - | inv1.cov as inv1_cov, - | inv2.w_warehouse_sk as inv2_w_warehouse_sk, - | inv2.i_item_sk as inv2_i_item_sk, - | inv2.d_moy as inv2_d_moy, - | inv2.mean as inv2_mean, - | inv2.cov as inv2_cov - | from inv inv1,inv inv2 - | where inv1.i_item_sk = inv2.i_item_sk - | and inv1.w_warehouse_sk = inv2.w_warehouse_sk - | and inv1.d_moy=1 - | and inv2.d_moy=1+1 - | and inv1.cov > 1.5 - | order by inv1_w_warehouse_sk,inv1_i_item_sk,inv1_d_moy,inv1_mean,inv1_cov - | ,inv2_d_moy, inv2_mean, inv2_cov - |""".stripMargin), - Query("q40", - """ - | select - | w_state - | ,i_item_id - | ,sum(case when (cast(d_date as date) < cast('2000-03-11' as date)) - | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before - | ,sum(case when (cast(d_date as date) >= cast('2000-03-11' as date)) - | then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after - | from - | catalog_sales left outer join catalog_returns on - | (cs_order_number = cr_order_number - | and cs_item_sk = cr_item_sk) - | ,warehouse, item, date_dim - | where - | i_current_price between 0.99 and 1.49 - | and i_item_sk = cs_item_sk - | and cs_warehouse_sk = w_warehouse_sk - | and cs_sold_date_sk = d_date_sk - | and d_date between (cast('2000-03-11' as date) - interval '30' day) - | and (cast('2000-03-11' as date) + interval '30' day) - | group by w_state,i_item_id - | order by w_state,i_item_id - | limit 100 - |""".stripMargin), - Query("q41", - """ - | select distinct(i_product_name) - | from item i1 - | where i_manufact_id between 738 and 738+40 - | and (select count(*) as item_cnt - | from item - | where (i_manufact = i1.i_manufact and - | ((i_category = 'Women' and - | (i_color = 'powder' or i_color = 'khaki') and - | (i_units = 'Ounce' or i_units = 'Oz') and - | (i_size = 'medium' or i_size = 'extra large') - | ) or - | (i_category = 'Women' and - | (i_color = 'brown' or i_color = 'honeydew') and - | (i_units = 'Bunch' or i_units = 'Ton') and - | (i_size = 'N/A' or i_size = 'small') - | ) or - | (i_category = 'Men' and - | (i_color = 'floral' or i_color = 'deep') and - | (i_units = 'N/A' or i_units = 'Dozen') and - | (i_size = 'petite' or i_size = 'large') - | ) or - | (i_category = 'Men' and - | (i_color = 'light' or i_color = 'cornflower') and - | (i_units = 'Box' or i_units = 'Pound') and - | (i_size = 'medium' or i_size = 'extra large') - | ))) or - | (i_manufact = i1.i_manufact and - | ((i_category = 'Women' and - | (i_color = 'midnight' or i_color = 'snow') and - | (i_units = 'Pallet' or i_units = 'Gross') and - | (i_size = 'medium' or i_size = 'extra large') - | ) or - | (i_category = 'Women' and - | (i_color = 'cyan' or i_color = 'papaya') and - | (i_units = 'Cup' or i_units = 'Dram') and - | (i_size = 'N/A' or i_size = 'small') - | ) or - | (i_category = 'Men' and - | (i_color = 'orange' or i_color = 'frosted') and - | (i_units = 'Each' or i_units = 'Tbl') and - | (i_size = 'petite' or i_size = 'large') - | ) or - | (i_category = 'Men' and - | (i_color = 'forest' or i_color = 'ghost') and - | (i_units = 'Lb' or i_units = 'Bundle') and - | (i_size = 'medium' or i_size = 'extra large') - | )))) > 0 - | order by i_product_name - | limit 100 - |""".stripMargin), - Query("q42", - """ - |select dt.d_year, item.i_category_id, item.i_category, sum(ss_ext_sales_price) - | from date_dim dt, store_sales, item - | where dt.d_date_sk = store_sales.ss_sold_date_sk - | and store_sales.ss_item_sk = item.i_item_sk - | and item.i_manager_id = 1 - | and dt.d_moy=11 - | and dt.d_year=2000 - | group by dt.d_year - | ,item.i_category_id - | ,item.i_category - | order by sum(ss_ext_sales_price) desc,dt.d_year - | ,item.i_category_id - | ,item.i_category - | limit 100 - |""".stripMargin), - Query("q43", - """ - |select s_store_name, s_store_id, - | sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, - | sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, - | sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, - | sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, - | sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, - | sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, - | sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales - | from date_dim, store_sales, store - | where d_date_sk = ss_sold_date_sk and - | s_store_sk = ss_store_sk and - | s_gmt_offset = -5 and - | d_year = 2000 - | group by s_store_name, s_store_id - | order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales, - | thu_sales,fri_sales,sat_sales - | limit 100 - |""".stripMargin), - Query("q44", - """ - |select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing - | from(select * - | from (select item_sk,rank() over (order by rank_col asc) rnk - | from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col - | from store_sales ss1 - | where ss_store_sk = 4 - | group by ss_item_sk - | having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col - | from store_sales - | where ss_store_sk = 4 - | and ss_addr_sk is null - | group by ss_store_sk))V1)V11 - | where rnk < 11) asceding, - | (select * - | from (select item_sk,rank() over (order by rank_col desc) rnk - | from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col - | from store_sales ss1 - | where ss_store_sk = 4 - | group by ss_item_sk - | having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col - | from store_sales - | where ss_store_sk = 4 - | and ss_addr_sk is null - | group by ss_store_sk))V2)V21 - | where rnk < 11) descending, - | item i1, item i2 - | where asceding.rnk = descending.rnk - | and i1.i_item_sk=asceding.item_sk - | and i2.i_item_sk=descending.item_sk - | order by asceding.rnk - | limit 100 - |""".stripMargin), - Query("q45", - """ - | select ca_zip, ca_city, sum(ws_sales_price) - | from web_sales, customer, customer_address, date_dim, item - | where ws_bill_customer_sk = c_customer_sk - | and c_current_addr_sk = ca_address_sk - | and ws_item_sk = i_item_sk - | and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') - | or - | i_item_id in (select i_item_id - | from item - | where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) - | ) - | ) - | and ws_sold_date_sk = d_date_sk - | and d_qoy = 2 and d_year = 2001 - | group by ca_zip, ca_city - | order by ca_zip, ca_city - | limit 100 - |""".stripMargin), - Query("q46", - """ - | select c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number, amt,profit - | from - | (select ss_ticket_number - | ,ss_customer_sk - | ,ca_city bought_city - | ,sum(ss_coupon_amt) amt - | ,sum(ss_net_profit) profit - | from store_sales, date_dim, store, household_demographics, customer_address - | where store_sales.ss_sold_date_sk = date_dim.d_date_sk - | and store_sales.ss_store_sk = store.s_store_sk - | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk - | and store_sales.ss_addr_sk = customer_address.ca_address_sk - | and (household_demographics.hd_dep_count = 4 or - | household_demographics.hd_vehicle_count= 3) - | and date_dim.d_dow in (6,0) - | and date_dim.d_year in (1999,1999+1,1999+2) - | and store.s_city in ('Fairview','Midway','Fairview','Fairview','Fairview') - | group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr - | where ss_customer_sk = c_customer_sk - | and customer.c_current_addr_sk = current_addr.ca_address_sk - | and current_addr.ca_city <> bought_city - | order by c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number - | limit 100 - |""".stripMargin), - Query("q47", - """ - | with v1 as( - | select i_category, i_brand, - | s_store_name, s_company_name, - | d_year, d_moy, - | sum(ss_sales_price) sum_sales, - | avg(sum(ss_sales_price)) over - | (partition by i_category, i_brand, - | s_store_name, s_company_name, d_year) - | avg_monthly_sales, - | rank() over - | (partition by i_category, i_brand, - | s_store_name, s_company_name - | order by d_year, d_moy) rn - | from item, store_sales, date_dim, store - | where ss_item_sk = i_item_sk and - | ss_sold_date_sk = d_date_sk and - | ss_store_sk = s_store_sk and - | ( - | d_year = 1999 or - | ( d_year = 1999-1 and d_moy =12) or - | ( d_year = 1999+1 and d_moy =1) - | ) - | group by i_category, i_brand, - | s_store_name, s_company_name, - | d_year, d_moy), - | v2 as( - | select v1.i_category, v1.i_brand, v1.s_store_name, v1.s_company_name, v1.d_year, - | v1.d_moy, v1.avg_monthly_sales ,v1.sum_sales, v1_lag.sum_sales psum, - | v1_lead.sum_sales nsum - | from v1, v1 v1_lag, v1 v1_lead - | where v1.i_category = v1_lag.i_category and - | v1.i_category = v1_lead.i_category and - | v1.i_brand = v1_lag.i_brand and - | v1.i_brand = v1_lead.i_brand and - | v1.s_store_name = v1_lag.s_store_name and - | v1.s_store_name = v1_lead.s_store_name and - | v1.s_company_name = v1_lag.s_company_name and - | v1.s_company_name = v1_lead.s_company_name and - | v1.rn = v1_lag.rn + 1 and - | v1.rn = v1_lead.rn - 1) - | select * from v2 - | where d_year = 1999 and - | avg_monthly_sales > 0 and - | case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 - | order by sum_sales - avg_monthly_sales, 3 - | limit 100 - |""".stripMargin), - Query("q48", - """ - | select sum (ss_quantity) - | from store_sales, store, customer_demographics, customer_address, date_dim - | where s_store_sk = ss_store_sk - | and ss_sold_date_sk = d_date_sk and d_year = 2000 - | and - | ( - | ( - | cd_demo_sk = ss_cdemo_sk - | and - | cd_marital_status = 'M' - | and - | cd_education_status = '4 yr Degree' - | and - | ss_sales_price between 100.00 and 150.00 - | ) - | or - | ( - | cd_demo_sk = ss_cdemo_sk - | and - | cd_marital_status = 'D' - | and - | cd_education_status = '2 yr Degree' - | and - | ss_sales_price between 50.00 and 100.00 - | ) - | or - | ( - | cd_demo_sk = ss_cdemo_sk - | and - | cd_marital_status = 'S' - | and - | cd_education_status = 'College' - | and - | ss_sales_price between 150.00 and 200.00 - | ) - | ) - | and - | ( - | ( - | ss_addr_sk = ca_address_sk - | and - | ca_country = 'United States' - | and - | ca_state in ('CO', 'OH', 'TX') - | and ss_net_profit between 0 and 2000 - | ) - | or - | (ss_addr_sk = ca_address_sk - | and - | ca_country = 'United States' - | and - | ca_state in ('OR', 'MN', 'KY') - | and ss_net_profit between 150 and 3000 - | ) - | or - | (ss_addr_sk = ca_address_sk - | and - | ca_country = 'United States' - | and - | ca_state in ('VA', 'CA', 'MS') - | and ss_net_profit between 50 and 25000 - | ) - | ) - |""".stripMargin), - Query("q49", - """ - | select 'web' as channel, web.item, web.return_ratio, web.return_rank, web.currency_rank - | from ( - | select - | item, return_ratio, currency_ratio, - | rank() over (order by return_ratio) as return_rank, - | rank() over (order by currency_ratio) as currency_rank - | from - | ( select ws.ws_item_sk as item - | ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ - | cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio - | ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ - | cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio - | from - | web_sales ws left outer join web_returns wr - | on (ws.ws_order_number = wr.wr_order_number and - | ws.ws_item_sk = wr.wr_item_sk) - | ,date_dim - | where - | wr.wr_return_amt > 10000 - | and ws.ws_net_profit > 1 - | and ws.ws_net_paid > 0 - | and ws.ws_quantity > 0 - | and ws_sold_date_sk = d_date_sk - | and d_year = 2001 - | and d_moy = 12 - | group by ws.ws_item_sk - | ) in_web - | ) web - | where (web.return_rank <= 10 or web.currency_rank <= 10) - | union - | select - | 'catalog' as channel, catalog.item, catalog.return_ratio, - | catalog.return_rank, catalog.currency_rank - | from ( - | select - | item, return_ratio, currency_ratio, - | rank() over (order by return_ratio) as return_rank, - | rank() over (order by currency_ratio) as currency_rank - | from - | ( select - | cs.cs_item_sk as item - | ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ - | cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio - | ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ - | cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio - | from - | catalog_sales cs left outer join catalog_returns cr - | on (cs.cs_order_number = cr.cr_order_number and - | cs.cs_item_sk = cr.cr_item_sk) - | ,date_dim - | where - | cr.cr_return_amount > 10000 - | and cs.cs_net_profit > 1 - | and cs.cs_net_paid > 0 - | and cs.cs_quantity > 0 - | and cs_sold_date_sk = d_date_sk - | and d_year = 2001 - | and d_moy = 12 - | group by cs.cs_item_sk - | ) in_cat - | ) catalog - | where (catalog.return_rank <= 10 or catalog.currency_rank <=10) - | union - | select - | 'store' as channel, store.item, store.return_ratio, - | store.return_rank, store.currency_rank - | from ( - | select - | item, return_ratio, currency_ratio, - | rank() over (order by return_ratio) as return_rank, - | rank() over (order by currency_ratio) as currency_rank - | from - | ( select sts.ss_item_sk as item - | ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/ - | cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio - | ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/ - | cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio - | from - | store_sales sts left outer join store_returns sr - | on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) - | ,date_dim - | where - | sr.sr_return_amt > 10000 - | and sts.ss_net_profit > 1 - | and sts.ss_net_paid > 0 - | and sts.ss_quantity > 0 - | and ss_sold_date_sk = d_date_sk - | and d_year = 2001 - | and d_moy = 12 - | group by sts.ss_item_sk - | ) in_store - | ) store - | where (store.return_rank <= 10 or store.currency_rank <= 10) - | order by 1,4,5 - | limit 100 - |""".stripMargin), - Query("q50", - """ - |select - | s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, - | s_suite_number, s_city, s_county, s_state, s_zip - | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` - | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and - | (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` - | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and - | (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` - | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and - | (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` - | ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as `>120 days` - | from - | store_sales, store_returns, store, date_dim d1, date_dim d2 - | where - | d2.d_year = 2001 - | and d2.d_moy = 8 - | and ss_ticket_number = sr_ticket_number - | and ss_item_sk = sr_item_sk - | and ss_sold_date_sk = d1.d_date_sk - | and sr_returned_date_sk = d2.d_date_sk - | and ss_customer_sk = sr_customer_sk - | and ss_store_sk = s_store_sk - | group by - | s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, - | s_suite_number, s_city, s_county, s_state, s_zip - | order by - | s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, - | s_suite_number, s_city, s_county, s_state, s_zip - | limit 100 - |""".stripMargin), - Query("q51", - """ - |WITH web_v1 as ( - | select - | ws_item_sk item_sk, d_date, - | sum(sum(ws_sales_price)) - | over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales - | from web_sales, date_dim - | where ws_sold_date_sk=d_date_sk - | and d_month_seq between 1200 and 1200+11 - | and ws_item_sk is not NULL - | group by ws_item_sk, d_date), - | store_v1 as ( - | select - | ss_item_sk item_sk, d_date, - | sum(sum(ss_sales_price)) - | over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales - | from store_sales, date_dim - | where ss_sold_date_sk=d_date_sk - | and d_month_seq between 1200 and 1200+11 - | and ss_item_sk is not NULL - | group by ss_item_sk, d_date) - | select * - | from (select item_sk, d_date, web_sales, store_sales - | ,max(web_sales) - | over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative - | ,max(store_sales) - | over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative - | from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk - | ,case when web.d_date is not null then web.d_date else store.d_date end d_date - | ,web.cume_sales web_sales - | ,store.cume_sales store_sales - | from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk - | and web.d_date = store.d_date) - | )x )y - | where web_cumulative > store_cumulative - | order by item_sk, d_date - | limit 100 - |""".stripMargin), - Query("q52", - """ - | select dt.d_year - | ,item.i_brand_id brand_id - | ,item.i_brand brand - | ,sum(ss_ext_sales_price) ext_price - | from date_dim dt, store_sales, item - | where dt.d_date_sk = store_sales.ss_sold_date_sk - | and store_sales.ss_item_sk = item.i_item_sk - | and item.i_manager_id = 1 - | and dt.d_moy=11 - | and dt.d_year=2000 - | group by dt.d_year, item.i_brand, item.i_brand_id - | order by dt.d_year, ext_price desc, brand_id - |limit 100 - |""".stripMargin), - Query("q53", - """ - | select * from - | (select i_manufact_id, - | sum(ss_sales_price) sum_sales, - | avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales - | from item, store_sales, date_dim, store - | where ss_item_sk = i_item_sk and - | ss_sold_date_sk = d_date_sk and - | ss_store_sk = s_store_sk and - | d_month_seq in (1200,1200+1,1200+2,1200+3,1200+4,1200+5,1200+6, - | 1200+7,1200+8,1200+9,1200+10,1200+11) and - | ((i_category in ('Books','Children','Electronics') and - | i_class in ('personal','portable','reference','self-help') and - | i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', - | 'exportiunivamalg #9','scholaramalgamalg #9')) - | or - | (i_category in ('Women','Music','Men') and - | i_class in ('accessories','classical','fragrances','pants') and - | i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', - | 'importoamalg #1'))) - | group by i_manufact_id, d_qoy ) tmp1 - | where case when avg_quarterly_sales > 0 - | then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales - | else null end > 0.1 - | order by avg_quarterly_sales, - | sum_sales, - | i_manufact_id - | limit 100 - |""".stripMargin), - Query("q54", - """ - | with my_customers as ( - | select distinct c_customer_sk - | , c_current_addr_sk - | from - | ( select cs_sold_date_sk sold_date_sk, - | cs_bill_customer_sk customer_sk, - | cs_item_sk item_sk - | from catalog_sales - | union all - | select ws_sold_date_sk sold_date_sk, - | ws_bill_customer_sk customer_sk, - | ws_item_sk item_sk - | from web_sales - | ) cs_or_ws_sales, - | item, - | date_dim, - | customer - | where sold_date_sk = d_date_sk - | and item_sk = i_item_sk - | and i_category = 'Women' - | and i_class = 'maternity' - | and c_customer_sk = cs_or_ws_sales.customer_sk - | and d_moy = 12 - | and d_year = 1998 - | ) - | , my_revenue as ( - | select c_customer_sk, - | sum(ss_ext_sales_price) as revenue - | from my_customers, - | store_sales, - | customer_address, - | store, - | date_dim - | where c_current_addr_sk = ca_address_sk - | and ca_county = s_county - | and ca_state = s_state - | and ss_sold_date_sk = d_date_sk - | and c_customer_sk = ss_customer_sk - | and d_month_seq between (select distinct d_month_seq+1 - | from date_dim where d_year = 1998 and d_moy = 12) - | and (select distinct d_month_seq+3 - | from date_dim where d_year = 1998 and d_moy = 12) - | group by c_customer_sk - | ) - | , segments as - | (select cast((revenue/50) as integer) as segment from my_revenue) - | select segment, count(*) as num_customers, segment*50 as segment_base - | from segments - | group by segment - | order by segment, num_customers - | limit 100 - |""".stripMargin), - Query("q55", - """ - |select i_brand_id brand_id, i_brand brand, - | sum(ss_ext_sales_price) ext_price - | from date_dim, store_sales, item - | where d_date_sk = ss_sold_date_sk - | and ss_item_sk = i_item_sk - | and i_manager_id=28 - | and d_moy=11 - | and d_year=1999 - | group by i_brand, i_brand_id - | order by ext_price desc, brand_id - | limit 100 - |""".stripMargin), - Query("q56", - """ - | with ss as ( - | select i_item_id,sum(ss_ext_sales_price) total_sales - | from - | store_sales, date_dim, customer_address, item - | where - | i_item_id in (select i_item_id from item where i_color in ('slate','blanched','burnished')) - | and ss_item_sk = i_item_sk - | and ss_sold_date_sk = d_date_sk - | and d_year = 2001 - | and d_moy = 2 - | and ss_addr_sk = ca_address_sk - | and ca_gmt_offset = -5 - | group by i_item_id), - | cs as ( - | select i_item_id,sum(cs_ext_sales_price) total_sales - | from - | catalog_sales, date_dim, customer_address, item - | where - | i_item_id in (select i_item_id from item where i_color in ('slate','blanched','burnished')) - | and cs_item_sk = i_item_sk - | and cs_sold_date_sk = d_date_sk - | and d_year = 2001 - | and d_moy = 2 - | and cs_bill_addr_sk = ca_address_sk - | and ca_gmt_offset = -5 - | group by i_item_id), - | ws as ( - | select i_item_id,sum(ws_ext_sales_price) total_sales - | from - | web_sales, date_dim, customer_address, item - | where - | i_item_id in (select i_item_id from item where i_color in ('slate','blanched','burnished')) - | and ws_item_sk = i_item_sk - | and ws_sold_date_sk = d_date_sk - | and d_year = 2001 - | and d_moy = 2 - | and ws_bill_addr_sk = ca_address_sk - | and ca_gmt_offset = -5 - | group by i_item_id) - | select i_item_id ,sum(total_sales) total_sales - | from (select * from ss - | union all - | select * from cs - | union all - | select * from ws) tmp1 - | group by i_item_id - | order by total_sales - | limit 100 - |""".stripMargin), - Query("q57", - """ - | with v1 as( - | select i_category, i_brand, - | cc_name, - | d_year, d_moy, - | sum(cs_sales_price) sum_sales, - | avg(sum(cs_sales_price)) over - | (partition by i_category, i_brand, cc_name, d_year) - | avg_monthly_sales, - | rank() over - | (partition by i_category, i_brand, cc_name - | order by d_year, d_moy) rn - | from item, catalog_sales, date_dim, call_center - | where cs_item_sk = i_item_sk and - | cs_sold_date_sk = d_date_sk and - | cc_call_center_sk= cs_call_center_sk and - | ( - | d_year = 1999 or - | ( d_year = 1999-1 and d_moy =12) or - | ( d_year = 1999+1 and d_moy =1) - | ) - | group by i_category, i_brand, - | cc_name , d_year, d_moy), - | v2 as( - | select v1.i_category, v1.i_brand, v1.cc_name, v1.d_year, v1.d_moy - | ,v1.avg_monthly_sales - | ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum - | from v1, v1 v1_lag, v1 v1_lead - | where v1.i_category = v1_lag.i_category and - | v1.i_category = v1_lead.i_category and - | v1.i_brand = v1_lag.i_brand and - | v1.i_brand = v1_lead.i_brand and - | v1. cc_name = v1_lag. cc_name and - | v1. cc_name = v1_lead. cc_name and - | v1.rn = v1_lag.rn + 1 and - | v1.rn = v1_lead.rn - 1) - | select * from v2 - | where d_year = 1999 and - | avg_monthly_sales > 0 and - | case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 - | order by sum_sales - avg_monthly_sales, 3 - | limit 100 - |""".stripMargin), - Query("q58", - """ - | with ss_items as - | (select i_item_id item_id, sum(ss_ext_sales_price) ss_item_rev - | from store_sales, item, date_dim - | where ss_item_sk = i_item_sk - | and d_date in (select d_date - | from date_dim - | where d_week_seq = (select d_week_seq - | from date_dim - | where d_date = cast('2000-01-03' as date))) - | and ss_sold_date_sk = d_date_sk - | group by i_item_id), - | cs_items as - | (select i_item_id item_id - | ,sum(cs_ext_sales_price) cs_item_rev - | from catalog_sales, item, date_dim - | where cs_item_sk = i_item_sk - | and d_date in (select d_date - | from date_dim - | where d_week_seq = (select d_week_seq - | from date_dim - | where d_date = cast('2000-01-03' as date))) - | and cs_sold_date_sk = d_date_sk - | group by i_item_id), - | ws_items as - | (select i_item_id item_id, sum(ws_ext_sales_price) ws_item_rev - | from web_sales, item, date_dim - | where ws_item_sk = i_item_sk - | and d_date in (select d_date - | from date_dim - | where d_week_seq =(select d_week_seq - | from date_dim - | where d_date = cast('2000-01-03' as date))) - | and ws_sold_date_sk = d_date_sk - | group by i_item_id) - | select ss_items.item_id - | ,ss_item_rev - | ,ss_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ss_dev - | ,cs_item_rev - | ,cs_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 cs_dev - | ,ws_item_rev - | ,ws_item_rev/(ss_item_rev+cs_item_rev+ws_item_rev)/3 * 100 ws_dev - | ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average - | from ss_items,cs_items,ws_items - | where ss_items.item_id=cs_items.item_id - | and ss_items.item_id=ws_items.item_id - | and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev - | and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev - | and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev - | and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev - | and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev - | and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev - | order by ss_items.item_id, ss_item_rev - | limit 100 - |""".stripMargin), - Query("q59", - """ - |with wss as - | (select d_week_seq, - | ss_store_sk, - | sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, - | sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, - | sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, - | sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, - | sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, - | sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, - | sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales - | from store_sales,date_dim - | where d_date_sk = ss_sold_date_sk - | group by d_week_seq,ss_store_sk - | ) - | select s_store_name1,s_store_id1,d_week_seq1 - | ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 - | ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 - | ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 - | from - | (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 - | ,s_store_id s_store_id1,sun_sales sun_sales1 - | ,mon_sales mon_sales1,tue_sales tue_sales1 - | ,wed_sales wed_sales1,thu_sales thu_sales1 - | ,fri_sales fri_sales1,sat_sales sat_sales1 - | from wss,store,date_dim d - | where d.d_week_seq = wss.d_week_seq and - | ss_store_sk = s_store_sk and - | d_month_seq between 1212 and 1212 + 11) y, - | (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 - | ,s_store_id s_store_id2,sun_sales sun_sales2 - | ,mon_sales mon_sales2,tue_sales tue_sales2 - | ,wed_sales wed_sales2,thu_sales thu_sales2 - | ,fri_sales fri_sales2,sat_sales sat_sales2 - | from wss,store,date_dim d - | where d.d_week_seq = wss.d_week_seq and - | ss_store_sk = s_store_sk and - | d_month_seq between 1212+ 12 and 1212 + 23) x - | where s_store_id1=s_store_id2 - | and d_week_seq1=d_week_seq2-52 - | order by s_store_name1,s_store_id1,d_week_seq1 - | limit 100 - |""".stripMargin), - Query("q60", - """ - |with ss as ( - | select i_item_id,sum(ss_ext_sales_price) total_sales - | from store_sales, date_dim, customer_address, item - | where - | i_item_id in (select i_item_id from item where i_category in ('Music')) - | and ss_item_sk = i_item_sk - | and ss_sold_date_sk = d_date_sk - | and d_year = 1998 - | and d_moy = 9 - | and ss_addr_sk = ca_address_sk - | and ca_gmt_offset = -5 - | group by i_item_id), - | cs as ( - | select i_item_id,sum(cs_ext_sales_price) total_sales - | from catalog_sales, date_dim, customer_address, item - | where - | i_item_id in (select i_item_id from item where i_category in ('Music')) - | and cs_item_sk = i_item_sk - | and cs_sold_date_sk = d_date_sk - | and d_year = 1998 - | and d_moy = 9 - | and cs_bill_addr_sk = ca_address_sk - | and ca_gmt_offset = -5 - | group by i_item_id), - | ws as ( - | select i_item_id,sum(ws_ext_sales_price) total_sales - | from web_sales, date_dim, customer_address, item - | where - | i_item_id in (select i_item_id from item where i_category in ('Music')) - | and ws_item_sk = i_item_sk - | and ws_sold_date_sk = d_date_sk - | and d_year = 1998 - | and d_moy = 9 - | and ws_bill_addr_sk = ca_address_sk - | and ca_gmt_offset = -5 - | group by i_item_id) - | select i_item_id, sum(total_sales) total_sales - | from (select * from ss - | union all - | select * from cs - | union all - | select * from ws) tmp1 - | group by i_item_id - | order by i_item_id, total_sales - | limit 100 - |""".stripMargin), - Query("q61", - """ - |select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 - | from - | (select sum(ss_ext_sales_price) promotions - | from store_sales, store, promotion, date_dim, customer, customer_address, item - | where ss_sold_date_sk = d_date_sk - | and ss_store_sk = s_store_sk - | and ss_promo_sk = p_promo_sk - | and ss_customer_sk= c_customer_sk - | and ca_address_sk = c_current_addr_sk - | and ss_item_sk = i_item_sk - | and ca_gmt_offset = -5 - | and i_category = 'Jewelry' - | and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') - | and s_gmt_offset = -5 - | and d_year = 1998 - | and d_moy = 11) promotional_sales cross join - | (select sum(ss_ext_sales_price) total - | from store_sales, store, date_dim, customer, customer_address, item - | where ss_sold_date_sk = d_date_sk - | and ss_store_sk = s_store_sk - | and ss_customer_sk= c_customer_sk - | and ca_address_sk = c_current_addr_sk - | and ss_item_sk = i_item_sk - | and ca_gmt_offset = -5 - | and i_category = 'Jewelry' - | and s_gmt_offset = -5 - | and d_year = 1998 - | and d_moy = 11) all_sales - | order by promotions, total - | limit 100 - |""".stripMargin), - Query("q62", - """ - | select - | substr(w_warehouse_name,1,20) - | ,sm_type - | ,web_name - | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` - | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and - | (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` - | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and - | (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` - | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and - | (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` - | ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as `>120 days` - | from - | web_sales, warehouse, ship_mode, web_site, date_dim - | where - | d_month_seq between 1200 and 1200 + 11 - | and ws_ship_date_sk = d_date_sk - | and ws_warehouse_sk = w_warehouse_sk - | and ws_ship_mode_sk = sm_ship_mode_sk - | and ws_web_site_sk = web_site_sk - | group by - | substr(w_warehouse_name,1,20), sm_type, web_name - | order by - | substr(w_warehouse_name,1,20), sm_type, web_name - | limit 100 - |""".stripMargin), - Query("q63", - """ - | select * - | from (select i_manager_id - | ,sum(ss_sales_price) sum_sales - | ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales - | from item - | ,store_sales - | ,date_dim - | ,store - | where ss_item_sk = i_item_sk - | and ss_sold_date_sk = d_date_sk - | and ss_store_sk = s_store_sk - | and d_month_seq in (1200,1200+1,1200+2,1200+3,1200+4,1200+5,1200+6,1200+7, - | 1200+8,1200+9,1200+10,1200+11) - | and (( i_category in ('Books','Children','Electronics') - | and i_class in ('personal','portable','reference','self-help') - | and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', - | 'exportiunivamalg #9','scholaramalgamalg #9')) - | or( i_category in ('Women','Music','Men') - | and i_class in ('accessories','classical','fragrances','pants') - | and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', - | 'importoamalg #1'))) - | group by i_manager_id, d_moy) tmp1 - | where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 - | order by i_manager_id - | ,avg_monthly_sales - | ,sum_sales - | limit 100 - |""".stripMargin), - Query("q64", - """ - | with cs_ui as - | (select cs_item_sk - | ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund - | from catalog_sales - | ,catalog_returns - | where cs_item_sk = cr_item_sk - | and cs_order_number = cr_order_number - | group by cs_item_sk - | having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), - | cross_sales as - | (select i_product_name product_name, i_item_sk item_sk, s_store_name store_name, s_zip store_zip, - | ad1.ca_street_number b_street_number, ad1.ca_street_name b_streen_name, ad1.ca_city b_city, - | ad1.ca_zip b_zip, ad2.ca_street_number c_street_number, ad2.ca_street_name c_street_name, - | ad2.ca_city c_city, ad2.ca_zip c_zip, d1.d_year as syear, d2.d_year as fsyear, d3.d_year s2year, - | count(*) cnt, sum(ss_wholesale_cost) s1, sum(ss_list_price) s2, sum(ss_coupon_amt) s3 - | FROM store_sales, store_returns, cs_ui, date_dim d1, date_dim d2, date_dim d3, - | store, customer, customer_demographics cd1, customer_demographics cd2, - | promotion, household_demographics hd1, household_demographics hd2, - | customer_address ad1, customer_address ad2, income_band ib1, income_band ib2, item - | WHERE ss_store_sk = s_store_sk AND - | ss_sold_date_sk = d1.d_date_sk AND - | ss_customer_sk = c_customer_sk AND - | ss_cdemo_sk= cd1.cd_demo_sk AND - | ss_hdemo_sk = hd1.hd_demo_sk AND - | ss_addr_sk = ad1.ca_address_sk and - | ss_item_sk = i_item_sk and - | ss_item_sk = sr_item_sk and - | ss_ticket_number = sr_ticket_number and - | ss_item_sk = cs_ui.cs_item_sk and - | c_current_cdemo_sk = cd2.cd_demo_sk AND - | c_current_hdemo_sk = hd2.hd_demo_sk AND - | c_current_addr_sk = ad2.ca_address_sk and - | c_first_sales_date_sk = d2.d_date_sk and - | c_first_shipto_date_sk = d3.d_date_sk and - | ss_promo_sk = p_promo_sk and - | hd1.hd_income_band_sk = ib1.ib_income_band_sk and - | hd2.hd_income_band_sk = ib2.ib_income_band_sk and - | cd1.cd_marital_status <> cd2.cd_marital_status and - | i_color in ('purple','burlywood','indian','spring','floral','medium') and - | i_current_price between 64 and 64 + 10 and - | i_current_price between 64 + 1 and 64 + 15 - | group by i_product_name, i_item_sk, s_store_name, s_zip, ad1.ca_street_number, - | ad1.ca_street_name, ad1.ca_city, ad1.ca_zip, ad2.ca_street_number, - | ad2.ca_street_name, ad2.ca_city, ad2.ca_zip, d1.d_year, d2.d_year, d3.d_year - | ) - | select cs1.product_name, - | cs1.store_name, - | cs1.store_zip, - | cs1.b_street_number, - | cs1.b_streen_name, - | cs1.b_city, - | cs1.b_zip, - | cs1.c_street_number, - | cs1.c_street_name, - | cs1.c_city, - | cs1.c_zip, - | cs1.syear as cs1_syear, - | cs1.cnt as cs1_cnt, - | cs1.s1 as cs1_s1, - | cs1.s2 as cs1_s2, - | cs1.s3 as cs1_s3, - | cs2.s1 as cs2_s1, - | cs2.s2 as cs2_s2, - | cs2.s3 as cs2_s3, - | cs2.syear as cs2_syear, - | cs2.cnt as cs2_cnt - | from cross_sales cs1,cross_sales cs2 - | where cs1.item_sk=cs2.item_sk and - | cs1.syear = 1999 and - | cs2.syear = 1999 + 1 and - | cs2.cnt <= cs1.cnt and - | cs1.store_name = cs2.store_name and - | cs1.store_zip = cs2.store_zip - | order by cs1.product_name, cs1.store_name, cs2_cnt - |""".stripMargin), - Query("q65", - """ - | select - | s_store_name, i_item_desc, sc.revenue, i_current_price, i_wholesale_cost, i_brand - | from store, item, - | (select ss_store_sk, avg(revenue) as ave - | from - | (select ss_store_sk, ss_item_sk, - | sum(ss_sales_price) as revenue - | from store_sales, date_dim - | where ss_sold_date_sk = d_date_sk and d_month_seq between 1176 and 1176+11 - | group by ss_store_sk, ss_item_sk) sa - | group by ss_store_sk) sb, - | (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue - | from store_sales, date_dim - | where ss_sold_date_sk = d_date_sk and d_month_seq between 1176 and 1176+11 - | group by ss_store_sk, ss_item_sk) sc - | where sb.ss_store_sk = sc.ss_store_sk and - | sc.revenue <= 0.1 * sb.ave and - | s_store_sk = sc.ss_store_sk and - | i_item_sk = sc.ss_item_sk - | order by s_store_name, i_item_desc - | limit 100 - |""".stripMargin), - Query("q66", - """ - |select w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, - | ship_carriers, year - | ,sum(jan_sales) as jan_sales - | ,sum(feb_sales) as feb_sales - | ,sum(mar_sales) as mar_sales - | ,sum(apr_sales) as apr_sales - | ,sum(may_sales) as may_sales - | ,sum(jun_sales) as jun_sales - | ,sum(jul_sales) as jul_sales - | ,sum(aug_sales) as aug_sales - | ,sum(sep_sales) as sep_sales - | ,sum(oct_sales) as oct_sales - | ,sum(nov_sales) as nov_sales - | ,sum(dec_sales) as dec_sales - | ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot - | ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot - | ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot - | ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot - | ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot - | ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot - | ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot - | ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot - | ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot - | ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot - | ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot - | ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot - | ,sum(jan_net) as jan_net - | ,sum(feb_net) as feb_net - | ,sum(mar_net) as mar_net - | ,sum(apr_net) as apr_net - | ,sum(may_net) as may_net - | ,sum(jun_net) as jun_net - | ,sum(jul_net) as jul_net - | ,sum(aug_net) as aug_net - | ,sum(sep_net) as sep_net - | ,sum(oct_net) as oct_net - | ,sum(nov_net) as nov_net - | ,sum(dec_net) as dec_net - | from ( - | (select - | w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country - | ,concat('DHL', ',', 'BARIAN') as ship_carriers - | ,d_year as year - | ,sum(case when d_moy = 1 then ws_ext_sales_price * ws_quantity else 0 end) as jan_sales - | ,sum(case when d_moy = 2 then ws_ext_sales_price * ws_quantity else 0 end) as feb_sales - | ,sum(case when d_moy = 3 then ws_ext_sales_price * ws_quantity else 0 end) as mar_sales - | ,sum(case when d_moy = 4 then ws_ext_sales_price * ws_quantity else 0 end) as apr_sales - | ,sum(case when d_moy = 5 then ws_ext_sales_price * ws_quantity else 0 end) as may_sales - | ,sum(case when d_moy = 6 then ws_ext_sales_price * ws_quantity else 0 end) as jun_sales - | ,sum(case when d_moy = 7 then ws_ext_sales_price * ws_quantity else 0 end) as jul_sales - | ,sum(case when d_moy = 8 then ws_ext_sales_price * ws_quantity else 0 end) as aug_sales - | ,sum(case when d_moy = 9 then ws_ext_sales_price * ws_quantity else 0 end) as sep_sales - | ,sum(case when d_moy = 10 then ws_ext_sales_price * ws_quantity else 0 end) as oct_sales - | ,sum(case when d_moy = 11 then ws_ext_sales_price * ws_quantity else 0 end) as nov_sales - | ,sum(case when d_moy = 12 then ws_ext_sales_price * ws_quantity else 0 end) as dec_sales - | ,sum(case when d_moy = 1 then ws_net_paid * ws_quantity else 0 end) as jan_net - | ,sum(case when d_moy = 2 then ws_net_paid * ws_quantity else 0 end) as feb_net - | ,sum(case when d_moy = 3 then ws_net_paid * ws_quantity else 0 end) as mar_net - | ,sum(case when d_moy = 4 then ws_net_paid * ws_quantity else 0 end) as apr_net - | ,sum(case when d_moy = 5 then ws_net_paid * ws_quantity else 0 end) as may_net - | ,sum(case when d_moy = 6 then ws_net_paid * ws_quantity else 0 end) as jun_net - | ,sum(case when d_moy = 7 then ws_net_paid * ws_quantity else 0 end) as jul_net - | ,sum(case when d_moy = 8 then ws_net_paid * ws_quantity else 0 end) as aug_net - | ,sum(case when d_moy = 9 then ws_net_paid * ws_quantity else 0 end) as sep_net - | ,sum(case when d_moy = 10 then ws_net_paid * ws_quantity else 0 end) as oct_net - | ,sum(case when d_moy = 11 then ws_net_paid * ws_quantity else 0 end) as nov_net - | ,sum(case when d_moy = 12 then ws_net_paid * ws_quantity else 0 end) as dec_net - | from - | web_sales, warehouse, date_dim, time_dim, ship_mode - | where - | ws_warehouse_sk = w_warehouse_sk - | and ws_sold_date_sk = d_date_sk - | and ws_sold_time_sk = t_time_sk - | and ws_ship_mode_sk = sm_ship_mode_sk - | and d_year = 2001 - | and t_time between 30838 and 30838+28800 - | and sm_carrier in ('DHL','BARIAN') - | group by - | w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, d_year) - | union all - | (select w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country - | ,concat('DHL', ',', 'BARIAN') as ship_carriers - | ,d_year as year - | ,sum(case when d_moy = 1 then cs_sales_price * cs_quantity else 0 end) as jan_sales - | ,sum(case when d_moy = 2 then cs_sales_price * cs_quantity else 0 end) as feb_sales - | ,sum(case when d_moy = 3 then cs_sales_price * cs_quantity else 0 end) as mar_sales - | ,sum(case when d_moy = 4 then cs_sales_price * cs_quantity else 0 end) as apr_sales - | ,sum(case when d_moy = 5 then cs_sales_price * cs_quantity else 0 end) as may_sales - | ,sum(case when d_moy = 6 then cs_sales_price * cs_quantity else 0 end) as jun_sales - | ,sum(case when d_moy = 7 then cs_sales_price * cs_quantity else 0 end) as jul_sales - | ,sum(case when d_moy = 8 then cs_sales_price * cs_quantity else 0 end) as aug_sales - | ,sum(case when d_moy = 9 then cs_sales_price * cs_quantity else 0 end) as sep_sales - | ,sum(case when d_moy = 10 then cs_sales_price * cs_quantity else 0 end) as oct_sales - | ,sum(case when d_moy = 11 then cs_sales_price * cs_quantity else 0 end) as nov_sales - | ,sum(case when d_moy = 12 then cs_sales_price * cs_quantity else 0 end) as dec_sales - | ,sum(case when d_moy = 1 then cs_net_paid_inc_tax * cs_quantity else 0 end) as jan_net - | ,sum(case when d_moy = 2 then cs_net_paid_inc_tax * cs_quantity else 0 end) as feb_net - | ,sum(case when d_moy = 3 then cs_net_paid_inc_tax * cs_quantity else 0 end) as mar_net - | ,sum(case when d_moy = 4 then cs_net_paid_inc_tax * cs_quantity else 0 end) as apr_net - | ,sum(case when d_moy = 5 then cs_net_paid_inc_tax * cs_quantity else 0 end) as may_net - | ,sum(case when d_moy = 6 then cs_net_paid_inc_tax * cs_quantity else 0 end) as jun_net - | ,sum(case when d_moy = 7 then cs_net_paid_inc_tax * cs_quantity else 0 end) as jul_net - | ,sum(case when d_moy = 8 then cs_net_paid_inc_tax * cs_quantity else 0 end) as aug_net - | ,sum(case when d_moy = 9 then cs_net_paid_inc_tax * cs_quantity else 0 end) as sep_net - | ,sum(case when d_moy = 10 then cs_net_paid_inc_tax * cs_quantity else 0 end) as oct_net - | ,sum(case when d_moy = 11 then cs_net_paid_inc_tax * cs_quantity else 0 end) as nov_net - | ,sum(case when d_moy = 12 then cs_net_paid_inc_tax * cs_quantity else 0 end) as dec_net - | from - | catalog_sales, warehouse, date_dim, time_dim, ship_mode - | where - | cs_warehouse_sk = w_warehouse_sk - | and cs_sold_date_sk = d_date_sk - | and cs_sold_time_sk = t_time_sk - | and cs_ship_mode_sk = sm_ship_mode_sk - | and d_year = 2001 - | and t_time between 30838 AND 30838+28800 - | and sm_carrier in ('DHL','BARIAN') - | group by - | w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, d_year - | ) - | ) x - | group by - | w_warehouse_name, w_warehouse_sq_ft, w_city, w_county, w_state, w_country, - | ship_carriers, year - | order by w_warehouse_name - | limit 100 - |""".stripMargin), - Query("q67", - """ - | select * from - | (select i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy, s_store_id, - | sumsales, rank() over (partition by i_category order by sumsales desc) rk - | from - | (select i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy, - | s_store_id, sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales - | from store_sales, date_dim, store, item - | where ss_sold_date_sk=d_date_sk - | and ss_item_sk=i_item_sk - | and ss_store_sk = s_store_sk - | and d_month_seq between 1200 and 1200+11 - | group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, - | d_moy,s_store_id))dw1) dw2 - | where rk <= 100 - | order by - | i_category, i_class, i_brand, i_product_name, d_year, - | d_qoy, d_moy, s_store_id, sumsales, rk - | limit 100 - |""".stripMargin), - Query("q68", - """ - | select - | c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number, extended_price, - | extended_tax, list_price - | from (select - | ss_ticket_number, ss_customer_sk, ca_city bought_city, - | sum(ss_ext_sales_price) extended_price, - | sum(ss_ext_list_price) list_price, - | sum(ss_ext_tax) extended_tax - | from store_sales, date_dim, store, household_demographics, customer_address - | where store_sales.ss_sold_date_sk = date_dim.d_date_sk - | and store_sales.ss_store_sk = store.s_store_sk - | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk - | and store_sales.ss_addr_sk = customer_address.ca_address_sk - | and date_dim.d_dom between 1 and 2 - | and (household_demographics.hd_dep_count = 4 or - | household_demographics.hd_vehicle_count = 3) - | and date_dim.d_year in (1999,1999+1,1999+2) - | and store.s_city in ('Midway','Fairview') - | group by ss_ticket_number, ss_customer_sk, ss_addr_sk,ca_city) dn, - | customer, - | customer_address current_addr - | where ss_customer_sk = c_customer_sk - | and customer.c_current_addr_sk = current_addr.ca_address_sk - | and current_addr.ca_city <> bought_city - | order by c_last_name, ss_ticket_number - | limit 100 - |""".stripMargin), - Query("q69", - """ - | select - | cd_gender, cd_marital_status, cd_education_status, count(*) cnt1, - | cd_purchase_estimate, count(*) cnt2, cd_credit_rating, count(*) cnt3 - | from - | customer c,customer_address ca,customer_demographics - | where - | c.c_current_addr_sk = ca.ca_address_sk and - | ca_state in ('KY', 'GA', 'NM') and - | cd_demo_sk = c.c_current_cdemo_sk and - | exists (select * from store_sales, date_dim - | where c.c_customer_sk = ss_customer_sk and - | ss_sold_date_sk = d_date_sk and - | d_year = 2001 and - | d_moy between 4 and 4+2) and - | (not exists (select * from web_sales, date_dim - | where c.c_customer_sk = ws_bill_customer_sk and - | ws_sold_date_sk = d_date_sk and - | d_year = 2001 and - | d_moy between 4 and 4+2) and - | not exists (select * from catalog_sales, date_dim - | where c.c_customer_sk = cs_ship_customer_sk and - | cs_sold_date_sk = d_date_sk and - | d_year = 2001 and - | d_moy between 4 and 4+2)) - | group by cd_gender, cd_marital_status, cd_education_status, - | cd_purchase_estimate, cd_credit_rating - | order by cd_gender, cd_marital_status, cd_education_status, - | cd_purchase_estimate, cd_credit_rating - | limit 100 - |""".stripMargin), - Query("q70", - """ - | select - | sum(ss_net_profit) as total_sum, s_state, s_county - | ,grouping(s_state)+grouping(s_county) as lochierarchy - | ,rank() over ( - | partition by grouping(s_state)+grouping(s_county), - | case when grouping(s_county) = 0 then s_state end - | order by sum(ss_net_profit) desc) as rank_within_parent - | from - | store_sales, date_dim d1, store - | where - | d1.d_month_seq between 1200 and 1200+11 - | and d1.d_date_sk = ss_sold_date_sk - | and s_store_sk = ss_store_sk - | and s_state in - | (select s_state from - | (select s_state as s_state, - | rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking - | from store_sales, store, date_dim - | where d_month_seq between 1200 and 1200+11 - | and d_date_sk = ss_sold_date_sk - | and s_store_sk = ss_store_sk - | group by s_state) tmp1 - | where ranking <= 5) - | group by rollup(s_state,s_county) - | order by - | lochierarchy desc - | ,case when lochierarchy = 0 then s_state end - | ,rank_within_parent - | limit 100 - |""".stripMargin), - Query("q71", - """ - | select i_brand_id brand_id, i_brand brand,t_hour,t_minute, - | sum(ext_price) ext_price - | from item, - | (select - | ws_ext_sales_price as ext_price, - | ws_sold_date_sk as sold_date_sk, - | ws_item_sk as sold_item_sk, - | ws_sold_time_sk as time_sk - | from web_sales, date_dim - | where d_date_sk = ws_sold_date_sk - | and d_moy=11 - | and d_year=1999 - | union all - | select - | cs_ext_sales_price as ext_price, - | cs_sold_date_sk as sold_date_sk, - | cs_item_sk as sold_item_sk, - | cs_sold_time_sk as time_sk - | from catalog_sales, date_dim - | where d_date_sk = cs_sold_date_sk - | and d_moy=11 - | and d_year=1999 - | union all - | select - | ss_ext_sales_price as ext_price, - | ss_sold_date_sk as sold_date_sk, - | ss_item_sk as sold_item_sk, - | ss_sold_time_sk as time_sk - | from store_sales,date_dim - | where d_date_sk = ss_sold_date_sk - | and d_moy=11 - | and d_year=1999 - | ) tmp, time_dim - | where - | sold_item_sk = i_item_sk - | and i_manager_id=1 - | and time_sk = t_time_sk - | and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') - | group by i_brand, i_brand_id,t_hour,t_minute - | order by ext_price desc, i_brand_id - |""".stripMargin), - Query("q72", - """ - | select i_item_desc - | ,w_warehouse_name - | ,d1.d_week_seq - | ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo - | ,sum(case when p_promo_sk is not null then 1 else 0 end) promo - | ,count(*) total_cnt - | from catalog_sales - | join inventory on (cs_item_sk = inv_item_sk) - | join warehouse on (w_warehouse_sk=inv_warehouse_sk) - | join item on (i_item_sk = cs_item_sk) - | join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) - | join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) - | join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) - | join date_dim d2 on (inv_date_sk = d2.d_date_sk) - | join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) - | left outer join promotion on (cs_promo_sk=p_promo_sk) - | left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) - | where d1.d_week_seq = d2.d_week_seq - | and inv_quantity_on_hand < cs_quantity - | and d3.d_date > (cast(d1.d_date AS DATE) + interval '5' day) - | and hd_buy_potential = '>10000' - | and d1.d_year = 1999 - | and cd_marital_status = 'D' - | group by i_item_desc,w_warehouse_name,d1.d_week_seq - | order by total_cnt desc, i_item_desc, w_warehouse_name, d1.d_week_seq - | limit 100 - |""".stripMargin), - Query("q73", - """ - | select - | c_last_name, c_first_name, c_salutation, c_preferred_cust_flag, - | ss_ticket_number, cnt from - | (select ss_ticket_number, ss_customer_sk, count(*) cnt - | from store_sales,date_dim,store,household_demographics - | where store_sales.ss_sold_date_sk = date_dim.d_date_sk - | and store_sales.ss_store_sk = store.s_store_sk - | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk - | and date_dim.d_dom between 1 and 2 - | and (household_demographics.hd_buy_potential = '>10000' or - | household_demographics.hd_buy_potential = 'unknown') - | and household_demographics.hd_vehicle_count > 0 - | and case when household_demographics.hd_vehicle_count > 0 then - | household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 - | and date_dim.d_year in (1999,1999+1,1999+2) - | and store.s_county in ('Williamson County','Franklin Parish','Bronx County','Orange County') - | group by ss_ticket_number,ss_customer_sk) dj,customer - | where ss_customer_sk = c_customer_sk - | and cnt between 1 and 5 - | order by cnt desc, c_last_name asc - |""".stripMargin), - Query("q74", - """ - |with year_total as ( - | select - | c_customer_id customer_id, c_first_name customer_first_name, - | c_last_name customer_last_name, d_year as year, - | sum(ss_net_paid) year_total, 's' sale_type - | from - | customer, store_sales, date_dim - | where c_customer_sk = ss_customer_sk - | and ss_sold_date_sk = d_date_sk - | and d_year in (2001,2001+1) - | group by - | c_customer_id, c_first_name, c_last_name, d_year - | union all - | select - | c_customer_id customer_id, c_first_name customer_first_name, - | c_last_name customer_last_name, d_year as year, - | sum(ws_net_paid) year_total, 'w' sale_type - | from - | customer, web_sales, date_dim - | where c_customer_sk = ws_bill_customer_sk - | and ws_sold_date_sk = d_date_sk - | and d_year in (2001,2001+1) - | group by - | c_customer_id, c_first_name, c_last_name, d_year) - | select - | t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name - | from - | year_total t_s_firstyear, year_total t_s_secyear, - | year_total t_w_firstyear, year_total t_w_secyear - | where t_s_secyear.customer_id = t_s_firstyear.customer_id - | and t_s_firstyear.customer_id = t_w_secyear.customer_id - | and t_s_firstyear.customer_id = t_w_firstyear.customer_id - | and t_s_firstyear.sale_type = 's' - | and t_w_firstyear.sale_type = 'w' - | and t_s_secyear.sale_type = 's' - | and t_w_secyear.sale_type = 'w' - | and t_s_firstyear.year = 2001 - | and t_s_secyear.year = 2001+1 - | and t_w_firstyear.year = 2001 - | and t_w_secyear.year = 2001+1 - | and t_s_firstyear.year_total > 0 - | and t_w_firstyear.year_total > 0 - | and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end - | > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end - | order by 1, 1, 1 - | limit 100 - |""".stripMargin), - Query("q75", - """ - |WITH all_sales AS ( - | SELECT - | d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id, - | SUM(sales_cnt) AS sales_cnt, SUM(sales_amt) AS sales_amt - | FROM ( - | SELECT - | d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id, - | cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt, - | cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt - | FROM catalog_sales - | JOIN item ON i_item_sk=cs_item_sk - | JOIN date_dim ON d_date_sk=cs_sold_date_sk - | LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number - | AND cs_item_sk=cr_item_sk) - | WHERE i_category='Books' - | UNION - | SELECT - | d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id, - | ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt, - | ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt - | FROM store_sales - | JOIN item ON i_item_sk=ss_item_sk - | JOIN date_dim ON d_date_sk=ss_sold_date_sk - | LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number - | AND ss_item_sk=sr_item_sk) - | WHERE i_category='Books' - | UNION - | SELECT - | d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id, - | ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt, - | ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt - | FROM web_sales - | JOIN item ON i_item_sk=ws_item_sk - | JOIN date_dim ON d_date_sk=ws_sold_date_sk - | LEFT JOIN web_returns ON (ws_order_number=wr_order_number - | AND ws_item_sk=wr_item_sk) - | WHERE i_category='Books') sales_detail - | GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) - | SELECT - | prev_yr.d_year AS prev_year, curr_yr.d_year AS year, curr_yr.i_brand_id, - | curr_yr.i_class_id, curr_yr.i_category_id, curr_yr.i_manufact_id, - | prev_yr.sales_cnt AS prev_yr_cnt, curr_yr.sales_cnt AS curr_yr_cnt, - | curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff, - | curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff - | FROM all_sales curr_yr, all_sales prev_yr - | WHERE curr_yr.i_brand_id=prev_yr.i_brand_id - | AND curr_yr.i_class_id=prev_yr.i_class_id - | AND curr_yr.i_category_id=prev_yr.i_category_id - | AND curr_yr.i_manufact_id=prev_yr.i_manufact_id - | AND curr_yr.d_year=2002 - | AND prev_yr.d_year=2002-1 - | AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 - | ORDER BY sales_cnt_diff - | LIMIT 100 - |""".stripMargin), - Query("q76", - """ - | SELECT - | channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, - | SUM(ext_sales_price) sales_amt - | FROM( - | SELECT - | 'store' as channel, ss_store_sk col_name, d_year, d_qoy, i_category, - | ss_ext_sales_price ext_sales_price - | FROM store_sales, item, date_dim - | WHERE ss_store_sk IS NULL - | AND ss_sold_date_sk=d_date_sk - | AND ss_item_sk=i_item_sk - | UNION ALL - | SELECT - | 'web' as channel, ws_ship_customer_sk col_name, d_year, d_qoy, i_category, - | ws_ext_sales_price ext_sales_price - | FROM web_sales, item, date_dim - | WHERE ws_ship_customer_sk IS NULL - | AND ws_sold_date_sk=d_date_sk - | AND ws_item_sk=i_item_sk - | UNION ALL - | SELECT - | 'catalog' as channel, cs_ship_addr_sk col_name, d_year, d_qoy, i_category, - | cs_ext_sales_price ext_sales_price - | FROM catalog_sales, item, date_dim - | WHERE cs_ship_addr_sk IS NULL - | AND cs_sold_date_sk=d_date_sk - | AND cs_item_sk=i_item_sk) foo - | GROUP BY channel, col_name, d_year, d_qoy, i_category - | ORDER BY channel, col_name, d_year, d_qoy, i_category - | limit 100 - |""".stripMargin), - Query("q77", - """ - | with ss as - | (select s_store_sk, sum(ss_ext_sales_price) as sales, sum(ss_net_profit) as profit - | from store_sales, date_dim, store - | where ss_sold_date_sk = d_date_sk - | and d_date between cast('2000-08-23' as date) and - | (cast('2000-08-23' as date) + interval '30' day) - | and ss_store_sk = s_store_sk - | group by s_store_sk), - | sr as - | (select s_store_sk, sum(sr_return_amt) as returns, sum(sr_net_loss) as profit_loss - | from store_returns, date_dim, store - | where sr_returned_date_sk = d_date_sk - | and d_date between cast('2000-08-23' as date) and - | (cast('2000-08-23' as date) + interval '30' day) - | and sr_store_sk = s_store_sk - | group by s_store_sk), - | cs as - | (select cs_call_center_sk, sum(cs_ext_sales_price) as sales, sum(cs_net_profit) as profit - | from catalog_sales, date_dim - | where cs_sold_date_sk = d_date_sk - | and d_date between cast('2000-08-23' as date) and - | (cast('2000-08-23' as date) + interval '30' day) - | group by cs_call_center_sk), - | cr as - | (select cr_call_center_sk, sum(cr_return_amount) as returns, sum(cr_net_loss) as profit_loss - | from catalog_returns, date_dim - | where cr_returned_date_sk = d_date_sk - | and d_date between cast('2000-08-23' as date) and - | (cast('2000-08-23' as date) + interval '30' day) - | group by cr_call_center_sk), - | ws as - | (select wp_web_page_sk, sum(ws_ext_sales_price) as sales, sum(ws_net_profit) as profit - | from web_sales, date_dim, web_page - | where ws_sold_date_sk = d_date_sk - | and d_date between cast('2000-08-23' as date) and - | (cast('2000-08-23' as date) + interval '30' day) - | and ws_web_page_sk = wp_web_page_sk - | group by wp_web_page_sk), - | wr as - | (select wp_web_page_sk, sum(wr_return_amt) as returns, sum(wr_net_loss) as profit_loss - | from web_returns, date_dim, web_page - | where wr_returned_date_sk = d_date_sk - | and d_date between cast('2000-08-23' as date) and - | (cast('2000-08-23' as date) + interval '30' day) - | and wr_web_page_sk = wp_web_page_sk - | group by wp_web_page_sk) - | select channel, id, sum(sales) as sales, sum(returns) as returns, sum(profit) as profit - | from - | (select - | 'store channel' as channel, ss.s_store_sk as id, sales, - | coalesce(returns, 0) as returns, (profit - coalesce(profit_loss,0)) as profit - | from ss left join sr - | on ss.s_store_sk = sr.s_store_sk - | union all - | select - | 'catalog channel' as channel, cs_call_center_sk as id, sales, - | returns, (profit - profit_loss) as profit - | from cs cross join cr - | union all - | select - | 'web channel' as channel, ws.wp_web_page_sk as id, sales, - | coalesce(returns, 0) returns, (profit - coalesce(profit_loss,0)) as profit - | from ws left join wr - | on ws.wp_web_page_sk = wr.wp_web_page_sk - | ) x - | group by rollup(channel, id) - | order by channel, id - | limit 100 - |""".stripMargin), - Query("q78", - """ - | with ws as - | (select d_year AS ws_sold_year, ws_item_sk, - | ws_bill_customer_sk ws_customer_sk, - | sum(ws_quantity) ws_qty, - | sum(ws_wholesale_cost) ws_wc, - | sum(ws_sales_price) ws_sp - | from web_sales - | left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk - | join date_dim on ws_sold_date_sk = d_date_sk - | where wr_order_number is null - | group by d_year, ws_item_sk, ws_bill_customer_sk - | ), - | cs as - | (select d_year AS cs_sold_year, cs_item_sk, - | cs_bill_customer_sk cs_customer_sk, - | sum(cs_quantity) cs_qty, - | sum(cs_wholesale_cost) cs_wc, - | sum(cs_sales_price) cs_sp - | from catalog_sales - | left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk - | join date_dim on cs_sold_date_sk = d_date_sk - | where cr_order_number is null - | group by d_year, cs_item_sk, cs_bill_customer_sk - | ), - | ss as - | (select d_year AS ss_sold_year, ss_item_sk, - | ss_customer_sk, - | sum(ss_quantity) ss_qty, - | sum(ss_wholesale_cost) ss_wc, - | sum(ss_sales_price) ss_sp - | from store_sales - | left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk - | join date_dim on ss_sold_date_sk = d_date_sk - | where sr_ticket_number is null - | group by d_year, ss_item_sk, ss_customer_sk - | ) - | select - | ss_sold_year, ss_item_sk, ss_customer_sk, - | round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio, - | ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, - | coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, - | coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, - | coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price - | from ss - | left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) - | left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) - | where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2000 - | order by - | ss_sold_year, ss_item_sk, ss_customer_sk, - | ss_qty desc, ss_wc desc, ss_sp desc, - | other_chan_qty, - | other_chan_wholesale_cost, - | other_chan_sales_price, - | round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) - | limit 100 - |""".stripMargin), - Query("q79", - """ - |select - | c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit - | from - | (select ss_ticket_number - | ,ss_customer_sk - | ,store.s_city - | ,sum(ss_coupon_amt) amt - | ,sum(ss_net_profit) profit - | from store_sales,date_dim,store,household_demographics - | where store_sales.ss_sold_date_sk = date_dim.d_date_sk - | and store_sales.ss_store_sk = store.s_store_sk - | and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk - | and (household_demographics.hd_dep_count = 6 or - | household_demographics.hd_vehicle_count > 2) - | and date_dim.d_dow = 1 - | and date_dim.d_year in (1999,1999+1,1999+2) - | and store.s_number_employees between 200 and 295 - | group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer - | where ss_customer_sk = c_customer_sk - | order by c_last_name,c_first_name,substr(s_city,1,30), profit - | limit 100 - |""".stripMargin), - Query("q80", - """ - |with ssr as - | (select s_store_id as store_id, - | sum(ss_ext_sales_price) as sales, - | sum(coalesce(sr_return_amt, 0)) as returns, - | sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit - | from store_sales left outer join store_returns on - | (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), - | date_dim, store, item, promotion - | where ss_sold_date_sk = d_date_sk - | and d_date between cast('2000-08-23' as date) - | and (cast('2000-08-23' as date) + interval '30' day) - | and ss_store_sk = s_store_sk - | and ss_item_sk = i_item_sk - | and i_current_price > 50 - | and ss_promo_sk = p_promo_sk - | and p_channel_tv = 'N' - | group by s_store_id), - | csr as - | (select cp_catalog_page_id as catalog_page_id, - | sum(cs_ext_sales_price) as sales, - | sum(coalesce(cr_return_amount, 0)) as returns, - | sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit - | from catalog_sales left outer join catalog_returns on - | (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), - | date_dim, catalog_page, item, promotion - | where cs_sold_date_sk = d_date_sk - | and d_date between cast('2000-08-23' as date) - | and (cast('2000-08-23' as date) + interval '30' day) - | and cs_catalog_page_sk = cp_catalog_page_sk - | and cs_item_sk = i_item_sk - | and i_current_price > 50 - | and cs_promo_sk = p_promo_sk - | and p_channel_tv = 'N' - | group by cp_catalog_page_id), - | wsr as - | (select web_site_id, - | sum(ws_ext_sales_price) as sales, - | sum(coalesce(wr_return_amt, 0)) as returns, - | sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit - | from web_sales left outer join web_returns on - | (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), - | date_dim, web_site, item, promotion - | where ws_sold_date_sk = d_date_sk - | and d_date between cast('2000-08-23' as date) - | and (cast('2000-08-23' as date) + interval '30' day) - | and ws_web_site_sk = web_site_sk - | and ws_item_sk = i_item_sk - | and i_current_price > 50 - | and ws_promo_sk = p_promo_sk - | and p_channel_tv = 'N' - | group by web_site_id) - | select channel, id, sum(sales) as sales, sum(returns) as returns, sum(profit) as profit - | from (select - | 'store channel' as channel, concat('store', store_id) as id, sales, returns, profit - | from ssr - | union all - | select - | 'catalog channel' as channel, concat('catalog_page', catalog_page_id) as id, - | sales, returns, profit - | from csr - | union all - | select - | 'web channel' as channel, concat('web_site', web_site_id) as id, sales, returns, profit - | from wsr) x - | group by rollup (channel, id) - | order by channel, id - | limit 100 - |""".stripMargin), - Query("q81", - """ - | with customer_total_return as - | (select - | cr_returning_customer_sk as ctr_customer_sk, ca_state as ctr_state, - | sum(cr_return_amt_inc_tax) as ctr_total_return - | from catalog_returns, date_dim, customer_address - | where cr_returned_date_sk = d_date_sk - | and d_year = 2000 - | and cr_returning_addr_sk = ca_address_sk - | group by cr_returning_customer_sk, ca_state ) - | select - | c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name, - | ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country, - | ca_gmt_offset,ca_location_type,ctr_total_return - | from customer_total_return ctr1, customer_address, customer - | where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 - | from customer_total_return ctr2 - | where ctr1.ctr_state = ctr2.ctr_state) - | and ca_address_sk = c_current_addr_sk - | and ca_state = 'GA' - | and ctr1.ctr_customer_sk = c_customer_sk - | order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name - | ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset - | ,ca_location_type,ctr_total_return - | limit 100 - |""".stripMargin), - Query("q82", - """ - | select i_item_id, i_item_desc, i_current_price - | from item, inventory, date_dim, store_sales - | where i_current_price between 62 and 62+30 - | and inv_item_sk = i_item_sk - | and d_date_sk=inv_date_sk - | and d_date between cast('2000-05-25' as date) and (cast('2000-05-25' as date) + interval '60' day) - | and i_manufact_id in (129, 270, 821, 423) - | and inv_quantity_on_hand between 100 and 500 - | and ss_item_sk = i_item_sk - | group by i_item_id,i_item_desc,i_current_price - | order by i_item_id - | limit 100 - |""".stripMargin), - Query("q83", - """ - | with sr_items as - | (select i_item_id item_id, sum(sr_return_quantity) sr_item_qty - | from store_returns, item, date_dim - | where sr_item_sk = i_item_sk - | and d_date in (select d_date from date_dim where d_week_seq in - | (select d_week_seq from date_dim where d_date in (cast('2000-06-30' as date),cast('2000-09-27' as date),cast('2000-11-17' as date)))) - | and sr_returned_date_sk = d_date_sk - | group by i_item_id), - | cr_items as - | (select i_item_id item_id, sum(cr_return_quantity) cr_item_qty - | from catalog_returns, item, date_dim - | where cr_item_sk = i_item_sk - | and d_date in (select d_date from date_dim where d_week_seq in - | (select d_week_seq from date_dim where d_date in (cast('2000-06-30' as date),cast('2000-09-27' as date),cast('2000-11-17' as date)))) - | and cr_returned_date_sk = d_date_sk - | group by i_item_id), - | wr_items as - | (select i_item_id item_id, sum(wr_return_quantity) wr_item_qty - | from web_returns, item, date_dim - | where wr_item_sk = i_item_sk and d_date in - | (select d_date from date_dim where d_week_seq in - | (select d_week_seq from date_dim where d_date in (cast('2000-06-30' as date),cast('2000-09-27' as date),cast('2000-11-17' as date)))) - | and wr_returned_date_sk = d_date_sk - | group by i_item_id) - | select sr_items.item_id - | ,sr_item_qty - | ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev - | ,cr_item_qty - | ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev - | ,wr_item_qty - | ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev - | ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average - | from sr_items, cr_items, wr_items - | where sr_items.item_id=cr_items.item_id - | and sr_items.item_id=wr_items.item_id - | order by sr_items.item_id, sr_item_qty - | limit 100 - |""".stripMargin), - Query("q84", - """ - | - | select c_customer_id as customer_id - | ,coalesce(c_last_name,'') + ', ' + coalesce(c_first_name,'') as customername - | from customer - | ,customer_address - | ,customer_demographics - | ,household_demographics - | ,income_band - | ,store_returns - | where ca_city = 'Edgewood' - | and c_current_addr_sk = ca_address_sk - | and ib_lower_bound >= 38128 - | and ib_upper_bound <= 38128 + 50000 - | and ib_income_band_sk = hd_income_band_sk - | and cd_demo_sk = c_current_cdemo_sk - | and hd_demo_sk = c_current_hdemo_sk - | and sr_cdemo_sk = cd_demo_sk - | order by c_customer_id - | limit 100 - |""".stripMargin), - Query("q85", - """ - |select - | substr(r_reason_desc,1,20), avg(ws_quantity), avg(wr_refunded_cash), avg(wr_fee) - | from web_sales, web_returns, web_page, customer_demographics cd1, - | customer_demographics cd2, customer_address, date_dim, reason - | where ws_web_page_sk = wp_web_page_sk - | and ws_item_sk = wr_item_sk - | and ws_order_number = wr_order_number - | and ws_sold_date_sk = d_date_sk and d_year = 2000 - | and cd1.cd_demo_sk = wr_refunded_cdemo_sk - | and cd2.cd_demo_sk = wr_returning_cdemo_sk - | and ca_address_sk = wr_refunded_addr_sk - | and r_reason_sk = wr_reason_sk - | and - | ( - | ( - | cd1.cd_marital_status = 'M' - | and - | cd1.cd_marital_status = cd2.cd_marital_status - | and - | cd1.cd_education_status = 'Advanced Degree' - | and - | cd1.cd_education_status = cd2.cd_education_status - | and - | ws_sales_price between 100.00 and 150.00 - | ) - | or - | ( - | cd1.cd_marital_status = 'S' - | and - | cd1.cd_marital_status = cd2.cd_marital_status - | and - | cd1.cd_education_status = 'College' - | and - | cd1.cd_education_status = cd2.cd_education_status - | and - | ws_sales_price between 50.00 and 100.00 - | ) - | or - | ( - | cd1.cd_marital_status = 'W' - | and - | cd1.cd_marital_status = cd2.cd_marital_status - | and - | cd1.cd_education_status = '2 yr Degree' - | and - | cd1.cd_education_status = cd2.cd_education_status - | and - | ws_sales_price between 150.00 and 200.00 - | ) - | ) - | and - | ( - | ( - | ca_country = 'United States' - | and - | ca_state in ('IN', 'OH', 'NJ') - | and ws_net_profit between 100 and 200 - | ) - | or - | ( - | ca_country = 'United States' - | and - | ca_state in ('WI', 'CT', 'KY') - | and ws_net_profit between 150 and 300 - | ) - | or - | ( - | ca_country = 'United States' - | and - | ca_state in ('LA', 'IA', 'AR') - | and ws_net_profit between 50 and 250 - | ) - | ) - | group by r_reason_desc - | order by substr(r_reason_desc,1,20) - | ,avg(ws_quantity) - | ,avg(wr_refunded_cash) - | ,avg(wr_fee) - | limit 100 - |""".stripMargin), - Query("q86", - """ - | select sum(ws_net_paid) as total_sum, i_category, i_class, - | grouping(i_category)+grouping(i_class) as lochierarchy, - | rank() over ( - | partition by grouping(i_category)+grouping(i_class), - | case when grouping(i_class) = 0 then i_category end - | order by sum(ws_net_paid) desc) as rank_within_parent - | from - | web_sales, date_dim d1, item - | where - | d1.d_month_seq between 1200 and 1200+11 - | and d1.d_date_sk = ws_sold_date_sk - | and i_item_sk = ws_item_sk - | group by rollup(i_category,i_class) - | order by - | lochierarchy desc, - | case when lochierarchy = 0 then i_category end, - | rank_within_parent - | limit 100 - |""".stripMargin), - Query("q87", - """ - | select count(*) - | from ((select distinct c_last_name, c_first_name, d_date - | from store_sales, date_dim, customer - | where store_sales.ss_sold_date_sk = date_dim.d_date_sk - | and store_sales.ss_customer_sk = customer.c_customer_sk - | and d_month_seq between 1200 and 1200+11) - | except - | (select distinct c_last_name, c_first_name, d_date - | from catalog_sales, date_dim, customer - | where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk - | and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk - | and d_month_seq between 1200 and 1200+11) - | except - | (select distinct c_last_name, c_first_name, d_date - | from web_sales, date_dim, customer - | where web_sales.ws_sold_date_sk = date_dim.d_date_sk - | and web_sales.ws_bill_customer_sk = customer.c_customer_sk - | and d_month_seq between 1200 and 1200+11) - |) cool_cust - |""".stripMargin), - Query("q88", - """ - | select * - | from - | (select count(*) h8_30_to_9 - | from store_sales, household_demographics , time_dim, store - | where ss_sold_time_sk = time_dim.t_time_sk - | and ss_hdemo_sk = household_demographics.hd_demo_sk - | and ss_store_sk = s_store_sk - | and time_dim.t_hour = 8 - | and time_dim.t_minute >= 30 - | and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or - | (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or - | (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) - | and store.s_store_name = 'ese') s1 cross join - | (select count(*) h9_to_9_30 - | from store_sales, household_demographics , time_dim, store - | where ss_sold_time_sk = time_dim.t_time_sk - | and ss_hdemo_sk = household_demographics.hd_demo_sk - | and ss_store_sk = s_store_sk - | and time_dim.t_hour = 9 - | and time_dim.t_minute < 30 - | and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or - | (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or - | (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) - | and store.s_store_name = 'ese') s2 cross join - | (select count(*) h9_30_to_10 - | from store_sales, household_demographics , time_dim, store - | where ss_sold_time_sk = time_dim.t_time_sk - | and ss_hdemo_sk = household_demographics.hd_demo_sk - | and ss_store_sk = s_store_sk - | and time_dim.t_hour = 9 - | and time_dim.t_minute >= 30 - | and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or - | (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or - | (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) - | and store.s_store_name = 'ese') s3 cross join - | (select count(*) h10_to_10_30 - | from store_sales, household_demographics , time_dim, store - | where ss_sold_time_sk = time_dim.t_time_sk - | and ss_hdemo_sk = household_demographics.hd_demo_sk - | and ss_store_sk = s_store_sk - | and time_dim.t_hour = 10 - | and time_dim.t_minute < 30 - | and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or - | (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or - | (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) - | and store.s_store_name = 'ese') s4 cross join - | (select count(*) h10_30_to_11 - | from store_sales, household_demographics , time_dim, store - | where ss_sold_time_sk = time_dim.t_time_sk - | and ss_hdemo_sk = household_demographics.hd_demo_sk - | and ss_store_sk = s_store_sk - | and time_dim.t_hour = 10 - | and time_dim.t_minute >= 30 - | and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or - | (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or - | (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) - | and store.s_store_name = 'ese') s5 cross join - | (select count(*) h11_to_11_30 - | from store_sales, household_demographics , time_dim, store - | where ss_sold_time_sk = time_dim.t_time_sk - | and ss_hdemo_sk = household_demographics.hd_demo_sk - | and ss_store_sk = s_store_sk - | and time_dim.t_hour = 11 - | and time_dim.t_minute < 30 - | and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or - | (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or - | (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) - | and store.s_store_name = 'ese') s6 cross join - | (select count(*) h11_30_to_12 - | from store_sales, household_demographics , time_dim, store - | where ss_sold_time_sk = time_dim.t_time_sk - | and ss_hdemo_sk = household_demographics.hd_demo_sk - | and ss_store_sk = s_store_sk - | and time_dim.t_hour = 11 - | and time_dim.t_minute >= 30 - | and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or - | (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or - | (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) - | and store.s_store_name = 'ese') s7 cross join - | (select count(*) h12_to_12_30 - | from store_sales, household_demographics , time_dim, store - | where ss_sold_time_sk = time_dim.t_time_sk - | and ss_hdemo_sk = household_demographics.hd_demo_sk - | and ss_store_sk = s_store_sk - | and time_dim.t_hour = 12 - | and time_dim.t_minute < 30 - | and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or - | (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or - | (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) - | and store.s_store_name = 'ese') s8 - |""".stripMargin), - Query("q89", - """ - | - | select * - | from( - | select i_category, i_class, i_brand, - | s_store_name, s_company_name, - | d_moy, - | sum(ss_sales_price) sum_sales, - | avg(sum(ss_sales_price)) over - | (partition by i_category, i_brand, s_store_name, s_company_name) - | avg_monthly_sales - | from item, store_sales, date_dim, store - | where ss_item_sk = i_item_sk and - | ss_sold_date_sk = d_date_sk and - | ss_store_sk = s_store_sk and - | d_year in (1999) and - | ((i_category in ('Books','Electronics','Sports') and - | i_class in ('computers','stereo','football')) - | or (i_category in ('Men','Jewelry','Women') and - | i_class in ('shirts','birdal','dresses'))) - | group by i_category, i_class, i_brand, - | s_store_name, s_company_name, d_moy) tmp1 - | where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 - | order by sum_sales - avg_monthly_sales, s_store_name - | limit 100 - |""".stripMargin), - Query("q90", - """ - | select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio - | from ( select count(*) amc - | from web_sales, household_demographics , time_dim, web_page - | where ws_sold_time_sk = time_dim.t_time_sk - | and ws_ship_hdemo_sk = household_demographics.hd_demo_sk - | and ws_web_page_sk = web_page.wp_web_page_sk - | and time_dim.t_hour between 8 and 8+1 - | and household_demographics.hd_dep_count = 6 - | and web_page.wp_char_count between 5000 and 5200) at cross join - | ( select count(*) pmc - | from web_sales, household_demographics , time_dim, web_page - | where ws_sold_time_sk = time_dim.t_time_sk - | and ws_ship_hdemo_sk = household_demographics.hd_demo_sk - | and ws_web_page_sk = web_page.wp_web_page_sk - | and time_dim.t_hour between 19 and 19+1 - | and household_demographics.hd_dep_count = 6 - | and web_page.wp_char_count between 5000 and 5200) pt - | order by am_pm_ratio - | limit 100 - |""".stripMargin), - Query("q91", - """ - | - | select - | cc_call_center_id Call_Center, cc_name Call_Center_Name, cc_manager Manager, - | sum(cr_net_loss) Returns_Loss - | from - | call_center, catalog_returns, date_dim, customer, customer_address, - | customer_demographics, household_demographics - | where - | cr_call_center_sk = cc_call_center_sk - | and cr_returned_date_sk = d_date_sk - | and cr_returning_customer_sk = c_customer_sk - | and cd_demo_sk = c_current_cdemo_sk - | and hd_demo_sk = c_current_hdemo_sk - | and ca_address_sk = c_current_addr_sk - | and d_year = 1998 - | and d_moy = 11 - | and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') - | or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) - | and hd_buy_potential like 'Unknown%' - | and ca_gmt_offset = -7 - | group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status - | order by sum(cr_net_loss) desc - |""".stripMargin), - Query("q92", - """ - | select sum(ws_ext_discount_amt) as `Excess Discount Amount` - | from web_sales, item, date_dim - | where i_manufact_id = 350 - | and i_item_sk = ws_item_sk - | and d_date between cast ('2000-01-27' as date) and (cast('2000-01-27' as date) + interval '90' day) - | and d_date_sk = ws_sold_date_sk - | and ws_ext_discount_amt > - | ( - | SELECT 1.3 * avg(ws_ext_discount_amt) - | FROM web_sales, date_dim - | WHERE ws_item_sk = i_item_sk - | and d_date between cast ('2000-01-27' as date) and (cast('2000-01-27' as date) + interval '90' day) - | and d_date_sk = ws_sold_date_sk - | ) - | order by sum(ws_ext_discount_amt) - | limit 100 - |""".stripMargin), - Query("q93", - """ - | select ss_customer_sk, sum(act_sales) sumsales - | from (select - | ss_item_sk, ss_ticket_number, ss_customer_sk, - | case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price - | else (ss_quantity*ss_sales_price) end act_sales - | from store_sales - | left outer join store_returns - | on (sr_item_sk = ss_item_sk and sr_ticket_number = ss_ticket_number), - | reason - | where sr_reason_sk = r_reason_sk and r_reason_desc = 'reason 28') t - | group by ss_customer_sk - | order by sumsales, ss_customer_sk - | limit 100 - |""".stripMargin), - Query("q94", - """ - | select - | count(distinct ws_order_number) as `order count` - | ,sum(ws_ext_ship_cost) as `total shipping cost` - | ,sum(ws_net_profit) as `total net profit` - | from - | web_sales ws1, date_dim, customer_address, web_site - | where - | d_date between cast('1999-02-01' as date) and - | (cast('1999-02-01' as date) + interval '60' day) - | and ws1.ws_ship_date_sk = d_date_sk - | and ws1.ws_ship_addr_sk = ca_address_sk - | and ca_state = 'IL' - | and ws1.ws_web_site_sk = web_site_sk - | and web_company_name = 'pri' - | and exists (select * - | from web_sales ws2 - | where ws1.ws_order_number = ws2.ws_order_number - | and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) - | and not exists(select * - | from web_returns wr1 - | where ws1.ws_order_number = wr1.wr_order_number) - | order by count(distinct ws_order_number) - | limit 100 - |""".stripMargin), - Query("q95", - """ - | with ws_wh as - | (select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 - | from web_sales ws1,web_sales ws2 - | where ws1.ws_order_number = ws2.ws_order_number - | and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) - | select - | count(distinct ws_order_number) as `order count` - | ,sum(ws_ext_ship_cost) as `total shipping cost` - | ,sum(ws_net_profit) as `total net profit` - | from - | web_sales ws1, date_dim, customer_address, web_site - | where - | d_date between cast ('1999-02-01' as date) and - | (cast('1999-02-01' as date) + interval '60' day) - | and ws1.ws_ship_date_sk = d_date_sk - | and ws1.ws_ship_addr_sk = ca_address_sk - | and ca_state = 'IL' - | and ws1.ws_web_site_sk = web_site_sk - | and web_company_name = 'pri' - | and ws1.ws_order_number in (select ws_order_number - | from ws_wh) - | and ws1.ws_order_number in (select wr_order_number - | from web_returns,ws_wh - | where wr_order_number = ws_wh.ws_order_number) - | order by count(distinct ws_order_number) - | limit 100 - |""".stripMargin), - Query("q96", - """ - | select count(*) - | from store_sales, household_demographics, time_dim, store - | where ss_sold_time_sk = time_dim.t_time_sk - | and ss_hdemo_sk = household_demographics.hd_demo_sk - | and ss_store_sk = s_store_sk - | and time_dim.t_hour = 20 - | and time_dim.t_minute >= 30 - | and household_demographics.hd_dep_count = 7 - | and store.s_store_name = 'ese' - | order by count(*) - | limit 100 - |""".stripMargin), - Query("q97", - """ - | with ssci as ( - | select ss_customer_sk customer_sk, ss_item_sk item_sk - | from store_sales,date_dim - | where ss_sold_date_sk = d_date_sk - | and d_month_seq between 1200 and 1200 + 11 - | group by ss_customer_sk, ss_item_sk), - | csci as( - | select cs_bill_customer_sk customer_sk, cs_item_sk item_sk - | from catalog_sales,date_dim - | where cs_sold_date_sk = d_date_sk - | and d_month_seq between 1200 and 1200 + 11 - | group by cs_bill_customer_sk, cs_item_sk) - | select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only - | ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only - | ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog - | from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk - | and ssci.item_sk = csci.item_sk) - | limit 100 - |""".stripMargin), - Query("q98", - """ - |select i_item_desc, i_category, i_class, i_current_price - | ,sum(ss_ext_sales_price) as itemrevenue - | ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over - | (partition by i_class) as revenueratio - |from - | store_sales, item, date_dim - |where - | ss_item_sk = i_item_sk - | and i_category in ('Sports', 'Books', 'Home') - | and ss_sold_date_sk = d_date_sk - | and d_date between cast('1999-02-22' as date) - | and (cast('1999-02-22' as date) + interval '30' day) - |group by - | i_item_id, i_item_desc, i_category, i_class, i_current_price - |order by - | i_category, i_class, i_item_id, i_item_desc, revenueratio - |""".stripMargin), - Query("q99", - """ - | select - | substr(w_warehouse_name,1,20), sm_type, cc_name - | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days` - | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and - | (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days` - | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and - | (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days` - | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and - | (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days` - | ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days` - | from - | catalog_sales, warehouse, ship_mode, call_center, date_dim - | where - | d_month_seq between 1200 and 1200 + 11 - | and cs_ship_date_sk = d_date_sk - | and cs_warehouse_sk = w_warehouse_sk - | and cs_ship_mode_sk = sm_ship_mode_sk - | and cs_call_center_sk = cc_call_center_sk - | group by - | substr(w_warehouse_name,1,20), sm_type, cc_name - | order by substr(w_warehouse_name,1,20), sm_type, cc_name - | limit 100 - |""".stripMargin), - Query("ss_max", - """ - |select - | count(*) as total, - | count(ss_sold_date_sk) as not_null_total, - | count(distinct ss_sold_date_sk) as unique_days, - | max(ss_sold_date_sk) as max_ss_sold_date_sk, - | max(ss_sold_time_sk) as max_ss_sold_time_sk, - | max(ss_item_sk) as max_ss_item_sk, - | max(ss_customer_sk) as max_ss_customer_sk, - | max(ss_cdemo_sk) as max_ss_cdemo_sk, - | max(ss_hdemo_sk) as max_ss_hdemo_sk, - | max(ss_addr_sk) as max_ss_addr_sk, - | max(ss_store_sk) as max_ss_store_sk, - | max(ss_promo_sk) as max_ss_promo_sk - |from store_sales - |""".stripMargin), - Query("ss_maxb", - """ - |select - | count(*) as total, - | count(ss_sold_date_sk) as not_null_total, - | --count(distinct ss_sold_date_sk) as unique_days, - | max(ss_sold_date_sk) as max_ss_sold_date_sk, - | max(ss_sold_time_sk) as max_ss_sold_time_sk, - | max(ss_item_sk) as max_ss_item_sk, - | max(ss_customer_sk) as max_ss_customer_sk, - | max(ss_cdemo_sk) as max_ss_cdemo_sk, - | max(ss_hdemo_sk) as max_ss_hdemo_sk, - | max(ss_addr_sk) as max_ss_addr_sk, - | max(ss_store_sk) as max_ss_store_sk, - | max(ss_promo_sk) as max_ss_promo_sk - |from store_sales - |""".stripMargin) - ).map(q => (q.name, q)).toMap - - def query(name: String) = queries(name) - - def run(spark: SparkSession, name: String): DataFrame = - queries(name)(spark) -} - -// scalastyle:on line.size.limit - -object ConvertFiles { - /** - * Main method allows us to submit using spark-submit to perform conversions from CSV to - * Parquet or Orc. - */ - def main(arg: Array[String]): Unit = { - val conf = new FileConversionConf(arg) - val spark = SparkSession.builder.appName("TPC-DS Like File Conversion").getOrCreate() - conf.outputFormat() match { - case "parquet" => - csvToParquet( - spark = spark, - baseInput = conf.input(), - baseOutput = conf.output(), - coalesce = conf.coalesce, - repartition = conf.repartition, - writePartitioning = conf.withPartitioning(), - useDecimalType = conf.useDecimals()) - case "orc" => - csvToOrc( - spark = spark, - baseInput = conf.input(), - baseOutput = conf.output(), - coalesce = conf.coalesce, - repartition = conf.repartition, - writePartitioning = conf.withPartitioning(), - useDecimalType = conf.useDecimals()) - } - } -} - -class FileConversionConf(arguments: Seq[String]) extends ScallopConf(arguments) { - val input = opt[String](required = true) - val output = opt[String](required = true) - val outputFormat = opt[String](required = true) - val coalesce = propsLong[Int]("coalesce") - val repartition = propsLong[Int]("repartition") - val withPartitioning = opt[Boolean](default = Some(false)) - val useDecimals = opt[Boolean](default = Some(false)) - verify() - BenchUtils.validateCoalesceRepartition(coalesce, repartition) -} - diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeBench.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeBench.scala deleted file mode 100644 index 1795bdf6d77..00000000000 --- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeBench.scala +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.nvidia.spark.rapids.tests.tpch - -import com.nvidia.spark.rapids.tests.common.BenchmarkSuite - -import org.apache.spark.sql.{DataFrame, SparkSession} - -class TpchLikeBench extends BenchmarkSuite { - override def name(): String = "TPC-H" - - override def shortName(): String = "tpch" - - override def setupAllParquet(spark: SparkSession, path: String): Unit = { - TpchLikeSpark.setupAllParquet(spark, path) - } - - override def setupAllCSV(spark: SparkSession, path: String): Unit = { - TpchLikeSpark.setupAllCSV(spark, path) - } - - override def setupAllOrc(spark: SparkSession, path: String): Unit = { - TpchLikeSpark.setupAllOrc(spark, path) - } - - override def createDataFrame(spark: SparkSession, query: String): DataFrame = { - getQuery(query)(spark) - } - - private def getQuery(query: String)(spark: SparkSession) = { - query match { - case "q1" => Q1Like(spark) - case "q2" => Q2Like(spark) - case "q3" => Q3Like(spark) - case "q4" => Q4Like(spark) - case "q5" => Q5Like(spark) - case "q6" => Q6Like(spark) - case "q7" => Q7Like(spark) - case "q8" => Q8Like(spark) - case "q9" => Q9Like(spark) - case "q10" => Q10Like(spark) - case "q11" => Q11Like(spark) - case "q12" => Q12Like(spark) - case "q13" => Q13Like(spark) - case "q14" => Q14Like(spark) - case "q15" => Q15Like(spark) - case "q16" => Q16Like(spark) - case "q17" => Q17Like(spark) - case "q18" => Q18Like(spark) - case "q19" => Q19Like(spark) - case "q20" => Q20Like(spark) - case "q21" => Q21Like(spark) - case "q22" => Q22Like(spark) - } - } -} diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeSpark.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeSpark.scala deleted file mode 100644 index 776f0ae14c6..00000000000 --- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeSpark.scala +++ /dev/null @@ -1,1190 +0,0 @@ -/* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.nvidia.spark.rapids.tests.tpch - -import com.nvidia.spark.rapids.tests.common.BenchUtils -import com.nvidia.spark.rapids.tests.tpch.TpchLikeSpark.{csvToOrc, csvToParquet} -import org.rogach.scallop.ScallopConf - -import org.apache.spark.sql.{DataFrame, DataFrameWriter, Row, SparkSession} -import org.apache.spark.sql.types._ - -// scalastyle:off line.size.limit - -object TpchLikeSpark { - private def setupWrite( - df: DataFrame, - name: String, - coalesce: Map[String, Int], - repartition: Map[String, Int]): DataFrameWriter[Row] = { - val repart = BenchUtils.applyCoalesceRepartition(name, df, coalesce, repartition) - repart.write.mode("overwrite") - } - - def csvToParquet( - spark: SparkSession, - basePath: String, - baseOutput: String, - coalesce: Map[String, Int] = Map.empty, - repartition: Map[String, Int] = Map.empty): Unit = { - setupWrite(readOrdersCSV(spark, basePath + "/orders.tbl"), "orders", coalesce, repartition).parquet(baseOutput + "/orders.tbl") - setupWrite(readLineitemCSV(spark, basePath + "/lineitem.tbl"), "lineitem", coalesce, repartition).parquet(baseOutput + "/lineitem.tbl") - setupWrite(readCustomerCSV(spark, basePath + "/customer.tbl"), "customers", coalesce, repartition).parquet(baseOutput + "/customer.tbl") - setupWrite(readNationCSV(spark, basePath + "/nation.tbl"), "nation", coalesce, repartition).parquet(baseOutput + "/nation.tbl") - setupWrite(readPartCSV(spark, basePath + "/part.tbl"), "part", coalesce, repartition).parquet(baseOutput + "/part.tbl") - setupWrite(readPartsuppCSV(spark, basePath + "/partsupp.tbl"), "partsupp", coalesce, repartition).parquet(baseOutput + "/partsupp.tbl") - setupWrite(readRegionCSV(spark, basePath + "/region.tbl"), "region", coalesce, repartition).parquet(baseOutput + "/region.tbl") - setupWrite(readSupplierCSV(spark, basePath + "/supplier.tbl"), "supplier", coalesce, repartition).parquet(baseOutput + "/supplier.tbl") - } - - def csvToOrc( - spark: SparkSession, - basePath: String, - baseOutput: String, - coalesce: Map[String, Int] = Map.empty, - repartition: Map[String, Int] = Map.empty): Unit = { - setupWrite(readOrdersCSV(spark, basePath + "/orders.tbl"), "orders", coalesce, repartition).orc(baseOutput + "/orders.tbl") - setupWrite(readLineitemCSV(spark, basePath + "/lineitem.tbl"), "lineitem", coalesce, repartition).orc(baseOutput + "/lineitem.tbl") - setupWrite(readCustomerCSV(spark, basePath + "/customer.tbl"), "customers", coalesce, repartition).orc(baseOutput + "/customer.tbl") - setupWrite(readNationCSV(spark, basePath + "/nation.tbl"), "nation", coalesce, repartition).orc(baseOutput + "/nation.tbl") - setupWrite(readPartCSV(spark, basePath + "/part.tbl"), "part", coalesce, repartition).orc(baseOutput + "/part.tbl") - setupWrite(readPartsuppCSV(spark, basePath + "/partsupp.tbl"), "partsupp", coalesce, repartition).orc(baseOutput + "/partsupp.tbl") - setupWrite(readRegionCSV(spark, basePath + "/region.tbl"), "region", coalesce, repartition).orc(baseOutput + "/region.tbl") - setupWrite(readSupplierCSV(spark, basePath + "/supplier.tbl"), "supplier", coalesce, repartition).orc(baseOutput + "/supplier.tbl") - } - - def setupAllCSV(spark: SparkSession, basePath: String): Unit = { - setupOrdersCSV(spark, basePath + "/orders.tbl") - setupLineitemCSV(spark, basePath + "/lineitem.tbl") - setupCustomerCSV(spark, basePath + "/customer.tbl") - setupNationCSV(spark, basePath + "/nation.tbl") - setupPartCSV(spark, basePath + "/part.tbl") - setupPartsuppCSV(spark, basePath + "/partsupp.tbl") - setupRegionCSV(spark, basePath + "/region.tbl") - setupSupplierCSV(spark, basePath + "/supplier.tbl") - } - - def setupAllParquet(spark: SparkSession, basePath: String): Unit = { - setupOrdersParquet(spark, basePath + "/orders.tbl") - setupLineitemParquet(spark, basePath + "/lineitem.tbl") - setupCustomerParquet(spark, basePath + "/customer.tbl") - setupNationParquet(spark, basePath + "/nation.tbl") - setupPartParquet(spark, basePath + "/part.tbl") - setupPartsuppParquet(spark, basePath + "/partsupp.tbl") - setupRegionParquet(spark, basePath + "/region.tbl") - setupSupplierParquet(spark, basePath + "/supplier.tbl") - } - - def setupAllOrc(spark: SparkSession, basePath: String): Unit = { - setupOrdersOrc(spark, basePath + "/orders.tbl") - setupLineitemOrc(spark, basePath + "/lineitem.tbl") - setupCustomerOrc(spark, basePath + "/customer.tbl") - setupNationOrc(spark, basePath + "/nation.tbl") - setupPartOrc(spark, basePath + "/part.tbl") - setupPartsuppOrc(spark, basePath + "/partsupp.tbl") - setupRegionOrc(spark, basePath + "/region.tbl") - setupSupplierOrc(spark, basePath + "/supplier.tbl") - } - - // ORDERS - val ordersSchema = StructType(Array( - StructField("o_orderkey", LongType), - StructField("o_custkey", LongType), - StructField("o_orderstatus", StringType), - StructField("o_totalprice", DoubleType), - StructField("o_orderdate", DateType), - StructField("o_orderpriority", StringType), - StructField("o_clerk", StringType), - StructField("o_shippriority", LongType), - StructField("o_comment", StringType) - )) - - def readOrdersCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(ordersSchema).csv(path) - - def setupOrdersCSV(spark: SparkSession, path: String): Unit = - readOrdersCSV(spark, path).createOrReplaceTempView("orders") - - def setupOrdersParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("orders") - - def setupOrdersOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("orders") - - // LINEITEM - val lineitemSchema = StructType(Array( - StructField("l_orderkey", LongType), - StructField("l_partkey", LongType), - StructField("l_suppkey", LongType), - StructField("l_linenumber", LongType), - StructField("l_quantity", DoubleType), - StructField("l_extendedprice", DoubleType), - StructField("l_discount", DoubleType), - StructField("l_tax", DoubleType), - StructField("l_returnflag", StringType), - StructField("l_linestatus", StringType), - StructField("l_shipdate", DateType), - StructField("l_commitdate", DateType), - StructField("l_receiptdate", DateType), - StructField("l_shipinstruct", StringType), - StructField("l_shipmode", StringType), - StructField("l_comment", StringType) - )) - - def readLineitemCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(lineitemSchema).csv(path) - - def setupLineitemCSV(spark: SparkSession, path: String): Unit = - readLineitemCSV(spark, path).createOrReplaceTempView("lineitem") - - def setupLineitemParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("lineitem") - - def setupLineitemOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("lineitem") - - // CUSTOMER - val customerSchema = StructType(Array( - StructField("c_custkey", LongType), - StructField("c_name", StringType), - StructField("c_address", StringType), - StructField("c_nationkey", LongType), - StructField("c_phone", StringType), - StructField("c_acctbal", DoubleType), - StructField("c_mktsegment", StringType), - StructField("c_comment", StringType) - )) - - def readCustomerCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(customerSchema).csv(path) - - def setupCustomerCSV(spark: SparkSession, path: String): Unit = - readCustomerCSV(spark, path).createOrReplaceTempView("customer") - - def setupCustomerParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("customer") - - def setupCustomerOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("customer") - - // NATION - val nationSchema = StructType(Array( - StructField("n_nationkey", LongType), - StructField("n_name", StringType), - StructField("n_regionkey", LongType), - StructField("n_comment", StringType) - )) - - def readNationCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(nationSchema).csv(path) - - def setupNationCSV(spark: SparkSession, path: String): Unit = - readNationCSV(spark, path).createOrReplaceTempView("nation") - - def setupNationParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("nation") - - def setupNationOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("nation") - - // PART - val partSchema = StructType(Array( - StructField("p_partkey", LongType), - StructField("p_name", StringType), - StructField("p_mfgr", StringType), - StructField("p_brand", StringType), - StructField("p_type", StringType), - StructField("p_size", LongType), - StructField("p_container", StringType), - StructField("p_retailprice", DoubleType), - StructField("p_comment", StringType) - )) - - def readPartCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(partSchema).csv(path) - - def setupPartCSV(spark: SparkSession, path: String): Unit = - readPartCSV(spark, path).createOrReplaceTempView("part") - - def setupPartParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("part") - - def setupPartOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("part") - - // PARTSUPP - val partsuppSchema = StructType(Array( - StructField("ps_partkey", LongType), - StructField("ps_suppkey", LongType), - StructField("ps_availqty", LongType), - StructField("ps_supplycost", DoubleType), - StructField("ps_comment", StringType) - )) - - def readPartsuppCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(partsuppSchema).csv(path) - - def setupPartsuppCSV(spark: SparkSession, path: String): Unit = - readPartsuppCSV(spark, path).createOrReplaceTempView("partsupp") - - def setupPartsuppParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("partsupp") - - def setupPartsuppOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("partsupp") - - // REGION - val regionSchema = StructType(Array( - StructField("r_regionkey", LongType), - StructField("r_name", StringType), - StructField("r_comment", StringType) - )) - - def readRegionCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(regionSchema).csv(path) - - def setupRegionCSV(spark: SparkSession, path: String): Unit = - readRegionCSV(spark, path).createOrReplaceTempView("region") - - def setupRegionParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("region") - - def setupRegionOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("region") - - // SUPPLIER - val supplierSchema = StructType(Array( - StructField("s_suppkey", LongType), - StructField("s_name", StringType), - StructField("s_address", StringType), - StructField("s_nationkey", LongType), - StructField("s_phone", StringType), - StructField("s_acctbal", DoubleType), - StructField("s_comment", StringType) - )) - - def readSupplierCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(supplierSchema).csv(path) - - def setupSupplierCSV(spark: SparkSession, path: String): Unit = - readSupplierCSV(spark, path).createOrReplaceTempView("supplier") - - def setupSupplierParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("supplier") - - def setupSupplierOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("supplier") -} - -object Q1Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | l_returnflag, - | l_linestatus, - | sum(l_quantity) as sum_qty, - | sum(l_extendedprice) as sum_base_price, - | sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, - | sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, - | avg(l_quantity) as avg_qty, - | avg(l_extendedprice) as avg_price, - | avg(l_discount) as avg_disc, - | count(*) as count_order - |from - | lineitem - |where - | l_shipdate <= date '1998-12-01' - interval '90' day - |group by - | l_returnflag, - | l_linestatus - |order by - | l_returnflag, - | l_linestatus - | - |""".stripMargin) -} - -object Q2Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | s_acctbal, - | s_name, - | n_name, - | p_partkey, - | p_mfgr, - | s_address, - | s_phone, - | s_comment - |from - | part, - | supplier, - | partsupp, - | nation, - | region - |where - | p_partkey = ps_partkey - | and s_suppkey = ps_suppkey - | and p_size = 15 - | and p_type like '%BRASS' - | and s_nationkey = n_nationkey - | and n_regionkey = r_regionkey - | and r_name = 'EUROPE' - | and ps_supplycost = ( - | select - | min(ps_supplycost) - | from - | partsupp, - | supplier, - | nation, - | region - | where - | p_partkey = ps_partkey - | and s_suppkey = ps_suppkey - | and s_nationkey = n_nationkey - | and n_regionkey = r_regionkey - | and r_name = 'EUROPE' - | ) - |order by - | s_acctbal desc, - | n_name, - | s_name, - | p_partkey - |limit 100 - | - |""".stripMargin) -} - -object Q3Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | l_orderkey, - | sum(l_extendedprice * (1 - l_discount)) as revenue, - | o_orderdate, - | o_shippriority - |from - | customer, - | orders, - | lineitem - |where - | c_mktsegment = 'BUILDING' - | and c_custkey = o_custkey - | and l_orderkey = o_orderkey - | and o_orderdate < date '1995-03-15' - | and l_shipdate > date '1995-03-15' - |group by - | l_orderkey, - | o_orderdate, - | o_shippriority - |order by - | revenue desc, - | o_orderdate - |limit 10 - | - |""".stripMargin) -} - -object Q4Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | o_orderpriority, - | count(*) as order_count - |from - | orders - |where - | o_orderdate >= date '1993-07-01' - | and o_orderdate < date '1993-07-01' + interval '3' month - | and exists ( - | select - | * - | from - | lineitem - | where - | l_orderkey = o_orderkey - | and l_commitdate < l_receiptdate - | ) - |group by - | o_orderpriority - |order by - | o_orderpriority - | - |""".stripMargin) -} - -object Q5Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | n_name, - | sum(l_extendedprice * (1 - l_discount)) as revenue - |from - | customer, - | orders, - | lineitem, - | supplier, - | nation, - | region - |where - | c_custkey = o_custkey - | and l_orderkey = o_orderkey - | and l_suppkey = s_suppkey - | and c_nationkey = s_nationkey - | and s_nationkey = n_nationkey - | and n_regionkey = r_regionkey - | and r_name = 'ASIA' - | and o_orderdate >= date '1994-01-01' - | and o_orderdate < date '1994-01-01' + interval '1' year - |group by - | n_name - |order by - | revenue desc - | - |""".stripMargin) -} - -object Q6Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | sum(l_extendedprice * l_discount) as revenue - |from - | lineitem - |where - | l_shipdate >= date '1994-01-01' - | and l_shipdate < date '1994-01-01' + interval '1' year - | and l_discount between .06 - 0.01 and .06 + 0.01 - | and l_quantity < 24 - | - |""".stripMargin) -} - -object Q7Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | supp_nation, - | cust_nation, - | l_year, - | sum(volume) as revenue - |from - | ( - | select - | n1.n_name as supp_nation, - | n2.n_name as cust_nation, - | year(l_shipdate) as l_year, - | l_extendedprice * (1 - l_discount) as volume - | from - | supplier, - | lineitem, - | orders, - | customer, - | nation n1, - | nation n2 - | where - | s_suppkey = l_suppkey - | and o_orderkey = l_orderkey - | and c_custkey = o_custkey - | and s_nationkey = n1.n_nationkey - | and c_nationkey = n2.n_nationkey - | and ( - | (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY') - | or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE') - | ) - | and l_shipdate between date '1995-01-01' and date '1996-12-31' - | ) as shipping - |group by - | supp_nation, - | cust_nation, - | l_year - |order by - | supp_nation, - | cust_nation, - | l_year - | - |""".stripMargin) -} - -object Q8Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | o_year, - | sum(case - | when nation = 'BRAZIL' then volume - | else 0 - | end) / sum(volume) as mkt_share - |from - | ( - | select - | year(o_orderdate) as o_year, - | l_extendedprice * (1 - l_discount) as volume, - | n2.n_name as nation - | from - | part, - | supplier, - | lineitem, - | orders, - | customer, - | nation n1, - | nation n2, - | region - | where - | p_partkey = l_partkey - | and s_suppkey = l_suppkey - | and l_orderkey = o_orderkey - | and o_custkey = c_custkey - | and c_nationkey = n1.n_nationkey - | and n1.n_regionkey = r_regionkey - | and r_name = 'AMERICA' - | and s_nationkey = n2.n_nationkey - | and o_orderdate between date '1995-01-01' and date '1996-12-31' - | and p_type = 'ECONOMY ANODIZED STEEL' - | ) as all_nations - |group by - | o_year - |order by - | o_year - | - |""".stripMargin) -} - -object Q9Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | nation, - | o_year, - | sum(amount) as sum_profit - |from - | ( - | select - | n_name as nation, - | year(o_orderdate) as o_year, - | l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount - | from - | part, - | supplier, - | lineitem, - | partsupp, - | orders, - | nation - | where - | s_suppkey = l_suppkey - | and ps_suppkey = l_suppkey - | and ps_partkey = l_partkey - | and p_partkey = l_partkey - | and o_orderkey = l_orderkey - | and s_nationkey = n_nationkey - | and p_name like '%green%' - | ) as profit - |group by - | nation, - | o_year - |order by - | nation, - | o_year desc - | - |""".stripMargin) -} - -object Q10Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | c_custkey, - | c_name, - | sum(l_extendedprice * (1 - l_discount)) as revenue, - | c_acctbal, - | n_name, - | c_address, - | c_phone, - | c_comment - |from - | customer, - | orders, - | lineitem, - | nation - |where - | c_custkey = o_custkey - | and l_orderkey = o_orderkey - | and o_orderdate >= date '1993-10-01' - | and o_orderdate < date '1993-10-01' + interval '3' month - | and l_returnflag = 'R' - | and c_nationkey = n_nationkey - |group by - | c_custkey, - | c_name, - | c_acctbal, - | c_phone, - | n_name, - | c_address, - | c_comment - |order by - | revenue desc - |limit 20 - | - |""".stripMargin) -} - -object Q11Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | ps_partkey, - | sum(ps_supplycost * ps_availqty) as value - |from - | partsupp, - | supplier, - | nation - |where - | ps_suppkey = s_suppkey - | and s_nationkey = n_nationkey - | and n_name = 'GERMANY' - |group by - | ps_partkey having - | sum(ps_supplycost * ps_availqty) > ( - | select - | sum(ps_supplycost * ps_availqty) * 0.0001000000 - | from - | partsupp, - | supplier, - | nation - | where - | ps_suppkey = s_suppkey - | and s_nationkey = n_nationkey - | and n_name = 'GERMANY' - | ) - |order by - | value desc - | - |""".stripMargin) -} - -object Q12Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | l_shipmode, - | sum(case - | when o_orderpriority = '1-URGENT' - | or o_orderpriority = '2-HIGH' - | then 1 - | else 0 - | end) as high_line_count, - | sum(case - | when o_orderpriority <> '1-URGENT' - | and o_orderpriority <> '2-HIGH' - | then 1 - | else 0 - | end) as low_line_count - |from - | orders, - | lineitem - |where - | o_orderkey = l_orderkey - | and l_shipmode in ('MAIL', 'SHIP') - | and l_commitdate < l_receiptdate - | and l_shipdate < l_commitdate - | and l_receiptdate >= date '1994-01-01' - | and l_receiptdate < date '1994-01-01' + interval '1' year - |group by - | l_shipmode - |order by - | l_shipmode - | - |""".stripMargin) -} - -object Q13Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | c_count, - | count(*) as custdist - |from - | ( - | select - | c_custkey, - | count(o_orderkey) as c_count - | from - | customer left outer join orders on - | c_custkey = o_custkey - | and o_comment not like '%special%requests%' - | group by - | c_custkey - | ) as c_orders - |group by - | c_count - |order by - | custdist desc, - | c_count desc - | - |""".stripMargin) -} - -object Q14Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | 100.00 * sum(case - | when p_type like 'PROMO%' - | then l_extendedprice * (1 - l_discount) - | else 0 - | end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue - |from - | lineitem, - | part - |where - | l_partkey = p_partkey - | and l_shipdate >= date '1995-09-01' - | and l_shipdate < date '1995-09-01' + interval '1' month - | - |""".stripMargin) -} - -object Q15Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |with revenue0 as - | (select - | l_suppkey as supplier_no, - | sum(l_extendedprice * (1 - l_discount)) as total_revenue - | from - | lineitem - | where - | l_shipdate >= date '1996-01-01' - | and l_shipdate < date '1996-01-01' + interval '3' month - | group by - | l_suppkey) - | - | - |select - | s_suppkey, - | s_name, - | s_address, - | s_phone, - | total_revenue - |from - | supplier, - | revenue0 - |where - | s_suppkey = supplier_no - | and total_revenue = ( - | select - | max(total_revenue) - | from - | revenue0 - | ) - |order by - | s_suppkey - | - |""".stripMargin) -} - -object Q16Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | p_brand, - | p_type, - | p_size, - | count(distinct ps_suppkey) as supplier_cnt - |from - | partsupp, - | part - |where - | p_partkey = ps_partkey - | and p_brand <> 'Brand#45' - | and p_type not like 'MEDIUM POLISHED%' - | and p_size in (49, 14, 23, 45, 19, 3, 36, 9) - | and ps_suppkey not in ( - | select - | s_suppkey - | from - | supplier - | where - | s_comment like '%Customer%Complaints%' - | ) - |group by - | p_brand, - | p_type, - | p_size - |order by - | supplier_cnt desc, - | p_brand, - | p_type, - | p_size - | - |""".stripMargin) -} - -object Q17Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | sum(l_extendedprice) / 7.0 as avg_yearly - |from - | lineitem, - | part - |where - | p_partkey = l_partkey - | and p_brand = 'Brand#23' - | and p_container = 'MED BOX' - | and l_quantity < ( - | select - | 0.2 * avg(l_quantity) - | from - | lineitem - | where - | l_partkey = p_partkey - | ) - | - |""".stripMargin) -} - -object Q18Debug { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | o_orderkey, - | sum(l_quantity) - |from - | orders, - | lineitem - |where - | o_orderkey in ( - | select - | l_orderkey - | from - | lineitem - | group by - | l_orderkey having - | sum(l_quantity) > 300 - | ) - | and o_orderkey = l_orderkey - |group by - | o_orderkey - |order by - | o_orderkey desc - |limit 100 - | - |""".stripMargin) -} - -object Q18Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | c_name, - | c_custkey, - | o_orderkey, - | o_orderdate, - | o_totalprice, - | sum(l_quantity) - |from - | customer, - | orders, - | lineitem - |where - | o_orderkey in ( - | select - | l_orderkey - | from - | lineitem - | group by - | l_orderkey having - | sum(l_quantity) > 300 - | ) - | and c_custkey = o_custkey - | and o_orderkey = l_orderkey - |group by - | c_name, - | c_custkey, - | o_orderkey, - | o_orderdate, - | o_totalprice - |order by - | o_totalprice desc, - | o_orderdate - |limit 100 - | - |""".stripMargin) -} - -object Q19Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | sum(l_extendedprice* (1 - l_discount)) as revenue - |from - | lineitem, - | part - |where - | ( - | p_partkey = l_partkey - | and p_brand = 'Brand#12' - | and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') - | and l_quantity >= 1 and l_quantity <= 1 + 10 - | and p_size between 1 and 5 - | and l_shipmode in ('AIR', 'AIR REG') - | and l_shipinstruct = 'DELIVER IN PERSON' - | ) - | or - | ( - | p_partkey = l_partkey - | and p_brand = 'Brand#23' - | and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') - | and l_quantity >= 10 and l_quantity <= 10 + 10 - | and p_size between 1 and 10 - | and l_shipmode in ('AIR', 'AIR REG') - | and l_shipinstruct = 'DELIVER IN PERSON' - | ) - | or - | ( - | p_partkey = l_partkey - | and p_brand = 'Brand#34' - | and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') - | and l_quantity >= 20 and l_quantity <= 20 + 10 - | and p_size between 1 and 15 - | and l_shipmode in ('AIR', 'AIR REG') - | and l_shipinstruct = 'DELIVER IN PERSON' - | ) - | - |""".stripMargin) -} - -object Q20Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | s_name, - | s_address - |from - | supplier, - | nation - |where - | s_suppkey in ( - | select - | ps_suppkey - | from - | partsupp - | where - | ps_partkey in ( - | select - | p_partkey - | from - | part - | where - | p_name like 'forest%' - | ) - | and ps_availqty > ( - | select - | 0.5 * sum(l_quantity) - | from - | lineitem - | where - | l_partkey = ps_partkey - | and l_suppkey = ps_suppkey - | and l_shipdate >= date '1994-01-01' - | and l_shipdate < date '1994-01-01' + interval '1' year - | ) - | ) - | and s_nationkey = n_nationkey - | and n_name = 'CANADA' - |order by - | s_name - | - |""".stripMargin) -} - -object Q21Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | s_name, - | count(*) as numwait - |from - | supplier, - | lineitem l1, - | orders, - | nation - |where - | s_suppkey = l1.l_suppkey - | and o_orderkey = l1.l_orderkey - | and o_orderstatus = 'F' - | and l1.l_receiptdate > l1.l_commitdate - | and exists ( - | select - | * - | from - | lineitem l2 - | where - | l2.l_orderkey = l1.l_orderkey - | and l2.l_suppkey <> l1.l_suppkey - | ) - | and not exists ( - | select - | * - | from - | lineitem l3 - | where - | l3.l_orderkey = l1.l_orderkey - | and l3.l_suppkey <> l1.l_suppkey - | and l3.l_receiptdate > l3.l_commitdate - | ) - | and s_nationkey = n_nationkey - | and n_name = 'SAUDI ARABIA' - |group by - | s_name - |order by - | numwait desc, - | s_name - |limit 100 - | - |""".stripMargin) -} - -object Q22Like { - def apply(spark: SparkSession): DataFrame = spark.sql( - """ - |-- using default substitutions - | - |select - | cntrycode, - | count(*) as numcust, - | sum(c_acctbal) as totacctbal - |from - | ( - | select - | substring(c_phone, 1, 2) as cntrycode, - | c_acctbal - | from - | customer - | where - | substring(c_phone, 1, 2) in - | ('13', '31', '23', '29', '30', '18', '17') - | and c_acctbal > ( - | select - | avg(c_acctbal) - | from - | customer - | where - | c_acctbal > 0.00 - | and substring(c_phone, 1, 2) in - | ('13', '31', '23', '29', '30', '18', '17') - | ) - | and not exists ( - | select - | * - | from - | orders - | where - | o_custkey = c_custkey - | ) - | ) as custsale - |group by - | cntrycode - |order by - | cntrycode - | - |""".stripMargin) -} - -object ConvertFiles { - /** - * Main method allows us to submit using spark-submit to perform conversions from CSV to - * Parquet or Orc. - */ - def main(arg: Array[String]): Unit = { - val conf = new FileConversionConf(arg) - val spark = SparkSession.builder.appName("TPC-H Like File Conversion").getOrCreate() - conf.outputFormat() match { - case "parquet" => - csvToParquet( - spark, - conf.input(), - conf.output(), - conf.coalesce, - conf.repartition) - case "orc" => - csvToOrc( - spark, - conf.input(), - conf.output(), - conf.coalesce, - conf.repartition) - } - } -} - -class FileConversionConf(arguments: Seq[String]) extends ScallopConf(arguments) { - val input = opt[String](required = true) - val output = opt[String](required = true) - val outputFormat = opt[String](required = true) - val coalesce = propsLong[Int]("coalesce") - val repartition = propsLong[Int]("repartition") - verify() - BenchUtils.validateCoalesceRepartition(coalesce, repartition) -} - -// scalastyle:on line.size.limit diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcxbb/TpcxbbLikeBench.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcxbb/TpcxbbLikeBench.scala deleted file mode 100644 index 8cea42f287f..00000000000 --- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcxbb/TpcxbbLikeBench.scala +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.nvidia.spark.rapids.tests.tpcxbb - -import com.nvidia.spark.rapids.tests.common.BenchmarkSuite - -import org.apache.spark.sql.{DataFrame, SparkSession} - -class TpcxbbLikeBench extends BenchmarkSuite { - override def name(): String = "TPCx-BB" - - override def shortName(): String = "Tpcxbb" - - override def setupAllParquet(spark: SparkSession, path: String): Unit = { - TpcxbbLikeSpark.setupAllParquet(spark, path) - } - - override def setupAllCSV(spark: SparkSession, path: String): Unit = { - TpcxbbLikeSpark.setupAllCSV(spark, path) - } - - override def setupAllOrc(spark: SparkSession, path: String): Unit = { - TpcxbbLikeSpark.setupAllOrc(spark, path) - } - - override def createDataFrame(spark: SparkSession, query: String): DataFrame = { - getQuery(query)(spark) - } - - def getQuery(query: String): SparkSession => DataFrame = { - - val queryIndex = if (query.startsWith("q")) { - query.substring(1).toInt - } else { - query.toInt - } - - queryIndex match { - case 1 => Q1Like.apply - case 2 => Q2Like.apply - case 3 => Q3Like.apply - case 4 => Q4Like.apply - case 5 => Q5Like.apply - case 6 => Q6Like.apply - case 7 => Q7Like.apply - case 8 => Q8Like.apply - case 9 => Q9Like.apply - case 10 => Q10Like.apply - case 11 => Q11Like.apply - case 12 => Q12Like.apply - case 13 => Q13Like.apply - case 14 => Q14Like.apply - case 15 => Q15Like.apply - case 16 => Q16Like.apply - case 17 => Q17Like.apply - case 18 => Q18Like.apply - case 19 => Q19Like.apply - case 20 => Q20Like.apply - case 21 => Q21Like.apply - case 22 => Q22Like.apply - case 23 => Q23Like.apply - case 24 => Q24Like.apply - case 25 => Q25Like.apply - case 26 => Q26Like.apply - case 27 => Q27Like.apply - case 28 => Q28Like.apply - case 29 => Q29Like.apply - case 30 => Q30Like.apply - case _ => throw new IllegalArgumentException(s"Unknown TPCx-BB query number: $queryIndex") - } - } -} diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcxbb/TpcxbbLikeSpark.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcxbb/TpcxbbLikeSpark.scala deleted file mode 100644 index c6047819387..00000000000 --- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/tpcxbb/TpcxbbLikeSpark.scala +++ /dev/null @@ -1,2132 +0,0 @@ -/* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.nvidia.spark.rapids.tests.tpcxbb - -import com.nvidia.spark.rapids.tests.common.BenchUtils -import com.nvidia.spark.rapids.tests.tpcxbb.TpcxbbLikeSpark.{csvToOrc, csvToParquet} -import org.rogach.scallop.ScallopConf - -import org.apache.spark.sql.{DataFrame, DataFrameWriter, Row, SparkSession} -import org.apache.spark.sql.types._ - -// scalastyle:off line.size.limit - -// DecimalType to DoubleType, bigint to LongType -object TpcxbbLikeSpark { - private def setupWrite( - df: DataFrame, - name: String, - coalesce: Map[String, Int], - repartition: Map[String, Int]): DataFrameWriter[Row] = { - val repart = BenchUtils.applyCoalesceRepartition(name, df, coalesce, repartition) - repart.write.mode("overwrite") - } - - def csvToParquet( - spark: SparkSession, - basePath: String, - baseOutput: String, - coalesce: Map[String, Int] = Map.empty, - repartition: Map[String, Int] = Map.empty): Unit = { - setupWrite(readCustomerCSV(spark, basePath + "/customer/"), "customer", coalesce, repartition).parquet(baseOutput + "/customer/") - setupWrite(readCustomerAddressCSV(spark, basePath + "/customer_address/"), "customer_address", coalesce, repartition).parquet(baseOutput + "/customer_address/") - setupWrite(readItemCSV(spark, basePath + "/item/"), "item", coalesce, repartition).parquet(baseOutput + "/item/") - setupWrite(readStoreSalesCSV(spark, basePath + "/store_sales/"), "store_sales", coalesce, repartition).parquet(baseOutput + "/store_sales/") - setupWrite(readDateDimCSV(spark, basePath + "/date_dim/"), "date_dim", coalesce, repartition).parquet(baseOutput + "/date_dim/") - setupWrite(readStoreCSV(spark, basePath + "/store/"), "store", coalesce, repartition).parquet(baseOutput + "/store/") - setupWrite(readCustomerDemographicsCSV(spark, basePath + "/customer_demographics/"), "customer_demographics", coalesce, repartition).parquet(baseOutput + "/customer_demographics/") - setupWrite(readReviewsCSV(spark, basePath + "/product_reviews/"), "product_reviews", coalesce, repartition).parquet(baseOutput + "/product_reviews/") - setupWrite(readWebSalesCSV(spark, basePath + "/web_sales/"), "web_sales", coalesce, repartition).parquet(baseOutput + "/web_sales/") - setupWrite(readWebClickStreamsCSV(spark, basePath + "/web_clickstreams/"), "web_clickstreams", coalesce, repartition).parquet(baseOutput + "/web_clickstreams/") - setupWrite(readHouseholdDemographicsCSV(spark, basePath + "/household_demographics/"), "household_demographics", coalesce, repartition).parquet(baseOutput + "/household_demographics/") - setupWrite(readWebPageCSV(spark, basePath + "/web_page/"), "web_page", coalesce, repartition).parquet(baseOutput + "/web_page/") - setupWrite(readTimeDimCSV(spark, basePath + "/time_dim/"), "time_dim", coalesce, repartition).parquet(baseOutput + "/time_dim/") - setupWrite(readWebReturnsCSV(spark, basePath + "/web_returns/"), "web_returns", coalesce, repartition).parquet(baseOutput + "/web_returns/") - setupWrite(readWarehouseCSV(spark, basePath + "/warehouse/"), "warehouse", coalesce, repartition).parquet(baseOutput + "/warehouse/") - setupWrite(readPromotionCSV(spark, basePath + "/promotion/"), "promotion", coalesce, repartition).parquet(baseOutput + "/promotion/") - setupWrite(readStoreReturnsCSV(spark, basePath + "/store_returns/"), "store_returns", coalesce, repartition).parquet(baseOutput + "/store_returns/") - setupWrite(readInventoryCSV(spark, basePath + "/inventory/"), "inventory", coalesce, repartition).parquet(baseOutput + "/inventory/") - setupWrite(readMarketPricesCSV(spark, basePath + "/item_marketprices/"), "item_marketprices", coalesce, repartition).parquet(baseOutput + "/item_marketprices/") - } - - def csvToOrc( - spark: SparkSession, - basePath: String, - baseOutput: String, - coalesce: Map[String, Int] = Map.empty, - repartition: Map[String, Int] = Map.empty): Unit = { - setupWrite(readCustomerCSV(spark, basePath + "/customer/"), "customer", coalesce, repartition).orc(baseOutput + "/customer/") - setupWrite(readCustomerAddressCSV(spark, basePath + "/customer_address/"), "customer_address", coalesce, repartition).orc(baseOutput + "/customer_address/") - setupWrite(readItemCSV(spark, basePath + "/item/"), "item", coalesce, repartition).orc(baseOutput + "/item/") - setupWrite(readStoreSalesCSV(spark, basePath + "/store_sales/"), "store_sales", coalesce, repartition).orc(baseOutput + "/store_sales/") - setupWrite(readDateDimCSV(spark, basePath + "/date_dim/"), "date_dim", coalesce, repartition).orc(baseOutput + "/date_dim/") - setupWrite(readStoreCSV(spark, basePath + "/store/"), "store", coalesce, repartition).orc(baseOutput + "/store/") - setupWrite(readCustomerDemographicsCSV(spark, basePath + "/customer_demographics/"), "customer_demographics", coalesce, repartition).orc(baseOutput + "/customer_demographics/") - setupWrite(readReviewsCSV(spark, basePath + "/product_reviews/"), "product_reviews", coalesce, repartition).orc(baseOutput + "/product_reviews/") - setupWrite(readWebSalesCSV(spark, basePath + "/web_sales/"), "web_sales", coalesce, repartition).orc(baseOutput + "/web_sales/") - setupWrite(readWebClickStreamsCSV(spark, basePath + "/web_clickstreams/"), "web_clickstreams", coalesce, repartition).orc(baseOutput + "/web_clickstreams/") - setupWrite(readHouseholdDemographicsCSV(spark, basePath + "/household_demographics/"), "household_demographics", coalesce, repartition).orc(baseOutput + "/household_demographics/") - setupWrite(readWebPageCSV(spark, basePath + "/web_page/"), "web_page", coalesce, repartition).orc(baseOutput + "/web_page/") - setupWrite(readTimeDimCSV(spark, basePath + "/time_dim/"), "time_dim", coalesce, repartition).orc(baseOutput + "/time_dim/") - setupWrite(readWebReturnsCSV(spark, basePath + "/web_returns/"), "web_returns", coalesce, repartition).orc(baseOutput + "/web_returns/") - setupWrite(readWarehouseCSV(spark, basePath + "/warehouse/"), "warehouse", coalesce, repartition).orc(baseOutput + "/warehouse/") - setupWrite(readPromotionCSV(spark, basePath + "/promotion/"), "promotion", coalesce, repartition).orc(baseOutput + "/promotion/") - setupWrite(readStoreReturnsCSV(spark, basePath + "/store_returns/"), "store_returns", coalesce, repartition).orc(baseOutput + "/store_returns/") - setupWrite(readInventoryCSV(spark, basePath + "/inventory/"), "inventory", coalesce, repartition).orc(baseOutput + "/inventory/") - setupWrite(readMarketPricesCSV(spark, basePath + "/item_marketprices/"), "item_marketprices", coalesce, repartition).orc(baseOutput + "/item_marketprices/") - } - - def setupAllCSV(spark: SparkSession, basePath: String): Unit = { - setupCustomerCSV(spark, basePath + "/customer/") - setupCustomerAddressCSV(spark, basePath + "/customer_address/") - setupItemCSV(spark, basePath + "/item/") - setupStoreSalesCSV(spark, basePath + "/store_sales/") - setupDateDimCSV(spark, basePath + "/date_dim/") - setupStoreCSV(spark, basePath + "/store/") - setupCustomerDemographicsCSV(spark, basePath + "/customer_demographics/") - setupReviewsCSV(spark, basePath + "/product_reviews/") - setupWebSalesCSV(spark, basePath + "/web_sales/") - setupWebClickStreamsCSV(spark, basePath + "/web_clickstreams/") - setupHouseholdDemographicsCSV(spark, basePath + "/household_demographics/") - setupWebPageCSV(spark, basePath + "/web_page/") - setupTimeDimCSV(spark, basePath + "/time_dim/") - setupWebReturnsCSV(spark, basePath + "/web_returns/") - setupWarehouseCSV(spark, basePath + "/warehouse/") - setupPromotionCSV(spark, basePath + "/promotion/") - setupStoreReturnsCSV(spark, basePath + "/store_returns/") - setupInventoryCSV(spark, basePath + "/inventory/") - setupMarketPricesCSV(spark, basePath + "/item_marketprices/") - } - - def setupAllParquet(spark: SparkSession, basePath: String): Unit = { - setupCustomerParquet(spark, basePath + "/customer/") - setupCustomerAddressParquet(spark, basePath + "/customer_address/") - setupItemParquet(spark, basePath + "/item/") - setupStoreSalesParquet(spark, basePath + "/store_sales/") - setupDateDimParquet(spark, basePath + "/date_dim/") - setupStoreParquet(spark, basePath + "/store/") - setupCustomerDemographicsParquet(spark, basePath + "/customer_demographics/") - setupReviewsParquet(spark, basePath + "/product_reviews/") - setupWebSalesParquet(spark, basePath + "/web_sales/") - setupWebClickStreamsParquet(spark, basePath + "/web_clickstreams/") - setupHouseholdDemographicsParquet(spark, basePath + "/household_demographics/") - setupWebPageParquet(spark, basePath + "/web_page/") - setupTimeDimParquet(spark, basePath + "/time_dim/") - setupWebReturnsParquet(spark, basePath + "/web_returns/") - setupWarehouseParquet(spark, basePath + "/warehouse/") - setupPromotionParquet(spark, basePath + "/promotion/") - setupStoreReturnsParquet(spark, basePath + "/store_returns/") - setupInventoryParquet(spark, basePath + "/inventory/") - setupMarketPricesParquet(spark, basePath + "/item_marketprices/") - } - - def setupAllParquetWithMetastore(spark: SparkSession, basePath: String): Unit = { - setupParquetTableWithMetastore(spark, "customer", basePath + "/customer/") - setupParquetTableWithMetastore(spark, "customer_address", basePath + "/customer_address/") - setupParquetTableWithMetastore(spark, "item", basePath + "/item/") - setupParquetTableWithMetastore(spark, "store_sales", basePath + "/store_sales/") - setupParquetTableWithMetastore(spark, "date_dim", basePath + "/date_dim/") - setupParquetTableWithMetastore(spark, "store", basePath + "/store/") - setupParquetTableWithMetastore(spark, "customer_demographics", basePath + "/customer_demographics/") - setupParquetTableWithMetastore(spark, "product_reviews", basePath + "/product_reviews/") - setupParquetTableWithMetastore(spark, "web_sales", basePath + "/web_sales/") - setupParquetTableWithMetastore(spark, "web_clickstreams", basePath + "/web_clickstreams/") - setupParquetTableWithMetastore(spark, "household_demographics", basePath + "/household_demographics/") - setupParquetTableWithMetastore(spark, "web_page", basePath + "/web_page/") - setupParquetTableWithMetastore(spark, "time_dim", basePath + "/time_dim/") - setupParquetTableWithMetastore(spark, "web_returns", basePath + "/web_returns/") - setupParquetTableWithMetastore(spark, "warehouse", basePath + "/warehouse/") - setupParquetTableWithMetastore(spark, "promotion", basePath + "/promotion/") - setupParquetTableWithMetastore(spark, "store_returns", basePath + "/store_returns/") - setupParquetTableWithMetastore(spark, "inventory", basePath + "/inventory/") - setupParquetTableWithMetastore(spark, "item_marketprices", basePath + "/item_marketprices/") - - spark.sql("SHOW TABLES").show - } - - def setupParquetTableWithMetastore(spark: SparkSession, table: String, path: String): Unit = { - setupTableWithMetastore(spark, table, "PARQUET", path) - } - - def setupTableWithMetastore(spark: SparkSession, table: String, format: String, path: String): Unit = { - // Yes there are SQL injection vulnerabilities here, so don't exploit it, this is just test code - spark.catalog.dropTempView(table) - spark.sql(s"DROP TABLE IF EXISTS ${table}") - spark.sql(s"CREATE TABLE ${table} USING ${format} LOCATION '${path}'") - } - - def setupAllOrc(spark: SparkSession, basePath: String): Unit = { - setupCustomerOrc(spark, basePath + "/customer/") - setupCustomerAddressOrc(spark, basePath + "/customer_address/") - setupItemOrc(spark, basePath + "/item/") - setupStoreSalesOrc(spark, basePath + "/store_sales/") - setupDateDimOrc(spark, basePath + "/date_dim/") - setupStoreOrc(spark, basePath + "/store/") - setupCustomerDemographicsOrc(spark, basePath + "/customer_demographics/") - setupReviewsOrc(spark, basePath + "/product_reviews/") - setupWebSalesOrc(spark, basePath + "/web_sales/") - setupWebClickStreamsOrc(spark, basePath + "/web_clickstreams/") - setupHouseholdDemographicsOrc(spark, basePath + "/household_demographics/") - setupWebPageOrc(spark, basePath + "/web_page/") - setupTimeDimOrc(spark, basePath + "/time_dim/") - setupWebReturnsOrc(spark, basePath + "/web_returns/") - setupWarehouseOrc(spark, basePath + "/warehouse/") - setupPromotionOrc(spark, basePath + "/promotion/") - setupStoreReturnsOrc(spark, basePath + "/store_returns/") - setupInventoryOrc(spark, basePath + "/inventory/") - setupMarketPricesOrc(spark, basePath + "/item_marketprices/") - } - - // CUSTOMER - val customerSchema = StructType(Array( - StructField("c_customer_sk", LongType, false), - StructField("c_customer_id", StringType, false), - StructField("c_current_cdemo_sk", LongType), - StructField("c_current_hdemo_sk", LongType), - StructField("c_current_addr_sk", LongType), - StructField("c_first_shipto_date_sk", LongType), - StructField("c_first_sales_date_sk", LongType), - StructField("c_salutation", StringType), - StructField("c_first_name", StringType), - StructField("c_last_name", StringType), - StructField("c_preferred_cust_flag", StringType), - StructField("c_birth_day", IntegerType), - StructField("c_birth_month", IntegerType), - StructField("c_birth_year", IntegerType), - StructField("c_birth_country", StringType), - StructField("c_login", StringType), - StructField("c_email_address", StringType), - StructField("c_last_review_date", StringType) - )) - - def readCustomerCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(customerSchema).csv(path) - - def setupCustomerCSV(spark: SparkSession, path: String): Unit = - readCustomerCSV(spark, path).createOrReplaceTempView("customer") - - def setupCustomerParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("customer") - - def setupCustomerOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("customer") - - // CUSTOMER ADDRESS - val customerAddressSchema = StructType(Array( - StructField("ca_address_sk", LongType, false), - StructField("ca_address_id", StringType, false), - StructField("ca_street_number", StringType), - StructField("ca_street_name", StringType), - StructField("ca_street_type", StringType), - StructField("ca_suite_number", StringType), - StructField("ca_city", StringType), - StructField("ca_county", StringType), - StructField("ca_state", StringType), - StructField("ca_zip", StringType), - StructField("ca_country", StringType), - StructField("ca_gmt_offset", DoubleType), - StructField("ca_location_type", StringType) - )) - - def readCustomerAddressCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(customerAddressSchema).csv(path) - - def setupCustomerAddressCSV(spark: SparkSession, path: String): Unit = - readCustomerAddressCSV(spark, path).createOrReplaceTempView("customer_address") - - def setupCustomerAddressParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("customer_address") - - def setupCustomerAddressOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("customer_address") - - // STORE SALES - val storeSalesSchema = StructType(Array( - StructField("ss_sold_date_sk", LongType), - StructField("ss_sold_time_sk", LongType), - StructField("ss_item_sk", LongType, false), - StructField("ss_customer_sk", LongType), - StructField("ss_cdemo_sk", LongType), - StructField("ss_hdemo_sk", LongType), - StructField("ss_addr_sk", LongType), - StructField("ss_store_sk", LongType), - StructField("ss_promo_sk", LongType), - StructField("ss_ticket_number", LongType, false), - StructField("ss_quantity", IntegerType), - StructField("ss_wholesale_cost", DoubleType), - StructField("ss_list_price", DoubleType), - StructField("ss_sales_price", DoubleType), - StructField("ss_ext_discount_amt", DoubleType), - StructField("ss_ext_sales_price", DoubleType), - StructField("ss_ext_wholesale_cost", DoubleType), - StructField("ss_ext_list_price", DoubleType), - StructField("ss_ext_tax", DoubleType), - StructField("ss_coupon_amt", DoubleType), - StructField("ss_net_paid", DoubleType), - StructField("ss_net_paid_inc_tax", DoubleType), - StructField("ss_net_profit", DoubleType) - )) - - def readStoreSalesCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(storeSalesSchema).csv(path) - - def setupStoreSalesCSV(spark: SparkSession, path: String): Unit = - readStoreSalesCSV(spark, path).createOrReplaceTempView("store_sales") - - def setupStoreSalesParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("store_sales") - - def setupStoreSalesOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("store_sales") - - // ITEM - val itemSchema = StructType(Array( - StructField("i_item_sk", LongType, false), - StructField("i_item_id", StringType, false), - StructField("i_rec_start_date", StringType), - StructField("i_rec_end_date", StringType), - StructField("i_item_desc", StringType), - StructField("i_current_price", DoubleType), - StructField("i_wholesale_cost", DoubleType), - StructField("i_brand_id", IntegerType), - StructField("i_brand", StringType), - StructField("i_class_id", IntegerType), - StructField("i_class", StringType), - StructField("i_category_id", IntegerType), - StructField("i_category", StringType), - StructField("i_manufact_id", IntegerType), - StructField("i_manufact", StringType), - StructField("i_size", StringType), - StructField("i_formulation", StringType), - StructField("i_color", StringType), - StructField("i_units", StringType), - StructField("i_container", StringType), - StructField("i_manager_id", IntegerType), - StructField("i_product_name", StringType) - )) - - def readItemCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(itemSchema).csv(path) - - def setupItemCSV(spark: SparkSession, path: String): Unit = - readItemCSV(spark, path).createOrReplaceTempView("item") - - def setupItemParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("item") - - def setupItemOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("item") - - // DATE DIM - val dateDimSchema = StructType(Array( - StructField("d_date_sk", LongType, false), - StructField("d_date_id", StringType, false), - StructField("d_date", StringType), - StructField("d_month_seq", IntegerType), - StructField("d_week_seq", IntegerType), - StructField("d_quarter_seq", IntegerType), - StructField("d_year", IntegerType), - StructField("d_dow", IntegerType), - StructField("d_moy", IntegerType), - StructField("d_dom", IntegerType), - StructField("d_qoy", IntegerType), - StructField("d_fy_year", IntegerType), - StructField("d_fy_quarter_seq", IntegerType), - StructField("d_fy_week_seq", IntegerType), - StructField("d_day_name", StringType), - StructField("d_quarter_name", StringType), - StructField("d_holiday", StringType), - StructField("d_weekend", StringType), - StructField("d_following_holiday", StringType), - StructField("d_first_dom", IntegerType), - StructField("d_last_dom", IntegerType), - StructField("d_same_day_ly", IntegerType), - StructField("d_same_day_lq", IntegerType), - StructField("d_current_day", StringType), - StructField("d_current_week", StringType), - StructField("d_current_month", StringType), - StructField("d_current_quarter", StringType), - StructField("d_current_year", StringType) - )) - - def readDateDimCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(dateDimSchema).csv(path) - - def setupDateDimCSV(spark: SparkSession, path: String): Unit = - readDateDimCSV(spark, path).createOrReplaceTempView("date_dim") - - def setupDateDimParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("date_dim") - - def setupDateDimOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("date_dim") - - // STORE - val storeSchema = StructType(Array( - StructField("s_store_sk", LongType, false), - StructField("s_store_id", StringType, false), - StructField("s_rec_start_date", StringType), - StructField("s_rec_end_date", StringType), - StructField("s_closed_date_sk", LongType), - StructField("s_store_name", StringType), - StructField("s_number_employees", IntegerType), - StructField("s_floor_space", IntegerType), - StructField("s_hours", StringType), - StructField("s_manager", StringType), - StructField("s_market_id", IntegerType), - StructField("s_geography_class", StringType), - StructField("s_market_desc", StringType), - StructField("s_market_manager", StringType), - StructField("s_division_id", IntegerType), - StructField("s_division_name", StringType), - StructField("s_company_id", IntegerType), - StructField("s_company_name", StringType), - StructField("s_street_number", StringType), - StructField("s_street_name", StringType), - StructField("s_street_type", StringType), - StructField("s_suite_number", StringType), - StructField("s_city", StringType), - StructField("s_county", StringType), - StructField("s_state", StringType), - StructField("s_zip", StringType), - StructField("s_country", StringType), - StructField("s_gmt_offset", DoubleType), - StructField("s_tax_precentage", DoubleType) - )) - - def readStoreCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(storeSchema).csv(path) - - def setupStoreCSV(spark: SparkSession, path: String): Unit = - readStoreCSV(spark, path).createOrReplaceTempView("store") - - def setupStoreParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("store") - - def setupStoreOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("store") - - // CUSTOMER DEMOGRAPHICS - val customerDemoSchema = StructType(Array( - StructField("cd_demo_sk", LongType, false), - StructField("cd_gender", StringType), - StructField("cd_marital_status", StringType), - StructField("cd_education_status", StringType), - StructField("cd_purchase_estimate", IntegerType), - StructField("cd_credit_rating", StringType), - StructField("cd_dep_count", IntegerType), - StructField("cd_dep_employed_count", IntegerType), - StructField("cd_dep_college_count", IntegerType) - )) - - def readCustomerDemographicsCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(customerDemoSchema).csv(path) - - def setupCustomerDemographicsCSV(spark: SparkSession, path: String): Unit = - readCustomerDemographicsCSV(spark, path).createOrReplaceTempView("customer_demographics") - - def setupCustomerDemographicsParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("customer_demographics") - - def setupCustomerDemographicsOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("customer_demographics") - - // PRODUCT REVIEWS - val reviewsSchema = StructType(Array( - StructField("pr_review_sk", LongType, false), - StructField("pr_review_date", StringType), - StructField("pr_review_time", StringType), - StructField("pr_review_rating", IntegerType, false), - StructField("pr_item_sk", LongType, false), - StructField("pr_user_sk", LongType), - StructField("pr_order_sk", LongType), - StructField("pr_review_content", StringType, false) - )) - - def readReviewsCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(reviewsSchema).csv(path) - - def setupReviewsCSV(spark: SparkSession, path: String): Unit = - readReviewsCSV(spark, path).createOrReplaceTempView("product_reviews") - - def setupReviewsParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("product_reviews") - - def setupReviewsOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("product_reviews") - - // WEB SALES - val webSalesSchema = StructType(Array( - StructField("ws_sold_date_sk", LongType), - StructField("ws_sold_time_sk", LongType), - StructField("ws_ship_date_sk", LongType), - StructField("ws_item_sk", LongType, false), - StructField("ws_bill_customer_sk", LongType), - StructField("ws_bill_cdemo_sk", LongType), - StructField("ws_bill_hdemo_sk", LongType), - StructField("ws_bill_addr_sk", LongType), - StructField("ws_ship_customer_sk", LongType), - StructField("ws_ship_cdemo_sk", LongType), - StructField("ws_ship_hdemo_sk", LongType), - StructField("ws_ship_addr_sk", LongType), - StructField("ws_web_page_sk", LongType), - StructField("ws_web_site_sk", LongType), - StructField("ws_ship_mode_sk", LongType), - StructField("ws_warehouse_sk", LongType), - StructField("ws_promo_sk", LongType), - StructField("ws_order_number", LongType, false), - StructField("ws_quantity", IntegerType), - StructField("ws_wholesale_cost", DoubleType), - StructField("ws_list_price", DoubleType), - StructField("ws_sales_price", DoubleType), - StructField("ws_ext_discount_amt", DoubleType), - StructField("ws_ext_sales_price", DoubleType), - StructField("ws_ext_wholesale_cost", DoubleType), - StructField("ws_ext_list_price", DoubleType), - StructField("ws_ext_tax", DoubleType), - StructField("ws_coupon_amt", DoubleType), - StructField("ws_ext_ship_cost", DoubleType), - StructField("ws_net_paid", DoubleType), - StructField("ws_net_paid_inc_tax", DoubleType), - StructField("ws_net_paid_inc_ship", DoubleType), - StructField("ws_net_paid_inc_ship_tax", DoubleType), - StructField("ws_net_profit", DoubleType) - )) - - def readWebSalesCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(webSalesSchema).csv(path) - - def setupWebSalesCSV(spark: SparkSession, path: String): Unit = - readWebSalesCSV(spark, path).createOrReplaceTempView("web_sales") - - def setupWebSalesParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("web_sales") - - def setupWebSalesOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("web_sales") - - // CLICK STREAMS - val clickStreamsSchema = StructType(Array( - StructField("wcs_click_date_sk", LongType), - StructField("wcs_click_time_sk", LongType), - StructField("wcs_sales_sk", LongType), - StructField("wcs_item_sk", LongType), - StructField("wcs_web_page_sk", LongType), - StructField("wcs_user_sk", LongType) - )) - - def readWebClickStreamsCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(clickStreamsSchema).csv(path) - - def setupWebClickStreamsCSV(spark: SparkSession, path: String): Unit = - readWebClickStreamsCSV(spark, path).createOrReplaceTempView("web_clickstreams") - - def setupWebClickStreamsParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("web_clickstreams") - - def setupWebClickStreamsOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("web_clickstreams") - - // HOUSEHOLD DEMOGRAPHICS - val houseDemoSchema = StructType(Array( - StructField("hd_demo_sk", LongType, false), - StructField("hd_income_band_sk", LongType), - StructField("hd_buy_potential", StringType), - StructField("hd_dep_count", IntegerType), - StructField("hd_vehicle_count", IntegerType) - )) - - def readHouseholdDemographicsCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(houseDemoSchema).csv(path) - - def setupHouseholdDemographicsCSV(spark: SparkSession, path: String): Unit = - readHouseholdDemographicsCSV(spark, path).createOrReplaceTempView("household_demographics") - - def setupHouseholdDemographicsParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("household_demographics") - - def setupHouseholdDemographicsOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("household_demographics") - - // WEB PAGE - val webPageSchema = StructType(Array( - StructField("wp_web_page_sk", LongType, false), - StructField("wp_web_page_id", StringType, false), - StructField("wp_rec_start_date", StringType), - StructField("wp_rec_end_date", StringType), - StructField("wp_creation_date_sk", LongType), - StructField("wp_access_date_sk", LongType), - StructField("wp_autogen_flag", StringType), - StructField("wp_customer_sk", LongType), - StructField("wp_url", StringType), - StructField("wp_type", StringType), - StructField("wp_char_count", IntegerType), - StructField("wp_link_count", IntegerType), - StructField("wp_image_count", IntegerType), - StructField("wp_max_ad_count", IntegerType) - )) - - def readWebPageCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(webPageSchema).csv(path) - - def setupWebPageCSV(spark: SparkSession, path: String): Unit = - readWebPageCSV(spark, path).createOrReplaceTempView("web_page") - - def setupWebPageParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("web_page") - - def setupWebPageOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("web_page") - - // TIME DIM - val timeDimSchema = StructType(Array( - StructField("t_time_sk", LongType, false), - StructField("t_time_id", StringType, false), - StructField("t_time", IntegerType), - StructField("t_hour", IntegerType), - StructField("t_minute", IntegerType), - StructField("t_second", IntegerType), - StructField("t_am_pm", StringType), - StructField("t_shift", StringType), - StructField("t_sub_shift", StringType), - StructField("t_meal_time", StringType) - )) - - def readTimeDimCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(timeDimSchema).csv(path) - - def setupTimeDimCSV(spark: SparkSession, path: String): Unit = - readTimeDimCSV(spark, path).createOrReplaceTempView("time_dim") - - def setupTimeDimParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("time_dim") - - def setupTimeDimOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("time_dim") - - // WEB RETURNS - val webReturnsSchema = StructType(Array( - StructField("wr_returned_date_sk", LongType), - StructField("wr_returned_time_sk", LongType), - StructField("wr_item_sk", LongType, false), - StructField("wr_refunded_customer_sk", LongType), - StructField("wr_refunded_cdemo_sk", LongType), - StructField("wr_refunded_hdemo_sk", LongType), - StructField("wr_refunded_addr_sk", LongType), - StructField("wr_returning_customer_sk", LongType), - StructField("wr_returning_cdemo_sk", LongType), - StructField("wr_returning_hdemo_sk", LongType), - StructField("wr_returning_addr_sk", LongType), - StructField("wr_web_page_sk", LongType), - StructField("wr_reason_sk", LongType), - StructField("wr_order_number", LongType, false), - StructField("wr_return_quantity", IntegerType), - StructField("wr_return_amt", DoubleType), - StructField("wr_return_tax", DoubleType), - StructField("wr_return_amt_inc_tax", DoubleType), - StructField("wr_fee", DoubleType), - StructField("wr_return_ship_cost", DoubleType), - StructField("wr_refunded_cash", DoubleType), - StructField("wr_reversed_charge", DoubleType), - StructField("wr_account_credit", DoubleType), - StructField("wr_net_loss", DoubleType) - )) - - def readWebReturnsCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(webReturnsSchema).csv(path) - - def setupWebReturnsCSV(spark: SparkSession, path: String): Unit = - readWebReturnsCSV(spark, path).createOrReplaceTempView("web_returns") - - def setupWebReturnsParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("web_returns") - - def setupWebReturnsOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("web_returns") - - // WAREHOUSE - val warehouseSchema = StructType(Array( - StructField("w_warehouse_sk", LongType, false), - StructField("w_warehouse_id", StringType, false), - StructField("w_warehouse_name", StringType), - StructField("w_warehouse_sq_ft", IntegerType), - StructField("w_street_number", StringType), - StructField("w_street_name", StringType), - StructField("w_street_type", StringType), - StructField("w_suite_number", StringType), - StructField("w_city", StringType), - StructField("w_county", StringType), - StructField("w_state", StringType), - StructField("w_zip", StringType), - StructField("w_country", StringType), - StructField("w_gmt_offset", DoubleType) - )) - - def readWarehouseCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(warehouseSchema).csv(path) - - def setupWarehouseCSV(spark: SparkSession, path: String): Unit = - readWarehouseCSV(spark, path).createOrReplaceTempView("warehouse") - - def setupWarehouseParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("warehouse") - - def setupWarehouseOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("warehouse") - - // PROMOTION - val promotionSchema = StructType(Array( - StructField("p_promo_sk", LongType, false), - StructField("p_promo_id", StringType, false), - StructField("p_start_date_sk", LongType), - StructField("p_end_date_sk", LongType), - StructField("p_item_sk", LongType), - StructField("p_cost", DoubleType), - StructField("p_response_target", IntegerType), - StructField("p_promo_name", StringType), - StructField("p_channel_dmail", StringType), - StructField("p_channel_email", StringType), - StructField("p_channel_catalog", StringType), - StructField("p_channel_tv", StringType), - StructField("p_channel_radio", StringType), - StructField("p_channel_press", StringType), - StructField("p_channel_event", StringType), - StructField("p_channel_demo", StringType), - StructField("p_channel_details", StringType), - StructField("p_purpose", StringType), - StructField("p_discount_active", StringType) - )) - - def readPromotionCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(promotionSchema).csv(path) - - def setupPromotionCSV(spark: SparkSession, path: String): Unit = - readPromotionCSV(spark, path).createOrReplaceTempView("promotion") - - def setupPromotionParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("promotion") - - def setupPromotionOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("promotion") - - // STORE RETURNS - val storeReturnsSchema = StructType(Array( - StructField("sr_returned_date_sk", LongType), - StructField("sr_return_time_sk", LongType), - StructField("sr_item_sk", LongType, false), - StructField("sr_customer_sk", LongType), - StructField("sr_cdemo_sk", LongType), - StructField("sr_hdemo_sk", LongType), - StructField("sr_addr_sk", LongType), - StructField("sr_store_sk", LongType), - StructField("sr_reason_sk", LongType), - StructField("sr_ticket_number", LongType, false), - StructField("sr_return_quantity", IntegerType), - StructField("sr_return_amt", DoubleType), - StructField("sr_return_tax", DoubleType), - StructField("sr_return_amt_inc_tax", DoubleType), - StructField("sr_fee", DoubleType), - StructField("sr_return_ship_cost", DoubleType), - StructField("sr_refunded_cash", DoubleType), - StructField("sr_reversed_charge", DoubleType), - StructField("sr_store_credit", DoubleType), - StructField("sr_net_loss", DoubleType) - )) - - def readStoreReturnsCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(storeReturnsSchema).csv(path) - - def setupStoreReturnsCSV(spark: SparkSession, path: String): Unit = - readStoreReturnsCSV(spark, path).createOrReplaceTempView("store_returns") - - def setupStoreReturnsParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("store_returns") - - def setupStoreReturnsOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("store_returns") - - // INVENTORY - val inventorySchema = StructType(Array( - StructField("inv_date_sk", LongType, false), - StructField("inv_item_sk", LongType, false), - StructField("inv_warehouse_sk", LongType, false), - StructField("inv_quantity_on_hand", IntegerType) - )) - - def readInventoryCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(inventorySchema).csv(path) - - def setupInventoryCSV(spark: SparkSession, path: String): Unit = - readInventoryCSV(spark, path).createOrReplaceTempView("inventory") - - def setupInventoryParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("inventory") - - def setupInventoryOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("inventory") - - // MARKET PRICES - val marketPricesSchema = StructType(Array( - StructField("imp_sk", LongType, false), - StructField("imp_item_sk", LongType, false), - StructField("imp_competitor", StringType), - StructField("imp_competitor_price", DoubleType), - StructField("imp_start_date", LongType), - StructField("imp_end_date", LongType) - )) - - // The market prices directory has .dat files and separate audit .csv files, - // so filter the path to only read the .dat files. - def readMarketPricesCSV(spark: SparkSession, path: String): DataFrame = - spark.read.option("delimiter", "|").schema(marketPricesSchema).csv(path + "/*.dat") - - def setupMarketPricesCSV(spark: SparkSession, path: String): Unit = - readMarketPricesCSV(spark, path).createOrReplaceTempView("item_marketprices") - - def setupMarketPricesParquet(spark: SparkSession, path: String): Unit = - spark.read.parquet(path).createOrReplaceTempView("item_marketprices") - - def setupMarketPricesOrc(spark: SparkSession, path: String): Unit = - spark.read.orc(path).createOrReplaceTempView("item_marketprices") - -} - -object Q1Like { - def apply(spark: SparkSession): DataFrame = { - throw new UnsupportedOperationException("Q1 uses UDTF") - } -} - -object Q2Like { - def apply(spark: SparkSession): DataFrame = { - throw new UnsupportedOperationException("Q2 uses UDTF") - } -} - -object Q3Like { - def apply(spark: SparkSession): DataFrame = { - throw new UnsupportedOperationException("Q3 calls python") - } -} - -object Q4Like { - def apply(spark: SparkSession): DataFrame = { - throw new UnsupportedOperationException("Q4 calls python") - } -} - -object Q5Like { - def apply(spark: SparkSession): DataFrame = { - spark.sql( - """ - |-- TASK: - |-- Build a model using logistic regression for a visitor to an online store: based on existing users online - |-- activities (interest in items of different categories) and demographics. - |-- This model will be used to predict if the visitor is interested in a given item category. - |-- Output the precision, accuracy and confusion matrix of model. - |-- Note: no need to actually classify existing users, as it will be later used to predict interests of unknown visitors. - | - |-- input vectors to the machine learning algorithm are: - |-- clicks_in_category BIGINT, -- used as label - number of clicks in specified category "q05_i_category" - |-- college_education BIGINT, -- has college education [0,1] - |-- male BIGINT, -- isMale [0,1] - |-- clicks_in_1 BIGINT, -- number of clicks in category id 1 - |-- clicks_in_2 BIGINT, -- number of clicks in category id 2 - |-- clicks_in_3 BIGINT, -- number of clicks in category id 3 - |-- clicks_in_4 BIGINT, -- number of clicks in category id 4 - |-- clicks_in_5 BIGINT, -- number of clicks in category id 5 - |-- clicks_in_6 BIGINT -- number of clicks in category id 6 - |-- clicks_in_7 BIGINT -- number of clicks in category id 7 - | - |SELECT - | --wcs_user_sk, - | clicks_in_category, - | CASE WHEN cd_education_status IN ('Advanced Degree', 'College', '4 yr Degree', '2 yr Degree') THEN 1 ELSE 0 END AS college_education, - | CASE WHEN cd_gender = 'M' THEN 1 ELSE 0 END AS male, - | clicks_in_1, - | clicks_in_2, - | clicks_in_3, - | clicks_in_4, - | clicks_in_5, - | clicks_in_6, - | clicks_in_7 - |FROM( - | SELECT - | wcs_user_sk, - | SUM( CASE WHEN i_category = 'Books' THEN 1 ELSE 0 END) AS clicks_in_category, - | SUM( CASE WHEN i_category_id = 1 THEN 1 ELSE 0 END) AS clicks_in_1, - | SUM( CASE WHEN i_category_id = 2 THEN 1 ELSE 0 END) AS clicks_in_2, - | SUM( CASE WHEN i_category_id = 3 THEN 1 ELSE 0 END) AS clicks_in_3, - | SUM( CASE WHEN i_category_id = 4 THEN 1 ELSE 0 END) AS clicks_in_4, - | SUM( CASE WHEN i_category_id = 5 THEN 1 ELSE 0 END) AS clicks_in_5, - | SUM( CASE WHEN i_category_id = 6 THEN 1 ELSE 0 END) AS clicks_in_6, - | SUM( CASE WHEN i_category_id = 7 THEN 1 ELSE 0 END) AS clicks_in_7 - | FROM web_clickstreams - | INNER JOIN item it ON (wcs_item_sk = i_item_sk - | AND wcs_user_sk IS NOT NULL) - | GROUP BY wcs_user_sk - |)q05_user_clicks_in_cat - |INNER JOIN customer ct ON wcs_user_sk = c_customer_sk - |INNER JOIN customer_demographics ON c_current_cdemo_sk = cd_demo_sk - | - """.stripMargin) - } -} - -// use temporary view here -object Q6Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql("DROP VIEW IF EXISTS q6_temp_table1") - spark.sql("DROP VIEW IF EXISTS q6_temp_table2") - - spark.sql( - """ - |-- TASK: - |-- Identifies customers shifting their purchase habit from store to web sales. - |-- Find customers who spend in relation more money in the second year following a given year in the web_sales channel then in the store sales channel. - |-- Hint: web second_year_total/first_year_total > store second_year_total/first_year_total - |-- Report customers details: first name, last name, their country of origin, login name and email address) and identify if they are preferred customer, for the top 100 customers with the highest increase in their second year web purchase ratio. - |-- Implementation notice: - |-- loosely based on implementation of tpc-ds q4 - Query description in tpcds_1.1.0.pdf does NOT match implementation in tpc-ds qgen\query_templates\query4.tpl - |-- This version: - |-- * does not have the catalog_sales table (there is none in our dataset). Web_sales plays the role of catalog_sales. - |-- * avoids the 4 self joins and replaces them with only one by creating two distinct views with better pre-filters and aggregations for store/web-sales first and second year - |-- * introduces a more logical sorting by reporting the top 100 customers ranked by their web_sales increase instead of just reporting random 100 customers - | - |CREATE TEMPORARY VIEW q6_temp_table1 AS - |SELECT ss_customer_sk AS customer_sk, - | sum( case when (d_year = 2001) THEN (((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) ELSE 0 END) first_year_total, - | sum( case when (d_year = 2001+1) THEN (((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) ELSE 0 END) second_year_total - |FROM store_sales - | ,date_dim - |WHERE ss_sold_date_sk = d_date_sk - |AND d_year BETWEEN 2001 AND 2001 +1 - |GROUP BY ss_customer_sk - |HAVING first_year_total > 0 -- required to avoid division by 0, because later we will divide by this value - | - """.stripMargin) - - spark.sql( - """ - |-- customer web sales - |CREATE TEMPORARY VIEW q6_temp_table2 AS - |SELECT ws_bill_customer_sk AS customer_sk , - | sum( case when (d_year = 2001) THEN (((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ELSE 0 END) first_year_total, - | sum( case when (d_year = 2001+1) THEN (((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ELSE 0 END) second_year_total - |FROM web_sales - | ,date_dim - |WHERE ws_sold_date_sk = d_date_sk - |AND d_year BETWEEN 2001 AND 2001 +1 - |GROUP BY ws_bill_customer_sk - |HAVING first_year_total > 0 -- required to avoid division by 0, because later we will divide by this value - | - """.stripMargin) - - spark.sql( - """ - |SELECT - | (web.second_year_total / web.first_year_total) AS web_sales_increase_ratio, - | c_customer_sk, - | c_first_name, - | c_last_name, - | c_preferred_cust_flag, - | c_birth_country, - | c_login, - | c_email_address - |FROM q6_temp_table1 store, - | q6_temp_table2 web, - | customer c - |WHERE store.customer_sk = web.customer_sk - |AND web.customer_sk = c_customer_sk - |-- if customer has sales in first year for both store and websales, select him only if web second_year_total/first_year_total ratio is bigger then his store second_year_total/first_year_total ratio. - |AND (web.second_year_total / web.first_year_total) > (store.second_year_total / store.first_year_total) - |ORDER BY - | web_sales_increase_ratio DESC, - | c_customer_sk, - | c_first_name, - | c_last_name, - | c_preferred_cust_flag, - | c_birth_country, - | c_login - |LIMIT 100 - | - """.stripMargin) - } -} - -object Q7Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql("DROP TABLE IF EXISTS q7_temp_table") - - // -- helper table: items with 20% higher then avg prices of product from same category - spark.sql( - """ - |-- TASK: (Based, but not equal to tpc-ds q6) - |-- List top 10 states in descending order with at least 10 customers who during - |-- a given month bought products with the price tag at least 20% higher than the - |-- average price of products in the same category. - | - |CREATE TABLE q7_temp_table USING parquet as - |-- "price tag at least 20% higher than the average price of products in the same category." - | - |SELECT - | k.i_item_sk - |FROM - | item k, - |( - | SELECT - | i_category, - | AVG(j.i_current_price) * 1.2 AS avg_price - | FROM item j - | GROUP BY j.i_category - |) avgCategoryPrice - |WHERE - | avgCategoryPrice.i_category = k.i_category - | AND k.i_current_price > avgCategoryPrice.avg_price - | - |""".stripMargin) - - - spark.sql( - """ - | - |SELECT - | ca_state, - | COUNT(*) AS cnt - |FROM - | customer_address a, - | customer c, - | store_sales s, - | q7_temp_table highPriceItems - |WHERE a.ca_address_sk = c.c_current_addr_sk - |AND c.c_customer_sk = s.ss_customer_sk - |AND ca_state IS NOT NULL - |AND ss_item_sk = highPriceItems.i_item_sk --cannot use "ss_item_sk IN ()". Hive only supports a single "IN" subquery per SQL statement. - |AND s.ss_sold_date_sk - |IN - |( --during a given month - | SELECT d_date_sk - | FROM date_dim - | WHERE d_year = 2004 - | AND d_moy = 7 - |) - |GROUP BY ca_state - |HAVING cnt >= 10 --at least 10 customers - |ORDER BY cnt DESC, ca_state --top 10 states in descending order - |LIMIT 10 - | - |""".stripMargin) - } -} - -object Q8Like { - def apply(spark: SparkSession): DataFrame = { - throw new UnsupportedOperationException("Q8 calls python") - } -} - -object Q9Like { - def apply(spark: SparkSession): DataFrame = { - spark.sql( - """ - |-- Aggregate total amount of sold items over different given types of combinations of customers based on selected groups of - |-- marital status, education status, sales price and different combinations of state and sales profit. - | - |SELECT SUM(ss1.ss_quantity) - |FROM store_sales ss1, date_dim dd,customer_address ca1, store s, customer_demographics cd - |-- select date range - |WHERE ss1.ss_sold_date_sk = dd.d_date_sk - |AND dd.d_year=2001 - |AND ss1.ss_addr_sk = ca1.ca_address_sk - |AND s.s_store_sk = ss1.ss_store_sk - |AND cd.cd_demo_sk = ss1.ss_cdemo_sk - |AND - |( - | ( - | cd.cd_marital_status = 'M' - | AND cd.cd_education_status = '4 yr Degree' - | AND 100 <= ss1.ss_sales_price - | AND ss1.ss_sales_price <= 150 - | ) - | OR - | ( - | cd.cd_marital_status = 'M' - | AND cd.cd_education_status = '4 yr Degree' - | AND 50 <= ss1.ss_sales_price - | AND ss1.ss_sales_price <= 200 - | ) - | OR - | ( - | cd.cd_marital_status = 'M' - | AND cd.cd_education_status = '4 yr Degree' - | AND 150 <= ss1.ss_sales_price - | AND ss1.ss_sales_price <= 200 - | ) - |) - |AND - |( - | ( - | ca1.ca_country = 'United States' - | AND ca1.ca_state IN ('KY', 'GA', 'NM') - | AND 0 <= ss1.ss_net_profit - | AND ss1.ss_net_profit <= 2000 - | ) - | OR - | ( - | ca1.ca_country = 'United States' - | AND ca1.ca_state IN ('MT', 'OR', 'IN') - | AND 150 <= ss1.ss_net_profit - | AND ss1.ss_net_profit <= 3000 - | ) - | OR - | ( - | ca1.ca_country = 'United States' - | AND ca1.ca_state IN ('WI', 'MO', 'WV') - | AND 50 <= ss1.ss_net_profit - | AND ss1.ss_net_profit <= 25000 - | ) - |) - |""".stripMargin) - } -} - - -// -// Query 10 sets the following hive optimization configs that we need to investigate more: -// -- This query requires parallel order by for fast and deterministic global ordering of final result -// set hive.optimize.sampling.orderby=${hiveconf:bigbench.spark.sql.optimize.sampling.orderby}; -// set hive.optimize.sampling.orderby.number=${hiveconf:bigbench.spark.sql.optimize.sampling.orderby.number}; -// set hive.optimize.sampling.orderby.percent=${hiveconf:bigbench.spark.sql.optimize.sampling.orderby.percent}; -// --debug print -// set hive.optimize.sampling.orderby; -// set hive.optimize.sampling.orderby.number; -// set hive.optimize.sampling.orderby.percent; -object Q10Like { - def apply(spark: SparkSession): DataFrame = { - throw new UnsupportedOperationException("Q10 uses UDF") - } -} - -object Q11Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql( - """ - |-- For a given product, measure the correlation of sentiments, including - |-- the number of reviews and average review ratings, on product monthly revenues - |-- within a given time frame. - | - |SELECT corr(reviews_count,avg_rating) - |FROM ( - | SELECT - | p.pr_item_sk AS pid, - | p.r_count AS reviews_count, - | p.avg_rating AS avg_rating, - | s.revenue AS m_revenue - | FROM ( - | SELECT - | pr_item_sk, - | count(*) AS r_count, - | avg(pr_review_rating) AS avg_rating - | FROM product_reviews - | WHERE pr_item_sk IS NOT NULL - | --this is GROUP BY 1 in original::same as pr_item_sk here::hive complains anyhow - | GROUP BY pr_item_sk - | ) p - | INNER JOIN ( - | SELECT - | ws_item_sk, - | SUM(ws_net_paid) AS revenue - | FROM web_sales ws - | -- Select date range of interest - | LEFT SEMI JOIN ( - | SELECT d_date_sk - | FROM date_dim d - | WHERE d.d_date >= '2003-01-02' - | AND d.d_date <= '2003-02-02' - | ) dd ON ( ws.ws_sold_date_sk = dd.d_date_sk ) - | WHERE ws_item_sk IS NOT null - | --this is GROUP BY 1 in original::same as ws_item_sk here::hive complains anyhow - | GROUP BY ws_item_sk - | ) s - | ON p.pr_item_sk = s.ws_item_sk - |) q11_review_stats - |""".stripMargin) - } -} - -// query had the following set but we aren't using Hive so shouldn't matter -// -// -- This query requires parallel order by for fast and deterministic global ordering of final result -// set hive.optimize.sampling.orderby=${hiveconf:bigbench.spark.sql.optimize.sampling.orderby}; -// set hive.optimize.sampling.orderby.number=${hiveconf:bigbench.spark.sql.optimize.sampling.orderby.number}; -// set hive.optimize.sampling.orderby.percent=${hiveconf:bigbench.spark.sql.optimize.sampling.orderby.percent}; -// --debug print -// set hive.optimize.sampling.orderby; -// set hive.optimize.sampling.orderby.number; -// set hive.optimize.sampling.orderby.percent; -object Q12Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql( - """ - |-- Find all customers who viewed items of a given category on the web - |-- in a given month and year that was followed by an in-store purchase of an item from the same category in the three - |-- consecutive months. - | - |SELECT DISTINCT wcs_user_sk -- Find all customers - |-- TODO check if 37134 is first day of the month - |FROM - |( -- web_clicks viewed items in date range with items from specified categories - | SELECT - | wcs_user_sk, - | wcs_click_date_sk - | FROM web_clickstreams, item - | WHERE wcs_click_date_sk BETWEEN 37134 AND (37134 + 30) -- in a given month and year - | AND i_category IN ('Books', 'Electronics') -- filter given category - | AND wcs_item_sk = i_item_sk - | AND wcs_user_sk IS NOT NULL - | AND wcs_sales_sk IS NULL --only views, not purchases - |) webInRange, - |( -- store sales in date range with items from specified categories - | SELECT - | ss_customer_sk, - | ss_sold_date_sk - | FROM store_sales, item - | WHERE ss_sold_date_sk BETWEEN 37134 AND (37134 + 90) -- in the three consecutive months. - | AND i_category IN ('Books', 'Electronics') -- filter given category - | AND ss_item_sk = i_item_sk - | AND ss_customer_sk IS NOT NULL - |) storeInRange - |-- join web and store - |WHERE wcs_user_sk = ss_customer_sk - |AND wcs_click_date_sk < ss_sold_date_sk -- buy AFTER viewed on website - |ORDER BY wcs_user_sk - | - |""".stripMargin) - } -} - -object Q13Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql("DROP VIEW IF EXISTS q13_temp_table1") - spark.sql("DROP VIEW IF EXISTS q13_temp_table2") - - // used temporary view here instead of permanent view - spark.sql( - """ - |-- based on tpc-ds q74 - |-- Display customers with both store and web sales in - |-- consecutive years for whom the increase in web sales exceeds the increase in - |-- store sales for a specified year. - | - |-- Implementation notice: - |-- loosely based on implementation of tpc-ds q74 - Query description in tpcds_1.1.0.pdf does NOT match implementation in tpc-ds qgen\query_templates\query74.tpl - |-- This version: - |-- * avoids union of 2 sub-queries followed by 4 self joins and replaces them with only one join by creating two distinct views with better pre-filters and aggregations for store/web-sales first and second year - |-- * introduces a more logical sorting by reporting the top 100 customers ranked by their web_sales increase ratio instead of just reporting random 100 customers - | - |CREATE TEMPORARY VIEW q13_temp_table1 AS - |SELECT - | ss.ss_customer_sk AS customer_sk, - | sum( case when (d_year = 2001) THEN ss_net_paid ELSE 0 END) first_year_total, - | sum( case when (d_year = 2001+1) THEN ss_net_paid ELSE 0 END) second_year_total - |FROM store_sales ss - |JOIN ( - | SELECT d_date_sk, d_year - | FROM date_dim d - | WHERE d.d_year in (2001, (2001 + 1)) - |) dd on ( ss.ss_sold_date_sk = dd.d_date_sk ) - |GROUP BY ss.ss_customer_sk - |HAVING first_year_total > 0 - | - """.stripMargin) - - // used temporary view here instead of permanent view - spark.sql( - """ - |CREATE TEMPORARY VIEW q13_temp_table2 AS - |SELECT - | ws.ws_bill_customer_sk AS customer_sk, - | sum( case when (d_year = 2001) THEN ws_net_paid ELSE 0 END) first_year_total, - | sum( case when (d_year = 2001+1) THEN ws_net_paid ELSE 0 END) second_year_total - |FROM web_sales ws - |JOIN ( - | SELECT d_date_sk, d_year - | FROM date_dim d - | WHERE d.d_year in (2001, (2001 + 1) ) - |) dd ON ( ws.ws_sold_date_sk = dd.d_date_sk ) - |GROUP BY ws.ws_bill_customer_sk - |HAVING first_year_total > 0 - | - """.stripMargin) - - spark.sql( - """ - |SELECT - | c_customer_sk, - | c_first_name, - | c_last_name, - | (store.second_year_total / store.first_year_total) AS storeSalesIncreaseRatio , - | (web.second_year_total / web.first_year_total) AS webSalesIncreaseRatio - |FROM q13_temp_table1 store , - | q13_temp_table2 web , - | customer c - |WHERE store.customer_sk = web.customer_sk - |AND web.customer_sk = c_customer_sk - |-- if customer has sales in first year for both store and websales, select him only if web second_year_total/first_year_total ratio is bigger then his store second_year_total/first_year_total ratio. - |AND (web.second_year_total / web.first_year_total) > (store.second_year_total / store.first_year_total) - |ORDER BY - | webSalesIncreaseRatio DESC, - | c_customer_sk, - | c_first_name, - | c_last_name - |LIMIT 100 - | - |""".stripMargin) - } -} - -object Q14Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql( - """ - |-- based on tpc-ds q90 - |-- What is the ratio between the number of items sold over - |-- the internet in the morning (7 to 8am) to the number of items sold in the evening - |-- (7 to 8pm) of customers with a specified number of dependents. Consider only - |-- websites with a high amount of content. - | - |SELECT CASE WHEN pmc > 0 THEN amc/pmc ELSE -1.00 END AS am_pm_ratio - | FROM ( - | SELECT SUM(amc1) AS amc, SUM(pmc1) AS pmc - | FROM( - | SELECT - | CASE WHEN t_hour BETWEEN 7 AND 8 THEN COUNT(1) ELSE 0 END AS amc1, - | CASE WHEN t_hour BETWEEN 19 AND 20 THEN COUNT(1) ELSE 0 END AS pmc1 - | FROM web_sales ws - | JOIN household_demographics hd ON (hd.hd_demo_sk = ws.ws_ship_hdemo_sk and hd.hd_dep_count = 5) - | JOIN web_page wp ON (wp.wp_web_page_sk = ws.ws_web_page_sk and wp.wp_char_count BETWEEN 5000 AND 6000 ) - | JOIN time_dim td ON (td.t_time_sk = ws.ws_sold_time_sk and td.t_hour IN (7,8,19,20)) - | GROUP BY t_hour) cnt_am_pm - | ) sum_am_pm - | - |""".stripMargin) - } -} - -object Q15Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql( - """ - |-- Find the categories with flat or declining sales for in store purchases - |-- during a given year for a given store. - | - |SELECT * - |FROM ( - | SELECT - | cat, - | --input: - | --SUM(x)as sumX, - | --SUM(y)as sumY, - | --SUM(xy)as sumXY, - | --SUM(xx)as sumXSquared, - | --count(x) as N, - | - | --formula stage1 (logical): - | --N * sumXY - sumX * sumY AS numerator, - | --N * sumXSquared - sumX*sumX AS denom - | --numerator / denom as slope, - | --(sumY - slope * sumX) / N as intercept - | -- - | --formula stage2(inserted hive aggregations): - | --(count(x) * SUM(xy) - SUM(x) * SUM(y)) AS numerator, - | --(count(x) * SUM(xx) - SUM(x) * SUM(x)) AS denom - | --numerator / denom as slope, - | --(sum(y) - slope * sum(x)) / count(X) as intercept - | -- - | --Formula stage 3: (insert numerator and denom into slope and intercept function) - | ((count(x) * SUM(xy) - SUM(x) * SUM(y)) / (count(x) * SUM(xx) - SUM(x) * SUM(x)) ) AS slope, - | (SUM(y) - ((count(x) * SUM(xy) - SUM(x) * SUM(y)) / (count(x) * SUM(xx) - SUM(x)*SUM(x)) ) * SUM(x)) / count(x) AS intercept - | FROM ( - | SELECT - | i.i_category_id AS cat, -- ranges from 1 to 10 - | s.ss_sold_date_sk AS x, - | SUM(s.ss_net_paid) AS y, - | s.ss_sold_date_sk * SUM(s.ss_net_paid) AS xy, - | s.ss_sold_date_sk * s.ss_sold_date_sk AS xx - | FROM store_sales s - | -- select date range - | LEFT SEMI JOIN ( - | SELECT d_date_sk - | FROM date_dim d - | WHERE d.d_date >= '2001-09-02' - | AND d.d_date <= '2002-09-02' - | ) dd ON ( s.ss_sold_date_sk=dd.d_date_sk ) - | INNER JOIN item i ON s.ss_item_sk = i.i_item_sk - | WHERE i.i_category_id IS NOT NULL - | AND s.ss_store_sk = 10 -- for a given store ranges from 1 to 12 - | GROUP BY i.i_category_id, s.ss_sold_date_sk - | ) temp - | GROUP BY cat - |) regression - |WHERE slope <= 0 - |ORDER BY cat - |-- limit not required, number of categories is known to be small and of fixed size across scalefactors - | - |""".stripMargin) - } -} - -object Q16Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql( - """ - |-- based on tpc-ds q40 - |-- Compute the impact of an item price change on the - |-- store sales by computing the total sales for items in a 30 day period before and - |-- after the price change. Group the items by location of warehouse where they - |-- were delivered from. - | - |SELECT w_state, i_item_id, - | SUM( - | CASE WHEN (unix_timestamp(d_date,'yyyy-MM-dd') < unix_timestamp('2001-03-16','yyyy-MM-dd')) - | THEN ws_sales_price - COALESCE(wr_refunded_cash,0) - | ELSE 0.0 END - | ) AS sales_before, - | SUM( - | CASE WHEN (unix_timestamp(d_date,'yyyy-MM-dd') >= unix_timestamp('2001-03-16','yyyy-MM-dd')) - | THEN ws_sales_price - COALESCE(wr_refunded_cash,0) - | ELSE 0.0 END - | ) AS sales_after - |FROM ( - | SELECT * - | FROM web_sales ws - | LEFT OUTER JOIN web_returns wr ON (ws.ws_order_number = wr.wr_order_number - | AND ws.ws_item_sk = wr.wr_item_sk) - |) a1 - |JOIN item i ON a1.ws_item_sk = i.i_item_sk - |JOIN warehouse w ON a1.ws_warehouse_sk = w.w_warehouse_sk - |JOIN date_dim d ON a1.ws_sold_date_sk = d.d_date_sk - |AND unix_timestamp(d.d_date, 'yyyy-MM-dd') >= unix_timestamp('2001-03-16', 'yyyy-MM-dd') - 30*24*60*60 --subtract 30 days in seconds - |AND unix_timestamp(d.d_date, 'yyyy-MM-dd') <= unix_timestamp('2001-03-16', 'yyyy-MM-dd') + 30*24*60*60 --add 30 days in seconds - |GROUP BY w_state,i_item_id - |--original was ORDER BY w_state,i_item_id , but CLUSTER BY is hives cluster scale counter part - |ORDER BY w_state,i_item_id - |LIMIT 100 - | - |""".stripMargin) - } -} - -object Q17Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql( - """ - |-- based on tpc-ds q61 - |-- Find the ratio of items sold with and without promotions - |-- in a given month and year. Only items in certain categories sold to customers - |-- living in a specific time zone are considered. - | - |SELECT sum(promotional) as promotional, sum(total) as total, - | CASE WHEN sum(total) > 0 THEN 100*sum(promotional)/sum(total) - | ELSE 0.0 END as promo_percent - |FROM( - |SELECT p_channel_email, p_channel_dmail, p_channel_tv, - |CASE WHEN (p_channel_dmail = 'Y' OR p_channel_email = 'Y' OR p_channel_tv = 'Y') - |THEN SUM(ss_ext_sales_price) ELSE 0 END as promotional, - |SUM(ss_ext_sales_price) total - | FROM store_sales ss - | LEFT SEMI JOIN date_dim dd ON ss.ss_sold_date_sk = dd.d_date_sk AND dd.d_year = 2001 AND dd.d_moy = 12 - | LEFT SEMI JOIN item i ON ss.ss_item_sk = i.i_item_sk AND i.i_category IN ('Books', 'Music') - | LEFT SEMI JOIN store s ON ss.ss_store_sk = s.s_store_sk AND s.s_gmt_offset = -5 - | LEFT SEMI JOIN ( SELECT c.c_customer_sk FROM customer c LEFT SEMI JOIN customer_address ca - | ON c.c_current_addr_sk = ca.ca_address_sk AND ca.ca_gmt_offset = -5 - | ) sub_c ON ss.ss_customer_sk = sub_c.c_customer_sk - | JOIN promotion p ON ss.ss_promo_sk = p.p_promo_sk - | GROUP BY p_channel_email, p_channel_dmail, p_channel_tv - | ) sum_promotional - |-- we don't need a 'ON' join condition. result is just two numbers. - |ORDER by promotional, total - |LIMIT 100 -- kinda useless, result is one line with two numbers, but original tpc-ds query has it too. - | - |""".stripMargin) - } -} - -object Q18Like { - def apply(spark: SparkSession): DataFrame = { - throw new UnsupportedOperationException("Q18 uses UDF") - } -} - -object Q19Like { - def apply(spark: SparkSession): DataFrame = { - throw new UnsupportedOperationException("Q19 uses UDF") - } -} - - -/* - * query had the following set but we aren't using Hive so shouldn't matter - * - * -- This query requires parallel order by for fast and deterministic global ordering of final result - * set hive.optimize.sampling.orderby=${hiveconf:bigbench.spark.sql.optimize.sampling.orderby}; - * set hive.optimize.sampling.orderby.number=${hiveconf:bigbench.spark.sql.optimize.sampling.orderby.number}; - * set hive.optimize.sampling.orderby.percent=${hiveconf:bigbench.spark.sql.optimize.sampling.orderby.percent}; - * --debug print - * set hive.optimize.sampling.orderby; - * set hive.optimize.sampling.orderby.number; - * set hive.optimize.sampling.orderby.percent; - */ -object Q20Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql( - """ - |-- TASK: - |-- Customer segmentation for return analysis: Customers are separated - |-- along the following dimensions: return frequency, return order ratio (total - |-- number of orders partially or fully returned versus the total number of orders), - |-- return item ratio (total number of items returned versus the number of items - |-- purchased), return amount ration (total monetary amount of items returned versus - |-- the amount purchased), return order ratio. Consider the store returns during - |-- a given year for the computation. - | - |-- IMPLEMENTATION NOTICE: - |-- hive provides the input for the clustering program - |-- The input format for the clustering is: - |-- user surrogate key, - |-- order ratio (number of returns / number of orders), - |-- item ratio (number of returned items / number of ordered items), - |-- money ratio (returned money / payed money), - |-- number of returns - | - |SELECT - | ss_customer_sk AS user_sk, - | round(CASE WHEN ((returns_count IS NULL) OR (orders_count IS NULL) OR ((returns_count / orders_count) IS NULL) ) THEN 0.0 ELSE (returns_count / orders_count) END, 7) AS orderRatio, - | round(CASE WHEN ((returns_items IS NULL) OR (orders_items IS NULL) OR ((returns_items / orders_items) IS NULL) ) THEN 0.0 ELSE (returns_items / orders_items) END, 7) AS itemsRatio, - | round(CASE WHEN ((returns_money IS NULL) OR (orders_money IS NULL) OR ((returns_money / orders_money) IS NULL) ) THEN 0.0 ELSE (returns_money / orders_money) END, 7) AS monetaryRatio, - | round(CASE WHEN ( returns_count IS NULL ) THEN 0.0 ELSE returns_count END, 0) AS frequency - |FROM - | ( - | SELECT - | ss_customer_sk, - | -- return order ratio - | COUNT(distinct(ss_ticket_number)) AS orders_count, - | -- return ss_item_sk ratio - | COUNT(ss_item_sk) AS orders_items, - | -- return monetary amount ratio - | SUM( ss_net_paid ) AS orders_money - | FROM store_sales s - | GROUP BY ss_customer_sk - | ) orders - | LEFT OUTER JOIN - | ( - | SELECT - | sr_customer_sk, - | -- return order ratio - | count(distinct(sr_ticket_number)) as returns_count, - | -- return ss_item_sk ratio - | COUNT(sr_item_sk) as returns_items, - | -- return monetary amount ratio - | SUM( sr_return_amt ) AS returns_money - | FROM store_returns - | GROUP BY sr_customer_sk - | ) returned ON ss_customer_sk=sr_customer_sk - |ORDER BY user_sk - | - | - |""".stripMargin) - } -} - -object Q21Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql( - """ - |-- based on tpc-ds q29 - |-- Get all items that were sold in stores in a given month - |-- and year and which were returned in the next 6 months and re-purchased by - |-- the returning customer afterwards through the web sales channel in the following - |-- three years. For those items, compute the total quantity sold through the - |-- store, the quantity returned and the quantity purchased through the web. Group - |-- this information by item and store. - | - |SELECT - | part_i.i_item_id AS i_item_id, - | part_i.i_item_desc AS i_item_desc, - | part_s.s_store_id AS s_store_id, - | part_s.s_store_name AS s_store_name, - | SUM(part_ss.ss_quantity) AS store_sales_quantity, - | SUM(part_sr.sr_return_quantity) AS store_returns_quantity, - | SUM(part_ws.ws_quantity) AS web_sales_quantity - |FROM ( - | SELECT - | sr_item_sk, - | sr_customer_sk, - | sr_ticket_number, - | sr_return_quantity - | FROM - | store_returns sr, - | date_dim d2 - | WHERE d2.d_year = 2003 - | AND d2.d_moy BETWEEN 1 AND 1 + 6 --which were returned in the next six months - | AND sr.sr_returned_date_sk = d2.d_date_sk - |) part_sr - |INNER JOIN ( - | SELECT - | ws_item_sk, - | ws_bill_customer_sk, - | ws_quantity - | FROM - | web_sales ws, - | date_dim d3 - | WHERE d3.d_year BETWEEN 2003 AND 2003 + 2 -- in the following three years (re-purchased by the returning customer afterwards through the web sales channel) - | AND ws.ws_sold_date_sk = d3.d_date_sk - |) part_ws ON ( - | part_sr.sr_item_sk = part_ws.ws_item_sk - | AND part_sr.sr_customer_sk = part_ws.ws_bill_customer_sk - |) - |INNER JOIN ( - | SELECT - | ss_item_sk, - | ss_store_sk, - | ss_customer_sk, - | ss_ticket_number, - | ss_quantity - | FROM - | store_sales ss, - | date_dim d1 - | WHERE d1.d_year = 2003 - | AND d1.d_moy = 1 - | AND ss.ss_sold_date_sk = d1.d_date_sk - |) part_ss ON ( - | part_ss.ss_ticket_number = part_sr.sr_ticket_number - | AND part_ss.ss_item_sk = part_sr.sr_item_sk - | AND part_ss.ss_customer_sk = part_sr.sr_customer_sk - |) - |INNER JOIN store part_s ON ( - | part_s.s_store_sk = part_ss.ss_store_sk - |) - |INNER JOIN item part_i ON ( - | part_i.i_item_sk = part_ss.ss_item_sk - |) - |GROUP BY - | part_i.i_item_id, - | part_i.i_item_desc, - | part_s.s_store_id, - | part_s.s_store_name - |ORDER BY - | part_i.i_item_id, - | part_i.i_item_desc, - | part_s.s_store_id, - | part_s.s_store_name - |LIMIT 100 - | - |""".stripMargin) - } -} - -object Q22Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql( - """ - |-- based on tpc-ds q21 - |-- For all items whose price was changed on a given date, - |-- compute the percentage change in inventory between the 30-day period BEFORE - |-- the price change and the 30-day period AFTER the change. Group this - |-- information by warehouse. - |SELECT - | w_warehouse_name, - | i_item_id, - | SUM( CASE WHEN datediff(d_date, '2001-05-08') < 0 - | THEN inv_quantity_on_hand - | ELSE 0 END - | ) AS inv_before, - | SUM( CASE WHEN datediff(d_date, '2001-05-08') >= 0 - | THEN inv_quantity_on_hand - | ELSE 0 END - | ) AS inv_after - |FROM inventory inv, - | item i, - | warehouse w, - | date_dim d - |WHERE i_current_price BETWEEN 0.98 AND 1.5 - |AND i_item_sk = inv_item_sk - |AND inv_warehouse_sk = w_warehouse_sk - |AND inv_date_sk = d_date_sk - |AND datediff(d_date, '2001-05-08') >= -30 - |AND datediff(d_date, '2001-05-08') <= 30 - | - |GROUP BY w_warehouse_name, i_item_id - |HAVING inv_before > 0 - |AND inv_after / inv_before >= 2.0 / 3.0 - |AND inv_after / inv_before <= 3.0 / 2.0 - |ORDER BY w_warehouse_name, i_item_id - |LIMIT 100 - | - |""".stripMargin) - } -} - -/* - * query had the following set but we aren't using Hive so shouldn't matter - * - * -- This query requires parallel order by for fast and deterministic global ordering of final result - * set hive.optimize.sampling.orderby=true; - * set hive.optimize.sampling.orderby.number=20000; - * set hive.optimize.sampling.orderby.percent=0.1; - * --debug print - * set hive.optimize.sampling.orderby; - * set hive.optimize.sampling.orderby.number; - * set hive.optimize.sampling.orderby.percent; - */ -object Q23Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql("DROP TABLE IF EXISTS q23_temp_table") - - spark.sql( - """ - |-- based on tpc-ds q39 - |-- This query contains multiple, related iterations: - |-- Iteration 1: Calculate the coefficient of variation and mean of every item - |-- and warehouse of the given and the consecutive month - |-- Iteration 2: Find items that had a coefficient of variation of 1.3 or larger - |-- in the given and the consecutive month - | - |CREATE TABLE q23_temp_table USING parquet AS - |SELECT - | inv_warehouse_sk, - | -- w_warehouse_name, - | inv_item_sk, - | d_moy, - | cast( ( stdev / mean ) as decimal(15,5)) cov - |FROM ( - | --Iteration 1: Calculate the coefficient of variation and mean of every item - | -- and warehouse of the given and the consecutive month - | SELECT - | inv_warehouse_sk, - | inv_item_sk, - | d_moy, - | -- implicit group by d_moy using CASE filters inside the stddev_samp() and avg() UDF's. This saves us from requiring a self join for correlation of d_moy and d_moy+1 later on. - | cast( stddev_samp( inv_quantity_on_hand ) as decimal(15,5)) stdev, - | cast( avg( inv_quantity_on_hand ) as decimal(15,5)) mean - | - | FROM inventory inv - | JOIN date_dim d - | ON (inv.inv_date_sk = d.d_date_sk - | AND d.d_year = 2001 - | AND d_moy between 1 AND (1 + 1) - | ) - | GROUP BY - | inv_warehouse_sk, - | inv_item_sk, - | d_moy - |) q23_tmp_inv_part - |--JOIN warehouse w ON inv_warehouse_sk = w.w_warehouse_sk - |WHERE mean > 0 --avoid "div by 0" - | AND stdev/mean >= 1.3 - | - """.stripMargin) - - spark.sql( - """ - |-- Begin: the real query part - |-- Iteration 2: Find items that had a coefficient of variation of 1.5 or larger - |-- in the given and the consecutive month - |SELECT - | inv1.inv_warehouse_sk, - | inv1.inv_item_sk, - | inv1.d_moy, - | inv1.cov, - | inv2.d_moy, - | inv2.cov - |FROM q23_temp_table inv1 - |JOIN q23_temp_table inv2 - | ON( inv1.inv_warehouse_sk=inv2.inv_warehouse_sk - | AND inv1.inv_item_sk = inv2.inv_item_sk - | AND inv1.d_moy = 1 - | AND inv2.d_moy = 1 + 1 - | ) - |ORDER BY - | inv1.inv_warehouse_sk, - | inv1.inv_item_sk - | - |""".stripMargin) - } -} - -object Q24Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql("DROP TABLE IF EXISTS q24_temp_table") - - spark.sql( - """ - |--For a given product, measure the effect of competitor's prices on - |--products' in-store and online sales. (Compute the cross-price elasticity of demand - |--for a given product.) - |-- Step1 : - |--Calculating the Percentage Change in Quantity Demanded of Good X : [QDemand(NEW) - QDemand(OLD)] / QDemand(OLD) - |--Step 2: - |-- Calculating the Percentage Change in Price of Good Y: [Price(NEW) - Price(OLD)] / Price(OLD) - |-- Step 3 final: - |--Cross-Price Elasticity of Demand (CPEoD) is given by: CPEoD = (% Change in Quantity Demand for Good X)/(% Change in Price for Good Y)) - | - |-- compute the price change % for the competitor items - |-- will give a list of competitor prices changes - | - |CREATE TABLE q24_temp_table USING parquet AS - |SELECT - | i_item_sk, - | imp_sk, - | --imp_competitor, - | (imp_competitor_price - i_current_price)/i_current_price AS price_change, - | imp_start_date, - | (imp_end_date - imp_start_date) AS no_days_comp_price - |FROM item i ,item_marketprices imp - |WHERE i.i_item_sk = imp.imp_item_sk - |AND i.i_item_sk = 10000 - |-- AND imp.imp_competitor_price < i.i_current_price --consider all price changes not just where competitor is cheaper - |ORDER BY i_item_sk, - | imp_sk, - | --imp_competitor, --add to compute cross_price_elasticity per competitor is instead of a single number - | imp_start_date - | - | - |""".stripMargin) - - spark.sql( - """ - |SELECT ws_item_sk, - | --ws.imp_competitor, --add to compute cross_price_elasticity per competitor is instead of a single number - | avg ( (current_ss_quant + current_ws_quant - prev_ss_quant - prev_ws_quant) / ((prev_ss_quant + prev_ws_quant) * ws.price_change)) AS cross_price_elasticity - |FROM - | ( --websales items sold quantity before and after competitor price change - | SELECT - | ws_item_sk, - | imp_sk, - | --imp_competitor, --add to compute cross_price_elasticity per competitor is instead of a single number - | price_change, - | SUM( CASE WHEN ( (ws_sold_date_sk >= c.imp_start_date) AND (ws_sold_date_sk < (c.imp_start_date + c.no_days_comp_price))) THEN ws_quantity ELSE 0 END ) AS current_ws_quant, - | SUM( CASE WHEN ( (ws_sold_date_sk >= (c.imp_start_date - c.no_days_comp_price)) AND (ws_sold_date_sk < c.imp_start_date)) THEN ws_quantity ELSE 0 END ) AS prev_ws_quant - | FROM web_sales ws - | JOIN q24_temp_table c ON ws.ws_item_sk = c.i_item_sk - | GROUP BY ws_item_sk, - | imp_sk, - | --imp_competitor, - | price_change - | ) ws - |JOIN - | (--storesales items sold quantity before and after competitor price change - | SELECT - | ss_item_sk, - | imp_sk, - | --imp_competitor, --add to compute cross_price_elasticity per competitor is instead of a single number - | price_change, - | SUM( CASE WHEN ((ss_sold_date_sk >= c.imp_start_date) AND (ss_sold_date_sk < (c.imp_start_date + c.no_days_comp_price))) THEN ss_quantity ELSE 0 END) AS current_ss_quant, - | SUM( CASE WHEN ((ss_sold_date_sk >= (c.imp_start_date - c.no_days_comp_price)) AND (ss_sold_date_sk < c.imp_start_date)) THEN ss_quantity ELSE 0 END) AS prev_ss_quant - | FROM store_sales ss - | JOIN q24_temp_table c ON c.i_item_sk = ss.ss_item_sk - | GROUP BY ss_item_sk, - | imp_sk, - | --imp_competitor, --add to compute cross_price_elasticity per competitor is instead of a single number - | price_change - | ) ss - | ON (ws.ws_item_sk = ss.ss_item_sk and ws.imp_sk = ss.imp_sk) - |GROUP BY ws.ws_item_sk - |--uncomment below to compute cross_price_elasticity per competitor is instead of a single number (requires ordering) - | --,ws.imp_competitor - |--ORDER BY ws.ws_item_sk, - |-- ws.imp_competitor - | - """.stripMargin) - } -} - -/* - * query had the following set but we aren't using Hive so shouldn't matter - * - * -- This query requires parallel order by for fast and deterministic global ordering of final result - * set hive.optimize.sampling.orderby=true; - * set hive.optimize.sampling.orderby.number=20000; - * set hive.optimize.sampling.orderby.percent=0.1; - * --debug print - * set hive.optimize.sampling.orderby; - * set hive.optimize.sampling.orderby.number; - * set hive.optimize.sampling.orderby.percent; - */ -object Q25Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql("DROP TABLE IF EXISTS q25_temp_table") - - spark.sql( - """ - |-- TASK: - |-- Customer segmentation analysis: Customers are separated along the - |-- following key shopping dimensions: recency of last visit, frequency of visits and - |-- monetary amount. Use the store and online purchase data during a given year - |-- to compute. After model of separation is build, - |-- report for the analysed customers to which "group" they where assigned - | - |-- IMPLEMENTATION NOTICE: - |-- hive provides the input for the clustering program - |-- The input format for the clustering is: - |-- customer ID, - |-- flag if customer bought something within the last 60 days (integer 0 or 1), - |-- number of orders, - |-- total amount spent - |CREATE TABLE q25_temp_table ( - | cid BIGINT, - | frequency BIGINT, - | most_recent_date BIGINT, - | amount decimal(15,2) - |) USING parquet - | - |""".stripMargin) - - spark.sql( - """ - |-- Add store sales data - |INSERT INTO TABLE q25_temp_table - |SELECT - | ss_customer_sk AS cid, - | count(distinct ss_ticket_number) AS frequency, - | max(ss_sold_date_sk) AS most_recent_date, - | SUM(ss_net_paid) AS amount - |FROM store_sales ss - |JOIN date_dim d ON ss.ss_sold_date_sk = d.d_date_sk - |WHERE d.d_date > '2002-01-02' - |AND ss_customer_sk IS NOT NULL - |GROUP BY ss_customer_sk - | - """.stripMargin) - - spark.sql( - """ - |-- Add web sales data - |INSERT INTO TABLE q25_temp_table - |SELECT - | ws_bill_customer_sk AS cid, - | count(distinct ws_order_number) AS frequency, - | max(ws_sold_date_sk) AS most_recent_date, - | SUM(ws_net_paid) AS amount - |FROM web_sales ws - |JOIN date_dim d ON ws.ws_sold_date_sk = d.d_date_sk - |WHERE d.d_date > '2002-01-02' - |AND ws_bill_customer_sk IS NOT NULL - |GROUP BY ws_bill_customer_sk - | - """.stripMargin) - - spark.sql( - """ - |SELECT - | -- rounding of values not necessary - | cid AS cid, - | CASE WHEN 37621 - max(most_recent_date) < 60 THEN 1.0 ELSE 0.0 END - | AS recency, -- 37621 == 2003-01-02 - | SUM(frequency) AS frequency, --total frequency - | SUM(amount) AS totalspend --total amount - |FROM q25_temp_table - |GROUP BY cid - |--CLUSTER BY cid --cluster by preceded by group by is silently ignored by hive but fails in spark - |--no total ordering with ORDER BY required, further processed by clustering algorithm - |ORDER BY cid - | - | - """.stripMargin) - } -} - -object Q26Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql( - """ - |-- TASK: - |-- Cluster customers into book buddies/club groups based on their in - |-- store book purchasing histories. After model of separation is build, - |-- report for the analysed customers to which "group" they where assigned - | - |-- IMPLEMENTATION NOTICE: - |-- hive provides the input for the clustering program - |-- The input format for the clustering is: - |-- customer ID, - |-- sum of store sales in the item class ids [1,15] - | - |SELECT - | ss.ss_customer_sk AS cid, - | count(CASE WHEN i.i_class_id=1 THEN 1 ELSE NULL END) AS id1, - | count(CASE WHEN i.i_class_id=2 THEN 1 ELSE NULL END) AS id2, - | count(CASE WHEN i.i_class_id=3 THEN 1 ELSE NULL END) AS id3, - | count(CASE WHEN i.i_class_id=4 THEN 1 ELSE NULL END) AS id4, - | count(CASE WHEN i.i_class_id=5 THEN 1 ELSE NULL END) AS id5, - | count(CASE WHEN i.i_class_id=6 THEN 1 ELSE NULL END) AS id6, - | count(CASE WHEN i.i_class_id=7 THEN 1 ELSE NULL END) AS id7, - | count(CASE WHEN i.i_class_id=8 THEN 1 ELSE NULL END) AS id8, - | count(CASE WHEN i.i_class_id=9 THEN 1 ELSE NULL END) AS id9, - | count(CASE WHEN i.i_class_id=10 THEN 1 ELSE NULL END) AS id10, - | count(CASE WHEN i.i_class_id=11 THEN 1 ELSE NULL END) AS id11, - | count(CASE WHEN i.i_class_id=12 THEN 1 ELSE NULL END) AS id12, - | count(CASE WHEN i.i_class_id=13 THEN 1 ELSE NULL END) AS id13, - | count(CASE WHEN i.i_class_id=14 THEN 1 ELSE NULL END) AS id14, - | count(CASE WHEN i.i_class_id=15 THEN 1 ELSE NULL END) AS id15 - |FROM store_sales ss - |INNER JOIN item i - | ON (ss.ss_item_sk = i.i_item_sk - | AND i.i_category IN ('Books') - | AND ss.ss_customer_sk IS NOT NULL - |) - |GROUP BY ss.ss_customer_sk - |HAVING count(ss.ss_item_sk) > 5 - |--CLUSTER BY cid --cluster by preceded by group by is silently ignored by hive but fails in spark - |ORDER BY cid - | - |""".stripMargin) - } -} - -object Q27Like { - def apply(spark: SparkSession): DataFrame = { - throw new UnsupportedOperationException("Q27 uses UDF") - } -} - -// Not setting hive.optimize.sample.orderby settings -// -// NOTE - this was inserting into 2 different tables, need to figure out what we want to do here -// -// This currently fails on the GPU due to inserts -object Q28Like { - def apply(spark: SparkSession): DataFrame = { - - spark.sql("DROP TABLE IF EXISTS q28_temp_table1") - - spark.sql( - """ - |CREATE TABLE q28_temp_table1 ( - | pr_review_sk BIGINT, - | pr_rating INT, - | pr_review_content STRING - |) USING parquet - """.stripMargin) - - spark.sql("DROP TABLE IF EXISTS q28_temp_table2") - - spark.sql( - """ - |CREATE TABLE q28_temp_table2 ( - | pr_review_sk BIGINT, - | pr_rating INT, - | pr_review_content STRING - |) USING parquet - """.stripMargin) - - spark.sql( - """ - |-- TASK - |-- Build text classifier for online review sentiment classification (Positive, - |-- Negative, Neutral), using 90% of available reviews for training and the remaining - |-- 40% for testing. Display classifier accuracy on testing data - |-- and classification result for the 10% testing data: ,, - | - |--Split reviews table into training and testing - |FROM ( - | SELECT - | pr_review_sk, - | pr_review_rating, - | pr_review_content - | FROM product_reviews - | ORDER BY pr_review_sk - |)p - |INSERT OVERWRITE TABLE q28_temp_table1 - | SELECT * - | WHERE pmod(pr_review_sk, 10) IN (1,2,3,4,5,6,7,8,9) -- 90% are training - | - |INSERT OVERWRITE TABLE q28_temp_table2 - | SELECT * - | WHERE pmod(pr_review_sk, 10) IN (0) -- 10% are testing - | - | - |""".stripMargin) - } -} - -object Q29Like { - def apply(spark: SparkSession): DataFrame = { - throw new UnsupportedOperationException("Q29 uses UDTF") - } -} - -object Q30Like { - def apply(spark: SparkSession): DataFrame = { - throw new UnsupportedOperationException("Q30 uses UDTF") - } -} - -object ConvertFiles { - /** - * Main method allows us to submit using spark-submit to perform conversions from CSV to - * Parquet or Orc. - */ - def main(arg: Array[String]): Unit = { - val conf = new FileConversionConf(arg) - val spark = SparkSession.builder.appName("TPC-xBB Like File Conversion").getOrCreate() - conf.outputFormat() match { - case "parquet" => - csvToParquet( - spark, - conf.input(), - conf.output(), - conf.coalesce, - conf.repartition) - case "orc" => - csvToOrc( - spark, - conf.input(), - conf.output(), - conf.coalesce, - conf.repartition) - } - } - -} - -class FileConversionConf(arguments: Seq[String]) extends ScallopConf(arguments) { - val input = opt[String](required = true) - val output = opt[String](required = true) - val outputFormat = opt[String](required = true) - val coalesce = propsLong[Int]("coalesce") - val repartition = propsLong[Int]("repartition") - verify() - BenchUtils.validateCoalesceRepartition(coalesce, repartition) -} - -// scalastyle:on line.size.limit diff --git a/integration_tests/src/test/resources/tpch/customer.tbl/part-00000-5e3bf4cd-c6e9-488b-81f2-00813dd4bfbe-c000.snappy.parquet b/integration_tests/src/test/resources/tpch/customer.tbl/part-00000-5e3bf4cd-c6e9-488b-81f2-00813dd4bfbe-c000.snappy.parquet deleted file mode 100644 index d62aa31fcc8e4bbaee47e95dde17fdf7d7683a92..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 87932 zcmeF(d%XK-T`&4s;N>?n%mRPNFd$$S42te<&8=;svvtxm&8-ElOtzOnzzbDC)C*SAkOc(=u(oIkL#xHx)uSuOJz3uVv^(Q^)+SBg1_TIZ{w~u{#zSg(8d?Gad#U`8-q63Him6* zZH(IB+Ys6i+mPCj+ZeZ@w4t`4wJ~W!Z^LL~+QzI6vkj|_c^h^cP8*9hzNn4ow(-Sn zJgiTw#;e=-;WmDxjn}mC+BSZ) zjUQ{{$J_XcHh!{=pK9Z$+jw0YuW#dL+IT}7Z*1dd+xWRQe!h)gXyf5FezA>TYU7vN zcvBm1ZsS+lcuO0<+QzT7@#}58wT<6s<2T#*t&2~5_&<-Qx3`b{b{oIb#_zWAdu_a< zjo)wMoo&3UjX!AP58L>oHvYJcKWXDn+jw^ykF@b;ZTxu~f6>N!+W5;h{;G}lw(-|( z{7oDGy^X(Zw? z54Z8}ZT!c@Cth6q-M>EL;@VA*x$OxzJ@&S1H+=&5MDR)AlfgA`Gq?rZ3LXb;1DD|O z;0fSUz;*CM@TuTQ;CAq2a0hq_cq;fb@HFsr@C@+j00CFvnc!LAPVj8-8Q?R)bHHbT z&j$YmbijWFp94M@pa26nAOH!vpa=SZ0yOwM@cG~_@ZZ1}fG-4h0|pEL3xQp43&0D(H-K*h-vk~2-weJ5d@J}i@a^D5;5)#B;5)&0f$s+216~YX0$vJU1|9<6 z3%(D$9J~U&5_~^+75D-0gW!k2tHBS09|5lcuLVB}ehmCL_zCcn;HSV(gV%xAgP#F! z0B;083w{p#Jop9hF!)9AOW>Emo4}jFuYk9JUj@GgejU6O{08_<@LS+*;O*eI!S8_I z1-}R00e&C66TA!j0r*4kN8pdapMXCF?*@;6KLdXb{sO!Q{3ZA+@LurO;BUZx2Y(Cx z4*WfMA9xhJAN&LO0QeyINAOSJpTWO?e+3@`{|5dC_%Qf)@E^Bbd&c`8{|Oh5xxDS# zwVR*|xc2cb0NP*(I4}Y{5P%3IAcHYbfC@A)0Xi_i6wH7LEHDQ)aKHk55qK{6V(>ih zCEy<5f+g^P4+5|PA&9^l#2^7F$iN0{K@N7H041ov9vr|C)Sv+;@TK5h@MYl3!B>F$ zzzy(~;H$t_gRcS42VV>B2VV!i9=rg&5PSppM(|DG0r1V>Tfn!1Zv)>BUIe}aJP5uM zd>8m`@IBzg;3eRt;AP+;@V(&sz{|laz$?M`gI9qc06z$R2)r8nF!&Mh8t_{1qu|HD zkAt58KM8&c{4{tScs=+T@CNWk@U!6Oz|VtU01ty-1iu7+8N3O+8T<-(3;0#=Yv9+x zTfuLD-vqw}-Ui+dejEG__+9XO;2q%i!8^gbz#o7=1b+nn82kzNQ}Ax^2>3Jb=io2E zd%$0UzXIP|KmVaUSTF<}7y%v#Km-zy!5An&1sa$D9T;E=X21j%m;)O) zU;(}eJQsX1cpmr?a1U_75_rG|0a$?$L|_eKkbo3qU<0-w2Rl%J5>#Lh4&VrC(0~*8 zQgAQ$GVtZ#E5Lo=2KY+wRp6_^*MR4PuLbvmuLEBXUI1PQz5#qA_$KfG_-61e;9J4B zfo}&d0^b211m6k13w$^D9`IuD67W*+GVl=iUhsY3<=_?ImEilqtH2L{9|S)HUJZU2 z{0MjrcrEx*@MGY|!B2pn1V06S8oUm?9{db=19&6&S@3h<=fN+4hrus`Ujn}j-UQwZ zeg(V*{3`f0@ay2M;5Wc;g5Lsf18)bv4SomwF8Dq04)FWno#0*I55OOSKLURY{sjCf zcsF+gU6F;9An`oC4VdN)1iI=e5cZ59;I zLeKwU;Ew;-W9NsViuKPwbTRqQGS_wZ_J6(awu_rE7hG?Kgb_hQgP|AKe7+`aj#?=huBy8{+(qk7`4n-|cZSPayd(nzcB9jNk64_QAufsh99<-;k71>eQc)pYuCzLHW%CMj1r0t_lF*wwnvB5dAIcFlIu{IZ9SI;~?=!Wh> z4Xt2Oo881H4)$P`xaDvbxX-@$g%95O7<7Q4%K^$XS;lq{bW#}3fSq6sFHyBOm}K)P z?LHNLWi79n(JVGgc3-c8G*EZJYM;%Dc=}9uwl^*~*XqeeZ!)&LFvXc-jOawMJB?4? z3_}k1li^j9WIja*A+>kM+5S|>VUHjN-1;_nik{5neRP_UivrWnuicckzDKuKJnT=D zUYFjp#MGGPkB6sZJ4u5|NyoUPa}4qs@Q_o@!_eMpWB=%H`%ARx3>GeL>dIJBY|QC! zY^OKeJP}^$Icqmi#*3-KXgZGg=)80O0qau{A-&k&RXHB){A!t$$KjBg^5pbn7kgzT z+KtgaHw01DyoNCA{b*kkry07SRmU9}n??0>cy{k~h@~Lb6e)C$evyr}*ecgMo8946 zasG!&#U!C7yF86aH8?+DJDHUibi)Wk?jt*27kfF0bw+C0P5Q=S%d*zQoF%63=9CmdR3Xy2u9vu1&dp;GBEzR%ea)jU?eX++(epRnDXX-)i_&$-brf8-&UHcKPyOb!hJjg@Q*heJkwu;`(e@MNfM_&L)=Qw!U!4xjG6@q@2^ zj6!Fptl8GVAUmi>DrNdS;rM0Z;+teKx&_{KT2G5Z(+rZ}3clO!r^{GKs5I)XWScng z<(61X#h7MXVzPBd{`p1Py5nop$#J+R<1JZtIuokPp?(%E_=8#vj_dQuNgfEz%SYi> z%M`xDE;pkNnfdIwCkxz}J)dV7I;PL3+Y&XytvB6q^DYwl)-qCOPk@I}(?r&jXt2*m zhagi2=iTS2ArsjB(XQ}UeL+`g?iiacxv6KJ9yLZrGE<+@a|6=T)6hwiQ$1)J3Dbj_ zh-}ZldT(@&oV%VKw&bV5w|zv{cKmKPZRmJqb+G~4$6`;ArbG1HWeFR%7EZI(XtpRR zWzXhPGg~gmKAVxpk*0;~<$2e367sM@x(ANs=cA?*ZOa+=kq+Y|8UurrRW3z@le@ME z7g^JrUXT{`X~;!XLFL13c0ODNlCy9mJt3Q6cW~a9bl-NC^j^>S}vDle7HGcMCbem ziZ=AL)_lGwTv-&)&7dsVfOl{-ab=mS&)2p1F zQlHBF_y=G3Sc+LMJ=RFO%&%btD)mVR{(eqnlS})_id~^z!U{Y-dEdWDl{T35~05S3)O1%!Hp*JC8gcEM&!SUD;Pg{E%EW zLS>I={7|1q7Q#TQ+|EMX-gJy6%VJd}ljF%gzjM=#1RTD%jN_bI`_-@m4T7JK z5xH}y?3~&;1LQoZITd>8gy~D`waWHPT*G5pJ!{d6JMvj3{qcADA!Kv}a@%gASr#+761$#N0*^@Cn9h9{$mHR_; z?vBw8EyUmzMB&$rj^=?`?opYDY3HG4Cb>5H8 zotU_?R8ghnDw%o`?VGVoOu}yHVD2bz?cjJ^A?Np)Xnn}$2Q4$rO;_sRC#pNt8@{iz z5^eAL=bw9+kF_GRBf~hK?;4XgLA;D4zJrQKB1Q@yQ?5V!_Pd*P6xixC-*e1y2}7j$ zL%(%@c`6HSJ*ik|RJuQ|&RxhVuS{lOds)JE=k@i=CtQ2<7nN*`7iu63(`tK|W{7h> zAWf!Pr4rghRvY>>Sjn-tN%TcJuXZkT-Y=9amby)jrM|9|ac@{uqsrc+8`4&|O- zLT4T;BjbKiq0{5qsfwjWQN#6O+BXruuiODIA=gtWAjimb5{LanifPI@5hrwTKCo+^ z)i}nCO_e_n4i?9-N1BVyuFo60Epf{F6Rd)+xQqtF(<9cSEI-)>+##2@*!g(XG%axk zA*-B1MYqi?*?QaYvfC?Gg+I@6mBYRBfGTUU$tuhv*RfTnN-jQ>vV-Yz(r9yITcq0< zpA8O$Oz(2K5(ecXcL|2K3VC9w_4R<=tm2(C!!F-(YaO^++BFBZc<`NWzX(%TJnasv z$*`k1EBSGkKYZ)kmosTL^v^h01XZRpomIHJeS+caKP=Ot=%TbED zg7?U~-qNsps}{p@L3DPFjKg0wiyC5oZoa)5DV@owG#Fyq={gA9R;c+tT3mb*1; zGujJHDAUK9sPmAzz@Do>C$EWtw?7Efuu@P5-4uU z9gq!c;Eue{d9_bu`rg+r-UsJaw@i0i)yI1&!>Nlo;r9B+>B?q$(QI`4<-gzhvQpc) zE@x0ma*(@Tz9Ow@x;vkY?rhmu%)DE%e&>MF+T{lx_vi~c$LYfMEd!}*QCwwoNr|&& zoDxYG&w3_{_IU?&&ZFnZB{?Bm*6<86bl7sz8;$}~%iKeyr$Xed++A@#`RpbP$f1oF zwp`my6JRiU7zbmEbCmIl9IU+fe0gRv*;G<#g{y|($$TcqweZRmWqN7 za+me9I^DA2U}&&uV3&5{52<8z{w;<)sc~(!Xi#f|P)B~g=#OxYIb@1vq*y|&h4WDx zH!3t!yu?P8fT4xLcI;C+Z`eT*_PdX}eCI9CQM!)1WXjn%r`<2EMTOnleh!CG;%GFU zghPJc2;nyN%#xz=SH!-#Ui2xQO=kK;WaXTpwZSPdb9|&+d3SFYa%an;$jayPovDnk zU##xlj!OwWbXiQ)mr-Xp#h52N*YoEzmeX)BF`N)e?rP^W^$OFkereX-H)AGn`hv(W zcGbSub2ccoy8QHO_Yhoq7##P$AdgYJPvA~a^VZ0!5auW0=+VFHiu2CgfWvW}ctqNVYlAuDb(Js216~{Vh8}wEZ}4bvWuT~2|^8;zDRHBV56^U^gL3qYG*+N%P3r?)h4K1K2}+3 z$9i7um}nPS1(UJ1*;37UoSf*fkX3l%1s>uxM6^;Ox<~KN7ckzh6W`|PeSh7ESf|r; zs~x#$3Xf~fgO;9k5RXgC&%9DO?e`s)BKc}CpSbIsU#uNX~MAJ#i&G~}}Lsz;y zCdh%Eb5{=xiDff`>H(gqn)6 zKuZnrH15TD6&=Ijen9nldsZ|jYt6p=s>eUb54~}BXHvZh)!DkUqcvd1T+b~Psz~dc z>);r+KdsG+_jlAd$^^Gx7hcR=KKIu9x_VY;&VH#zo1VagY( zTV5qbvBVZtMA4ktuZ`(JJ&z;gW9ZA~7~}D|3rDg#>Y|P9zTtjmAM|DG)ETWeQz73T z5G&~hKJ?H!J}{>1t%P2G==$AoHn9_B8Vv2q%=0c&Up@JL%8{`}Guv$);;76wT%D5Z zYNCwCeVX8N7<=|+%%Y~FL6AH6{zOqi%I;=R843fl-$(ZpHVVc!{w3-iPno_`|4HR(@x7V7pl2994@nY>h&vw+qdhR3h!blc} z=@HXvF*pu(SD!zr{No`Ea-P6?EISh_#T`-lBR1+P%mCM?OA4WS%4c1_f4|l8!QLoH ze1*-$-Ntg5s9rJ*y-ehuVDhZpfX!N$CVOw3O&h5GmzxUbLfQwPS_3`}bSr2xpX;Jn(eej;nMG7!gPliA$8Ql5ylQyJZ3GMfZ%2KJ&)cs!tD+p zO?TrHl^}aGoR{mOEZmXH4Y_UaAo7(qb2V&s99H&9kJG zBY(cvu{>USWV2d9L98&KXs39R`xDiCz0-$ORCmy%SKdMp%}r-zx_Tv3W=E$kUw7?+bYK#7 zns(fNK+l|vK+Gl{PFPDhYy>&qb!cIZ*;j8!YYY*(UavPaLjli2btXl}Ua*kEf;l~M z|8tx9xXS$QzHH)>Jv3tsZgU3Pg1zxlQb+d^wkp`=nr;qz=J__hGSdArLejah-moRM zS!c}9-&|h1_0GQ8-{W%8j3yWVQN`L!AMi_L6Rifyp6HB4wTI8Q;dnUE(Bs38@#`|| z1d$<3yPCXf21eJZwv&v?n$yLXlELxRmDEiwO|l8^JpA?<1G^kkj-I-5P+99WyF+Jw z3@#*pJvd~wwy=nutvLFen3LMJ?~Eefx}YW);f=<0$zz*>=M2+O>G?vu&OMPdjVQGy zrxQ8a*+F)3ATioR*9(HLhJI0Qu3mJvn(q{65DmO}LS-ASaxb5M?H(m(kC8VnmY%5e znLwV>m7$>Yvbp@MTOOc)-ao`w$Bov0^YSgMFj& z&PtdfMvZn#j}*(n$_T87-uz%Ni`Qcm3z{ty+5>#9Z%+i5PeMFBOviz?$8G%B)biEq z@0=}7!HSZqI$a#b#X47;W}*ehkYe0uvq($jLVqx1Cdnv1);6}-A-J>aAtP}$_9e10 zwTfh5bTq~&ZO{DVWL3HQ-X!TPi{(~nxU|zrM%j(WK1f!qbXXDS+zt7a>+L*dm#x^m z+T-3*vAh^p`VKn3^*t{}MhV8Dt87;?Z%SudF53-gbfqA(WV~y*(54iLno(*ZUw!jE z{bQPQgYsC}gNdpQnQ6$H6{Rio8|b4hgDnpGI`;F5nk^>1HHirX+!T5xztbc$>(;0$ zPONLtKa1u_5<8xx_|7Kdra@YO7>{+4cEzzszn7s;&sd*@;86S_?-fV#3*c zq(wZvptq6Xm(AYUOb&zTf%%m8ZW(&pAso%#ni9vSJ$zikD zFu}=#=@WO_iEKh#`3V9qa`W&c6T+N{WnokWFy@+tgvJ@xIw*I8WqrQ?tpa?cZKzs0 zN$btRyK#Fo8dys<=Pc;(CzQMmce;jKdpoe|r(J&TlcO>iiCRCN@srt!@fVY=hTEiN zla$b09bq9_fazP*oZj%OvPiJ1ILCsZ?r5&PiS8@l0^Y{xIxa zUi~T>&aiGlx#hTlOU>0c+>6>AzKZOAjPKV6R~SGA2=1)Ztpx2b74_N0YkfLeuZU*7 z>Yb<~mCq?^)bDmD&}p8SH@$2@uY5f_pnaA|kf=UZ^u~*#y}sVDCU#QRWsYD;)N6?P;6dPy?G3M2u5y> zGe4@`TujEC$h?wd3&FupHF?+s1$pbmyYI9UYfbIkD4w_Y11Ad;&8h6)04fZ517sQ2y;v|!->m?cOa$|Vex#j+{iq^Y4?y(a_ zUJm<%!SqD$Q)54({Z6o4L8ae&}ZDsshst@y)3JalJzo$Tcv>KqA%ueA%|{vhom zOXn10<}~lkx@$ttR|}GeH00p$M4kN0S=Jrr*UAaEmDI8scThpK13)f-)# zZlD-LTOL*vQ0~8YG1W)K^oVNWRyDl5nQ@$mg^S+FP#Os}`AG7I%zvQlIz23jgbJgfOp6}JcnsYM# z@*Ph{7A`jqk@DyI)^yyl$3^#YxnYV1mXIGULR;NRG*T z4kle1<(UuNb>AUZ86ypd%)n@vk}2^p=a$JB&tUMoKUlrV#ZTSo2|au7*%oTu_~e{v zy7+-DW_CHv^8PH-v1A_~_F}af^Yx9lK0tLUe`YznvvmxYk&ismbBA!fBH2}3bw>1X z$g=puZ+>TP!-U<8oi!d}ibNHr0~ZGW*|uylzPD&JwpsS9C83<_;3p38RVYBjE`RZ@ zckZ=dKcT#U#m%X;H;a;Xd~9))!WeJ+-TS)7P{Js zw=bXYxVsnQjkQ2n+gRfr9jZ^;8yEMl)^lvr^T$&QCI#buVq*OsWs3L_J18~h2VEC| zV$uBzL?nqrN>nbfcCy%3-Km23y-9$R>t!7IhspG7zU#rwe5lQ$o{eI3z8X{(u}&D) zS*(?qZ<28_+Uy%Gx=kTow~cw0{A zdnCq%Ew{p;I_?m0&Cw~nVvSY2wchh$yVoyzHvDT%D}?hnH*BiPcnwCtfnGMW;h=i%ND z$+n6f3OZ+OEiGzhN$LrT8%wEoKK#heNnfQt?pX9}8mJ*V)egI~j--P%r{=@+SZkUO zhrYQgT#i|>=9t;@WA0ot3F5tC`p3uK&=JYFLxgAIy_T3ovoHwD@!<1(Si;TO4+;4-M*UGW&m~jmRFiaFY*mg!7OoK^{9=@ z+X%@V#)wMG8*NY%XO=@ z`U`Ep_s`c|eZ4SFFlegHM?U+$e4+i9%#ybYGgo6$C>?jloW+*05 zPw4U!WtZ>sPIO9V*5Q`RJD-U(%w*$Rh^StgxB332j#=jz?WaPG(6Ek(+E}Q8^;Z1V z+wXGaiJ~r6%g{nMfh7*EE6;o{9u+zQcack1O(pi^bTTX1hl?2NN@D zd{GG+-?(ZM1s8u@O&Br_4pB4g7>5Ft1fA4XZ70#;9C2*!#+x3%2%@ZZ3XP0s9jM?< z(`81|$i^;$Q3$Ih!+CGglP;bwRD^zFSDhWEF3Tv3HPLh~o`3HKKN6CUE!X)`gmr0F zSa7h`Q8j`(gK=8Aedzsn9f^}w2F`F950R(XzntsSAZYFo7&9q6F&SJ7My7TVI z7`fHZY=TGBdOZ*YC$taNFyHrfyZ$KM*_OwML*{fKcjd(+RWckZaM2zk8*{qHr<*=& z3eCcTgKT8h^yLSx*(d2R5pi-ho#gA$$yx0Q<1mx;>o0j)Jq+}^WNRn}P=?-@tX}5( zhQq^D89rBN^6GuQ6C-qpW$GS93)x6KYzr#~a;T+>kNgJeTV%T9_A5IXQ3mq^?_I}~ ztSV?nFb?8jb8wI&73(s2IusbEX|$u9SCy!6rdf-{#ww3!HfA=v-3f~SSHWGQY6MBhPKAA4oMHLe&c~nJ;z|M zqv!`Q^9$tE$3v#VmSwtGaqFCoI!k$>GQV@@673aXk4?Mt%k0|y*_!K11?%&nO;yq0!4}rciBV5j8kLzrWCSTohz(j2gf67S(b{`&ZvMiLsT&)ALd+a7pYrYdQ`kET@LVa#ZWkTCYZ3`IB(bP0ltSz~Q3 zJ7GWL_3mKb_Mem54{HJV`F8Si!gf>LUYwA6a&oEJjkcUzWUz06QDZ^ z_X`avLQAlPLNaue9~Q>e)Kccl((tfcs1j*f17_Q8v;}TzbMF9S7pN0ozTpX!V4=={ z!j<`vCZHF-@pX5tDpGP+`mW?VhvE<;^S?WHLtp_vW_lg&jh z9}U^ATZ=0wE-<(zkXY%Z^IHW5()OYfpWKxd$2rtg+~Wc8FUpX zS{>9fypSt@eT-F@k0Xv_XWSt36+2{R+iu}uB7u}{^#D0!)e|$LNZhlWndh2QnoJB- zw}q(z16Xg14topcFCIWA3Rdq0AKl?x>Pb-gfCU~(lW5jps&5tswI|}_diIpNgWZ;f zDz(euECaQS&iLvB4-$R=HwY)ZY>WfdC@ZvKEIHxGY$)L9erUvGC0T-b-M!<1Be{~5 za6Q=bnbp~hH+?mfQf#<|^=c}t$)-*S((OF^3-1bJC-(+*Pnl5sVd(a*rgs&G&}~-I zXsqF0d{P+qb%N=`RK}vmPfuM}%@wVy;_l<4A(fImwWi%gUpAJz9L7=ygNm?YTwHwM zQ5M$Xn+z(O(n;i-xq{?)v>bE*DB@K9}rxw}kIHQe&5G~bh&X>Zp4O1kmDgTu9HyQiGu z98VgRHV#v_-kQs|^t?MT2IP9@FJZ22>^_jnR=#&!(jU&Lbg{4oRn@>Con-NyKfR=H z880NbmB!elGeEJGvp{NJ#jl<)?99ae$e(#g$l9i|M(N#k^6B?Y7G##q$9<#2rKnPk z=gMkq=NOKrTeP#TGVPH+ey&e%bv7fm8?_vz;7;eIG}AAaaMT#M%`otc5j?G0fiU@G+F5p4;`p8qTLM#6I^{ zYf(OmgiwTPI;1b?2iSxc2y!hW^nnp8a8C^U{E(RqgYRGU*KfCR+epY8$cZJQd z<5j8Hzxt$c_fV_j)0CN>V9BUiP;gh~IrqX6mNzp5jnXb*OSAvqiPN_|11NL=0}c7VZX0QTr~-!9``&?JSeMGdXUc zN0vmne;!r3+0jTMbTia@F!^m>zWBa+Y{AV+3LD5F2vSntO z^4chqtkCt0M(Y}p)?%$G=tWvP2TZpROe!seF0G}yUl|8~lf%N#u&BFGzJBhy%ucC! zDt3lq@bir2Ofyaf<@$zpryY!z2b9V;+}20J!M+cEN@=D%iqqtPD=aCiqg=X zrYRG00t+W+dwzQHAHs6rDe-7BYs%@&o%w>d)h=L`QS|4eXobOUt_gD^d?jcSSZ69oEAw{>)4)r|c zn(+i%2oEU_NP*L+|_Ds=w;)&CDVgmb=~XDQGYp%l&E*!8 z5P5hULR&!{Ze@6NDD zBP?6DmPh!608I<;dChp$^mlc4C5&Ny3zh>A{ywI!-ioHD#9_K@@91`p_KokoXS3Zi zqh1pD8@`-#k?u5BKHY34)g#lF^=o$Q@MAU~nVo7Y4aA99NXxOfA8!*1ix9Dv5jW^A zo_+b@J02bJd!r_EZOp+U@BT{C+zu=OUCWfXS4tbEV!A^FJ%7YcVa(*nG~6NVVMfX- z+pNqL_z2d?aXV^x>*c+--#J5)kt-dK*nrUmJ!R05hxCF@(d+L+ZhG21^iQ1~ZSCNW zb5@v#+WkFyEQ}3Qlyyn4Mi_#chk-g7@6e04JXrKr@QJCqQpF+S4EBtjYcx5Kn_ZkJ z=q{UeH>i>M^mYE)%Tk@C<<%US_xqw#Nih>n7vWw#)v<#VPC`_@)T>w1=#I;$T(`SO z(qxBt)w5!^U#aC7fzKYThAG3Idd!K6C-fb= zqjv{`In8=1HNO0&+sLh53sq=Oy1DJlqKlt>87CbzU76{d(edC@1>!)NYu%k_O>dE^ ziRi%IL86eVkju}wR!_IxK%_<_2OqXfd&Fj5nG}5frqN?<5R%QqM#`12$c*%&q(g|X z+U=+avpv$Y^m(z07=5^agP`Zo5}rHz$smW-`xnoD5I!Qod$bkM_+U+rdI8FNQDEXq z#%Z?1p{HFloFOH5{pnY=Vsdc(!xsC4v0bSS*(KPdAenrZV0@|tmtT49LFlu{(VpS+ z_~4qPGoYo;iif$TK|z-uw>|J0CMPo}($}RBXw$HqZlrZDKRVL_R>$tcH6MkWy1y9Q zcfJL@`-e|A{S)#Y~Y$F}VVryZ=xC?S< zSzgxTO`?zb%wFU&bB-Nr{+X9=|J-|zbB##nacD`i#WC8XRU`CHao6{gQz%fz@;Y|; zKsK!?J|?A|Z3n}tx53!~0;hGli4MEkflumaQL?{y-=lXC6OmjnGkZrhF04)HlnYJw zJU&H`k%!F~!a+Xr!PE}!AW4$u1sv^M4oiir$XqY4zv5|k#Vcwu)!<*T(_?Z!q8?7~ zJT~^B9(KssV_ux@>Db|v$yPoVLicE|56LuGE^DU7CMI)RJ=*f#a8IXcK3WfuF*hC& zg&(`?BVyq#R#0r-~L zlr$}$)x}7qF}*bVXEQ@qD${n3N++yAzY@d&vNeh3S~06{%<^=POY8ttBI9BzYI4aC zH68VwXhNVQ3+pIYSeZ@B4KwOkM3xB?qb^BVnk)|T6Sjf1U>a$KzfA2nV)SpW%^tGZsrqtw#NNhc!ndR7ES-GjiLi z=6c{jqB%>@hMjV24)o_0I?XO2H|qKMylo9rlX}Q{ zwV^{#-?ZLP^(pYOAFU%XgPziXXzMo%vCKC%cn~=(!B{B&ZK~u^m zr+w2J4rVqkYOFbNkB`KMY%l}IFmAl{J=_Qog)9x6)@0a`{IB`3NQqLglPwxp)hMIW zMmrmcP<55Di007t85hsA4x~ zkZKm9imhOEHJbB(d5Y81dH|b`DwTFEJ5%yFYLuM_piApD6t6qT@0M$1G9?jJu9UU0 zI!T2`1G`a!>{ZKg6!+(k^o)ew5ojmdbQ@C$Crz;f$N#dy7ASSr4hL1vY$<{2h~!e! zjdx9%c*~BdlXN%859=II3MiJEl|)w2n0yTz`;JgMWJ$Kw&9Qs)x+U#Od&&xR-3OOx2&IPka&U#JF`K(WORZ1&35CNf@zKv z{}X5MLI*e47B+CoD1l{7t7MpDsU)kjT(Hxuvq|xYQIqgyewSmQAOqA}2%!HVd z;}tsJE7C%s7i!E~tzm_A|J~^T6tlvdGvw--Qm$7-hjFbKQ=|2aQ0uofE{e>Sj`Xcz zRdAUJYczUCtk0ZNxq?as zlKsYz#z!NF;L*;kAW_`}*-Wuas+|gH$lczd#ZjqLk1RP|PU73Cx!r0FO5MUtli+Nv zZGQPX?P;cf2Bd%v2vMN#XG)!JlZ+4HTs_KV+X_|e8Cf~7qc$J_GGU4h+Os@0Slay7 zvF_L?TE=k7B;w+O7}OFvaRGo=BdPDZ;;L!17iLHLd$4(;aa>LU71dIM4~V^SB%Nb1I~nREZPPuw zFb-LwUBpM1s6p}F|6SbHf+L1wV*ZCQ6ds_I;> z^id)+izswaNT!vz(W+EhY9}o6r|ChsRuc=ka6UPWWaoG^S%tEj+Ki4chEYf9P37by zq;Mi2Q5};jwZI>W;-%Areq3k5BLbfju;urY1|Wlh`xu<$d5#e#J>Rp77gG6|qLJ;{ zFk%LZ!E!4$C`*nr7AZ25$|`bk9GVwW8}@yJVkw$y4l9xf)r%5ZAh|+bo)5yrd}w2Z zTGf&TVEhbfhml>vnFPTFtkN`wHJv;u!WJ72C2^x(AqXA^vI&tvp7GpV>jY+rEEy@) znrf~xH-b4{#oH1DfXCprn3)aDo3y!c@`#{HS}+$ZSgiYw`zq?VJGby*(-gCH9Wbc5 zIDP^)K~u6)ft%=8Qq1@)%=h&1O2@ z(uCQNg_T|~3U$a~mpZwKjhsF3k`28BLJCc)UJQk&1v;ygy|Q|)Lc!6q5p+RK$4nJ_YT+l zXc~$Um=C#f_X(D$jq`?>0$fx)GUd{~`(JG~M=_O%bSwS7s!2*YT?Hh(z|WDJmyX$~ zZYJ2|)g&MCouMuSJAATY^oT}v4hN8(+_Xa2!(1q=aPhuYX{R809`qgdo1J*D*Xf69 zJ<6gc9i_|k_-R%)-IOLq(iq7mi&i^rD`AMI)48huTgURzNx2t}RI;50lJP*oPEA|g ze5@BEDv?BV6p?WzU&Pp8t=`We?>(5FjWY_Dm{FM`3l({zF;XIBU3Gs7Q!Di;R6Fx@ zD>T|1$bwE+YSm#mVb6kN2%>{6>$RuZ;aw#W0mX*(zDVMx@bVv|2Nzl=(uGXk3fDVc)rKu)~%~5_0kF21gqi z#TeG|f*gV3T3JuF5)m5p|8{C8!|8N8TM=~93C2RiKyHx`u{UERJ&fj}KUf%=POvC+ za?NV5VD=+MMp7c|yr3FWF-Eso|218WsD%dc{*(?laYj}Pp$3lWEN`Wk?Fpj9%PzrN8tLYC@ZTPa?|ofyjzu~ZQCJA9a(FLSu7KY$gwOw8D_Iq zO-fh^tBc|>I3L+M4n>N3IKpNI^Ekl5(@C_{vYPz7EN*E;FLU`-_nVB9yWr2q3574$m{hEHa5lCP9t_;6XNmwqwnCH!7M+an<*RW13@x%M}$Q zFPh1ErXvB-&Yb%n4cDo0y^o`9tQ!f2=&Gs@rOm%TSuLvLU{jh^!}U574{;ecrm=aV zs)PZDqSf%(ya^cmwdAZqxY-5oAg3Vpkv;0Yk{l(5DCc&!^>IHzLK zK?egxAraWnQ94{x1H>em%k`%eI_p@4W^mw#$*#h#!#5^n870GP2E&PFQSIUx5GiTa zQ=>8lfK+J**FpxS9L703S7S<6Rl*=MP7BSMGk3IJq9a@7`?`54O#o4n$#eZcy)H)E z65`cfLwHB`nFCZf(d;N+&0fIf=Erwnn8Z~LZh69ip4$({|t6ULy4 zIfO_pW-tNrCLXQ&zjU!_VM#q;L}-=F$LEO&mh6Ha(IBnYXJaae76N5JtR=2Hd&N0S zueDoxIXn`K@=jT6!inKXDIn{)Y$uzEL!jtmzK0mNIT zA@C8c7NGwGb(W?I!!5~9*OEfZ(aW|mnq-y2Y>rZ~SUqe^0^%B8km*XdHk%fi?o8P% zq={HBHyS0gF=ocZn`xq9H71Sn#^KsvfQnVUA7HYlS7gkRI{5DZd z3}oFPd6}9gw+SkfktXsuq#Q%qb5hz3clOzT%YFUI<}VWha1v;E@xEVpuQ_N)3sw@0n}5$w7~wht(pa z6a##=S#8?EW`<$w7@mrwn;(N1IVH%Q9NS9787F~5xne4_nQ6Rb6dE(+)hlL5D@d6{ zf9QZBg%P!*D6aEtA?U}A5|jxhd_$l2`J|j~mD|0D-bUWwp@EXnmeGbtMbq_k-gndT z6q}@nIbH9g=%io;(T*c`F)S9$PG^065=-+0 z-g6R7HKiD{vLy`FNQDSC3!0z`Ev!H8R7#!bjOivbdOefKx}z7;BOTKBSykoJNis2v z%$h_q#Z+*B_F1{yAkYFjb!Y4PmtF@vf_5p5=4c|Slrjlv2=XJEj5@Kn&ep+=(9<2% z)r^VY;Ye0xV@0$<_sz6u6aj>2O_0q4r$RV}_V^Mz4$K-|M;n($sfd;z_mquKex7Nh zB2+Jy!nl0Yt^gx7I8Vgs8Lw0gCyepUHX-a_lFG?(E3ey4X1sZmOw>_{$FsR`8C9W} zD`gKLhi@<DzI!E2>@rxXu28U#>iC_ILPr=!_eKf~GqE8Pvq(V;AkExsQZNjac+*UcgsiR8I_ zCz*09rvVKXQu0G!@r5!@s@w~RbRQ&JBDGmUAV<+4@=6a-#s|VIgLei4Jz5Qn>xKb@ zn>7bT$adqg*&H@WB+_ERY=-vVsv$ z=IW93AY4qCqvaJPt=Pg?$)ZXSu!s1jwyYOD#U>9Jtb z8K#hPFzcFTzAz1FkY@W5-*`+<8f>SMnapIu{R}VTrE)Kw zP3n|V8ulgYQ}!@{_33dZ44KAGIXV$q1Gz@)$xyw#@xyb`0jJnxm7B|kfHOlaWQR&Y zcz}!?FV*6gkg@v@16@v z<$kC;s0?!AAka^wXFYo!n`RQAWl(Mv!_iRnD&$t5?HC}dIEpY;)dJDPajB^A&1fcT z#Jb4Er7qb^>cwOu*ciqXHZ_Q6al@iCeh9pETtwcW?#~UPK-Tfg7jh94ZDzGWFjXx8 zZ9A;>Cdt-Vs^`Y}rZm7yWO}SGTKA&ue5w-~B^nj+@X2v~6dub_+k#@8GoGPMap~rW z9hAI>7I#KwCv1Xed%4JkyENw(|ImlZx$>oSzuFKbUwDah1^(lBoNX} z^mj-&1ln#isZcD{o|H$Bln`S^Il!kZ#!9J;9RPTSkjEpu4+;d<&gU6@7Ei)<1+?L! zWdf(E-vGWi2}tk~JK8B8mQg*`@0UaDV9GU#AvF=J{D>grTmzL+-v@7)0=h_nBxa^u zGHgQ|3PNL2=(oc{2dD$n?nK;e3k}lHnsTxj>(QOmILCx*oY-KTr0l!@N}2C7lS!@C z)Pz_=oB}X9y8Tl$Napj!WSUfb*NQc5%#}iDaYpB6HXok1My*Mgucg#NRS^G+&jz8Q z+L=%qd`^*PNY`_UuS7Jnl41&@E*?ry(?C?CpgxQy{Kf^OX(SuGW^K>3Hj^0C zWvrS`_3e1FlkARjJ>Tk{nkdEkjUoWjEHXlYfszCT(zEmVN3H^`g77QsR9qY!YoKG#VCMT0WZQ!MPi^~nDx4vTd6Vt6?;AJn^Y za1h7cKhV_rB$9;4HxMZlqiVS`3X}HcxnV>Am{2}4Z-=wtf;^H!n9|SUjuT4_m2_&m zYor6yW)c_0a5lf$Qx4 z9D=4{GCXi{oe~?xI5>boCMaw7eU=DHJeHj{t!$UG`j+pEFFlk-Ns-J6GsXdFx>B06 z==40g`IB>GDP3?<^r%_j=(3&0k}bJiqYIYQ@77|m8XByS+a@i=rl6=%RWc1JXAm2I zX$KRM*qBwaINH!CkUtd(9TMkaz-l1q@`-S8G=ox5Z;n^dM6IE1eMZKKVv+-_20J%; z9Z~?lb?F|yJT8UIUa>RKSwM_UH{N}*sx;svo$B$)d{t+MP8xhofEz)|jvoN}pNfPg z!#HG+Ln25+KO>zza(;^UVt$T>V7%c7FQWXU3^+u~wV;JeKb z0WmIw029z!$>zlB5h;ll=dFA`Yq)<33LCiuGcAKgsU|C}X}4AMeT2T?l(IuETn`Pf zE|u4yYD>hzRV$(9nNVw9t}|RafEC@}hRR%X7$wwLmo&$dA|~QfO6mYJsWj+9TS*DB@hK_4wQUq^Q|X1p;%{<4XO_6X@Iog zd@u87iZ&EYM8+NgNI)wPvq?Z;qpUclfEgh41g3@@^_Y_!saHXL5dzRTI@AT-4E7*0 zKYHV-ENGF|b6|<-L?RfAO0B6v$_oiSilVG`8MUFJ5Rg>jwPS9w{qp&q5$gHome5xQf=+tQJ@KqQGO%wGT z+YX8-HL{+=K-|q7u*payQYpB9cVfg&^Akyvv8+w&@{r$t)tQMDoif1c8&A5aN@Z3O zdKE{o$GuWZ0MW;6&Oc;wWcWAkVo5ZX*Iqz)|NT#C#! zQc)>1Y!_>#px$L;xzZ>=_2y6$(a6(Ro=I9@Z4e4caWh7fGFz!9(!tTFoG6vaKzJhb-A7N4 zgb~!76qfIe!gY&n;8?~?=7WF^4@1s7TJt@s9LWaaeJS5!IUUrwqOF)jv~32ogVM=n zZ5|sDRBq_|C*eZ6)lCN{L#l|QxkRSbtSf{DSxN?nXlO8Rvs_Q!}3U!rnhAGtSWg@Luw?2<-&0Z=?V>s+d&3vaOG>v+!UG3M4 zQfJ!hwYy{@=%mYhIk>|LD~+nuT%Mu5aKEdpj}d!FhnKR9da5%v*tGP$&l5pww2O0k{^<_A_-pScBqxxU8)qKiYT7XoOHjn*`Kqh873$ zQe7@F2}}sp#mi6Ao!XG9K(}%tfq{Z@z!2p4vJ-a)ES4Fy=g2t^KcibBhNknZoTIB^-u>dSRJ}K#$$E_-2=|B1^sU*P8k!&{ zS=8v-3~0KkSYJ`~bZ^}1G-$qTef&f#1~O}tdYZ8$mSM`&#=|EAQ@J^(fndf06LgU7 zHpRhdsm7y|LW9Xfqcjsp^@veHi)eTO75PjC;==96hLEVFn;dWtB_&6N+WEw|3DQU9 zpk)1p&&nxOj1-Z*w+JD&UbO?rB((<1)~q~ZZ67; z3LXl#+TanxLLgn9=5k}6YwYB-E6r3AO97XBGVRMHNc@-WN|ex@9##@s1$75?U8! zNVC2=%#R^)i?e*Z*3W=8iTl?eaGUCddswhUaUeZv7)2;TS{jNMyW&LO-U`pUtliAD zyTy5;*fAKh&ga~}HivjWf!tbaAf}~YHXJr9F<@Yq z?BezVr!wl89x|n($N{rwsHd!9om8S$jq=^JdTfLh@sMqBcA@J2l@@I&X+fTXV;m4n zSIuN{i36J-(1Ia82-_4u!w&?ZJXcq>82nqITupl&?qKa6l|+tw7Wn7N=Z%oZ*Vykx*)?vr=iA3t79SW6j{4 zcp(s_A!jVcgXySM%mQIiNArTthqr%?nZb5nkjSDKH#Oux_kX8vchE+=5vmQx-7srI zUOmwP!0DhLCFZmi%yg}+oYvd_qif7}6Ldb4?@V#qA37Ze9oHdn9p@0-s<9|O&SG$U zpS5M1EeIx+b{{!C%Im-l>s0&Ul3ofc)j5bk7etEy)G6rXIQ+&N06iv!$@(ZXY0&yG zU7O|kE)Z!Q9EzlRNc690N~9DbK+d|K!c#^XpN*SDaYMe?sZPS6(Hn$|1z4dgb^s~R z3E;2~+JkIYOfU>t2FI?v$VUVk@?T`f1nu)yxwiejBeg-)GACgOUpDXMd)WkV@8cOJ z3P6BTYUA>ondrYciH9#p9Tjpml~#p`)$?tF5p8itE)l5qkuIL&OKnFnvu@e!YU7H{ zfgsnUB9_8@r$!FSkTrpH=(+xQph^9tDaF#{%6(_|U z-eU8qZW#j*YT69Duej%ujQ zgixRZ#(|F(m;Eaz6PcMg6zF`6)kh!^(~F8pn2cIGHp?5ShFRefRo#%uF!JY!kx8bV z#Hb6r@M11R30R{*NfqD64x4#9q7>t^s+<7@fL+!w>qgsM9b8L)Jv@_TY&s zpN7go8dO4sF!DLiscZ_bI2P87pviHj!t!X7&a}r^cIyA#3BY9#Agyd9qR_b3Sa$C} zVRdTpWCr9wi-EX8h*R(<)B<$n5&l1vH()JFQf(l}R*I9ViB7ZIVb~dI_XZ zpp@jG+I3=(8z*# zQJozHeZRbUXaYk~wq>W1%J*|fQRV*!C6nPQ|$ZEaB0Ri#H4q9hs+O z(wKy+*-4+SC+EG5%RXId1hibEk{`CoYyqCa(9491ar!h0Q@ep4^ZKGm|tyTw6HB`%boaX5|;Hr3#zXK+B+Wk5M zU`x3y=m^_MRmad=Tbyay(EU0diV9*Ej}IatE>`4q_v`2^xYjY5E%xIb&Zuwx7yz4M zP%`orKr*0O&6>q*JH2Dt8^e4nQ43 zC-Jd}w3aTA0yh!3OjWORvs8CXCfZIUlM6(9cp%v=Srhnu{h0svFKM_Qod#zWV0euh z@W6(=kerO_%{T~wDQUAKgr@~5TdNW~MG=m+1Idsc)q+Yw=}Dk8IpuR9O&E@f!)in< zhDXR}JYBx23qd*;WsPXgbULLO5R~g`#fVoR3BC2`5emoW(Rf5?R;>q5j4I)T-b&UI z9WCX|Dv6Dt9cjPaY$_Beb3&&r5`Ag=2F8-6wo^tm>u+e{~lq%_gS`O7=OI@0S;z-Qt)dU+&5@s$ntWT|BDbWij3b{o@ zLqJ-bHkS@-22Wnf!6Q~OtxGSR>jS!sbVm93NUQeMk|x;>Q|ObacAm#3;1UIK1tAl> z>I`r@QcE)OBr9n%f23=v1X&ztDVxaCwmKzaWhS7(YLW^@8?``*-BB(beye}r>Iy@t$-zZF_DQZN;mru9K<;K(k zxC>JvpgJ?jauzm1YiO>N!3aGv-}w0%2IGmmtbxc~lIEh2@a99N4dXeV85CzUjkfj2 zMpN!G92p7OeT||U0W=RueQCpWk;WA}*RX9XS4+&`kvhRjro(4-(9AEQvGOoN*0Wf` z(5#!L1%OpdV!d&Wn^D13o5e|{I{^rPTCNO2r09O+bjGO$N*L(3I@&NHX02g62iia~ zg!u6(-pV&OPvcpk#HW&cF3L?5Tm0)Y)2J;r*>DQ-^B_nt4_6amlPbkp(}E1Jn|`c! z=~}HkLYbgQhYwqt_{s!ll6m%0-(0wR0IFZ7IHsT_(0 z@Tm-V^FS>f0@Emy1vE&cDx*5m04rq0iq26{g>v5rk0cr=SS_#c0>o`Jo}Xh}JmSCOv^<_?rZUBJYSCe) zZ8f!!E;QtUG%rktAY89omz~QD`c5^~D+JoDqFFQJN!BSgJ3SWfg#(#7O*SUx_VTd1 zGjxq6o5#2mYq51uy65der`i<~x;+Nv)niIruw=%p*2tdtjuW6AAWf5_ic*{uLz5hi z{CxmR%Bnt*GI>x_;C*|pq$^bsm=d~>OrdN~a6j{Td?dy|=|&1*li`3)rDeLTX**dS zsqBym$Ijv*i2}XwN+cZ*Yk9-o7~&}%7*uIUxwB<^bo?2U3dHkKB{Pc^p*l(9DW#9{ zdb()6vDuA|phi#x{jEYUlE+7dddR^$Nh?g4&CMrn9tBayuF}OGuhzM0Z5m;sqjWpN z`>rDn*-;`ipAgm6x8I6qiw+SFRSYNu!t-u^RJy>hQ8ZGQP{<}%i)J$-(oq#vOCxkt ztqzi%u@g(QFy9>qFANHGyPTu7w4g*g-Bxo<#Za@@E|#gPnGhn4&eRyA{x6?~=NY8L zK?|svj6M$Apx^0CQq64I?iq#2fU3B^(VW%$sXA14=D-JIjC?;E3j(_g?FN&viT~|q zcqST?(xgl#)sQs{2|{PyZSYLD8&+6Hs74g`|8=SoPR4>#2%c3jpTK!GNnlDV4o)cK zE;hl>1(jB%*H4b97}o)s{CupEVpkW3vUM1TJRvctr6;+jHfkJLIB?q_bcy#GmHQ8biaiIR~+yhI_UX- z-#qB~zdz)1t-6l=|NIu-{q4dp=a*d$|M}ZJ?p<#{&xvmnjy-tt&4rJ?@~s~4*Y_@* zz3dh5J@*IKzUpc9sy99K*Na~+5a%Y%BVTIYWK%N{?Ns*e)rF2s@%Z7NuHT@z(6DRakD`xzf22mA*w}OG zO>Xzw-*v@jiSE+UBR5`HdHD-(Q(b@g__c|xcYbx@$J4`4eR$!|AN}fUm-by*9A0*{ zf8!UA9Qi2TAWnTpOx^Pc^IY;rvE)e}-Su|n5tF|5fcE40F1lNgFKWkE`_l4L;SU7A zTsl;{?K{nRJkmj5B)eejL{>Q;{0 z_ka4jkE+%q#x0k9<*)XwefA&ke#aZ7tuH?E*}E^JUv=}@^xZFhIrNUf{(oHilgqyI z+?VcidEYmAJV)sA%R_D{4c=Sm5sq7;^ zJRkW&_nMFXt9m@PhCcB{*O%g7IQ(}9J8%BVg*X2AZSKE$=h9WyJ0FgqpE#wBub6yo z2=@EC4_|iRTbJDb$MY#EdL16Ev<^?%TPJanI=F|x*9lx$rzw1$VJRL=_H*xa+y9U0 z?!cr-W?kTgb&e+1NrC5g?W3u65Y1a>d2XFzS;oC@g&@{B!hPL-ex0INde=UjUMEO? zoejF)#v&+}A z7nfLKopF~I1kU>qMy&HR18)iI6z{#bKvBDXagc#_aV+kAh{o4Bo>*sLJSYO^c4_X% zQ5-r-!nJwtK4=5MFzX}%7odn;57E#^N?>6y;e|81&hiv=g5lOl7-gQLNw~m2AA&wZ zXL*W|j__~=25t{6IZG21+=N`GIdYvRY42?~1V6Ll)vi7i zbHP^Fb)JWt(%$E23a$=w&awhb8@u;jc?_!(@6 zX1R5iS$+Yd;2#(_7*$?~UGM%7+?oT6f;j4^gu`!X0_Gl?Mic5lyygh}1Xj0B^1@MR z0i=k3xu0ag&M5D7*4hHWSSOdj!iinixx*+-1w4=i1|f4JMnj`XjsPQMS--R&%$~t{ zQC(VRz^0#cUp#_?Nkd0ynmn3yZ!9rjzwYlYkmRn1+%Kav%pmlMCe_c;Bn&anLIY^9 zZozpP19JdV=Ajb|)!L2GF!Io14qD8`VqiePjDgGI?i3w+#y2M&*`xyal zE*$^fM)MH#kAq3U&mUNVD+}%?{GVB1_~VNQsdWZ=#EFL%NPO2m7bV`m0^<(c&GymL zOJKI7#VmonFtK~~6W|SS>g*jJ8X9)mMT#pP=$-r3E;{D+fZNzyS%G^n?46c1f~5%y z1;&Cp`hJClzVa};42_rThu}Z3p4SQWTi)mV77E5o!LkAicxaP^`tCZ?vKYa*hhN8jWY`R_04UzSLRf&lOn!WS$hXimMU`Dw36_=pycQFqr3p^u! z?zn@)w@5H30#*}8oR2S1xc~D9VZyc>MU{=H32puYe(v#NL+=vhZ#2Pu7QyUVQ}C3l9z90n_=tUHkn1y#!`RZj@b& z_aT&o$r0dUjJtaLdtj31|L*K-U6gui2|7mkpK;+^t1hlAufW{VZ21WfPjg2X#h8oP z{H}`^_aFV4A75f%eEerz{Dar{|5f~=i?Np=y@F1-E>kVsxq?KR8 zfO&5d-?9av;IpHR11{G2KJ@%2f4cP^mtgH#0;8bs9In8EW{4kr-9xkF(W^HbE^6zX zOW3&A`@#aVe%9J}Y=ME_!@}FyOuWB!5$o^YxXeXua2{A}>h=4w=iTw-Uz;D`o z$t5VK7AU&>_~w#_VsPai7rE0dSjvUs@wen7{{Aezz@^Zu8S`6(izvf zwPUO5J>p;mhJh3|Ss0kx_JhGPII-)t1)9t3U*W(qY5!ddEOY+z+g~{ZLkP=)X4GH1 zA36k^7zM6~cytMbnZIKlcCqV^`KJpM3xBP%4}a=`yItJYUoWha5C4OU^vC`GvBZMI zdUy$$EFz-E3cI-DcYzr&thHleAM##wQJ3CxkOR*|k)#@P@eib31mf|mB^Eb6?-urh zd9f^Y)Y%V{%n95B_j_25J&t2wb+ED-*qp%|N-(hW+D|<&qpLT0T*USxuy`8_lyDs1 z{<#PCU*y%E1(ILewFn<%HZT{p9enn37w-G92mAwA4W~YDom+xN2wR_kNA~UZzz*QQ z+C}^SsUpkYh~bfj^4Ed4v)JtzT;3hVb^sYfhUgs zOuPsAYcKM)MR=;9dbUIYJy8R7BO^Pa8I<{Sc^^sbk|Yx~Z4 zJuJa({LDqV1wW2l+`R(h#`DL2CVq7Lf4TTg@P}uI$Q@qsbIAT>7xkctguEWu|F=>L zq_BF$(h7_o%ZU@@k3G!yj&Zw)5G)GF26^|=!a4)pk9`zDyvUQ#23Xe6eHV?a?S|DaAj0mo8$2*QtosI- zQRJDWqo=`H@krH!WERzXcD7Fryc7ky4NnM;y!|2!GZZkNz8t#iS%GbXq|e`Lt@*yX z408w`V{rwdCV@jPN0y+)I0RKM`1b4uzrpfrlST0@FuVi@Z+nm4|F#B_Q2}m7#_KPdDoLLASeYs3mi5>-uZVfcH`3r;gaB&i97GzynE|buzP`j_$3cG zQ~w<+Fgi55RzY^GgJ-w@-hG>LJ?zu#$U1_2duf5@)#sJZ!RA3DH^O)ymk`6Vz_Gs9 zEkani`V;t@ID7AY7NRJYUVY_~i}YJ1*> z6CM}6dejTvs+@CC$cuZxj0gg0d0h~K!y-QFdoNh*IvCzYaA958xZ1^>KZcZ-{MiLq zJ_q)AA96pp1n~yo+g@zKcPYlb2WIp9Wul6F00FCDcEesl;l6W-i-rA(d3I}O6~fAO zu?VtsezQL*IQ%#cBmWI#vw=K;y!fZqpFs4aTO;KjV8?B~ z5tjacD8JtPrbR3WIPyVn`Dyn*Z$GdCF#yeYFK!i)qkEwx;`2xE@=Gopi7g`cuRQ%R zaL=o_cL}^HyY`X2TPMJxU%g^|KlZ#!@ZGWpNw0uA0HfMI2=**_Jun8b>tU8i>jUs? z1()F54%7BN7vX~p1W#~aQP(anfwLvuzlW~l?(2}hK(=&{j&%E<0)M)IGyY$B1w=>S z8XPp>x!=GegE`05ai^+;5XA3w3I4mhn(u>N zi2DfS%PY%Iif;kKUj2LDqm7NX!bZsQ;5*>>1itXSJMBBO*R_72kE9UfBYVN&QfpuF z`4^G)?iFx|yx{CczP0E-`;c@L6U^uOQ5`!3t*WHzd5sT7mu7Z-9Kzp+Uqar!2S$*^k;)UG?mPMz zY)1cu{Bjv05|+3NdQ8G}ZFgZQy~B%qe>Y4Tvb5qNS07zM8p|$rs{(^avYF=*+qXba zu)5Ue%a=nuK;it>3FQZ{!TVn2+X+VCUS}_Z)+^U;{lx-9t-Z#FtnWsiUj!e+v3DLo zeuQkl#>H$Ty&Bw<-9}d!h$yKCuW^y$^Wa{<2g9O_F!WE*?Vx)fa@ihe z&KE&i88-OUFYH^OskI+_kSn~~P2`aSKUo(40lxJ3&bg82@dv)@f2(h>h`hWT%mrF0 z&^!Xo0}s5qhWI52ARhQD2siGDZ+#4TY`=?KzYCc!yU3kjoD`luf&A8k!qJ0-d1Ptz z9`{2q}5i;LEqUh_mk&-v=AaPl5<} z0QupSE@A8Vaat|PbuY({$T-iA(6RW!xt*4RlmA=m{um1Ux z_(sHBc5&NBVlkK^0!Pm6h2=%F&g00VUiTRJ0=P==b=%Ym9Fqwed1>V*dE~#Bwl_EL zanUdS4ICj55Fy~L@SQ*0`VI2_{TuK9gPk5+`ZL(RZ5O@vUyI<)IONoU1)4_Q<8|uD zm%J`!^&|rC(cT4medDtp4xHP;#a%DEbN+wYh`ZJ&aXgDiRM;+xr`92$kM?TNFfpI>gyrMN1oab7IPPPdzyLhQ|{{)7{WJL zKK$7Jt)ocRyA#6+tCznLq1IrKkAfZ0Yrpj%;oZnLJ-$!8a_#GTHV*8BfYkQ8z5DzQ zknfQ;?2q0TkbV2k{ty~d*nj+Lqp2RAA}pR)LHUA&dLvFcpf#iHAys)SWUgVDb z`(f|l1<1+yKC}{>kkdGSW%^GmB=J-F-1wa*>cFTm-a5#X2r zzdp6N0x1(#fFXx#a$vwz`RNyfVAp?hCFBdJt&{3U|2J9h0@cLXwGGeUq}~&fn`B6a zWPp$WLBb)DK!8|LqDDz83M!tetr3GnIUf`}RHCAwQpF=!wM0b01A<3T!34FHRv)#s zRjX~KRx7o(+SY1qZNHs9@Av-ST7TE73vn_t_dZDMhf?tgeS;mlGysCpaCuPzz=clH z$P#gmPuLEIOY+B``GN7E5Fx~BMYvP!5cB>dVgRtAnZad#s-Gak-(&Cr5tOHWxX8Eq zP~S#CkZBD3afRU-!C-hHz+g65|{WUxo7eI2KO2Hh?(;)y>#H4aodRiPgw` zEW?{<3CJ}1M1x}qVBFVz0Z^HsJqMCvKIlA&Yh`feCnD~=02@VMc`Oofxd8wQ2}aWp zPjS_0c7yfxxIPfBu-fN-j;$Q3OPqTm5lch4CA$Tn+Zh!9h#XQXF&dgfeXR=!K4By` zV}ZZsI35z{-Uf`WKL;ZbWz=C`f2@#;i{P@8MEIdEk+{VRmj&Um-kwe_0=Wm^(?Zo= zBC+Om89)=rdIsp^!M@I~DEKbdVpyML2%xh7l+SqJeR}#G$Q%5FpVLCbYi_GX;w+3m zSAZRXINf7}oCnU0YCDsRfm_B|A*whTA0l^c5E0`z^e;~flwgYxE^qI}!QOb02*7Hh zynh*41PyUwA|5Y>AOISj;JAW|mB5cvfv-cQd{+dFb%1p##Qz-yPEw@4fI_6l!M<3? z2lB?}83`Txjf9AAc~|8HNf^N3d=WblJEw{uAQR(&HgFI9@kc&@$%CPr_wXUrNU{i0 zqJyiEIzPw7fjBpa%NIfk#V3W}73h7JK&&L-ASK=+gW8da@EEb<6@+rS&`he<;|3A_ zO$qYJWPF;B&kH<6nL8w;Lc!k|^#cDsY7iAIU*04+i?PTnSMWz!0&$vvGkB{0u$Z62F>o=MGR`42e2C z)n5%;xB)Wyu5fTX920|u@E$&IHVu)_trJ1Vy3EZG;=eKQR2|s>$y0}KiNIvAAdyfQ zM#1GOXb%lu9n4k);Nb$&hlsVpbAy1BTP!4D)RZASx@aSMbHt0G580rPk(Higs0byj* z1Ic~hhQ)zcp^CnZw+4Aoum-56VjyN_2AO9OiQFU+`3_}-p5IRZil`n607Tp)m8VAc zD}nkf^|WkwCI?6*Bo6?LSVsX9a$hIn$RfrjfbjZ21r-=iVJ&A>x1Ce*Bj`6QuyM=> zvH;RKigo#6Zx&jGO|;@%Al}H~X~DQERKs_?j=+8H0g6O1s5uE(qBIR*f1w>Su|SCj zfOj}}>HJs(E%lfH!oWoa;(Ky($G@o0nMB|iBjaF+M8%>Ip!Es(lrMgRBMiClVpXvt z0Zd#hQ;)U-uM%EsV>9uC3wCEXT$yQmumJK8fT~>IvxZu{vx&A znJED6KGv}s5G9?DCj=3Rs_BmQV8G&hDehB(Q=*T<J!=ZgZgqz2SKKSz747*xJbt&hC*k{cKh0N9@60yM2`e^ z=m_^1Jdq3bcH}_Ml)!Wa0CZwsr1}Lv{`A=Eu-5W^gt;Ym)%$%gl$1!p~GR~I#oM( z`nfN=laG59z`}-lxKez^ApC|OJ`hGF#;}YP>riFmAkhzCLAYCaD?pN}WXv_?bUHB;{1h3#GKdiwxH5{ag^%A)BpCQm)f$R%5n<>DbhT|tP%R#tgXQ+oJ z>kX{IyRW0N1y7a`36KX`YLVK`Z)8EHPWE;RsF(!uK;>tSKk#;eXO)ScyBXl8G0Yu}<|k|k#Hvt; zdExWE#lVJNh63yyw{?{Jz{-4;%^F^y?V6hC-QY;y~E_MbfmYIvmz; z*&iA&kQ-}P6Y+qQIzVOis+jD?EME-R)_Aw)@~s0Z=S7QU5K*-zlKE0y@cLGx!b* zgsoW!BM_{PLSP8ZWDLsKm;n42)Ft3UrsXP3{s}Ao8$@(`SHOukaJ1Y+g15HbvYGGOQfjfdXF-vac)BvTK<3ImvP0bA?X1Rv{7 zuwH`}|03wEa@I1BH26X&D6_xNI7))Q0aeLUbLUI(WC8G6(ANfH!qE2&?^DqSm^pBh z;i{3*d<;x{9)^k@m%t5!o}alV2L^3W_=n(+)RubPdi*BFv*oI*+_q3WP=oJakEW!% zJOJ2MEM`LSdyL|9e>}|(0(l@yL`6VV;9GGVD4vc(_auH(_%P-H>PCRD;Ten zgAj1V2e*pxNWtSKM-Dqz1YH-qm*#y3&0iuT3$#yhK5?wYkD?%5M=AwbwDJTus10H~f3_E8v!`*bnX#%+?OuRN_$emHYd zMAwlUpkyV>@tcF7*jjb?tkBbY#tNTgS05{0+T@3?S z@T~y4S%|xZQoA=Z%?B%e@IJ+%sW=eEAY>YAmzZiDe-LqfIhsI*LjRKS8A8Ptp<@O1 zml271~Vf9HS4XjflLEF_Dk=7UFn5$ zCFDO)IlBfVUfe+`*mHOQb6%h*m75O2@AD%)1vds7uER+6 z3h(ygp8G-%kE=zVp(lG!4A3_-f-v$j)qG|NTodRAK(^`!X9MO1hCv#~rp^LD$25nh3plX>s=jIo_S12IID$xF5*2G;$ID^RVZ=AF z`z1)~9u6K84TTTS3P<|HMA!(RSHFUP-#Zu-fBO`WK$hwlxM)J^r}*zU=3nJ89_)7}vVmoQ)$3V^|h1nILf`f3(+!sVe=hI0xQvmVYeDH1oS86c`0+n3;+0YNfLii_2}!#FVkMwUjH z;zwJ3Agc0}qyhnv!QV&}VPPKcpOxZuDy}XJ@9+n95#zoNtXt%!}zh3 z1Cv1y8wV5-YF>ikuA-?2d+DLH7-^VmCg8tyL^RYFV5QH3LDm4~dn68aDfGvhLQL^) zw_@ZV*u~WieMv)g+wAv^21%B=eR51<3IU z9vTID6cEG8aQwN*Jsz(LCt^V*U4{23p{#xpNW|D0nF0`%bc{Dntp_Xyte@mh7z$y8 zjB}%KTmnuCv)uvSO(i5Udp*6zSt-P#0gsPz-UltCz{hI98G$w7rWw{FctIGn+MsDf z1p!KJ3k5zV3ljDBad*D57KF_*O!#}0yadI>IOxgo zMjvY<2cqoPaLrdkz)gA}@G3c+q+bp7+>pTG08=p=01bu?>JvT#EEX>?f{Mb8lrSX{ z2J|>XJQV2Tq4=B{1~x44C5zA$DM$d~$7!NK?fhO0bQU!291ML@Ji~-Shtd*s|7-k8 zf?GHcn}NOn3PeF0^mrMr4~O-3fPU%0I3AywqQIk~I11r?{=hc3LD}I1GF6P}{{1sB z4}i|_j1OMm4Gr2#v9rb_c*R0P#Z8QJzt1cQ0FK!ra0peG@B+{XgH^N>nTCO1jlNa| z{!qq@!Z08+AH+XKLNCC=Mf@Nru(vnfp*v*8Ey4KiP^@Hx&p~dD!xsaHc=w%D682Ih z#Os+DJrqs197rArYTedf*?8goFF{T>9SmFHkwzv5Fxn7X;{|9Z7AV~ZRf8)QTzylc--W6o358k20k%?G=LH>e41oR?0P+=m~ zxN|V`nupQCKko2`rf$1xq;(%Sa6}^e5J7v5pL%n>v9`d?E$uatHBHGPC z6xgR&f5t)|khr*$k=V(=CJUaB&;^5Ln{aqDIh>F|{(bg2wAukD?TfL4543f>MJAL$=1TD|{SY9^kXy0rjT$x@Y2TbpGQA z2UP9^u?ye_ykQg;dD^DEV%Q62FdjOIV?qVc-NxfzhaTU9N5HtxfNMj*Wu|%}wyyxN zAG(}DR*fm0c{9Pj!6d8o3Je3&F0zU5WXEtE`x5`u5 z_3pi%X^)>t(JW+IhccIjz``}Vu^f5~xFVh?j$VSNc*@qy2;esAK#uZ<4OTkU5>JZ& zGf$9-I54jsVq105JzfC26LpYVpp`@K9dG&)xWqpqKp{wdMsm3duhTHA1^CS&{YuZy z9#8^3S*>S5vU$O{2RiI9d&q#vXdDEoX(vF^D7J63r`j$Qwg{}(-4i_))p18K>+*o= zYXXpQM&SuVpl5~W?O^=1mIlRv=~l241h`QF2Vet8p%xNGOrEm-mJXVWk@%4}`;`o= zUG0KVw|ga=>6ak-x#Iy&je$K1gh6PuD%@A^huRxq>}Z_`0&1@t62?88E%W4bU6^VK z%+x_siiPnPv>U)N9I){K_IH93nh3>mtQN*cdiJM5l@A!u9exj0-*(=@%c3z8z%>i4 z^FSCegITzjavUVZgfGJlHxB|CV6YnhEr$pNnwTlWXZ-M+0o)d$X$T}{i<*F~i})R1 z1RJ*TdunKimJBkHSRaWOtGO<*^#rbpgDi*-iN0v{hML5x@Hu7ENe?sg)q2L8y)5u| z&wK`U25^wQ48}hR@GS&HgQ<>$4MlLuph(YHCoy^x>{xu^kUDHQXxP821RpL`w?HwC zwR?l6pU8wpK%rF372*4Vc<&&_K1g#LSixV3_y7zzG{-Tp??p2FH3OCCCnj0POdkT1 z3%kPcOr?Tx8KDQwWuyfA`Qtx)s!OoMGw2F4!pPPb&z1#QIa7#dBgczj>KIu2m(eC3 zfHHIpPh^hQL70R+q`RS6`!5Fu6uvlw%=65&V(dB)I`9xaej_<-xEavYR|jEVJ&rje zz$ZolV$=^~zV%V9ceH@1=13oZ?P-GS0nX*%J(e#~m3hV)<(No>86}7DMFDn7(Mfyl{9R#Go7^eQt>ey4Cv$VEg@9$UaV-1!k2yT(|Eve*hXlCfPQZz{xp(^ zQ7?hLFXIGQH2b-8cK7#sa5t5nUJjmK^ z1!x4^!2H zE%Wi+QP`=*$VBL%(v0#2DF&x2L7)im!8IW`caTDBfhq+#aM%b@d4k2875J1l=tmqA z>P+hs0=Erg#Ml~y!ypm4wvkp2f9G#1_NW#Bn3E#GyaU9FQ##L+2C%n451Yc^N!)mN zOyj+!QrFZd=~W;2`M-}DNm|GBed#!tEJGcN%17z!dY9)hqyPVJ;n|FjhTZ(1CpA3( zylCjEwBIx+x8~^9u7Z!R+a2IW$hPT$z*u=9|wDq`8OI1 zx*AXM!UbrAE14XFBrBTe6hs%g$f-FmtM(_S^evLK2Dc_(zI5r**p0`M;rJweZ&Nbq zhXz`X@RCvZ3^{x{Z^7|oGEp?$?oQ4kNx{*sWP&6^)3+w?fx~YsTTUmBLi>|7a1WpR z_9Vx`e@DMM&6|Y;!;aD6g25+}lSpS(`TFD^;jYa+UCE1KJ&w4Z?e=bnYn z@CiDQ%$tfDC%VX+=+=iW`1x7PCOVN?e3Y&TZC&SrrG^LecJLC>__7XO8H)XCGmWU9 z3+P#tw;L`NGW8s<0zKJuhKEqwvU9vTw5{L-?*$s8DWs=T&Ox9F22Q&@T{3be$BI@8u zeLI%(3eW~)kLDpVtaPb5sLj!I2Wp&}M>kUs&ccP>zHkc8qd2}@Ekcz8j?xIpSGdeF zbZCJ~^#Q7Iwb2`hItzj10jJC&G^y}}`fB*1$oG?jz_Bmxo~Cb7KkWx1yMJ4 zeULl>JRA9LPck_bEg#;=>!6Ou)8S}L>3$8OuD6&8wB}?7?;Z>nveA%J zXL#W#B;*L3sB!T|T1g3=yap7oy=}$ghxj^;J{}O8{h_qI9%pt^z?AD=?Qw8 zc8-=&+AjE{bhXP|haRkHqNk$Fa?d&;!bezLjE~*1DZkZ2kT_5J`3lJJ=_4s_-vV*zDhBz z<_g3cdCZ)KI{w#ugfg2I<9v?mj)nunCz$*V$1Ys{K9q@Lok%D}~~ zyc8<&W6Mo+Z)7_JN5?puCn$TH)Kig5ri}qDg7pu#kWe=jd#z?~ta0de8;VxU}#bq~mKx z)R1XB)e9)oE50@kJD4wRQO{XT|i{jh){Bs%bkM8G%bXy)a~wpn@=&F1M6J-+zoNEUR@9Q z-uJfKj8JTxCvG=YxgY}Hb|;f{Xk49J(@Z7ru*j(Er@$%V4W1*?-`oyi(ox8ZL9H1M zSnr~thr##mcU+KDn^v1+sBP$wiz4Rmo(LM_4kW8Y=mL8jV)@=_h^#3so%9p1K3MdI zZ3XO$-P{O187n#j_UP!bBWjzACqq$LZn_R7=epca(3AOl)dXtp=z#@(c7RSHZaq$; z9`A$@3whBDfHZ!yhMGf>ZBSl1^0I#{JR^Lnt0;0ItIngt<%V;@QrYV=gklj z4>T@w1x46f0FkP(<59XDBIMGRT6F~)IJ+Gz6TWUa&qa0Q zq)tV7Z@Ju6s6+NXtR?UEF-V&i^B{^-cR;;&#INwswiO_m0HWIqb?@d>(GZp5BPN()ecATs#;o7 zsFqT7GSw3gM$H_WtEBEXLyUEGnhQ|h^eo#$bmiVjaO#cYyWy6`^p?UUKf2xC+f035 z1Hka=YqxL~u>;)H)pHVTbRn$U9D`m~x6>Di4{JK8xN1nsjk^JxB_Hpk5fP}lh#nYD zfmg=7v+}}SG?R0gXQ9Rv8(6))!2AN;n{ZZrQ?UN+au@EPsnX-UBGfJDfh6xQU}N~y zi^FOqLW^CRGV1jsyb9_)|K1I#i#b!d0nMmAZk~$j2RfQ6kYwa8$gM@^bK$sSXLus& z<(>Ww=+b@%jHm6X(p0qqfzk0hNRDRqGSkNkUgM}7N z8Vy)B#r=!aBpw`MAJWnC{K=4b#Rhn+HCMTw`5Lv9tXZlepRaT{y|5==e;{tEbx zyNjq*O}q~DrOi`w=P&h?k;=7eEKex?ilz-nt8cViq=?+gS*WybKO|Lb$}US4G8Vhs z$;fZK8;&09K4P9qMQqhPK_R{eA^x7Facwm9pxxYzh^(!!^vG4<$^WFec{kDO?lYPf z)Xu4Jtvr33*@dQ*Hh}qNs82y9JoSXTN>lH2K^{NpZUdBB)4;m;)Y~l@0J{N);p664 zZEz1`?r(%B;O#hRt3VGrPiU&BcPk)W?)Yoc&=_GmolQhq5L*9uBivovyl%LqtR$|E zPdQK1$&^>2tpKh6WeV>qYSrw7NOqk7_)A{X4c@*aYX{>OI4pH&yHAh00!8+9140nz zoc3^HBGkP=>E$x&#a?v^Rcc|I5%o2)H=xS2ZgmwJvA4kzLp6q+st>-l?i8ILpIebLoc6p!8Ogt&q9b4?}H1^Z|<$5v`g7C8XfKL zgw)Y$PiREM@7398%B53arh5r)bsF07=mf7p&|?_s!r4O9_svEy=Jl*&ybb8;k8XP# zu|HKtm8@hE`BaY3o{iov18_mtT+MKR>8&2~LrMmFH2GBHN16uY<9`rP`@tj{KcbYOBJzxraQcEHI{?zg&SPT`we}#u<3-0tV|C}Wo84F$bDn>s9x$H62>3NlR(X%yG5R}`#a?uUc ziB8^Cs%#q&kt<2PiRju-XEn2^ASW=i&ssGp#D$g@sIj*NT$J_sPVhzRnl0*ZlRc;;%Cj$P00duL zJprXQMBf6`sNo2dq4__1ILw%Hys4D2)6$04Cb`tJP}_$mAQDTz=z!(DUdU{KX~rAX zko+fG;XVp{RskklNlism#12TMk8jwO=+V1Jd3UL6Uvqc8so%Fl{9i~s4ofWS1QXV~ z_d-z&+z%+QId>1l-9JSafG~!&14;PynfpQ-3LmzacaeJZrezjV^`4}gi9GHhpGapL z5F(~E142mlpM((GF&U7zZ!$z<)|qNV{ZyiPNQ43*sy)wM6rqbY7q5+~%%j^-B05Wp zhy%jA)ZLv>C7wKqmQlZNRd=8VVY|WR$u#iJ`NLZ*$yB3JB=M@Z}l=)Um7!Z76n9i&})%`5d(9bw{L7>uOpn%R_&;og zPC|P0q<+AwA~^1@5EajJRYHHzcNXf&qvy@^RTK?!UWE^}^noY69;`cT2KLq9w7o!z zRR`&CVkY~7kMi2uEQ!Q3i;Qylv2BP_T;VR#)S!3oC!@W;oN6M_orwn_7(!|oC8A!M z*ccxa)_4NKuA`Y3P6bS6>oBUn(hc5DT-B;hL-z_!&{f2kmI|u1=W-e`oh=Zf3qKx( z*pe?l2qle}yayO*$Gh)VR#5!nTtvL;zf0K!_B!fGUG+m0Ii*YOLYMY9HBYFJqjrL7 z+(*9v#iD?Zyy8wk`9GQrT^TWmQ;MnnTDac)Qzz+%)CaTR$D~r=Hq_`sD1z5g8^Ds| zbB=&5V!PHgJw)r)o&mCZa0A@=FWYSh6 z_`W%rn$*K6zm|ND%-1Fl;H)JA|h7UpP-*6oU*x4S3(P9 z+OM;_0qiaVLN{J}oJ}1*1~9#28+%niIsXIR{%Ke42C8?9xeN`#?Y0f*SwSw{M*Ve0 zoklIZ%4TEqYnuyhdd4>&(1==gT%)8`AB8M_61N|KJan6;fx0sd$cFD>`YxLBW+6|B z4DFrp=h&G}8$p>rV=sE6CzXeI1?WcSS!fXoc5rP{%2IvdAzJOz4!HcmY_Pwe^#~xv z(8ECewk@`#5r5abpk{SKP%Ye}mQl^W018$Ht!1nD)Nf~Ovncmz=n2+7c0mF0`QZ?h z&R9Vs(2bGoO+I>evP=DhD*FKN28hE|6kFwD(uBlg4){c;hXq)gcxh<8WVQMU+I(Uk zxG-t&QQ%uwa#J@@L#ovk)Tvi2;Y~Tq*#>VkB=3kihT1ZiOOX&?0~hoArwy`ax_1kK zBySgah~H&sY!|InBj{Aw!lPl~) zj9wi(3zaVW^&Tj(SNmrnZR7y}l0~byLQb`=@Zc3RbUNR*1K2)lTE?__qt@kH%vFf@Q)t3z#+YCFLGOI-tyn-kjs$hR$JFZxlXF)iUJ@uz)I z+ayJ0Y=JM;>by`!wAXZ?^eM-n?_RY9;>EA{BplFK;05lUFLA~ zQ|ED5$Bo+MOp276ebrJ%dDXKb7ES5hZzl-N<0sVQ!$35Z!pKNs~>N1 zczoC_qV~SSJ@g?q_C7?dgYEX4RAGf0-hs3HJt&8}N*k&Zi6?9`rZ#ij6PEI;*AT=l zn8hGs{V96~cE~Yahi1U%I5=iM^qB&uPkS=7yt(f~YMcvW>u4%#fu#baXHR5bFx2S= z2&6?9>osN6KT9F=vR+|DvO*fI6Dh|B#yR1XD__1k;A4U~2jE2B}xkXER)v7pR`(pj7hRO3en?c_N~gFFtgYI+715Ib)w%L`Cx1sRAS@?E<=xxsXkyQTsxGmL0frtdcYhCaXt>vV2CA>t7?|F%qsm}A}su3BMv7;>G#(2Jv&6F`|eCqfS zma%3U{+4PQnPM4do-mCRPD&Y{F%iD#b?hk5Ka9?pFwQb5V}cn(>gBIV*S+?yVEy0s zV#O2T^#6WP#{d7$2%mEd_^pEcABB3rvXo;~z-#Zh!&3z*f?F=$ij9QiWL5nrUVZ#~ zX=n|)s2jCM;|{NwP*HW)l_u-4-)1*#I-ios3Nx3FWfE4cq&7BOjFLH%r`L(rRE!FU zsZA${YGfUIBt3oax^HFHYn#)i?@5{CMNRFoUkekQ)0+ij!hDaX&j8-YMt$pAr+RG; zA+Nm{pE_qx)QE~~_g3L`7kN?1KkrIYx~%Uf3pUG4A!G2DZ51o)qBmEi`-)cXXb80i zk8t%=FMZ!79x!T_*VgFs$C9s)3KbJ;WGi`;+3REd>^;iVH&W|JV_s z^_I*IJH6U*ae2(0I4UJv)U!2o&ECT8Oo4k;7$W&2cIT2KL~GiTueIe$LekD{6e}gf zo99*6nnh_S>h1{*<;4$)5iB1OdahxPq$=t=kubQif+W;RqkvYw7tonaRfqE1MK^;h zCdJv$ZO9I8NFmROm5hGY=%Su*GVQ#0WpMCF*{FaKBsi`404@EkeZc=SX8^ZND~H!0 z=9k!twMu(Yex5xuN?Dp$T3l+)QRd~B}!}oU_PQSZXUK4@x}t$%&Hw7H*$ovlheYZMFj1 zpH33!;jqAzu?y`vIm&G70-F*{P@1>Uo;OD;tde9}=jG?vc=1$GwraZY5CXf%?}}#U z=NFgIp@KQ~yt(9P)iU~5l`K?QV9lIwoeO@|t^y6Nq*y#F&>db{WX~@x9tRh(s#Lr; zH2a7sLuk@+csL~^@&ECN(wdu3zGvd2vVuZ#f>ccJNJZK-LZ+M#HuIeHbbJGxWbL};HIcG{qWoMtyc66`?ht>cOAL%c;11K8+(pJ{bFEA{yU1#vQ#`X*JVIJ4fS}Mu zKh)14Ei;Hh;coRz!EDdP-WPr-&lPw=Tb&=H%*@FzwkfmVyCjOWfhu!N)cIIDA#NaEe0Da^RtvCX)1~Oiaa>FQT_<1PWo8kBdg58 zn2$QVsGM@?S2BB^8BQ^GNH$@5Pxe1an>(0_j+Xx}f*Y`BI%@RK2uc}G?o1zN*&$w} zy~Jz(Mc&gCCxUd!pA#r`I_xsJruv}u6|7FHoC2pS+(K!U;x;i;`;Yn=p_O-y2qy%d zgirNtQOY3EW8k3r$AQD>8!dlUBk6QW4*a5AEC1CJDJv>1$vO+cHxUXVbncX)oC1j%D&F;}X?;s@UH5vv?Dc2Yc+m$+ek;ws-o|=+2vkpBTDpHfyJohtADBU^fJ= zg%HY9hRvllr-cc^bY3ixjHe=EF0SAcxokXtS{oggJ44Zh*bF`bRK9hWP~t0emrZwG z1UZ~HS3L!my-1f({s*0=IUp?R6x4Lr3{X#UluZUkD&&TG+j4}XrbWLhpS;BD!sp^d zT;c7Or{`$jBGT{D=N~(XLe`43t)%^eW`yqR(SrzU4^bq|hKwl!l%Hb@$$dmbsmDS( zkeaaf`SGulIF|n`naFj0H8L}wDneQ{5+D-ihZKRohkGkb4Rr)FaRE*Gd-sAN|{Jn zeT-0jenmVnW4BV7YX#(o{}t7IoroA=&2T0@8!kL+m`%>3a%_Qjd9~s}GJwy4rc(CIz#rWMO?PzPh~E#@DvL{O)|`^;a?{k{#aQ1? zz!BCWX{`MbnW~8;MHy5?Bq|itzF_mc3A=kMQHoP=&0_f);Tf{g5+w}~Zzwz^o_OmZ zg3`cMVEj@@{zQ&*T5Z4P#uzmAYXZQt1`4{sLkY2D!)`sVy)1~Sq4L2{3dn^I5~eBsgsM>Sbl_{ z{X~35D;l775^uX*xYtK9leU*gpyp@U%e1wGW@CGqwrjwa;LoAF5QWvxkkU4WZ0CKh zX%aWq?yDabY!wP~r6&~QXw^(bF#T1ouIx(`G};MiPKVx>&~vo}%fY-Ngl=|J<}+S9M4pQ3~GLkT-8KNlZK&^wqjzBTp6 zJEF|#kn0(J=~09=;LokB&7K`PJK5+o?j$zLcRBU+eWnlq<%q@mU})A3p9r+5iUzil0zI;LF|-3(QM zz9jam>DRDr@tTKx|E^Fn87?Q}xP2NOiyw0oe5A|xW za8Y)CQL$>NIV5bepZ!pnQMQ!h4KW=R{8JGI(kQ-V%)ITVq6XO6TU6auY<-%Pp;{ zlT8;jtBm{P|ERL{Wdnqfq|BtSU%{(rwwe(D%gw|Me080!xE_fa;b=uXa?Am3T%8da zR*K{&)Cbj4RBTUGeaBnL%jv%)m~AW1=lX=SnAXc%#SU7gNynLdfmT_hAUkl3G|bEz zYt6so!4h?~JT3j>+VM-}%g<5Lp$Isv>J*{X45ZU@4h479VZJJbFD*zIn?tX)L-(Q*m`Ilq2EeJF@8o}S}X%X$v zmU~H*#d-Ppi@IDuGH{zPw?)0Y1&ZtON=u?+ars4JwgR?q{pF`t<7>#&!oBTZde@nU zlO(D+Dl>0v`VM0+eW)^(81nQNQ~;6ql6~pV+qE=O_mV8*c!64vB zy)M`!H)PN^sH}WE$gl8Bv|XA|*&LkY6Bg;MFDK}&c$VV+PuwCToTVS?#T&nrk*-rk zXm{|q-}vM(BHy|3>F3i%(R{obg=I*h)iXzsQ*{~Q&(*h>UfOoZ^s6ID?T&wswi%XRn21MVGE(WTp=XqJ*oVFXyDJ^oeJL`?BgX8$lRg$7yYo09+ z_Sjt$hvt^%l-LU(7>Zz5xMG?78kUw-zLH)iT-_Ud~U$lCf?Hp_TGURb`UnwDu+0`8VvH9VDM+T8S3~v_G`m z3n+%~K(Q6&*)m4kMhB*t!lcVNzIQOizLJnT1FNI`iBLQZcAqnZHi^OSs^IzbjFD7v z4!IK-GSYdPs>!tj=~L-P)BiE@GBSB*F1XYQY`jQwfE(Wrr>j;=&_2`e4$|rnOl;vME;BPocTPFWSXzIr3k(%6>c(qMjJSIx@C{A z)Za8m^{6x>JqanZ?eNX5(iJ-is=$^TCv9i#NaauE7vpXwA}es`!(k5y^`+Wd4ormO z8bQP(KlN(uk5pyWh2f&&I1lzj1@gYeq<8QLvG@WpoHrTU{f*PDzrv9|V#qoXB_Fxqk$A!FJEE zYtgz9JO!8Ir(QuO+Qu@z%0E6WFXoNJIkY32$kE=TcpUVf+Mh}7F>ljn`lViq5q$T1 zrPFjd+5uk7m_VW=@VII_zBp)DB2PNnXb`4xpU^YcDEj!)DV5$-7I6FkJSN<>mME_| zW#?&vYtpFMZ#YhoXEgtz19d+TT9rlZ(EV0ALMtKrWSVu>IL@3_an5O1cFu@YiLcf<1NZMcUrSH z3&^H)k|GL|bK>8lwaa*>gEOtycA6ewCS0>NtXQSpPN33)eEi(c-bZrA55(&XySd$% z96wg!0@G#|f=~M0V?2_%>r(53s(?OZ3%wa({82Ry&Z~dijASm)R|2ZV1v7FsTNF5USAxgd6Zl@Mn@eP0N zrHFAI*f|lztykKGJX>4kB;nfhmC-cmk3}-+XRwEx7-H1lKc+u50LPE(cmh)(;XbwK3RP2oqrxN91&B4!DrUGfp28fRmC8k_dtSrTp6nkrR z19dYDf0gSPKfWb|Zsk5z8Xt>iN>ghZgFg~|{t*%7Y!MG;*YKw~wccLQk4%Nymjm>F zkp@!JDc+ztQ~T$IW%^QYrovx0H7qck2*Zz+C^NrYwNm$jXO>U%yr&d&fO&W@15n?0 z``X{kHL>{k1bCsqItlrPqkF`jfm({xlvruI@|u5GlU)4ah0tm9h-vtoFS9IiND0=y zONqzH%}k7X+Myv8Uif>FDh1yOm6q{X@;J9r;OYEt5Y}Kzz~iGCZ$-SsYU|`$Tzdm`p*9?*GHHpvdMqTP!7e1V<~!QXR4N%@zA zRx<|Fncqpe{quLoQ$z%wF4BGyrWoiW&m#wMv!tp;noL6qcM%g=I=gKt&I*fuLmxZP zAZz+c{0;GX7!%L>7*sUeQu)jHlmX-s&KX(r8WE^^jVRUr5?0US^*By50fDCc_($H- z)3u>wh^AK_pnjg3i@)cy%9zyTK;6?bx(b7)lM=_L?!!xh+x>XUouARiqZg_og3qI{ zd|H2-)NbV)qsatoygrg(&*A6>l`L#Tto9gPT~r!ix!1H>dSCsKe1?-EiWN%}E+5jx z+57~c!A8N;4$i~r_d&lY0nX}p8!s57=%RJW#2nM^>LJ=f{oA*QzQu}Kcy*_*#YHYL zHsfosZ(m)YmBv+?b?5Q)@S4jJa=P}->c(NO@$5lxGvcB6SYY%5?Jh5AK9?d**Z;90 zbs_UCh{#twjj}A}{v#5{6|Se};=|quno~v|%1!qvT+ZkuwmIta?c~zug}Y3hcmhuU zj)aJgso7)v8&|T!np9M=27A-IY>k`@w!S57$?fJwbJh#YP-P)J(Tz=Nyd}f~VZpxr zR|L?lmB(k1(sMN%BkH8WgPc(y)<9nsR=R=RDZoD~3ERxa^h!fEH+oQdNcui;#)YH6 zoVX_OwLeR1!y+OvD>A${q>}92AvKuK^k2lIV>IMAe_o;Vnv!_EyfS!$Pw-e;{!Zb3 z!!MGOqH++AaTu20WOjLH%nh3>i6-$msiIs8yH&p9!oCui%&MGDKBKo0Ek`w}rRUQB`9D;>dqC6X z`~IImqeLnWEzPFmyPX zGKY$%sqeAP^Lg_y=X7I^IZxZ%ber3pzlXg)pYQkk`%@5v+C0yFU-xytE^NPycBY0{ zju8Re-4CvTS|g-F(CYN`muf)j9@E<-nE@Cr;`c4PGT!nHLhQ>y*pf(eJ?sn}YVxJ| zJKJy!hoc9u!VFGYO{cq1w{+_bt89-@G*ymOyCcadIMlKAZ?&aDZHGOLlh=%|(p}6@ zjSiwlb}qs8B#L*42fHA>%{|&4JD<~o`UG>FCd*pH+6jsG??mebI?Dcp@>@PcZSA3| zigaHn^`IpLO9o#tCqA_NA#n4xa0a3W-VRJ^Jlf>C;p$@6!h6zo3592TZXz|i`~fx_ zzJLB9&yi3|r6)4%rY=ljQv&z$_TR{)*6twJlKCovmy}P_b@%P3gFsiw@Z=?C=G?Ez zIcH|QN2|lAxx!Bb>4g?v5@h2b|0>T+&HCn%~_8QS>|Ha-;uB2ixn{fu77wik>f$Ce{PKp5u(nbB` zBE+#ogB=M%s+K9MPUS9-gk4%id?}bJHMk!y2G0fgh6M=jWRugF%+KK-v&Iwn^VwBhAgCU0LnY~e(k!!}KhTEIX6B{p@-q5tHAxD81 zas0EcWOv;V{koi8NJ>te(bbgeOfi=519{@{=0(ExETLKpOetcYEMKJiG5>Xs@(dwX zn>hvH?24`9erk)g910iT(7VT#U!^VDV`WHyFLS1u)wYQ^C{zny#F=Gz7iy-BYr%Pr z+R)fFFcVwMHzvZ8f&Li>M>%CJTZ5;jQe4wkYj|M`Xr|d9h1CeJAosgnUiq?G2UY&6NE(y$7H^5F@fpz#5`t)|MXoB{-PYHSoelqBm zz#OoetE=T1A!v%T-f`BQ^SQMe)Ulpt@)C?JWhP!Qa2Yt9d4NS!>m_X=JOI<;2=}q# z61X5gT?5D@Rl#IZSnub?k@ExDwNJghul-V!;#v)s#wb(ViNyYEIWnaXEVcdv zs&sP=;&7=B@5kP%zYMFNXE80&9@pcukiFQuJdJ4v_hRi@v2&6*Lu+&~ndTdKUU><) zq2cV=|AfFYhkk2-HB%lJVjDq#TAmxsDAY-29=H=pe-_Pq3E+zauNX}g{VnrD*Pr8t z6Hd0=1UmqN(+)Xj2J2r-37ZCykECxYxQFYWA6n1A>M1QdOzgE1t1 zGRnG}OsO*=eozo?iPnHEU62BL0Lysw8zIF*9NfFMhOFTeqfLWxPan_N6JMvT3Tcc1 zleNW*+-+DAY|~wDLly2wFKSGx)$oE^4c>Pn`6eXM)dgxy?=b9-T-9yE??@Td<5--x zs!nv?w#ur&47#BovCNtS??lSYY_{|bsX8U!M8Qe{DUhmSEM}x~M($Jt(Eq3uxWjNb zAO;JfvLRXfTcf9=d+&neOGyJfEq>pL(%N}otw}X)3UuC4&4eg;$!BJEGBEx{7pe&j zx50B5nM_X!XEShF9}cGqis7bkiGb=w8aU;)W-67bo5=jM^C9*G=Xo}cYW-e(NGjcV z9|iv*a5R}tPDgsX1oslt`%pbX%H(F0Jw(7Wbarg*OX;ye8K%r$pQPkcdvE_OEWjO` zkn&R9?;xl(eZv{INR3b?Q;EmnYM8osqi!};#~ci735WU=w4^0O$nBv>K-$`an0ci| zKf!h=E+fyrTaVZu@S=C=!Lq%Wn}N-R&1=jd-2aiCfhH|&=bD2pF_BEg18`A z)}Z!99rF2(Nd6sC_Nlmsjt3#8blE6S8znC-k4NZLYO)zr(I6#)J|f6{L$K@KYit58 zPqJZg>?O9U6x>kP$Cl4PDK;vWXi6hP*$*R;JZTj^@S@dQemm94fkknqEJlec-at$z zI=TG*2Bg?u;O%?AO-$B@!=tJ!pI6aHp8vYM77@qe<1$gCl%A+>A4*Js1PfjnT5U0H zCK(r)%h0Qma%NJG2^Ui4X(c`ep&9&f$vghSr031RI4@T9;e@rt5h} zjW;`1_nu)6Qfdm#u4k4Z!2^NG zU`x1OYnqXDWjl4k9xG2$nKxOcAc(`V4(DwE9uLE`ynE=2WvMYup}_!oJg@`&LC_gE z5i8l;*hyrvWg0>x`BCtG3^+5AprO){s-ccrzpBX)awz>;;g@h?Cv>vUOSa5}=V))9tf{xvNz<~-);=6LrxsvQuN zYqRNLPo%UhSydPS$K2*BW}&#sO@%93r!Di7k~f zgaTFoGvG78SNr&hp!_=HG_8xID+su>8>cpWZ1?kr0sZlC;h!-73a3mzh=k_~jFb(U ziU-hRI75c?5U>4HUkV1NQ;S5a7V0DsXtgeI7#UkxK<|CUe+ZlmL;Hitqqyrzrt+1% z9LwAnQWfb-_6NXKwR|r`e;b@g&Y6Ql!v-~z8{2x`@P+j+Ym|AXF&&H+*`dSzak24% z(njcqOgE}Qh>dT)!{~ii=%ub>c8({@Rnvp|?ic3D*AXgU97WTyMl!fmKi(3B zF*k^V?3+IHC8PyHr30y43-#r{alsd_e?YSd;99)wB#5FXLGG>kdU2wbYML8t4OpKF z!_?%nVgtBN2b&5FkGq)4VhD@WE#13CPDZKCUN2Pb?8~VwV6YlI;t_e^_N>5Jfs??i z3Dwo>;~qb1S~YhSfSu~Mf$7HX{G|aztL~8JC0`ZW!q79-mA`_?5;xNaZvg$DK_8oD z$EH<9$s1!&_}D_(vO5%$+xo#{WV9V@XUMOa*@(X7O6@>=l7CuS-_BW+&^m+uZP05V z{hr%&rnfUHb7Ut7)k+?VES`_f39O|y2{@jvK zx*=4P{5INrRZ3AC6|Ap`8wxf3X|Qevf#K~dH>XMOX~5YuB;e?qLH%J2S8fQQIQ6hL z$A1uNFu-Ks`;ccUX;d?CAzGUWkLtaB>$aiig@@UczfmUV-!J5sYCtwA?@%3%(l4XO zXynRJ;s{u-X2zW1bOzYS_48cyOmC#b%aOW#{sM8Wtl==~H7^1q6WKSz_%Kg#pUh~K z_<+2}UXL?gvg@e2+SiPv*sLx84UTY2KzvrkE5x6=fxhnWXpywkx)-k&|4zGlw#||E zb|SWMo2(&}O=*LTMH%F3z}kXaPR}HuAP^fuwXuhi?gzO^9b+3?6f%jOmI+(b%rd0b zDeoC+998!Q9JG|p3p|oDfBZ1;<{Z#A`{r_QDu0r5tzV*_mNVQm z{~MsBr!ZzR9U((Sdf;Ugq&jVPRIZGQCgLLH@O=n9^YFrINQ7niLOoWC&KFjj*~`F0 zkZj2a=ToE%X5}y!7=j9>t__~ITg56GJqoqpn(Ef&d^Tn8V|Y(7Pg z-qEnuC0ibaK#Ru;q%efpcO@syk=QnfeyE{;i>GPhmEC@vP9TCFW)M0Zd`0kmDI}l$ zSqHhKzv|(SNz4F8vgu@5{yesLSzQCj%`zbXM&hZ}F5a38 zP6Pf*95KO5(@P>2Et=mZFr#YLBY9VT7(G#eE>P%S0yDMX_ayUeWfePuF!6+8n6nRv zOQd^6uAlEXk9gL2RfvQ9*m%Wr5vpBZ52@)*M-a|tTZP70gw0~^7^sSOnD1*{`Urpy zB#f{!nPb>5wBY1&%CmW5eKb32sOx6n?_iMob^RG8kok8A)>OGm?1ZNUGR&ko15M_~ ztC<{f2UBsO7T6Liem=H~y96dhg78=6=+LTwLre?JYGRy0Q7yCUgKe{2Zxhe2cg$^DeLWBWBy+QowteQ1x#Y7?jg zF)xuACa0pF=g45Xd&;r3zW%P;3K;i&Xj(}pNQj5fYiF(Zh_w&7$Z&yvD^VQD!8UXn zs8wIs<&Ug9N?qc$;rwN~GKIeh^`g|gKo9$3veesb@>`s1kSXE=8YwQ0Fnl9iqL4tL z;&DPGQqJ3?g}xuF2xL+tSNmr1I?6z<3hb~Yp{*Hw*BI~&q(S7!eYazCNX~sS$GQh( zY3M^ZQe=3wiKdovg!`$oGIzV-AL1Pl*Uf%GKfh@NSO@e6h4;d|&+LbSuM+`^b#E4~ zb=9>i4W_$cIep22a7GX2NZ>xjcfyT(#D8>LH>6m9#TxF}$$mz(B@aBnNr&mQc$m^+ zV3>EE-?3rgXdAehLHuN?)R42ee$5=eh%Q`=OtO3$f{r$SR+Wgz;E0<2n@-)%3=}4t z%-`Fw3cumq$}Ym|G30pV&l4Y2^xXUw$B^CiabTIcri{s~UQ50J#YtD*XUe2dID$lG zf>(=R{i~-g!)+INfEpjL#NG=Y3V2-5uCWKXb=;>V6ObTxZ0 zZF#gYDb?pHqaUk<1Ji=l>}i5{+dIPbH(`M-)(ysR4B}#)Ch|>476JDJ_$V8*+05w! zvpl98pRt7)Y0LpLH4|4MwP&!ekFBu^#dDAn`c;;FnMjP|Ux@(OdQ(76yk6f;HLc7t z139sWzcGe1&#NY4FUnB7%scE}Uy4p$|3PNtQ6D4*pulw_bi z?EMoeDO>#_hVK^+7AM`Ficf@AjossjZSIvitNa{ww)+86p}zzyQT*PBmI(QukU$^n z%YnE`R^g2V-4h4T)$V*(1M`qb;PcdAb-3fQ8=YQ!+FMX=hEMkLA^cm>@(#kBYB_>P z%Zx~#5T};9b;at+9qjQ~MgkXyAq9?G9MW4EE`Ni1x3_kw?n~U{M%Sb=W9M#l7qdH} z{NK=vb^1;4dN*~GT%CirkaTi^^eJyyh!Inj6P!W5i1H)CTn7Dl|63q69Lw_+%2!cy zZ9ovWQ_3k%J}B=2cF&aF)UIDNa3df>Q1>paDh)OW5W#N zu_;ibe5`i9LXJ5zrbGLwAJVw`N$J2fp@+XFdex@L`&CpOeZ-)@NF3+)=zRzY=j?4YdMND*LOyi94(8*?bbpR=hw01y#Mfrk9XAdDw{`TTc*`M& zcWCVcd3PwkGE2E$d{Y%)LhpVRnH=0ddO=L_9_vhg{&>p6RG@2(26kAu>#_Vslu@$# zsl8^pW#hjx$&+|uuz#a;Ka5IYYG^%fH~PLYe{a72>@?CF>V33ItZJpd8kbobf)qKI zdG`+tAmCyj-T<3vT|9Z2c?bOkFPR){OOn3Gmu`l6=R@CH_b?|f1qi}Jo|?M@^h;>r z&wK<9hgE#T<`UlH)?EE7y4}Qe)WOzIsEj)NGkT=a%S^qnhT_4vNXJ^L%9(YLwJ)^U zLUCxi1Z)_;N1gjQ%IkZ9Mi%}Qmkd|+SwssQx4QabM&e!Lh4z(}+|GP5&YV2l9eq+b z3Mr!x0tkjZ#sy1}vYgIPZd=n+dVn)I=G|O(dQ&u=Wfool<3;%jtB}Q0Xs&=6Qs-i??!@pAcs6_T?m^dydWa%kM`BIu;;@-t# zE6q(ob{uK?OG7LYR%$El;3hCxGuf?L>|M)H?84&0I`3{MJb!A3O>>E)ErcSe@L?)n zOT*iczOA;NMAF@Am$7!KzWuwVW=jEK73A919O>r72A9?$)vEQJiS;h;n>H$>`u z6>qsIx0_GFsqa!RA$eem2`K=)#4?5MQqD-ukOn0X-teOGDfwj}T1Sqye(IW4S;ekk z3YRwbzSf7ADfrtIgN+e6Ij+yZDWhR4`bEpRVq;od{LOcLACoQUcz)g+&>b`lJi~=l zeZ}ikiMv+Ggdv?9qtK}zV88Oqx^z?!dI1)r0iT9ctZ0-SK^rxdU~bMJjj=K}>_!$qpLBAWv-L{AXctvgRd}1#$2^Ha4rqVFQdV=+- znHtOid(o2niS!zam<-MwG=%GN&Cp61$sK?PI(;`4oi4x;UC5I_VMx^lbRqSjGQg zZd)R5%BP-pcn-_*i>ByT`?nMI&=n0vM05GduY4}$yXnhyn3Eu)T8m%M;*%OFo z-Y%p`RVq7wXTLIvGu5n@FYIe=zMouIW*Q*EC#7>Y{jMjNwHMns1R8OQ3h1|HfpiE< zj3x&YD;>8G^HL(#m`pENDy)fBV*L$J({;R!*t>IoqGy2!kZG{Rl~;DAhAX?-mTsPB zet`!RRd8hkLp^z76_DP(7$l$mERxPG=T>1Ql@`$yW{%_%#5uUJlXMNpCqn~Syw_Jd zfar$9bs4ZnPusc&-nHgS{UVt?WOrCafKD`f5Rg&&{zy>w)J4G}V^aMIP@$2Avd%if zrf}p9@Ox~~V$P*qk@W6# z?@QA?ZPOwgZtQ@r1GoyH!yWw*PUl`P2ax-{Ik8w(6A3103RjW!CoZ5}KtV>6P;(${ z9ZFxa*xnmex64pae$oDYaXfsUp5{~EbE7Ma9WuLrJwL-vm-L|d?v5d2;OKasmOO=4 zfc?1X?^tmKAwAUSUU4gEg>Wu^%NL$C!&zHIH341|mfNH1X3;E?% zF_Y#LdqKN{8s(1{KM$i}9?1<;_nxw+bIr-}Jo-?!sx)6WrLot*ULl?t{1{E{Yvw^@ zJeZ+FiktqzvH}%OU*&O6uiAH%u54yx+A>^>9|(rRcsZN>xlVne+SdmrpwE*(L>+>V zHNA6H6_^msr;1#j=e;ZA4d?9*AuC4lJ? zFpn_3W%C^O^}IQn`CW*>sm&KIV#8Xk4g1JUU=9~T!>r|3jMQ;OPaahod_x%r9Fy#; z0Pl2VdO)E?y+v*ISoSD>=s*3HX44PVGnVhH-Ilx1583SkiPJaAQG!0zZBQIZWOcuInq03wND%+Up5#0ql!wrSM`o zRt1v1phmQf*TXU7fruQ9G+N8;_QnabW8_tcK}XF2nSF%*vi(~u!J6S&VN`s}xUS|W zzGciNYB4$(nGU-6;X7kS9qyk!8-y=_w*N`AkI@~fZBQ&=Un$#VoLqo%v$00X?0S=X z&G}({n-(5$dieDhszT`_^N1T%1^YZ5kDulTX?Xb+IUO|zaJXXzZt`Gd2hu+cSN@n@8P}pJOd; zc20grMXeEGyHx6Y!LdOb_Ibtc1Ve2=PoR1HN;ByAze6TAeC402}*v%eZ#J^ z(_fdEw`~4hpU#gWY`sDZcD^9Oh2rsrK^V)}8anqi(-wthVERp*87rSz7r5SozNVMj5UY=ssp5T#i%?A~z0(g$wJX03h{V zW4^nWZ93xZ0j?4z*BJA0yG>k$H}nGc3GU5Qhsbk$ItcV08Xw^;^K249l;m4Q*TKL) z7*a_f<+di2Nt;_yGfmmz9gzv+v}V#vqARWZ14%j)UTcQ(hCB|18BGO9fWBKm%W;{x z2pcUPz7^PIXa+A0FzVy8(YoKq|oQbVwVuJq=;j!9Y~7 zn__-GmLSfJq;ix;0trPjZ(66_+T0_?an3LEK)}A?^xAh0d zt>B#$(_(_&iOZu|{c~ZA&NUT>JFLLPTq#WbRGt})_RRSu^GWEx7fEb{_=vW?Cp~5l zZ&6}w`6$CWPrwKNbSC(S-#pVOQU_gL)2p`4x9N&Fqp6ZrQpIm6K9Na{19||qL{!$% zSK@4UH+wy)ae<+NNlmSWFPfe-wTh-xJE-j;c_hk0oeYj+qD&vep7?``x7pB&E3S$Y zuT`ooq~h?a6!M8ycHy2;fWhG%EdjdKLh59n+T4{opSDAV$j+mn3SzP7S8G5DXT4AL za3CnN#c?ArzIw zzDG}?oMm6AbDZ>93>t^_g0-F6Wcd+cZh)0)#qDQ!Po!r6uT^hW8D9+iLn;50{vt@1 zzlMEdkeP{SJyiE%QySM;<{_REbAP70_n|x_w4ns-E`_>7?IT2=JJ;Bg`HodM*UGjR zhfghjzlWJ0GLup#q<)cg4$crz`zrc}2q1Q$e#u#Q|Fq#c_@r-Tk#n@X42AZ1a97j~ z;bxJ>&5JK2ad(JwehyZhkCrJp!5ALZFh3wVkkD29$Li8DPQ%B*N?X21gg0ZUqxu^5 z2R5+JH-H_fR?1=dF%jga)4S5{d%$M}*Sd4xLT6Dc#~0a5^STx8y|SRyR{66E?!si6 zMQ#>qP(f>dY65)+m#{E?btGJ71c7+skX#BoD!drJd$EWA&hIy~7|D+uZ60s<#r;!g~pv;lSlE zA8FWYvB3cfslRRH?V_ajEjntmPXZkbXj{#n`Z~=N!+}*F?Yd>xp zHx%yU5c~A&!_S zUkK)%O1IlC%8}aBI}TeRW#HgRI53P6R&^!+%z0lJHvm4+RlUGdoSJ3wlR4+TES4uU z^Fo!@3#Uo!2ZLZ3nt2%7ES*pE+~RUz;n9^GPld^sL#Q-g7#v}tIwqz3cExo?rg}3) zX7a^zM6S4N`kWva1CIbsX_djolT0yK8x~k9o@XsnP_Q2`E#P^H_;Hv%24sr1od~El zdj>JDlg*S-|B~;@N{addSK*c)p|qLRR{+8Kf+fza|E2AUsA&YJQ;-ribsP2#{czry z_gV{FJ=l{5?yCGG6t(Jmu{YzWW2yD1UK*m!=}+%c`#E8FI<(aD2}~u}i$i0?6tHb9 z5&*5NRzQkW`p!Uqim*{+R3EKGgd_ zCS$0N&7(kq#d0^K<&@<-(h!aow7s00Z0L(!^d5)Fg#1Uiw?4pvo|@*J@&R=8M~Exb zx}^EOcvB5Vgb#GQkNuX@%#vA^{j+1W#6-zUv^D}TMtUC2_th}3xXz%*m~}+1c++K{ z@110Sqva`kZ~~aB2BURw6gmj^Tr?d|5)Mc3w5H+(RS4$A)~zMZQ3V!X2*dF8TI^G} z9}Jpe^xt9^-7p&1`$#xIC}HkuY97CHkf$HWO5QvUh}puHZ7R0e`c<_)=XVT+H6uk#Fnl4E}&ON5@CiJwiY;d zgx9NGa@!?&Zv{3@H_tr(+D0#{{9~>{{Wv67I5GPQ??qy`9EAmTqI6b{3Vf9X{R-K* z;R=>!vTCW>!Tp>2h_73?S?l*zSHXx?@TJze&Ai?Vn)_j7c^ zP9?HsP{VI}Nca*rXq>e$GX@xBw#(Z@G8mBvTw&mA+&#!r7g~Q5TpKKnd&P1{K>gqw z_2v)wv~cdFCpC3p<`%VLGm?1xWItFN4+<$$l)5fNd`@=g3gOY!RLczmLXV2j9}b*{ zUYD&VfRACHMF#B-mazSTT1SD$day56o|$LZM`c<6O1s`EI~JFE0D8DKH(A4#3xke^dUW6|{KEO1UI zM8x?xp(ujip`oY0Mc1pjw)UqLR`;(2Ysyr6!=CH|KWnXtL|x8EcAJ)b<}E*ApS#_*a@-XU_eK9>%KDXd~MEWrb%Bg7AC1X_dkrUMS@u>IGHU#kMrADr3Z>|W0{ z907t^niu8&2+NT7BK9whYn#Ko-zmu*D7?nf_zSn`N0hkVhRffi8nOR!v z>e@k;pHyxg@hC8syhly~U!^s7rH=4TJ&1?cY|;$N3xh_yH8Jfe;zc}f*%$NHa6>va zoLt4{@qDjv5D2sH#ysfS!jnkAdWlHoJXRI6g6VErpwWK?9wo`2r7#ZX@3qfCEMfm5 zGal`*DmfRi*~i%F4*Ep6W!E}GfogI&47HL+tC_cvqWV1-YPi#uyC^KRrs|~;q&Ox{ zG7?QYS+fs`H*^I%H65=_RW~qbbur55us3vlrOOKTh3=2%lWRNIe(gOAE9rd0`PK~J z5cJ#R;t(ZD`93h5?~}NBlOg3;rQF;JEZHYaBcxER8!^hG9Y||G@sDuu5nlT1EV7O#;rZh!Q033tYWM@N8pd%AcNe}mno3fat^1JG~cR^o+;S*0M@ktN3Jf2PO)}~%}B*Sb< zw(0@-v$Z)DX8o?05W-Un&7NYz5Nf7ei(S|UlH);dHQh7PrbaYg+j#zOLJNjT6`z9@NJBr~J-rg937)E2?jGk?6GkHS9m z{^V(4yVsY9kXc7o|5%>_s0Nf^O(YvQ9qe zN#qx)i3!9VxEI+|V?xb2Gt2K#S3yuOZdIGFF*CNCNW;__B zreAdD5v_H+$m??+@mmJs4p?p@;lOu_$Sa_Z+LJ9QW(mE^rKj24!<}>9A9A@(uL{b zc}Kst!*WVHaJZVtZ?53l(Sxv6F%^?s3`ledRZGB}2x&ri`FQY9%_vu*m77_2t#kyf zPv<^>e49qne~*Y(4)M0W=1AeSdC1JF89uM~iFnE)M}``@876=uJ*n-?8PAu@dCzy` zu!bYTMMsBObXW%VSLP^`=qB|fu;Rxba)orEflAJON32nk%fYjFc+HNg;@YXM(p947 zeQ!upFJKANf8qI)T)6&m@hY@N*C%+33GkPqEZ0@`L>!`wK>rle)=#|ius>C6o00i$pe#Kp9>h z%0IxR4T+8(xD<$1e5&|-a8$#;46lx}j^S<5_6_z^#;^{6Kp)EV7s-sMuM^j*!F3WH z0~4$%M_{GtHH|&P_W&{j=xckScgB(po)DHZA*_e=W^m5T*AS}JI5+i(c*O*>ob7Ok z@R)aRP>wji<{uJ#h?|HP#*{)+3LoLw1+8HFhY|Pvk35^H0a7CEZXw{ZKZ98$gpW>1qQ`!)fH5sKwrfvft zbIot-SF^j-q!EH=VBUGG<5&olOl>HPHJ$`|9i&9-i@+R8UV{ijlcc+ZxsaTO_~{2x z@Qt<_Aic@!Y10r}1O_V8>Fx%xFp)NqUBQ%y76=u%I_#1ZW;*4zEJCPrI?ewdtKqx=ZJ&uASszqZhVKdblD~^+2gZil^}dh zmb?%Fw>5RGvKx*~pgA-Om;VZ>gtS<)dixa$Zi52 zEq@qV$oiHWE?axacQJEaRm~pQO)Wo7sN_mYx7C{OHV2?ylew1$`8~ zOn-(6H=X2c`((;Ud?5WsiN8g){sdx70bTh&>n+AxHNBei*dvK>WNHaRrU~ofFq6Ci z_0-rRLy?K#pw9Y@uo8zInMFb#zv})IM0J?tZ0{jtGJh8rcEPB}Sb0DX-JhOV^EV(O zLE5~*AXg{AqNp3GGl)aNxJY8FDt;2ohjTzlI@sH-6+&ac{ur_s90-C#(~5EGbF4=H z7<+=0vd7N`-)(yv5Gmj_7%{g*DO5hOw)sCVe-!9X`u>Hp9FGt)BQ4uib@PG^6_(-+ z@=%oS9wxvzhYkaOZ)0&ZwIX+z?=K?Pi&4*A?wnF{blU^a62`qEzA75xp+Aj|Rm(qh zHkOF1!>N8S*I0=L+q4Fr8&2Nik4IbXgz{JLiZXiAA}a!|yybB4G8x8uTQdjVUL=2r z)P_K-KMd&mvkQCclrt5MDvQGcpWD}GjzB9O2lp680(BpIam77uq#;|DI&(3cx;DJ( z;FAgDliFqAh>aM)UK*aM!m(O0Qwy?L*!2Cqk*?>=uUlf#`bqGQI8GnNg;sl89``qq z<{kwqaZ&BB(Odt1VKqgo$(hRUh1)AN@?*lX>n%e!(n#m#*}I4n!^Eu@pH%xT*e-En zoF!^Ytrffv-%6=+I&LVZ66~vsZ;+pwz3_rFMtQsQqvOzzkQ88C361G;KBidML!TB7 z7tyZRaphObt0>cMd;&xP)msxy12xnxQ-cNqWs55}(Brjy;uzw?8f_&G8|-Pai2A~b zURF!x2A$AW;~Ht&8eKl2d8nlow}oqve6Ugk9R6bI`rJA-QI%@Au#nQr(Cz*O|A)oc zPiG4J21Ohia;I=;5=$ZVmdS|!wtpdSjjyYxCQ^-O7<>1^Q&Pnn#)nu1@hNyO;^y$o z2$bF6l){9dj-EY&o|P^CggEDMOP-`uhKi@6s{3_Z2y1l6H0#f@3*n#9lo6c%l5Cgm zYpEX{O)Al1H3t=`ZK9Y!qknM&jVd-D+Vt(I`f{WUO(N<)}Chms-H{2OL;Q=dVbvv_X{P0t>r-%96Q6z)89)Q8A>=T*(w-+4l+ssI+&dj z#{WmO&Za)(kB1uva=-erxk$IeP=Sq-@**J;!xS=)`B~;MZh#U3r+)uojfF}r==>?a zJ+7_1S>S(TrF|>SC9UrY-$t13@U)WFwr}oxz&4HP&B3Kvsc`DNH2Jpu4r&fYB1oa4WvZpT)nevh7=IJ#@T5seO z!31oyxEbSb^#!vy;m;hnME@bS+cFms6LBySN zMHVlqdyj4CW{jY8meq*+Z1Yy)8?3AHGd=z~9UG;aO@AK)>+By|uG9p@k!ov-A<|l@ znpi}CB+)OJ>TPg1HHm%aJ=(C%_z9#2!8>+ie;zsOisk&(1?!Kp-}Go3VpbHbd>K_V z#ZXa?m-fRSmH-+x^Fgs%2?dv+yO48apu8ozCfISYNbNW^^}>7HAQ;N4+|xC2pBcAX zT0+_KVbs>m`|0ak`4)N<`3!b<&?bI4BK4(QlkdH2A5ESt46{7z6#PiZs)+|zxG$VKQd zq^PZD&i8DoDDOiWYRHkm91D`IN(n~uc}?i0@AzG41%Y6z+w*_Ew#Jl321O9mFJO!Q8?4$Id80FkFm%!R-nLSgZM!b zw@iLr4X&`Dh&7j#pRQRdos9+?!^CZRcShxRW`X&kvInu`$uw{;UQARQn6x7ZKP=q$ zk|%H~X>&dGf{jU(;v%iLd=`+E5!~^ll{2jGO6(@qQ780~;tPgL=F;0=r(}4z{D&hLvct1_AB^j1^ZX5x0#t_sbyG)Rt1&Ia~v2q#5=e_ z;TD;&NPC56+P{XPPZ_!oPBf6b^aE}H#U)-ZQ42PX22IJvWzvUk*CrwXIO-smLswy~ zgJaPmu!RNhMuSgexwj@0=CLM0c+rC-vX#m6cV%?Jb@eA(#~PbCUD{lgu@;gHfjXUi z4sl?7n%2G5H$z^l@-n(OauzF5(p9yatM1DzQU=ye1nCLDM7GWpHsa`eU~rrzRR!KL zN$;uc_rMcQw@g^8Mx&@*hI4ec)T-MOH1WJ10u2uox+M8?yRq z^gewRnr^>Jbz|=E$7d1O{8xm%Lh<`@_2jBU;F-)^_v!4U@vyL(b%z<(Iuel%A*xkN z8u_)P{3MKj`3@8+Clsmz(}{V|>!T}c{;B0(Kv8N8zdHwkYqe~4>> zF(s!-*=N2hcMfImt1A*L=Wy;Inh8YGyHqjN`id{<+?BoLBPiO}a&@A23DpmF2Fu4& zzF@hqG(p+nJt5|5&0!p0F*{FmdHqy@wAw}b4Ct?^pJSQ!0_zh0UHsoCK@;P&`X5xWKhhyEyl8Q0#&S?9duQ8Kr zozTY@M@fb({)XCs8+Lf!hNHVvNvcUf$0*5kA%&hbzZ>#e`|Kc|;wWBE!n`?~=k zBt!EANn9`XtOnELs7m2!gnT!|lR|8E9W(X$ir(GTG$z*mp7CsPAEO2Q zk!sPaxdvh*mG1r$wo94c!L455Sq63Q@Crr=e5-$*ED@i8e;@44eb`44#G6PNoZ-~H zme~*W_{cxN7wWX_p<;Th<$4G|G8`1@#8q8|Xx#KE5N(da?dUjv@`W&EZEB7ys=Ytc zqBJWAIFQ1B+Q=@5%Dj&7xZ1FmUc-Ur?Qm@qOcCKy>mpVBMEaG1v@;d7MX|TKrp|)& ziV$OQ0_G2TY~h`AS`6Dj#i*(~=u6A26vD~0aluR;1un>p3`M#7{0m~F?<-{)#BW*8Q9{)m@asm_5kWuW zkBtLuu@z^1n+@5B|G9d^J7`;+wNe>-CadFnj5;fA?O)w49qua5Ogd2{omW#yN`<{U z^Q!+kIEqs-e5}^LlZn7a=m$Bvg-|1{?+4dq%{q==0(`jXcMW|(ZSXojm!om21@bAx zLLGZVi*7*=v&WnuZx|V8Ee9EDIx@2Gllx!5N7Q|QNhDuoH;KaU5lRy@!<4qYC67C4 zx8`{HA|miU+}B3KZmYo10U&{Ar{+{8+`i!YfpYTaG~C92o2dCo1J`!49F_Gxa3+zR zEZF*m{OdX1p}D!*XrW5y?lzVPrCR>uh~Pefia-Knn(`1Xg{;yCcJW=}Yx5!@tP9#n z+4kOrPAGp-AISNkfHSmB@@Cs+hBP4j-tfwm@V{^Uz~3Sb!%W^B`w(gtG>Lr$+ZEi% zGUa@aF6(2AI5x)K8?#DhQxvxSHEeIkCRt)rVDn1MWEqTk2f6E^xF_n=sld0G6f^q>I%}4SahElUx;8T4G>af z{J&sgG@e@A16uvyrddG;Qxo#Z(Vo~xz2g5R_xvB;IGGmyFHsvhX?p#aZ=CYKU~VQR zKORzU<2$g+9c*RDG=<;iUsyThsiN6bSIh56?EkXOxpHKpK4vVGw;`t+{=)wy26up4 zIu4^C`U)He|BKLubfw=BGx{p_CQ@SP3`=Qjn7Jk~{{KL?l{!^?A*!hI1I!eT|1V26 z-fyh#;CM`95)H3#uSNbBhl}O^m;8&4H>@^y8}R?)$tM=UMPNiGd1|>f9zxj7+aLw$ zB~}I-PjhYLe|g)~S>H&;6)*n_p@yhwW@m~Yei3Am{|gI;3BIxp23|z}50o0+WY}*V z>N%hrZ~f8Txr5T(fgd-N=)cNIP7D^n7d^fM?d`mW7tIdXhsXNM%KRN~0?&sKsW&b) zBuBs@>0Cbud)L}IvZP#spP~waJbYfrGWeMi!)+vwI2rDMA1^h2L}g?mXwG<>gvP?R z%sBW#ziXxSc=*3;$LxWq=cV;B>9I!UCU}j+bT4nQi<((0v9SAsogQlYAY{W@vtT#sU0gULvOiH0f~ z6%{esSW)qXBLqYU7(gvrRIXBs)wB=IsC0c{#mf4sR=iqQx3=nC+gevEyT!-0 zTJhDo*4sS;_`2==!~bqR@X^ft=FE>Xzt^1KIp2fbre0<4X2r8GTa}K$PNITF;Hx<| zM}Xj&)~~Dgql)ydFba4-3u9H^Gx=5Dc{ekcnZ+)*4Q99E{0;y&Ho`wNjMC4DIRILh z@`IPF_vv4N>H;Ia?j3BN$!=w1J7D2`m@VDTbT_g;2UqKKw|Z` zM+R9(`>uxzuqMprs6jRtenNvhQcNuPCdC&7H|=oXjye8Bsx>Llj!=)63DWW5qcvv( zZw1!k%bZ2HeKy=<5pKpmY+Bs6vm#$QbEU?fNoZ`-pi=HJDDw&+59XPw*)G_~Rzn;_ zlY&-?Dwq)WYUNTP1qDnoX|>!MVZLaItliubP6FN}yegK@CWo zsb9rxOt|TtUoptvH~rlsFK4;@=*ci;n4uXv@X;{mf zS80fH{7RgTJ3mI6Ho&oW7V6P75vRC~sRufVTcTuuc5>LmUk4jsS~_*It~-64!2ggr z4;XP5PK}*(zhSfc2C}M{>;{i;8N2Vi5(!JQu)b*x4GC{d>5cS{BwmFwu@CF!12!X2 zq}@s~SFur5dL;Ul!-gsY7q5N}FcT4LMH9nTM=|vaxyx_?g-#r2E-^{|v&>xl8^aXP zRZ%uJCH-F9LtJ&$@I;)bwpG0B5K{->+5^gl{_XI8$2IT8t}?$=Mw|da->dJ)hJe*E zH`zEf;6cmHUDWE)&;X>*HEje<)2oukIXz;}Fm?cU>x_W)0_*oi!Q{>HUb?dr?=CW( ztOY`|X{kagt1^V^Nlq=}@#%}I;BIEh~C)NAvJX=C9*S%$0(N|snA^WAFOW!+S2bUJ`%YohNFfikE4FvW=rqM9O(c6WI zO$6jcka4TplXQX5%>Ar+7&pbZb z1RN@C1WY`+f#TsHp@JRG7}PSOw`sWFg|B4k)UnEI!zCUj@qTK??YEJo;D{4jP#lDj zl0HpI-;k@Ro!VI>>L11mgcY;IBHWe73(<`@nH&p!20BmOekfR%yBAoS0jhEDcU+$_~(&Jio8-&i0b277dnJ&;Eu6c#f@tv#8)DnesaC~j`A-t@G#R8-&ZPfN}F$rU2xyqm>nP&_KHpSx5B9~Lsmdk-2t+7fj0O_+NwlQBWVz6ncwGwr6dIb?8^fS3&LK-wBZFILxPR{eeaCQ|5( ztlp8>mm_eD9~lGOU@n}@us%NowSR&S_7K8es5$w8=?l7)0p}E*GBh0QgMT5#9yI$) z;5v2)b0RTwn!SvDzU`HhSG!(X@sV>@6n!OTQ!#&yvGgLiX>3+am&O^^Mrotra#Jn6 zSc8v&q!zgnUV?A5H4sHc)u0ijYn zO4{dGW~it&bgs&~gdU=1rusJE(@lBR>#-@}QEH&)m4_$G!(V|}ubtTgiHVn$!i0u> zy6-7-q$<=PzrFjEbiY{aELIZA##0$q&X^GZ2DkVQVJT8Fd$HW>sfqgf+?X zlQxvgk0+4rTJL28nk82!!F)@xx4}C4)(jz7_#7yC1 zNt2S+$GH514Xk7G4!HXdP@v_q4PV>6&$m_tpg{P z^ceF+ipmb17k70ERfzs5{uLVkqFI1d9GQ!3sg)7jRAvYW8%U30`3BDMLq)IjThtEv zbhQ1~;K)4!8hL~XG*NNqPkU;#KXBc^ll39e*z&&GzQ)vo3XhFJgXt<=^$(T|HM840 zpo)Z*A`OmdrSvBR-rx+V2b^=kdb`-`}}4_M`?9TtF$#lnl|;J4qk};nDPeQfX`X zSJVYbO=nyyeOD_dqLkXJV9F~C&*nSR0Kh{%@lz=cs=#V&x>*-(txT*|(*4!JT`ng| zVYZNS*-v2%mGR(#c!{plrAho~eZk~WUt{FJyaa>c_IZQ%9M%OKz(|ONHa>uEYWaxp zEq9()NU!TO8qcrhw{(ryg%;LSb2KxeEw0|9Oq^g04P+7^9(-81R`;?>On?PVb9Q(k zKZq1hL@?c$${H1B4{ry>p3{R*`ZR1{)k12AXQA$|q!;JQc;6js7e9c{=_4xs4lsIx zr;ZtLqgYBihE|qKk_N~|@#SGPILu1KiK}FhSLggt0ck|YuPSb}g_)MqMyp{!A9UwS zYCV0NR0K+^Qk`8R!Mvbbq<8eU@26J6VxuQcRsBmFs}!e1PzPuB1}}&_x|s}3Kq+|2 zy+%g=;2Z9Zv~~Bk1?F(Z2P-zc7rue~()w`2v7flZ!?q~J0f_1m$j-cI%rjz{)^816 zmc5~Y6as-#M}mKUVxa~cS;ymtC_&z5JmG)0LFYIxL>dRG>=PZwXkZl0ji#2ETKACF z*!?R}2tNdU`D$Rdz{)EJeEuU%S>yi#8LzPy~p67 z9mVuQn$D#)@J=~R!seioNreMCef!+TfH{E3~uBtGuOz`cMM~14d zsQm0~@=#4-8iT)>o|$q(LnAX)z}5Q#Mz?|eS-V^-DPSbtT|18+WOQDS@-I}ELA;J` zwt>sHHu!6_^*k8ddMmSX!MmnWrCli)7d+=WJ$@*nm-BpN*P19+I<>lr1r^|(P8FC0 zrz)yO1#l}{EbpZtDI1-x2A5eeK-kxj9J50-A?*#0oGH1N+ z)G#rLCemki{ZTttmGzD#mjuSwYhodVOyqL40ah`_c>=_b>h9_@KQi@^wxi-z--?59 zaRh>77$rrsW)>McTdcP%t5h>b@os|slU7exx~T6g{dG)A=~0|(O;l-gt)GoFZS{4Yw2lHhU1_gZ=jH3Yxm{6hqpjqd@J zw1p6dAy)(U$+ETh$DwWJ`IKw7FveMmVHq?;9FwMN)}WkJX;EaAF*oq9+i__ECmAsYX1tSrQBRO2bg?BzCa}x zkkf-bx};N1_KayI#ckIp+eJdKdHGbSs9lCSCp%bJUKYrgmz7Tl&M66Y9MPSO=Hj@e z79x&Sa4sSWaZl8Xjeyg-H|FV%XIO|ndg6+lLJdB80&D*d(xGm@a};rLkQNTH5m3r< zIwOu%cZlxz6Wit9_&P#&d?cLj1L@+l13?w=o>7{RB!3+6tN&led<#6K`%2}+CH*6etQ&gHprEz0Cj*)sWChMaeNp?q)$KEqJ$?kF|_kODSo3}>A zvC79!vZFb)ONJ+TFv|i(pz^WXhBSXu^JILVv_K(Q>i?&_f={#? z-Qgv%&{^P7>-8n2`64jiptFl@{a&0EC@q6-tH+bYBryr;t-mQz7Ag{9hyd$gxd?U_ z5ni~ZT&Y~rZ&%DUmsiXPm{ZK{*#XTqb6!E9D7VaCx42FEF1z%k>>&)#D00yNE}F+hZZm`d74# zhL10Q_BwR*m&YPxY%!?s88rEL#Cly*;^kz4JVtWam_OAkXbiPR>noC%HWVS5l5=YA;W(q^W_axehD0G!CKP O{Xu7=5PH!&%JFXkU{opq diff --git a/integration_tests/src/test/resources/tpch/lineitem.tbl/part-00000-6de1e1da-3f9b-47ae-a8c6-b4665bea5c5d-c000.snappy.parquet b/integration_tests/src/test/resources/tpch/lineitem.tbl/part-00000-6de1e1da-3f9b-47ae-a8c6-b4665bea5c5d-c000.snappy.parquet deleted file mode 100644 index 90effe9c10b176a41b7d6629efb65f61a4d622b4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 49579 zcmd?RX;@QN_cwe3=fDn@6>&M<_@3q%n zd#$zC+9$~&z%$5MOpE)6i!%lO;=lC8G(`=V&>V`(q4c2zzdiNcmo>12bcsz0Fgix5DmltT3{Y9AJ73Qz&c<(kP4&$8-R^K z5l{@20DFN_pbRJn_5rm(9Z(OP0~&zyzy;tU&OU z&;SD%0EU1JumY@s5r7RK2W)|nfEw@wynu;-H=qH0fJs0E5D7#9(LfBK1?B0mVQGuoox=%7AiUA5aU_0rkK+paD1!TmUWtoj@1x5O@SU2A%-j zz*8Ps`kLY_e*b^#>W}nNoP)3bZ(IG?Zhh+0_4J=oOo<Cn12I|vN1h9BQrB| z6GX{^k~eSJvSll-?ChMJT!u4h+qP}napmF4hu}MQ6zs%;3JVLrV>p6cxOPL5@Au#; z;&ABV5^&xN&ZR&Z!?7tZFWwkP!Q}`} z;`#wxYEFUZH1zW0k3XI{^AjXK`!l+0YwPOjA?7*sHlRF@rC+#k;o`-MjbMCi3(f^v^DBisJ z71!Ih?{NJFj=!UQfXnfHkLv?_{qgbRpX~M5U)Wu`XmPu+I(e*R#7>3KGZ8=;*k2K^GY6&lZUaHR(P~&-OsvRQ| zc?v24^`Sf!b&w~Zj-&jFS3$kvNhxzc4vaRWvn=C&M5C5>f#*7filF zM=%Di0JF8YFXF}WM$%{!P^Wl}DD9|f)ZKW#)FG6Qc!|_olmC}0iX1>n3s)q0oJ4bWTL_7+XF;_Zyb0gVI8|6 zlML&Q0l)7+8|SEk9YQ~y85~g#1^rm8dlO1utbGh<^dVC|z(tz~yjMZqG|(RBrBhj; z{SmyQLAwTh`*6P+{pOHu5XL@2TRzJ5yma0qn)(_nX7PkLDum+5;S2K!s4qZL3vxS% zvIFc_x=eGTeM(Y6uGcmc2;k&E_IKo4kGlQ-~mIO<5L;ebEp z352O81Mv{e1Z^3B4s=hz=N#r{-QqaLUPd_;myNgPDA{9I6J(D@eI>^40?(CL=QXt3W4t$HF#<&B z@5A_ESmQ*P>U;E0MxAx>vw#9LUZ6h`T94xqb-vFuJ?3ezsek_vEt z0j3)cdqmpw9}hyIEBFQ^A$U2OEI078JQPdW>Q9KO^bM+#<>QNM#v9r%T!eiilC z7&C=80Q0Lpx%6G|DMH;9*bm<9$^9|bd| zo%ex$E$XJ=H3>2d1D1f!3FFi#9l-x1=DvjfcC1H=l6B8tQ3jx7BW?@oJ3yxdeI#Ug z0$u8(-2=QoVoeJ$cO9>tGQ<52w5NcU%_yuZpND%7gFHiV{{ix!#dwipELDhm2k>RL zVm!vFVSmpgz2TcR+gCVyB{15R6Z#WHij7JoaetJ;b%(J4N zVQ4(=UxK|7?idNuDG-faGP`S7u~&ng8CXq2`3Tc^0n>o-pl98TefUzM|9jB)q5cwe zcIm%^{#)?h0-mMdbI>sYk&{hwq0~wAXG0Bag58PwIEZ@`<0@g&kx;<_w2uK;*Utx^ z4OnIx<`Urk3!xqUehF;g)j&bjQ58k(VvSBxP zAQvE&6<}WkAVkF@Cb6N<7olH4&G=`EB}fIy7%~CumZIGQ_#J(=z!20&W6GPT=LlsO zS&SNc4rxRAC31k|DY1~`UU*yQpKxB+F)fk$_N z&uvIJ8f`}4FUGmV2Yh_MM~yy1D9jCgCg3>%_v|yd7WMm%>@&Uq=M=%Goc<8}4kO}> z13pLF6s*_{bjMI10>pr42Y8)>3>?g(fHsB+2`JcgK0)~w6hDIN8{A7V`*=`pL7fK- zLcam@a}TPQg6=ES{n1u}dp2ddqb(GYt;IODzJS*c$CF~_h)<>)0n-c7*NQO~psRJX z!|Dad0}7a0@hKj&Zm}2qe#8pCM&DIP_#CtaN0`q5n%{`B9=y82vlTQ)(H8{Gq=GIC_tSCz8Ora#ZzJZ*#r*@^zl3YD zac?Eo!p0Co&|il>?xF8JEL4ZN4&r`2+9N?%4!UH}Z9%;Pa6M$u0Z=?j0s1(3+6G$xE~=eLAe~r#h4z*ISaHr&}O1Ok0%@3ruX-A zdxPQMC7$xzO~nsQ|KlnD@;{wY@(=Z=r~LnJ1&in0x6kN*p7V{?Qnaj=qRk>|ImD)@ zR?pb2vsPc=Evuu=f+Ok}X-rX_f#r(MIzw5ite)6xkErL`l@`?-jXK_0KS?R#(%eU#ecyW`VxWj5s88DI*DsJ;Ds0 zllYpEMy5L(m}?7`8d$_t95EQOxaPHi<!~hw4}-5!10Go zpH(+lUmkV5HR`fs&Et~GPG{aey!?4>fG|sOPB1;|^9$1D#x9rajv2dNRsCx0cEi_| zr)&#d&T~I9kh|IaUi{6?qkqwvZc#m=J-0YL-X49;qo;K5wJ}un%`IQNsx{q8-dveD zZQO4i>!*!3P!(Sv|MBgk>)d})X3Z{oJnsO{G13(RFK*i2W-nu9aMnZy z&Fb0S0b`__b8>u!EN#u1G(07BqHn2sM2oLYUQD{*$l~p}h7JcCf~GkAM4JwCzT{^5 zrSi@S)2SZ4$4#eIylvPxZ30KUJz%0RWP9KwtGw+&Q=HqjPk%9%5xWQaDa9eDgSBlT zq1v*x8M6~kG=$7uqZNflW+)|L+MNlK@VM0_x5Dj@wB^tGx=y@fcJ~#{ZG!~o$lG%g zl{q`+e)GC*$5(4P?miLg!W7cT1K&vZ?j4oscp)@;%Q%Cum}ozDb8TK&IY+o7 zGJhwpaPft(`QNWG2#+n<>{IAdc0TfMTt!*=-313ubl;tRxK8rjqGLB!`Yk>wUp8dP zsos-AzFzZ_(s}3CKXV4}ia#fuxohdgIqN%=m+XJATz=K#cgq#qyhqM)x*0fgx9i}r zRda^OBWC)qyu0|~?!fzPkM4*5k}~*vlZTrp&0W1_=c-{zy_G)<`{u=|--mtsy4dEI zS-;-67_#QK`y0PX{_x`BS8M-D`E~dt%D}GESYH%c#QR>FC|k#MsFAJT@7Yqc-lR!C zGF3E{2yD%g=Iebemdq>WwaiJ6++eYU7;N0OqNjMl@UwZt)2&+M57TY(V@n2&{IK5I z+P%EOKc^^L(Rp}gI%uqMsIe1kRH9|LT}B8q_MyCJld*`GA+%XC<-gh znPk1QG9~$8)vYgQogv^JYk58}b%M?kvlnwd{^|+l$rjJ_F(ERPdc z6s;G9R}`BKT`;6%xouTXNtSzG&)%(`7L}#js=7?d@`8<0%EIec+n4YBzOuJ`*WB8@ zhW8WtdiND=aOm5;H#gj2O!@bj3#zJusvY)wo$Tw||7E?!{sYHa!k-17eDGo3^qTIf zgsa~h#SfhuK07q(QSPimr=G4{zvOgd)sId;zBoPQ z!jFCRaZb(iT)`o8e`)3 z&AwYuF_nK~l+*5QcaQ09p1juAdZ9yGQR%pp{+&k;#%$d=()VDH#pqZ~#KAV3z09l0 zXUw=BEeGwlMY`6Gx}1Gwb2?XO|0Yqi?C#WT?-uXFPQsW$=NlKe9NPEC+c7}~_xQgu zt6d@nUv&*jIOlbxZp8XVpOJ=2VWyW^NT^TBx(>zct}j2^e0TC`dUFJ=uJ+q6EAm*p zb&~TIH>(f6b4r%mNw0-JXs8!1n>s6OzxBDqrb9bY4u7%Ur?5S1LtIj2=@0!{!=`fM zX-!T)cqffNp?UZ4!$HBteU85yy*w~xYGbuxOUCYPOAKwa;;&wb=tFbw&%>7GHKokg-;nR#DVmlhxBMc&NfyDiai|%d zX_OZKb8#rI_hVeZRPCS&g=*Fs&$io>csUh6w?1EWo);l-EceQOXE#0fkaSk&vfT0? zf|oz{-x|BBHqD}3eCNDJ?@-|^BmVOD$7d)yx-)+?JL?;^II`)ls4HK#Xa&wQCeJ?C zaL{vfgXhKgyYu+1vks(iHZ7TDSyiG;H_M7&vV1#t#q}lLXDk;g?j4@e{K0(ntS>K5 zpY7>6*FQzJ*!t|Q+|b)Dn$_l;5`=PQcER(>LE>4zuXbNY;xygivm=%xOycN=CkIh`32Ji$Wd8-AQ9 zLN%#D=aLtQ%s1^Q@R%LXt zCt8&pd_8Y!xfSvGEw;$!q<)^VaGhxLwl^Zvwwd-HLWahtNyxHG{XUM}zn-)la^|aY z?j}=SaNgwwzd8iackI_!MGh;@o;cmjAuw~_vy7ugBLnY#IkCp3`OtNv8_kb+%9Mxk z=4VI!7-esJx88Yh3(~%vdPM zKN^Y8!Nu|70Dm!`jj^i#{p;wl2LJ5je~zmZw);8*@h#>*w)?+3|8u+lcPn_M=(qo~ z+4a|^U(mY|D$^@D;^QW{A+WL-@mr9Isw!V)Ws5+Mf*0X6D_AhRCiJMi~H1RT##qefu3K*rh2 zS!?M%La;gA$D441+i#I|7$n59qJ>tFFWrZ}A>e_%kHCspqwbw3lC>C`-KOa zG1v^qW=uA_AhJ^v@%@I5O{2c3e99LTZrHrV<~ueg`El{RxiFrx;|l0ciER=)#FHgw z(-TrIFOfz@5>OM^4sW)D%|XboLSJMU_H(CC85K`aI$^s|iVP*3 zpJ4BcL@%XW*$JIh)F^gBHha5piJfke`fsOHsNs$MgTx;O{f9UH-RY;6lD}*H6WA|M z|GO0}1I*js_5SfkgSBdzv3I1UZ;A1so1?DjYg!`wTLbL!5+a$rxOxd_2C0*x^Bef-Qm?s_ggREGP?&wnnigPMyGfHjMGVqHPWg>Gq7s4)MELT=Zgc!F-Q`MfP(l z<>biYh#`q9^b#ZHHlN(H+G9#kc!!6q(fsI5iHB5lpkP&4g}c3($Du6s@v04Zo2nIe z3p{&zr`kR_bE7_R`n*K5{JN!djNC>OIYo7U$nnq_#nXQq<(VUn_*G)~tKNpm7U@+d zcbi?9_912AYrmq0dM*1Zy`L?fJ21~Ezi`nFH|oUDX8jG7Pbbd}^4q5N$~(0tnB%ah zzij<7>4iY!hWSJL3=Ztc6x-YS?pOOXb}g!Srkh1MZLqIv`k-6$lCj^XT(+af!eM4& zU$eKvrR;38jj;<3WX!8?8#1e+a`vMd$zkCc1KV9kmd)9IdtHp#+&k(c?S&`EO{=f6 z>n-;m>`bp1Bna{=*wCsowjVTm-sqC3%S9Z$B^SS2H}|Bmoq2ukf&`m|)zzu#U#FJQ zhC1`Mm>b7)LdGoJY_{7p<;Viz1yU%;U zV$M=A*Ll5u-4*w|2ewM@EbHhBg~_O+p5mnJ8!Ih5PHp=mFjVDj9kpy-S;M2atWEA) zn!BoG^MBf}aLH6A$&Z)LN|RMwu6X8I>SI>%JpQL3-(B4Ixr*CMkKBn59vQp8;DDbX z!0td^eNf$!earIijw%$5ThN~rGOMmCO_cJcqFilbI;1Sz&$pzjVq@D%&hsr>E}wB) z5EFCsjE#A$S7DOo_vErVI%Bn0@Wu`HQM(_1STlddDPMy#ju*4$T;Mx3PW`ehXs)Z} zgU_mJZ#mCw(qi*$StITb|Fn6ie|~+yZj<#&mXzX7w=vB>2s9 zCO~oL${BUqv%s1MB~R5soE)3^FVw%;fzA6;ZZ?f?$^%IL(>IKORNVT3=^;a3bTd*i zFY_ta*XBiy+84r|ZS7%VNk_#k#+MwO{0KzCQ}s+W;#?a@AA0q2)%2{hs8(p7fW+MQ zAyY(*(>g);b7nVJO4Hp;qn(X+x7rLczA1XbsEFY9YbKsp-hIT>l1)pyF<|pguNhyv zE52Ro()e*kBfokHR$=rTCYyNjA2PnQgFd&&*mHN6+UhXbe71$LBfVYFAQ=|b&!`2r zR~%8h;*0&ZOH2(Z|M)Y8#YldI*jaP#%j4-^AK3enh4tl_Cq4HKqoUV~?c+%p7^~=p zCVMq0x4S3rr+@CX7m)7bJs=BAyf3%&7TL9_heLU)zJqF5bzJ5l`*f17ZI{c)R*Oda zTDtPKyolIsE5tHZe|}Y-O-_Df7c+a%Jtmv(-^v{zAN-C&R_>f<8WjmKe<`PE?Iph= zlF{*!(UG7fcc6^y=!f!HGPdn9LrBAzUZ#M=TKyu2BsaQXmm1YpZUyx<77%vk8pezY zxB-T?>XS?b@lEQ5nmPiy!fquJ;aB66`lp8!1D%{G3WrJCLg!< zW9}>VSF|cJ_)I4(;NMZlwWG&fhR>@HcgcH5g3(jCj5hn)FP8KtKEd)1HFwH=N%yno z@&RHr$Gxsu2U_ax+`>=KGf~!#B?U4u3-Y$D0ZzNN1J;$gUf@ZW_ z7}|#3E0vES`)5C2!>GWK-(d&Jn{UEfHZ(njR>$4FfJMLisg3C*Udm3irzJK(kd_79 z3NqXFg*=|j+4dSvSUL7K2uEqIVgs4Jd(9*gvyn|$!;A^-5JdLXUHJd2KW;$GdDIr1 zRkEI9zS^8t)-38)NYiiE1vwTRx{tc<_6zv9!RHTAr(U+fgNXj~7u z>i!n`?LMJVkggf0Sbvyu0NXg_#9K`~UH`H&lw@YzhsSm7yn^+5j(ZLfyT178iHgYT zpL6m1j>;Y)JN;Y}Pih_)$<_24Uv2>L%D##P&bZ$J(Nq&3;qN$pxrWfDzuAY9#Sc1R z%+n_D4Ki)VF)a4+LIhP}X?zb0TzcVG)bAWS4+dM~w`kHya=;UAEluhm)$GV~_MNo* zZS1I$Z+?|i1c?F;wSP=Ej(_%#m?FB{IRimU)dyeNwXYs)Pm-SH$|%=McJ$LD8B%)w zUi(C%KK}s96*4`r_>!A_b%;AP#6g1-ClYBr_kmrfRG9;T(!^EIfNyY$HIi2b5o ztTJ$Ids-2>c>EgNGJ9Mdc0auG}do>kg#jbv=0ePvGo5e|8D5XeL%cWP;Y69qEBP7{e za^>(nMos@T65rTl9@Nnz`><8E{?H78w}-w$)-h=XchlF+SjWA{E+&!w@FGq|?)20k zqNy7F2pQqm38#*pdl5Tww75quAU~Y$fmMrSb+F&1f`{^I`oe2T1tBAY9y2m}V60y} z*(s?5dCvWZazc_9zTgJXVJ|VV=1D8&X&-fwX(o=F?qGFyo<9LeaLOaNtdZg>WSMb+ z-N!Z8>%mugKoY?vqs_4Y=%(DbY9cUy10^p^JPj8Mb8ExO#^=7Mp`ToWDce$iL5$Vx zMMSwGKfzVeo!xMpjATS!k~8cD6l!_kF_atn#a#rNo4>xakEh1;Mv#|-Fd@BjWQLla zXKb${=3)1s-uefvX%TdMRh){v)BhC&9-V%6B)RDZHhZA6eKi?zqF=5cGy9s6Yi2EY zByT5f{;y!2tFzv~jvtn@>2*@>O{SR^7fS@#BMZ-KkylOGQN9k(V1}C0Y%twh^8|;8 z8{7N@R6u1SJDxSK_Qh zS7c<=kWCUcIZ(_|X!W&cTr64t2YZ~Uf7mEbCq;WY;Gh-FS{>Qjd1%`RYhm49EGT}{cN!eeE_HL` z>4+jGmKGlD^(6s^U&2Gaw|&Ia(Ef{%I4SeEelppkmx-X62^O(@+VrJXMn1arBElS7 z_!2Q<(P%bgcWiwCEw49j!7Q$u{5t9H2Km*Jr05nVo*e3ZAnzebFCL}EliQ1OFxQ+X zeo}J9^eV#T*EgTb`^npYU+lAK$8+*pnlZ~z825Q!SM$^PMAGn_NvHjrB?W^>qT(IA z)T^Qsj=b(yth?K*1)J8+12kS0Ddf(r-l9u95^8(Nu3-1X0&F zYTC(lpQjpMaz3d}+eo%}A?1BD!^~?T5+bwHXv?{`?uNVOQc8N*^kd z)D9vZPv1d0>R=ZPIcm&94WWk}&j=+&npQNw+q0Y3d5Izcn-PKO`4=TUCS+g&4yN?& z_aM@J-G4rU5IK%gblJhU8Zzhmw}@F~$9k(t!KF*^*~mxQcyi9;B4T!q|1V6ur+}Yr zN{Wp75Caa&dlkedh)oaoo=CDqq}}y59D;s$`UqR;%9{C}^K|_%@)B|}uG1BD1 zC_jlipi+m~ub2wbc15^Ed+?bg%@a*FJ>{}QMd(FQ62d3> zqCWd-%3LBHL}F5(LbQ^42^mDLFS!EeojnZ_MDQC@bJsK+73u3+C5-}7@aJ`SRBgd)Y(V9~j7Bn_{}v?2a2Cy` zz5bLGn2=3^9y}?eN*W1?JXMvZCc4GHK=7e&x#^^K?ipm8M*&aeI+FR*S-9M~p^fnH z5v%TLbY$M2*EAFzW^ic$2f^3!0Mhmsd!V|%uMVmqy_hX4zZD~$Cbu!6^qaC5EgoMqaML~p$R3kgMPq+wZ4ZW zkxw_Sm4up+g=a3veQERem(*n8>?UaP)!RC3aJ7#_C8B3dmsFULdD zL)yImuKCl`H~EGuAgfdA!C}KxiQ0rHPV^y=CE0c&7JOTI9=lSiVmIEpLm2_&-Gd4| zzy7OF(rIkGGqzUUMUk1myoRIKD~dHLGLE^)y7;+Hr0TGbEsToaPx#G)=Cw4_K#^ZGM%v~ib2CLo<1@B`ZXg(O`}cEvu1Pq-QM zVd2b`JKArr>XnEWQG7(_q4 zXwfdFC>(hDIg(gY5?HYrC$C?IARFv-J%bbyp5BYqO$xUlM$`vM5szM-DN&iwrb{FP zgtoYg2Tk1>u9OT)c+6DOvBM-PGcs&hzP6c8d%?!M@!#R$jZWH~QADC{H^5eZ;o}7? z_K76kob2stkmE6a;{#0r@sECijQs+tB)hIW#TRSX1!c`(LCdbY_#!=(dLY!px zW^}??nB=!|JM!b>M~IV4ez=5V(LHO6ei3q)N=ioTtdwWt=^IZr)O2Ky+JYK+0ckC& z)96Ue$;QeWT7C@{STgoLf|o4(DMrtAk@O5ABfe@j*^0Y30=?0Tt0l6pufn0s(syYSiR%*fQTLBEFR?vO zUBw2uVc3tOfz~gA&i?+Aj-EFwqmxV=$3Em-u|6eFB)bK@(8lO-7qBBjU#6+01OJdz^T{jwi%`tY!hNvAp~hM)=hK(#VNl6sz!o`t6z$6aCNo4hCu7FGOmKI`BBn?q!`!) zhdcFDLYb1fmRt4;s)IeD*1vp(+#Y)471H5~%Y8_7dZxcXh0}h$>lZ+i9t5Uy*g>7- zs`f7a?wH$!1hethlk@H5?vWO(?_K}Tavc)F4%lF8=$14Yq5imo!^Z{oSnHMj6ZS{e z*|-2QQtiKJOfUn#V$HQ}p<(g$Yx z^^iKzCCu9M$2&auozj=oN=Oow4fC(cd5!2fU@B3F5S}H?A~MFLT~kY_RlnfyzvCgd zlMZi{)EeUq$nGMa+gH=+Nk6g){=!A9E_+QsP6$iqODLScPFzRCJ+}ESOlF?i3Zw4! zg#1^oN(7el4-WW)sQ&lpcRt z(ui|q+hzMi+O{T5Mkfa#FY0#wfOHeF+#|XSI`mj~> z?2Q`Vi&b;P*&Khdg#BJD)SLZ~uq&x_0{c1jIHMmX6z=43>`qPi^eOhUBNHedhr>4x ztT{A+tB|EmU_Y@Atoa^aW^LyYo8zEnugVGdIQ^rccmw&5@1p+hJT>9p-$hZMe&+Yz zt@t$iFa5WFa{l>1on9*txdaCpEK}{=wk)hjS}dsW%m@uxEvyoRMVC$T6fCW*Aj>WC zg?net4-|THqf$dkH>Pt|lzD~BUmiOnP90Kc7`RSt;3d_FtAbNw6)_v;>AZ^f$s_%u zg?q(rfxc;F*>qY_*eWUGygWOdtMHHp(>B3Y>x)OI(hZkx^Q3aDDs0x9ugGz$h?>lu zAqfxLu5yvvmhUUH&d^JhmCWBslyTv%&eIbO$>52BsRjAW{Ni;U;d{$+R!(+{QlxAz z49YRxIYumvmRi_E+xSomq75uEeC(t_rPk>#%V%0-jG^+a6>=kkZIzRaV)1iLWlX-e zp+|5*`I@DI$UMzRGlP{xv)5fRdU9B{@6K|~j$l`F#?LixX2#@1brKb5y?6Qu+wA!+ zOa02sT}$oDHo6&cMa9v@qrJ-QDlRtS~kkyR&9OwL8zZsJZOE}x%nb{oi z3V*SPbxhx}e>>*(e_Rg!ConGkk}NU@EV#C2InT8dCG zgWkv}8*to!yDfAWbQAH;dLfOGZnU&$AgF;1a!37R|)F z=!MQ)7ZZ{;SU+f8RD$h18!^T~>1p%beV4o2Y9m&pigiMjcG>j7jG50eXS>kU_~SEu`!tqah~T?3@f5sa%rgqNGdOP~%+^o}ra7RY^4moJmf_Zb|_ zaZ7P`pAqHaW+mgy(@#v%lRI-(1qKX`bfw}&i4wgOD>pM~NW7WGPTy*}7iY9KIE1!Q zj}`{cv{I!hSA|NojLVFu828}m%ViQTA_-o;+?(==PW4UmGO%*jy01%3wTX`LS+86b zp`MtsT(KciMOkwunXQlBV6ZC2ZS+cx+w_SW;EKb<*8ccb5T0nFVqZm`%Kiyr|9B$S z>1c5}{r`$}?vx!TNXqdJ9Xw}l$4`iE8)nXVv~M- zGO$)UAmI$M=rsKE{V>j;zf6)Xbuy0rM>CFq5^xN46aJELxRzr(CrBwnj&3N_F#*~* zryDM#^ixjXz^MB^><-yv)Ec zF&_9`pKtirFbA2(yx$Nj)E(xC%T@mDk2bN`+L>;!H|6Z%;qIt#b#!($ z=Nr2@I=i57c69Ohbm!uX!qw4LVa}&erAIq*RQMf=rJ0+f!o^V`3^v0+XSPDQaU5u8 zcbHc;-ImtwSY3Mm)oxhS*5E#Q{S83_=e8eihb_^VC)hCXkMr(bASzMKK09O_0PpPcj#4nAZ0e6o@p8HpC;aUpg zS-o%_0#g}F$|Z2T=B@Z7!*{ZzIH$gB$7IP)MjUGN7+-y5UtHBtyaqSMl2RU}+~h{W zlk$MP2udXnaL%Vxk-CIoY&|d>Qpe+dOI{|W;zUsjXS^k26`+)ITcBT8PJBWuX3Rn@ zIblo6?@qQH&v56tjAcl8T7ZCNUpF&T#odUsB4|ySOg{ z4^i$`M?;Fz>ct<+t|)D>PN{N$8lCcOo&{rh!UY-#7AMEW2tR7KD6e6hB{kYZO1VVp zlx~!()wL7q;?F6^1UP&o+a*-zmj)y8fQZAf^p}|&oogi&V!VKKCh*fD8Yw28yTGWZ(Fy%=l%&ZLXYll&;i5-}e%DGHuNI17)=_tlh z9M7mix*1DlKv_vhZrNS*^TiFI&oAo{TV^yn&rNYP4^C?i8Lu6NX_ay}XDo}u$4sM( zdxi$n&MqFMm~H^}ZeV;)5RKWmK8}wt-f#TU7=}M?5q!oxo@<$7rzuj}rrZn5)igOJ zQz@Lq7r2%~`3`f5eL&zDhM%AVu^YIp_GN;VZfWHqsT%*h4;p{5Pnk_#IpdHpitFHf zP1CMiB+BI~y(rhnm~n0?nH~YO!p%Gi`mju>f^Ae(N{FK^^Z|Z(&F4dv((_JOqvf1f zewC3htMW>qLc2>TeezLkDe~ur%D) z%1RP^E4K?Po@6PnT{;c@XAZTbofX(ZDs~?XJy|!waS_w7rj=HxT;P!&J0s*q%A;7* z80`fyeif=rw_oJk0S8!6DVY;XDNoI&l%CxuX3IOo4iYyaY|t1uA$Yr*XE6@qPOeX& z8KX+L0NXThEr;cD;r1|!(ui_Pvs>Dl@c{eOG9;E!IW5zt+;<-5CY$?so*CEY`LiQ@ zKXL}k%58P4==q?KCMI4frClBQBAze`ENUTfMcEbD(OsT?#074}Zt--etpoqm!W>`M zE8R3^;;4)Wmi%2O$fLBw#4QLWDwI)idVz15La6k6Wgl!lml_@UP@J5S=OX1$?(bFd z^ffxC7QX!2iS43pu2L#mW+aa1KjVAYQZ7~lN;taJQL)dZb;1hxSWDT4gc!e-foIs* z72?n3JG(tmg%~jWwsrDSrI)r!IajiXuyTaFmAIKK?Z=pSE`Lpp!xdu)9xNZQHsHzw zPHgu(B#bJnPMDFF$=W%gCgV81+| z!0#pH&i6GExA>hI2d8Fj2PN=l5tn3&@^FD3M>}Ov`~k95kYT|FZl^wkK*jqC`64A7 z2PPZ9S59ie*?{0|ju_zC{&#RK)QndKXbci&aQT6U7zb9vkYQ0?f&6HmU2mt*2ex(4 z_A8UHTBkG%@k`uiiB@vQNu5?0bbYnDU$n@?dVpZ`8A*qUb2_$Ys-%T4M0gX)@C#4v zFysK4>u@s zAk8B00jzE$IcqxX3^y$(VVT@3#6Xkl6in4^<27hUy@Op@1t?cniCTpTqcr}?xf+~2 z7ZNM#GjZ}_6^(=@N+QCSR;xUWFzJBCK-_S?QJF5u<^}qTCi9e-G@%MQ zFo=8T%r41C`sI(1U~gF;?QB|*M%K+K0HY|-am z67p2sjdE7$p_aJaMN;Cp@trh3jE! zb$InIa6f9b&D=rsmMcDqsAD9uB?&Q}7sJj-s*QyeA`4N%1x^$*bN+0Af)v==0o)ezll&^FbAV?2P!-2=rv=|lVy=qZH8g6Cj+?x+3hV1+lEo(x zhJxj#PDkPSP9Ny5WFhjjvn>Puk{}U#k`Pz;H3?M+HZf&q+a5EPc?FWQ#+btr(I1=3 zn@;gq?90UGMKC^j#t1w~uy#!3rqvf1A7w4K#Th;!j^eK}32VwIcP^C}56a0*JFZNU zgmazUG~MqCD@&Z+AiqJ`2Ao+`ZIki&DS}iLyD_Bf#z;o+I;ri4o+%d(w>0+z0gN|8 zqBDA(jM&n_1h+LZ{4wzqBAO456%GiN{5gmCvyGke%Vw{M;rAE|gGIF@K~MBOZ)jAC zozOv&PF(nuxR8m0R*w&jcarhWXJcC!!$1QhqzRKEKso2LdrAer?H2j)#SWTSaTb<> z24`E*gF(ttO;bp$-`l|PP{&f8rd!MxEg2+s5cLtK76k4)p~%kxtnBh@aFkd?U7U<+ z#f4=h*123Zr-&XJu_sCQ)vf16A%}1}D#|c+>7+iLF3mgTMr`JCGmqEyyD#D-%#fEN zyLY<3q?`&V_uniiHxDCzq|xdvtOv`Filg9@o!pN(-!dP?g%GhXEyC%7KIP$2TJ{@q zse?iwFFirD{W!H;2y9BTYrBOM6)!nTgbti%VG6A9gak367W)jQ4GFi;4}2?^hNL3< z`!XM)W*8AcVF^vSBc58_Xj|d5#9Th0?BHez$Xl5Jz1)?Oa+m z6_Eow$i7zCke2Bbt4FCk+~h|Qq#dlen%D$^sKZ3uV1G2nnA?e7vQHe3DEMc5_?L zmYeg7&CU5K-|_fR7XnA2&_Q(8SR8-;fKvxv0Q0cb?#c@ijThhq&kwYgsQ5_x4IxcP zy)L2yym8V<-HlMuN3pULGWLmLqrfHx=uBt-&eBUpsprA8WsxPpCV!;_f;&%FQh4QPB^V1!3R zl_MVWoL!}C9S=)*9)s-vzutJ}=nwx^M-i)Zg&tR45MNLKe|qgXvO(x;Q`{i(ck5~} z4NwWw%%%qfrio`pR_rKRaA2+BE9eF3%~BCp*?naXS6He#+-(KkE4_>WwG* zI}iJRUV9F9dvwLM*?YuQx7Oh3tIGD6y;t2kRy?{o`hh@ys_J3Z`gN**zxKSv^x3D^ zo@eV#8|&Y=?b_J4t)5~b+5y3>>yUyl` zg4Y3l-X?x-LXDUgeM9p!V*5X@J@5FI4)V3$>>cE1yE`?=dUNRtlkxw4?RmPt%T@2` zU%KJ7=P{3ur%w0qV2Y+QqUR0Mr?m{cH4X6OD$StAfbD^Q_AfUJ^0Pl-HvLPF^R3eZ zH1f9K;L5y~2OP??;M;LWS zTc9n!GjCTy_nrC8I!R&d-kceQar=t%3*#y(Pg*P#9q+!o&;l<&^VTRYxer-f-8S#u z;+iKHwl4Yc^{wx|)_(M{h_8Dm>xgd<&+k||U$$e{vdhl5cP+oZnVPer<;zKP5{8GZ z8oJU#yYK$W%=nShSGlZydVf`C+QoJlV{ zaP(TZ@xc=hBAy+rsmk7e$X9QE>!CBhIb3pB^ZvtU)pbT+j5>0zKIp)aFNbbgbhOFo z@}iQfV=ng{(|Qd#c--56%Hk8Z=5AWN>b`dWi<5u9=RD6hwC3TKg?D@&51X+hz5Dx3 zOHTJ4y1c~u#pyqs&J;F$_Ug>rTbDJ1e|xa-K>5+Gg%i(yczOBjpa1%6P`^88-<{pH z)&GaG_m6Mse*gc^sie-yk7sjEn@FOGoM=L#i6)wAOOcXJO-*S=OU-GUwjuqQ2DRw~ zH9yLz)#xxWwH7tC=`c*KH8s323^UVhHNR#UURZ74>(uM@`hH&T@Av!p+`fOPHaS0^ z&+BN*cDwF>A_EabDXcroC!DwsC2fQ7|! zunYDx0yJ1l0?mL)Fp&&T5CH6h^8ljg;wEC-5xWU9NsF-1(jsquLyq$q0+0N@snWo1)d3k za4bjB_+6?b3~-DgnhWQ-IC@ZeHN?u{fYP=?#K$3^SY|EouW%Khz&H$w4YH5*sR0Hoh9)Y7Q_rm&)qM$xfFjmHI>%*)$h;PqjiT6bqdOme zhz;Uj=T6FH?b3UU+q%8SAuHUe0Jg7!7(K9b#X9I!4`#6fHR;a#9h<3GeB@;_84etC zu>Da@f7lfo?Xl5uWABVQtMj0LCfGN07KHZQSOSYGSqX981h}@FnAO^ff!%bGs3a$O z7=@Cpz>!;VX};o{R;! z!Zj=8q_MtzbdErpM4|^=_6FD)`1K%U6`7-g{Mn}#v|l_s6ps89$V?b?TM$i)rOnA> zOE!?oRRFkv?`1&(NI`=H<{Lp+H{$H51;Pasto=9zuBhgWZmmt=xFkK-*~*a#^NAqi zPBAvvB}t81DtcDIi?S(_+IY6DZDz;;AaR6y$cOWD301yqln`tTDhRfxbuvgN2pAsW zFX=aIG9C&Au^8Z(GH4E?`FJp9ORt7x=Mpg5C4Qv^jM%m$kOy6WAc6TzXpZk;j1aYN zO5N2qS!#60Tg!r-QEi(bZJtim$`V7=!$=sc!Bi5U>+O|=V7B45Xc#O}@NXB~YB7Ky zaG^HzT*%cjpb&5l{^&2{Qdw@4;geQu!>oQKS5ffgfLXzHD=Ktzx$$AOLWUQuNaNQ^ z&sB(zStcsx-`!e!@MU0{TI;C3hf2~*-V9VoC=yGtZfW8 zx}@juy7g}*XK;>}^VBf>5X=pA5jK!7g@U2LSOF6c#}194Gsx~~6sFkJgOJH>rL_`i zRpm8lR^@Xphg5~5qe7~dgJ;FGT5w?#7_$)rcg&6HlcAiGcOG;^&3hfuVH+oa=WjJU z&3ps^ueDMczZJK5z%1l0i8%>l3@O#Rt#1m883BOVmA$hsxYNej1o-nktt4NmUGbGX^X3hRBm7_Apc2spw=v9#GLyMS4@tBXr*P##vQzYCT6fYjZ zr<@nN{0qH^1l9t@1G+*Azg%}#!#1tlb}*XfGCkAbZ}lDk%9f`<0cw_Y*4mze+lue_ z2h1Gxwym7?!*xK1_|&i$3>>Xm;4Y;gT&X$Q?TVp$!7)LaZv@5}zIP794;7WmU>e}6 zMMh^Zm>A^waCn~B_meCQ(r7qNEWKR-sPJg!B4f3hp9Pk8MAl6nU>ER#F#FzW?)HJx zXcYbu17%_UYpPZ@v>-^f4mhG?7B`YPhz)~>4x`ebq9cHRb1UDHR?sm*06A+d(S|*# zn`QZG?ofy3z>EV3*tWJ|H;k7UZrv7+{S!{2beSxCTFWA7--*084;?gO<3a4`XqL599*I0DiES` z+*pCK$V&f;lrVcAMQq&@X|drnl||oVGB?Ay_Ai!QN5EYHMbf!pz|>%zlHr*rfCTF4 zag6OML>yI)D6WiYISnXsUJe^Xg+swg0L%;=Uz~my319D${Us>b0PHvntN?;PUu0*o zxeG&YicE9cG~6f7Vcm{-uzrK!Whvt}0oPJbTSW;+(6k6(Rl$;On+$K8?0iHgs%##> z=e5AUpcD%AN+6)paM>9lT)KcTKgErUBxI{)k8GKcLUfQE_$tR%AN8^=f%AkLk3>LW z4aPF?qgJR$9d};h1`+s)EcfV=W3JKW{ybxqR2MBZ0#=%aC)}2e1ELF;S>3+rhvS%S z{~XfD#bDt6W1WvW)3`9&cEo*ORbk8(S1MsTHu_4@D`ATIIx9C{bCtn3MG{m}z|W4S zt}hiq;1A6J4GS|sdOn)WMQzgh$_Y>a@DLX?l@rXff-p<@>o7%Eb#{THxM!IwD~7(n ztSt+)S~9Q@A`U=_Rh4hqqFwX3V520byFLIW%MHg`-C8J|+?HN%!4-h*yC;@tFi&nC zwoQXyxPr^xIWU&E0elsTw3o<=vzt0AfYXhNfs3gyOoP!1rV0mxP>H(*Yur=a`0nXW zfIba3Zw}&|Woz9C&X;B0K#z1lKd;IFmFWQ=OT&|gZJZ7>$T0e%*mmUKLWch>6od3D z?%z;s5ES$O|3fiPG!V(E>)rdq=zhQo&z_{~BqR@O08!56JkLn?|T`KN@ z;%@Hl5t8uqeh-PAu<$5{yo5(5_iy^IWdPIIwy^&v6kDvw+q`HuHY^+$e|zUw>)7mb z$^GlLo*NUnckAj`2ev#K7k{HQ;@h#&7jnK$iS69_?YNe<)b5z7bC!PkLiMUw>)(B5Cd=c8P8qP%zt#OWCU~NY>}9Nqv32 zi(3IOx6t!)1;AP~Y>_^rUpECCv2k(6jP328DQ7nBf1>m>J=1(J>eG!$Qwo|JOJEtm z&aEFr&-+69)s&*cFM>B%Th{A23iitKVI$?U!%N%awq7kg76mA5;?SO|+_TApRla%7 zydQj*q-!%r!&-ouv#-tTIc?6hrGu|!on19aU3w$g_ph}8^FI6a+MD;=Car!;@-H6t zMup_o=RX#7J^uB_x4E?d=ksKv*ZO;E*KJ-PoZC8m;fgp8g$y#RIbv$dix%m&^VJFtLS4CgU|9M61jryNg zzFhJz__8uTvEtirR*hObaqH?UIVBO*$ramvsorI|ymifMiE)YVO-PE<)i^WCb!*dS z?C+GF|6yYI`DIkhX}GcO+PcKt^|_mt-f+2Nw%w?GW6$N>4+=N^Q@^1biutZ2 z|GII`jfP)0p@)~Jd|2^UyLhu-5MSSuLt#CihA-<+_K)Be{~LF){-bvjxyAn`DD8D? zr!8}Do8TUEZQFPGU)^FiF8dYM{X0P!i4KwBcKz11V*(8yZ>inBeeAC7J$Ag{1&!X3 z7WM1x9lMVm_=sBpaAtmU#qOhxW!G!(^f_?v#LVu+|EgE;2HcJLik`Bwsh4a{D%wkgli*$t#gfY|yway3K4^SJd|H-HY4XIsgA}Ph94G^eb?xJR;>FzIbkis!|2p6_5SKyb53vHkm_1Tx zaC=70U4VI;p$)@T*F~IWup}O+TT+Px09VGLsi0M929DQdiH4s^2?F+u75yPT91qhx zL~~{&D%8@A-D-(y*s97`y0lPH5ts`4Kwx`;_bhU#Q#ZTf&AWoIxXYX1 zX40T_2Lbp2l^MS)RSO3NDG0RpJ!ce`b?8n5nYaO=J7~@7M#NmloST8?Ng&1H;@0s{ zxrE4ti&~(TN7c9SI94(!J=p(R+_XBG?{&Lh(b|dm=3|2$+SE%rIY%1pVbUtFtL}sb zBvJ<*VJsi8)|thTBulrL zMGx8F{bVrNIs%|bxNU3lSXJ$dVtN{38}B%gAP0D|ejPoG7(RJ^o=If^ImAOi7M<6M6W`KdmLaYTvRLtg@zgi>%@(UK^Q-cP8i__H*Hzw)E zO98z2MLwXLV3gsvk}O*rZd&A((`_CI1m#di3hI+R$x-lB5SR640A&C@ zYcre($bq65H!48E2N2q=j;X*>4|diA9;yO8UTX|+7{GEhg%E7V8n1}>70xKxU12H= zE~=JENuLD`I3EDpek%eIPpk~Ybro#~5}rcFQNE9#l^KO@6I7MVDr&N#01+p5OJW`c zQN=DbS89iY!@Z3XYHyHGZind9nL+O%K<;*3)dhAC1`umsg20#~02)$k*a~98YOYeh zUj|GL`lq_)EhG(5q=7hc2>@3xWoJfv8C4649HiR{M%wZ7shuq)(9w{+!u2L~#&W?m zmDDf;WRcdo5heTTPJ&Ie`bKxyAZUkFXe~t3+*qNy>SSQHS)7Q@vi=&FP!*j>NR4$7 zW*HwU>fkUS1FVzP6gG^Y?lp`^jnbWt-_QdFc`LVzwOozGbi_I@LDpiK=8{0?y&P5< zuF*9yZMpH$cu8sd7>nFB8pH~dH(r}}9x|45bnQ#pTnl{cr%I7|9S@5t=frDUU6Gj? zT#ubgu1*aG^Gib?Q;K`Hu3Of^SO8#5bvbs~^xy=I-2JhG(@#KiCFzzx8XHZ3??kw; zpcF_0ME}mH%5{LqmT_QtuS}*EaAPT9ZVPSMx^x+3Y+qoxDtj7|3Zucd0c;qgOM6e! zrwQ9tu-Y`pDDQzF1%REZmT{QmWW5Tp>AGLJHB0b8rk~48>J;))z&DrG1yDV~g zjz}8Ie3Cw^eP?%(BuRFfuy92B2nZ0m+J|yCkec5%6tt^@t>K84hj0ua4&jaZ2=6$DS>TU>j+gB%uD4GPEP51-F6!!b%h|2)RI68_k{Rz+!87Y6r;`bXEaAE z0FW4^t)OAsdl={#q>YxXpaQrADHcpYMvyohgbcD_DyT}sxbg<*K5tc6PT#7ay6Ak# z-kZ?=9uoW}*L18{aSsx@SPX5H7D%9p{%XHSor6{ZpJbT}V|U5Y38`8#{MkM97nK02 z;`O4+bufJ-Ifu2CfmUqQY7ze)SEZ3w1->ZXZkR^Jz7z(5Ml?uT?AQg?gR*JJ(4@t1 zS4a6(($>mt90#@}%I=}nfD3F(+8zRx;ZSKT{az@?K5eiFRU#zKu?Xf|KrVH2E|kI5 zj)jhBhOBlNr{U-V*LRQ(1{nt52xHvb%2{b-ds*r*$SjMwyzaJZ^xhc&S3p4>2N`Lu z7!=Ua=qIRm&}#`uuR&#z90mFLmC$aj#x(FufYc#9t&;)xWqPL!bL88WxN_oKdzQ=x z*>g1T6_6!z-(R5XMu1i?T3URoD)mI24D^67C4qH;vNd&A1IX0C3x)fqs}QiQF7lk} zy8WL3`8LKNd59t$9VEfeXC%nc7Nw#JM69YT^rjMY?5oc}9Vf|k%vTDzZWl~8i6CX- zGUR)*SrW@_*+B_}4ukf;M^6?5rn$10fHZj=3^YjpFHD98s!q-Ti-NcSTJ;FoY>B2z zDVQ6K^~km10?jY>m4eo#0>1EoTjIubwD}qj9TQOXPz;08nhM#272!DK+zm3z>4x1U z`&>cLHdT+)+{d{2eD4zw{w9HC=z;ax_5cOrY9G)ia5`}599o>1QVp{qkRce7>zt2Z zG;=tr6a4yTh$CvTK!NE9 zzx6Q!jofI!oz4Dt6r4)tWvDc*ZVL>dV*&sYFw@j+0Zw4t`-GcyIUnKLzL$VoKBtpQ ztL!@z&93b+b8(3A)`INHxSK&mg1AI}70fjt5rcoX>6KLc9vrGdF;)PVG4bQ^gt=%j z18tUNn+E*{VXZI7;DN56!<0mFFtxG}=x8{&$Wa>Gk`5tP_2C#+H+O*+^Tji3%I`Pa zEGY$Lmb4Wt62Q?Cg44OX)T(=d8Pq+Hi`T4!_Ph?~I1TEnR-H#$n|#S~1P=d{R&zQ~ zP{G$NhPFE%23>>2aFd(!gki~{87-EAeQ>h)x-Hze;Z)TZ!J>e+0+(5mIhL80Ajjh1 zl!zAwPGd=kO^?5?I0w$Xl|BKoZw^Z9`(cw()i^SDAw0Y8$VcRx^tBz{ufD#V{9G^9kGO$}_5aXoko# zx9vDUCj8x$FwP@DS!}SP3L*fA=8~kwHjS#v98d9^-^;Q=_;EDH zo2xy_#V*KE%~5aC_fu!pg`oyLl#Fmt^ACckEeoo8r2=PlXY0o4NG*r4xIk=^!_xy? z5T~d|4AVh;NdRV41yS5{ zeiq2!nh{qH9Gsp(c0s%RRWnK!A>wr7-9(#1fv0r>u5jUtx0@6wdDiNqmC!G6it!Co z)yD*Ie@{UPUKz|W+Ua{_I$sbw8)y{0c>j>&W0vn=|5=V#Yf zzw_VxY&5ete8Fh#vdE*O`>q@slpG=bpYPImNcImbu?h@z>q-U2hWb5%0gd|}jv*V4 z&_d;&@fM+J@1;_qdH>Bl-}HqSYAhP~B^LVfg&Hr>nKSaMv4c(z?E&a)Xphms-;STS z!uH+tHK#OJJYS^@zV;TUEQ_|FxXyLnBk)3vKm76C%Cetc^0R-xP~-XCuU3B1%%bjm0yOp1f3OzzrxHYwcq%h^{nl@ah7jql(XX2JzxR(f$X)MlR^)9gf1=0_+|pdh7uSr+ zaM!JyrJT{g3CLcevlkyNgg0os{z1X#9Tz7S9{5d$6&*TsGv+Xh6*KlR@2KYf zM>ckhzh-H_Eu+uoth~Hx%d!D`h~(ww_=g-Yi|xC@c4bcAmG5<|`)K8mhnKQt!~P<2 zRwbh3oYhI{r4jRzty?2Tu_ky?MpWFhi1&K0m3GEV=zd$qw56_I0L*kRSsA=2kV&c^eZo*}rZq{3$toMfvUh{XU%c{8_)v zmVJ`?&F$i4ii$_T&Nae&Gwy(j?Dm~cjsy?5ySp`J($1#7;Zu&(98apgyYR%(pw2xf zWA5Dj^h|Isj+#wr+&g1qiK%(*OBLCG1)u-8p!cqgKP?+#-rrgKruo3by5;X5xFcvU z1Yq_(|KNiR-R}o;&wY`3=+@;o2OfTQ`-_2JK7Z2N+5AwOeJXoa=JlfJzk|YeAL$Vq zzQppbYv1IS<8l#C@)DhWalH7e*JF34&%cx)DrudcJoR4d^3Uk%_j&o0D;?l+VSSi6hmpS78a0(pgD9703*Buiy$>TKR9TO(LHckQ`rrBS0 zXcFxsU)4--q&X*ygUzvVuZ?qXFL@aG+Ssv;p$w6!fUazgIR0di%XnFpJGpSR~kKgr#{ zjHl0!{&K;)zY7TM5ob`_RAMAq7@k?3y&yFi>XWL4W@}(FUiCY3e5O6|F~aS+}maxO2^XePX}S@~`(iSY=*BtP!r-myszDYT`Z#oix;6*tvOG4n00*ZQ!Fp%Sy+Ou3C5`rX?uDA@EG8pN!d^LBfnh zM{AZO^H&{6j}X_6omA%9JEQ-x)CDVMSl&#DSg`)LKvUnH9|fmuU87_2*VMEG?xs05 z4tQB2uz?v0`!tg0vfbx3}>GSh=cVE3+VHmio9;P%&bxuo1L#=`@#lAg?V0gj!RRNiys$hazP^#h=dJT zxDPfvyGVw=Dfbonygt^$r+B_X{53gT1m~1+SnE1cGn<|1GHYNpD6Cv{g!&B20qs4T z=kjU_**x(!akvN;2EcGdf3L^oD%P;MWv){G3M?DH5UYnTOqvp(J9{SA;D#89rkKs1 z$xq=&_cv?YIi7MFwkRXtQ62BEgWuo|G(N9OJc}AT)LZPzc0;U z(J((Qeva2+#j45FFn$-N#$AzKO|iSMtVomPVhhFZ5NsZcPlJ_|@|gk5XLyJPb}n-l z6?#L=>A0aDPlop>)PuTIsezN_vw3-*eoUEIgt-jx)xzcP?C5 zHjDqE=meh3Sm1?~o`e>YC1ng=>AuP~b)zB41V+S@)a|gs+{sgiDj_9|ezTYf5am*K#sk<0~w4YoL>_hq*nj!m`NJTej0-VBkgGGW&j> zr=0(3jgb{m(;G#Y+4%``VD%wtZt7R5-y42q)P5OOhVR4N{$xdeBAXT1R}nBA{O7T< zx-7>9hL3shFPE-L*hOgA0{f@7!^%mQ@04oVp7GwV3O-pY0au@5z0!+~2-uH(QR$UB&J) zwm9GMni?wY2l&w~C4r38zmu)_`2Z0XsR`qZsx5U-+mC^I z;3dG}+do&#Nbjvv6X20MGI?w{oA=usYgKScDOQfJ>g+Gm9Mu{U>fBsq~)=rMp;Hu0lY~3noo(9BO7^Yx_GXez>g?C!OZ;K zXr;ic!>t>_qLn2E*%&a@Z{KOh{%EG+G=Ec3fVj~uT za!7wc=I6S1!7t9s#?)3eJ11!kM9WtwT8st?{Xe7@ ztzXP@&9TQ}?hZdzqFf^qXSP=>-6HBCCPJ-*Y`u^_4?l1CoyaMg%N$txMfz9b^XM5# zof6NpFCu(y7pfp!H#eTBxyg+7MYbHohoVLjU)K^!Ul7}GroPK0s5R);Va#iAF^80~ zi2g}Lt#m9~-%NmWWDG6aQaemvVw%7-Tyu7^C5-&i-oyT;E!J{zUZefnl2wUkgYZ23 zdvUcXSCZq(E-I(jhhRQ-N>XW%>^zb35&nhjvXFquTp3J?N_}Pd_!nd<*z+#kA^Hho zCh&57oes>^zR&VZ+=4|?%O%Aww$$SeL!AQKDrFVX_64|_qgbAOoE8_zBE2XnUO|zxU8}QL%NAxDlvoSR^d3fsk)-yPkiK|0QZzNX7;V36IN5vap~5##Ty*(juR77U2@}vLg*oiK5ah zdm6h_lnZ0CV5@X5#@egJx8ZeLQgz1@^aVkGLpe4xs}&mikFqzU9m(dwcxS2Kj5iQD zzEWK8U(x2Ge-Fi2`k!JfSDi%iOeX2y%njopNoEhrnkv7LFz?bjypCQk z6DROAIrwmXD0(h1wA%i}a@ZF2B+{;P6*+2b?=zY71423>@I4HVcxh-tj5K%Y4b(@d zBk%~mlWmkuCKXHgrTPq6%Dg~>Wsh$wdO;Fim)E{nUo5lC-Fv?yM0y6ApT2@D$K^Ri zd3mlJ4ZZ{`PuWL)iInoAIV?-o zh2U>AO|tw>JyPElVICQyaQvh%69%f$t0GGRTc)-G+z7;eML+Z7Uf9$ z#l7O!p55fPD)j>0dK52^j_UQrbl_`1lDn3McglzbS z^eR|9tN*o^>;}o;>F3I?N-rhi%9&8 zGna{J`5kkklZZ+*ek?%Kh?+i%JG?9({WwGyN-km_V%etU`eB4(-~Q3~!Ezp|CQyc4 z`Vk#M;0Ixp947f8$Rj{Pa>WG01B~q~Qe>rm!Yg*;7h=v}>g!_6OTR-h|5Qqeg4CwH zse@Dg;2R*=&#^qFcFDGrFhiqpV(qJBdW0i3Fv0$pdS0)F7kIXQqJV$HuU7v-5Rho0 z4k7-Wz9vwAMfoY?G*QV8F8Z-w=XoP>0fg_C?aWM)#v-L(#I+gjGc#rJ(iOj3uLYf& z$B_6QtcZCJ8{(+MBDF09r9(S}0lRMk-hl=Z{^iUsmRthT2IeCrA`=E`mm#dUBgkMd zK9Hlh5HwZ{lc*o^GA`{i?KDplCa+{CPS~`t= zLVXAIzruP}>=juTL2W13+p-HTunlyt-t@Nc1b)nsU|ksIHoRT3MtMG@^DC?*P=U?d zD;8ISF2nj)ICmIyhr}Jn%8u`GKc8zSyJepdjdH8k9?g zrroYjB#<9^3YgLMZ5^jJHfsBkj345-UC!J74skC@ZYe~`ZDSs%h_Za%a-~#Ep9yJL ztz1ihTTx`tLxs#$(;bW77RM{_>8@4=@Cy4ombpB$8ud~^A02wT>8|(#{u5qNq1Gy8 zM%RpDyVwGl$lffNihs)7NuJj@7al<3X>#dD60A_&L!+Fg#K)uF0%(PP=|kxOqRLQT zIi`UX1TYW1I!wE%w=6e=m+J|LQF@zR7-#y){|$4Ce3mMT*Y;pWVLo(*F?|lLXDhRxR91x>-d1Fzxk8yKtY8>7c?6`c zDdJFBfyfalUM+qeke({G6l1yc#BiRcprDwS zps`ANi+2&MXEu@}p0}8;AkjgBTEdR>hc!hS*9lyA^ru6Zudzb*vhsYGAF_F+p5hO9 z(6nL-Q-}m^H<&(gL>7RfuwBr`Dt>m^k#ER{I^1DRyn^j6IGryN0L+sMEE~p<*=q=coxrBrdCVnKZ$awwoQPqiz}egEMj#V zX_#thrqpo*AZq?Q0s=;6TirFtIAc`-p2pW`y~9S>?dq5e4A>{YTt|PZ5n(a!lhajl^kq|Rm;hg znJ_w1O>g2Z-&&3P{*^*(^Ok9F|{{ zJ=0Z=J|^khaK}vXUd$^i2px7OMDERJJswRyI;@8TUN%JBp=3tWzevshz$L|{MFnoU zNaO!d*`+ICcqKkRL3^cPWks{u(i~Z>$g&C9V2NMxa{Sg0#eskRwhvdfTrRkXf7cK! ztLP20C^}1EE_#eSek-5(7;VZ^%yR3BVUfLDAoeY!{d< zF@F(pi>z^fx}HaYe#O z%!Q)FB3GFkUcT4vlueV+li)>(%;cD-{X|)=+@e$Min&;+tj%C)!SfwTatzuKs{4fh zvslVwbI~A$v5}A+LYS|mT z0HkQ$b~IV2Zc?E-!rarSeoKMkNCK!9Y7pr{)jXO4ETW;$^`pWqA358?XS<<)Tx^-` zI8%d_YnKa@vx$~6Iz*`Cg;3Mz0*SiZfO+kQWr-prGg4nsk(f!HVzh4w9RBUn{?Agi zek;98M7^!v9^qJq<>7q-v%zURkJz7TzluJQF~hYVQYGIrYiv!@yXe*4{+`TcT&*>T zXoX1p?*6ovkHk{fI}msEBjcXpb9r7fJll$bggTz}`yl-?5}Si&D|l{?PNwfv$V2`0P|gawv2=obV%T1bB+R2kd8x4ep(>&DU+@vUJhL(H9?9Ub;bv{6jE z2zW&Z(+}oQGnxFvJ|9-)nL>)?f#>wMMo1~_H)0miYzTWWwTIMXXpPCi@TVogvT}y% zNBs#tDwnxVM}=C_eH+mNBK4!puPw)={b4_Pk{C4(P$HQ>3jdKBAP37W%P-2)&ZcOW z9^8@rAE-l$ueAPxQy0+&63a{Pr>aB`#H@|3LY+IPXmuzutRVnpLi7O6Ol=fOAOb3y zt3O1fFC?SD3r4GCM+dBMQt3uvsO>Kuc>-io_ zcj~I^ZV`a5o)VO`Qbpa57NrXY61`~D>85IX=uc9waKJG!rYSP z$i@rx-$r6)+Cg9?5LO~HA={vpL3q1N!o=~5lxmv6>q?myHS6pj(I57+$D>FZvO{{T z!u}Tjw!22Uo>8a1ps^5rCry5;Yz$>YN*3t{NX@-@B|M(1nC_B8(h)3HE>aO-sD>Z? zrb>@iEa(5Rey$-*x>L3&f|un&gT($&KW$j8?;$d(NkbVEfnV(0lQK)TN360b2hnH- zrTkTe*7Y_X2oph+6UmIipBe_4LYNPDIUaLxh~ohMJ&gE+)?a!o{ewAVD}(v7L4tAx zVLH(mWU47=dM5zv4R@?8m`jeb~=R2AO^X zntFD!e_>`!!+QGsz{fq5P6MV%{v02|O$!&hGhOC)0zx4C1p!>0eodu#&Yx?u*%w;Z zqsBf`2K=L2APWdlh6JfYP*b#a8{}UpS$MK`h&Um(VT@mnYtga@WtZ8qLx%UpTd*wq zMbv_D@mV3Y8c(F3kj%&I6#ryu6a@pQl>JDz(VavsHSfR+wxdA;j#O!fDCjL^kVKnV`}fXiRv?99cwKk#`$=G>!$q>reNjI z-qnvMaX02tXAE*rMiCKsle2+N6H;Ha8TRf$lL+1{m$4=kEktC5B84dQ4q>uU7eeVs zyhi#deuwR2e=Y**QrPhr%Nx^R!y?ss4ZhJLyPW*sNXh~N@H_iRRY)mLKHGsm92(I`s} zVSB3w6RU+{V3@{V7fPqhtd`;LPBhIii(&Xv@l2cgNRsFBMrx{VJQ>j~%Q_LGOs@47s@)NAw`- zPrr=)ly8p9)GuVLRYc4C;v*~{PtY$8BMM!t%n;A7er7d1!*b|w+4_lSYqb8;-bgN# zO$6fte)pS#0Y^nv)nd5LB=>(x+~sRv-z5P7K5OYRW> z{MhNJ#BBMZLzjGk1WIYY$f;+wG*Z!@vznieb7j~L*7CJUxx z+IwKWz;yLGgXLFjc-;UrF&ryH^Cj4v{UhV71wm||mK4mRyxWWVIzA3ZtKZc}#jZu+r1=h+gb9wn`* zUmx6&iRA&bfCZ8tc+hNu-YZfH1$9pR77~O2{3|ZYUp)#Nu4toQF;a;ln1gd%82#EX z)vVQT!P5mp=phi878XoZ;#6P|IgAFbuOUnbWfu0ZHUt@hBRcQ`fyrb9gE__@GpQyTW9uIP1X!IO=4bSEXEk4%_QzC zuEg`uhf>Vd;i79zc%GzG<7IvL8O*g}h<0*sJt4Vu8*+Csq0d8M8dqT+n-_qGX@()c zKwP9=NAjCoD!zc8HsZH6fi(XJ-6!a6q!fuL1Jqke*-o;Lc}fChFqIV;1G9jqp?*S1 zkuNX5NV!uAlXo9NqL~TVQ%4sQC3_$R=e1Hg6OumqV}m^ohHn<0q`0$wMWBp2EFH=* zay!!@pby3HA2dnuHE1Et&-5>sP4C^A=a`s$Kz5E$Y~eB{{|t0A0;us@1piqeV3?6S zUwJ`U@=E8&z7?`i4S#N=6&&2Ckq3i?E*%IZOVbn))Eb1%N z!i(CVbV|1lViw^=OjqE$%y;Pq0?SVSlAbQ8Jixzan#%JQ*~{n)vCbJ%QXIjb#mZ!R z?N~8Nw;TV_R|E<2Y}sgljdcD-%VbRgr`J?e1UHnIZHcv7yVavTsn150&kr>}AX&YfFef zhfEFp;s4-PwbHBQpT)BE?~)KO`W@mXEKpHKf5zp4W((;Bw$KPr&_*ZJ{)E!>2g-UGcqn@hJKNwD_>jchuOcMQ@$$V7cz6e?L zD%81IOey0y#M$1@5ydnIdMn&iKas}k%R-_^i?(tVtASY5^esvpDe`cY;P=&g1u3Hp zk78c%1G)M+A#7plDRKYwfFD6KD97sTX3G+YhVkB7(cq;M)WPBY8T8iJl#g-AA#JBf zZ4W8wv2hs;m~8q7y)>L`Ye+|537At#b%MG=Hx`M$k7t(hSD?kTGD~7s1UV|fsR_}oR~mQ0 zFC_t~EKOiq;tFMu@_nJ*MNdvP@5PmmBM~7IzXJ}Q9wK+|fse%S5*b@jnLk_X*%fTpD?*Am_ zH9oyUN#y1-x2E&Yp*Apm()N8zfc`2O*3Z#>Ynf#l#_xUWC!Qutwkb+07P1$0K~yyI z5O|%vkBpxHE4w~UsT8)nsmnM0U2qsx3uVuQYIAQ`CsR9$uK61EMQm!*&E&UWgj$US zTD1-t?k2p3_^9Kvz;^W;2_=_PFM~JD3yf7imZ#?0r!vP3?m%d0kHjsIkOS_NIFX#oYCawAL0qDPn z8pnh>mf}B?o3IjpJ@YP<4pbSb6WBD&>qx?XcOEc)NvwXX6S7?>GfMX>QL4%5h<2=1 zP8!O*$}dAL0Jn=gf%N3ldQhx9sP@+&>FRX+)hk|-rb176yG7n{G67>TWFRQ4dXJ!aB zm423>vofjD$;`Eyc?5_}>~WRjE%#$S<3XZch}@J^ZO#&zJwdLHfQUM;3rBsGKlEn4 z#8-&_$!>v2Uo2*4V}%g)IW>Cq*)Ed>c?yA8WhjO1M!_$k==3Y8~ewU zl3KcY3Dgnu&?_Nmz;J1}d2tBp-wU4lM02EcAu3r+B`0hBMrN|ILj#ABWy*X3yIW@H zzj_iSTa55*DEfF19s{y@{vQ5# zzCd}z8WV$wqf#~?K`gvz>rLw$V!YCMSG!l(5RahHxnO(8e?R>Pw^d!dlm`mOQ6BS_45Q=dBiZp ztnX3xg})SIm5m{WP{k808}&7;-ob9i+Z}EA-|Pgj6%S)}wEd|VQ8Lt$Nf-3%*eAQB zSEq{1Zv;DzLDcZE2=C*c+cJnsh6RttX|E2nAy`#c@wN^Cm~mf7@-OHc0?VyilP*qg zk6#&tWzow7%H?961@T|iAMT}y6L0lL%VvpcwkZz^`4x=KIE$dy^pT}Fu_FH*i6)1g zI~NTf$b3id8KBF8ncqeC>g)fT{!mpQ<|xj13J5Dyo;5PH+&mSBd3gq*DxiA^QDm5I z0DsPhGO#-3LbgSRRlhl$6da(LhhDYqf*D50)5xQV02 zjRI!5(x9dH3hIjC6{d>qYM;gL%$&eBV%e~m(H|qA8mpWTj^)eNDe|#P)*_)GG5sQ{{L7EtFFKS7}!R7DcuGXMok2{W!1(c4P-w zn9*HjWd{~n5CwGARoBEsMZ^3W*hN=aU|CSnytWmZsU+qXW+kbqJ?hmm%f4>=SFcfj zO)Lw|TW*CP*HS4lHOu?13xUD^quj^m!8!ZR@7+0b&U@a^b1b#5=&pvLK@>L;@5Lz2C;v2~!;060v3W`FNMG#&13FXP;kbN|;VwK6K zIU*BZ7f+#TdBf+PT@q*Rsu0nQzoXL!X)gBj)_T7;iky{uStF&H^)f_;5_2TcmUm$1 zuG~cP-ykR(b5J)airgSvQgXA|H)zK=vY4o7qMf_*#z;QU9--oF6(H5z`mr%Nzj58j zXc${fQ9A|pRw&4D{4|GG80ZpXimt`TObw%dCF?c)Bj~@uL4Rd&rRJr~Tt@Iv&DEy{ z&`IJp{L%Oq)`3OVPY*)pWi=dk2O2c0rIBYqQyOZb@e;+cUSPKVX3o`WlS^Uut)LKh zaoulglkMLu(pkS*8&r4Pb<*&qq9-5RQ~3?p?WG49&`HzNAlTd^Rm@B79>R>L370k- z5)D2_Cqqpvoq|`&&=pP2)0upH22sWy=%IBPmPj&G$<~@=6^UXg)$E3=cpqhn#*5;# zcHtcf=5p44ZTvX*b9&X0DY~slycN?^QoGH;1(Pv4QB4T-!Quzz&&2WYO5bJU*FXpy zn?sr69t~fz1BPjb;tBFvrKK@7Ou>dqEg2BpVh-p{SJx`}hk8Q*uRocx7p_T+HiyJ#DV>hVp4+A% zOCfocJO?6tdIq{gp?5KK|1wJD0z1;i{e&e&=*X@p5pr+d8i3}44lX#;XKNPC%p*o)KNgsv>WN2|jYeb1Rg}2Farhz?*R06X>&3y<1A~*%i?;6EMItN6X_zW)Lm64pv%&RXNDlgUg zJ~F<7wnt<|g%B0^RycWmTe0fS?odbp`;^O6>_ELJT#pU6gl9=KQpbSYc=sVOU7J8u zXfJaWbr;!`P(zCLW3WpQ6G0R!%T05Iaw4Rn4Bv^^5TCnA73i4{1 z@-arnx*3mEXrpv*(weI>at>aE38CsJZjiBCVnw}GZ63{f!z&i!+lWy_8M0nDq?(Ag zV={$Yo||abnY}nNjEvPdBJ}Z!ic{nMF@%{xg|(GCy*7 ze4%-SE)7YZMeC%<(~A?y=gASKYr>`wHU!q!eIt;d)TXI*Vu?O_6!Tn!D+j+njhxmT zj$e*Zk8QXEoA{#|X-LUD!$PjCf*!S+U<+AvM~*VttQ|y5e$705MuTlVz`C-OIqQ9u z8ab^T;~3{Vt2rB+bBZ}Uoqoz(#IKOrEk=WN2P|!P1Zt7z*dpWHvq~z8Y_4%=dZgqU zo*O1SCvjfQttU9Vmtyy-$QQ*0wHoa#rmxj|*vc#K`Yn&8(OGbceRniaP86!%Y!Qv| ziaF*DSqEka>w`g=uJ*i#m-a!0oa3l+MD9Xv55b`%hpL))(6xW(T2QJ2TXQPX>RdQ6 zG%K2Hg4{av4(9fG8W{}i@9r1%l`cl73KMnYWJub&DZLF&3;y&FEcEHSh{(;hLc{Rftzvg6!X-o7nTvvJKogh@#rJgN3P>B59Jm$c@t- zdf1a^1#?NaIT6K#>Nh7QYo)|Ijza}%@OgU}mPTJKsb93_B zu|%AAs8~xl_2IT7$H}$KB#K$zqoNKyNBiD_+>C8<^GV#KWrwi(oSs}y2w~1hHc`SM z0^)V(I_4RjVI2f7Am%J*R>w(}Ypuz{2~rCI5gZa}>8qU$yO$A8)KeN?D~H5z4*krB zIBd9hs4iDpH=3x#>&NfhL;17Ff5;hFujYnipYUZX&narG;L9kgB)!;6r!FP9iNAJJ z)ev#w89YfqQ4@5Nutq~WE<*+cQFhAB=mrpWrXME1hDf{- zd+H=B#OdgDrfy4KZ-_4+PFZ#eS)}~Jv5+X>7|sq2g@Jx;nsFdGM%ma=nXQYyk#E(9mfHD-S&EPUY&sr>J!4?#U8DDDA@&|gS z2XTqc!X~Fjp-m^%gzD~|Jad_5tCoUzrl;4+U5Xj{xAnR6;j#?9 zMjP!&AC5;(A`%+5teua~CxwXRs9Zt!M340bnRj(){4HfS=NF1}u1Cxvv_y%%p?x#6 z8~jnGmtY|yA7kF5Jl~=N2K6iCxw>@qA+HSI4dW}L@PP>MqKywHjhs^09qbS26-<7k zFF%^X>yz+qg)&X?UWj{u3b%lxqZo(ivrS54?)g)ZiuXNyb73>u=yv0c zE0>)j99%sF`93im<0e?ba;M@K##$F=?_@q5!K7sx)Nd>TRdrJ+0@ZSqAO?Y*Sc$a} z_Pdtqyp}8Fd6KQY-0yn&|azeWxPB_(htt(wL=d= zZn4eeBv`cH3FagnOSrPigS?+48-rMi#2vG!w=AbkhFW@b{O)n|+wbvFwbqmCTKZc*-pO#D;N>lH=f z{B$!YOg)X7%24!EIN?^uANhr?o9ewt%!-MU*anQaN*RMLw016DJ zx?7&lG+}zD9H}8n!2fQ11_BcagnJC# zjVsmscjynSITvlCV;<2`XoLV#W+%eWhp}%dtT{0o>7(L)>)VF@VC>-wy(OP{6fKj? zyL{K7S2awWx|iu;6eTwZD`)9~DD_S;hpCaW%^H3a<&TSPQlafswhz|zD>AR$n|f@u z-@WpTlqiC%g{FVtX);~46eY^jv3!6$5@w6$c+Bphmeb}u>qTZU z5@ka!l9DZ8mXM240xygVg2>IdaL=mrjmp=uL%Bip5biPGVI{|7Mb$XIN_pvWUKlB3 zj=@6jsU_2tQA=eRbMcd~75H zhmXu|+3T6B&z}hCd(8P#G$7hq%wN%VX!If$33Lnb)z z^TR4ya#m%&x~~SDx|ZYYdcJ5q`0ImZQ`L8T)Nb zaC1S4)9EPh*r@O$7T6u7_qC$H-K_;ou=v^nR3fArL_MXE1XzSO--1Q0|=(+y%WI_eq`r@N54?GOv!-0a*v&P8-*D0eod^aC_g~FTDcDH{d5fo~;AA4#=Ih z^ymt?xdZYaBwzq~O*dw1H{F%yd&vJs<$4zkR^RNvpo1(8fH!t$&UV)UjsItNEXnQy z>B`*z(!IIXE`kF_>0g?xEsshuCd->Zf# z=M(rshrP(=<_A2wkS`XU{;%5CSNOV>9O5xE;i?!ahQ<6Mpn_srdjj8v%nw+&Xi-83 zgw_T5uLHD7_`eR&zU2RPj5guij^M9|g~$IBJ#n8}@gP8L^7TP5KzVtP+uQY>b`%&$ z;M0Ta|M%EvmyHjC(xy>82u8aK_8=%7bhZb9@jpS3K&9K34=> zanu!$xgzKa#uXvLy9gv6hkxhkcmyuKLo#=}4!oU4BS+4O>D8=|VUGw)r_%*dfCX1P zDc7$f@X}UOL~p8aI-Ok=vqCqhPZfmo;u6s_zaV6Vl;8zb`t(|U)kjdl)kQIv7rmpw z?-qpYLYg?s0&+&kt`TECF`J@RzXQLyY4^ydCpYzEv}+(W0RAmbzQ~Wrme^uYLo8_I8#xv zW^oNwvapIRlPeiml53n|=$ayPk}T`CW&xJ1*{1EFE1IQqCCpB6HK|%vF%Q@VuGp&D zAyCE*-BbXpj4do%K%)tBQdLsGqhCDw#XGJ~6Y%ywU@XBh;NiN}F!Z`&;?5Sp)+j4l zl>>@a4RZ#r+Nxw6a-*cHWl(S0*aU)-s%sm72kc-}HI*{v^h#@K18Xg$jx*pU^IFYp zU|Bg7V-8&Bq>^r19P#a$cF-!i(F#BHZeTKo0Uk>xII30sW06=p;!Hj;i46by$r_rh z*oLCp5G78(gbnW|Z7EibSdUaxyHZiqruXwc*&#{1LSDk&q1h7KP6iPQ>SbM3Ebly0 zhPL|bDiEPQ3%XezI7b`0X%c+@4+B>sg5;#}8!WdyZb;3!z=tU2eFmxHe_V3z0n_C0wrrc78e2QY ze4NN4eri9?H4G>g2oaQ!A-ppB>lwV=Ffgo;%x1k|=|fL4|3o-*RlGqZ`I{c3h~8jL zmp5>exSeWZ!-TwI+~7*!VhyYPaXJY*){zggHT|N8+8e*hVJA^ola+zD=~FN(tTTIq zC0#dRV@q!g{VMSRHJ`B2Nrt69r3b+xy^MHj6vddY=&Dk&rWRT0IH(mlTo?Wx_^nXO z{CllNUF{jBoL;Uz{6kFcCz*3i| z{C&nc05K=~Uru!KHwkKI^E#5&t|5%S7ygR>foCtFJWf9iCPP2HaBTJ*MDUY{rdzt9 zDQ4zJ;);{Sgp%P%cxl)dVaV{5uN#H$Lu5RmWawvPXuoUNH~$dd2;m5{ z2*fy-;_#okA`IQeZgXqFzTgerB4Kv^YrFAbgtuIFq_4Zh?w&g>n3p`EyTtHY4!sB4 zjq?ffpcaK=(6~sT2OJ#%EPvT^!15qKtsQO+gx(8+fkX8Fq9YxOUFdEslmvfS7A|mJ zf<9Wqe5;OlxR*^R3)VZq$N4y7M<1{-H&q@=4jPNa8~PSs$CgxF6<)qOgntzb%Sfin@jMCIG^A~O11)hTbh|I zS7s`+vlGeEG_FifPfV2K)A8w9JQbH`OUGxA$4j_WPK<_^A_(34Lr0;k^YAJEA25Oq AL;wH) diff --git a/integration_tests/src/test/resources/tpch/orders.tbl/part-00000-eac6ea97-a06e-4550-994b-1a1553b26438-c000.snappy.parquet b/integration_tests/src/test/resources/tpch/orders.tbl/part-00000-eac6ea97-a06e-4550-994b-1a1553b26438-c000.snappy.parquet deleted file mode 100644 index 113e45bfe3f44f3654e511b091399b8ceb02fe4a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 46429 zcmd?Rd010d_dj|9=fDmmAqgY~1r2B@pkNY6NSvBP5&=;`iK0~mBnlN3R1~dRM5Ew{ zGg@tNK+&p2tCqIf;(&r$Uqo$jYEeM}En=O}qP?F3eed`E-S_*?y?@>3DbKUA&e_A7 z*V_B+91>$3sgh8VH47ytjC~~4Zn$=m5HA@ia-%x_WQ5asK}jk9|D_e37Y4~4fvVj& z9E!`K9JtOD#p!|@n#1tW!E_?0PUPH)ebfU$bD7OqN^tQBfz_*@<>_qTQWnPbVtwM0-2YzD`u$iB5N-Go9#cCpy=ODmu~mPIRFY)peqK zo#=iideDjLJ5fU?df15`aTpFK=uIpf#urkKd_vIx2H*s62FL+E0AD~)Kres-;0O2s zU;r2aCct1oIKT|B0EPe(0ZD*lz$8Elzy_EMm;%TFEC%EPmH?Il?0{u}<$xluXAD|p?8gK@17H|$w0XPr10H_1p1KbBZ0Mr8-01p9=_$2cg#hLa7+LM2x z_D7m1&Y@@jRPCSLqd31e{ZqB?ev?aay58gdWA(r43jJ@o{$us8;zR$d{C}$cG(`lA zfMAz`+W!x&|1`xNM*Wl3cll?l@A}VHpZCvJpa0KRU+~XX-|hde>JM1{_o>6bPah<- zqT|xV zOWb$C z*r}j1lwU;UgN_A#BS2q_ai07{w9E07ETp~zB%s|9bJp-vsmr+E4Z25gFT>cMaIeJm z3S1AtJU85L$Ndt1DfJs@1cB}w-2cFr@%z%$MLrj5>L@P^TL-dj{5pszrvd@)5GNI6 zPJ#R>^p6L0!8PHxQt7bWIE*QW2(K_^1NwU6{y1o@1YU;w+rTd9yT#9@eh|q)?G9Sw zz@-tb^Z13-CE$9rPryhyu3JF$9q?ShL$h1C&T6udpGQ>#ABI5HV0H}G zP5>cXa;aksdr0CBH?Rj2G8~$ucKATa&!Dy;gvNm0I$Xa1-`{cV4Pf2q3%ne#2eKps zcLmQ+LFXMX53+>gS_FE{z%p2PAn;|}X9ADL{Aqyo7}uX)i_ngzHiFLg=yyQ>@4z3S z{{!G8&}#)8!u>MHHWY^HigrFA4{#pvA^HbG)}EM4ffvEp0RRQ$at9Aq_AFd?!?=6E z2hlbVe5-N&6m6e__Y92p6zM?W4~+g0L7IqOKa8w|_1P$z1E|COFpSir?*cfUgB1;E zAB8qITqon2m3#`M`ULnk#>~U~?;!bU^qqt^Rs*M^jn!xj##P|D0N1~OCVQOp8}9!C zA3fsj0NTC+Ji+x>7~2c&=P-W>+D3t9D{vp&cLUx8JPG(H!2GosGaBtlklhR(C4d*WXLCOp^B!Z&9oXwB+6Dt60IUv1 zWX5jDI~V<%L64QCFQ6IogGH=Y`@=$4K;dUtCIol~+P()RREv&+z?S42@Z?F{?+1lI zTra})r#Qv=$sz!LMpYbM$TK&EA&JqXU^2!;A=++Y?r8MChrQ3^`eV>%>9Bs@i!qUaAm}q3 zeXNaZV53>!Js;QX()}2;;&J^A=CR9k0LHP|#@du!c-UAN4*z_HHY57#F*X$QS%1{x z{yMM-tKD`7t{y>}i2a3-^5S-(;Waj&m55Y!GoXaW6FZL06=)AZG=-y$Lf=L}cW}4C ztIpuK3mmQj-k?1P;Eg^V<~)Z+?CSP8uJC}sdVIP{1Z{a>x%|E z_HV6T;`SjWS}GhYj9n@kB44*uJXD!va34N^WA+#sZ8S?pk6&z-YEqLEWqw!%!Ou3>w2p6_ZXy}GR1jPfHUubG#P7f4pE=z@24{mu*B7reN4p+^qw zTPa`akzDDME?=MLJF3r_h@NW(I7IeZFIRCDo8o73{XR+ARpqyRws`3WU*w2a`0p;* z6&~POIMXF?f5}mo-iOMLR`xDE=XqcCb zU(x9c91W6&!dZfTO(h#H^m}sb>t(%L>Yoi){$4)VGT@KPA^d?aYiGF)qDy=R4|@CL z>{`F;FCMJ*NojYCF*xwUV~pIyN!4M_X-i^EE-0Wbhir%)JvJ~c{^4a?VgAOgqHRG(Z}yGOm}8mJl>KUJ-y*x; zxCy)0joy~FFKJm<+5Y_}x=vLE)NIooKIL@V<4BdZ+qC26$+xGKHEiBG{d>;o+XGI$ zdDSiBbH+J0lDc@p<4h`oc{GZ)R1%H|J-5ru=35tmy+EdCa~V8MY(& zx^>Eqp1&kb88heBv4HK1i@UeSr(yZZ@V-q`R#0C& zx$p)3Zgsq+e7baleg>GFrm$4-sZ`Pe1*&)yvpw6#K#CiOjgez8+RL|u%`sqmg(*Xfl> z%U5_d?3{IK0ZGpLSH8|SzppK2Wd7>HY~K|Dd-vX7@u0NKZVS5Da6LC<@seK$tmed> zb^7Su%xx?Co{{ZdC9NbiriVAoTLVH`ihq@c`tG_hPa~N8V72aH;fUydEtQkj4%+m|Q@6w9;*ys8>EN!W`kCggp4HwiZ>>3_c-vn5wr)m#PQ4;hT2jA0T)lP7 z2GOgt^HfHzrN{dgsPbgG`E%DY6*OH0!v?b7oy$=7Tk3j^6*5 zU%cWrT7)wbcp7C+v);K2IU^P=qtwv+4M{KNIXrvkJ ztwjuNFvrm`ap4RnHcn^|8N$t0VR)R?Y_N*V9>PQ;@ra4@Fj&JeEG|}PHgcHA7?ClK z!x6>!P;t^&2*k=emNGKd*jUQIjALSGViwUHtDL68jh$~fR8ROn-g59bJmg;HJgHGe z_GAon`9-tCZg+Z;BzYST|-TwH`E{u^KXF7Y|Fd2VGyK;?BV#~UF$3HgM zAIFx>|Mra`1b>tIJv4ywZf&8^_{x9$+GqnYat!JEdXDkle2>Fn51$#(o3Zwcv+)zq zOIlKI;K<2YPl^$yi?W{?z+(QU7I)*>m;U2lFS=*wkaPHJ11E;pJmZalql?J5I2qTT zdFelRy4aWrR$0x2DIl9l-!saH+y8PiZri$bYyPyShMz89zFfJwjVU9WbDuF8yr3m- z4ID!saTGB^fPqer+YOvDUg%mUBPW%tf7i~WlAY3f42c-(;%4jtoep|)i~-}^>g{ESzbJQ988OQwkAA7i*hWSzjs zDI!M;of#$X&k>GBp9s?5^pt5O>1UjoMqaOz&kdYLV!PoGAS0PBMjwB&vF<)2<9+hn zfxTI1bYKkr-%ojM7zr1+ujeV`WXcv-Mn*mw?xv{ZUA)<9;FOT=8N~{rzxO?7V+2gx z=Ij887H#K2pR5pyA-uKjXAD06ywL~Uj6UT&J?Bq@4-JD96ul$XPA&Z~YDhN3>$5l~A z&l?a>NbV-SF;m3Q#hEE2=U-BcjugqHj6c~D=Ax+O#gBD{;duM(e;AAuN$mFyBtG#Y zW+5prb2OLnlI;5pNpj*aj;AOjFUDPl!~H$pDvHRCAPUzlh6f5A(NA?$q|!d=0Xl-H zHyg;bb_a!$_jrdLDFsZzasv?6ihh#qWFh`K~*S|ob z?}j)l%6L6~ziUV;A?r3cnhA*&6GbEO3uuLN>;pW^dE`(MhfxyQT#=%j#2$VL8M@7O zg44S7e-4)Z9`Bh9I(t_@BWY-X;JJgTqtQl5-!uQ23LaNj!5kS-A8u z9C5qe9Rc$F;9tS`zovU1urr<>niG&ni#)icBs0PpLAyKH z#av4SsedqywBkr)Jh6Z41Z!-_a)!CzebA_o(TT_M6UnJ792ku2@qo#ro6d5x$@R=X zAmWxW%?O_dJ)bZ+ysTNz4LkiwH~%*XfiQI&y8R{ZVC->6-!eJ$2fy!cB=r;jRJ0O@ zohKL_+0g9_B(1hMqUFoNmyD47n*6)Hh?Il~Z85xpkpg3JDz%y`C-)aQnlos+PktUL z?8_jIwmf)aFQfItxwYhI5XYQH*A9-1A(uiO82?^BO&1y2oZNhdYao+vJyulG*%Z|M z<_0Phoy{}}iMjBFEuQ2=2o!n5qm*OLCh_iV=2GG|mJ6r4|I`5Co=oPzwbZ|O7_+{Q zGfEtlC4`_qk8L!^kQZpFBXO&qDGbDY7Hbo)3$WeKZ(R_Jn@0$kQbIQViHO`6(1-+@ zdzqtXrMpFP6Un}?C#cwx)i;?4o^LfV-b*Bxr`4N_$lA3GtWod*rD&wzU(8RX7e245 zCW}1@;;DDZBc_P_T=fTpx!>4?9LkiZwUVJf)Y)Umn{OYP3;dUfDI>eM~hqKKT?{L)-R zcOA(sqtk{q)snBBUP0B>mCXo;vWfLf1U<7?Qz|igJ%xRRI!6T9j{Xw^{#_$Jy&T0%8~HjG#g%8CQ)Mt#6Xj-Fkw+J=j7%lZ?G${lx`2a<@X6Y;}TU+o$so3oowI=2TMh`ZX$0Y1u;ic(4UHIvU+MGSY567 z4e?ed%P%8yzvkO0vdDD0xSSXop4dx?;d>FHXz?YUT}e(Xeu&jjW_1LkU8kRc;D9gQ zBhuX7w?azqI)OQdR#kEvN%6Y6dLWbC&0iZ(LAy@e^Kxx%Vaa`g`^ z$K=zGxJZgF_XV)$ykk;1m-zUGBtwj6QBKO;mS2 z9FY9d35xGq*$y4nP5jO#BZ8)fpnGj$S_Jt?<7{gsce9;}i%5N~t2v&G+b3iS=y2bv z2;RE0#CTdqzEHh|Zt_t)IB868AwIA7bS+krm0U-08zE1*JbNX%)AuAg{O>y<^55@q zhK|~5NI5#YUQtbomi(nCAz!sRC<;hex}(jXe%S*%m+#(jA@S<8mxzDQns$2`Z+H1? zgP@g=i*k21E;1dMHu^(_w3?9jpH?d{?LCU;=idt9f=}DSIfqzA3l-(0&xf?4koU!tIzv(!8C1o_(>~dm3MJ|H z`JYS?9T5!e=y8)F5^wOI5~HAuM{C1Dqo%hQvmNIBrN|+@H8f<6ETxfcH{Dt*3}oo3 z76z5#v9y*Dqw)m;)AMpAmYyI+Xe%SXf5$^Eew6EM7t$|uQlWrERNc4bk-1Z^n#;(} z-<~qn^p$&&wWO>cn@;;~zBN~pLtFo3vdO(sd}uV;%j&ln;tP1#+_bPS6oStNa2=~twZi2d8XWLs>qETgSq(l%FeLEC`p)S%$1}+Q`*~^qOY7oD8CWgTY#tGuUNbTYlGOplw2FoeTN! zOuJa2zBlpZ`Db=_^c zqavPu-e-R)**!*3Y#^;4RGYC48*;(iN|!oIr~uG z77lbRjv+sP)1rtWSH5UQ(MkABigU~3>)*go3+_1}L8|XMAq{UIL4=a$Hyn`QtL{qi z+OK)kbDM#1#Lw+zr2VKfQ%-Jw@ea#)?E)^ss3_n*>PpQoPtAGsUr7k}At^7hj~N;H z2+CJTULogpJ8}?dI@LQ{DiQtg7h1Z$Y%|x9@~T%%87XxTL4REwR=a^A!T?S`G@B;vggsgQd|+U7#6&+lXPJoFhJ17;_`M*3vLH<&4M@z;mu zHnMjATex7vo?lV-)>rVE2=ZO*dqpjApqvzW`k z9eL23vKNuS7GfsE+K;g{k|XC?_fNF8U`-HB`wQ-x@;w7b&0O^o`qyPY!Wy?Ww6s`9 z2i=I&(Vst-qU`1OmKwxlrHn$_%sCZMKvrygZ%d`u=Seg8^u%AKtseB9(xwcOd&iln zqzg^bMlPA^{dKX9cs#m^$Be4W(mWTsM|f2Zx&Mrdc(;#lf&AC{y@dsK4Wr>c`rs?n zwGk(zi9(XUuvsA_lOmdJN;3J5TX8mdt*6aO(yLg==y(x7I2r||Vp8IN4uPjCd1r<_ZWZrSbKgO9TS>gN^ft(^X7_o z?ELeY60A*Qo=Owl=)+H&45aFN4obxj)+z(N>yosPOCC*dLXD!TrL{a_FxK0#y~vej zc+gH+`|HRp19l7~mu^Jx2V9h92#7KDg`JQed|d3QbUlq{r`_|o;A1%0Y%U`+c5sTZ zBRuQMl+gRTNsG92mw{YtHPajw8FbrMQbNeJMc7W!pME8c7m&N7e@AV2)385+^sS*4 z5oG232MFF7m)|R5=!EfUI$|F5XG1xC@?&XP7qTPwFBI+l{iQMyN&e-jt&V>CsZ=K+ zi+ej@g}zu=6+;KDg!#z8{*G8Fl1EArgu^|=SRD?C5c~AvPf< zBR7wnTJ;$9)p-w__;my+Nv6-ofrn0xt>db#(i`n#$;s6=^YK+wo@zQ~#W2b~#UVqro63kdbc{UK5)x9`3c|UTHbzd&N17o<=KV(GMJP@I2>CRL50d`3D6>Bu<0&oWl9Gb+wj%Ph z|0_H~MSj(adLip`5AmH*^bE!5m`9VsK=*kk&65&KwzIvO>}kh=3-1=M$snlgLOfpn z0dB*4zrpULdv#SMQ8}~eSN@x$f+D>wLR&U@JesY-I-?tWH~r$W*PZn2)mm% zCAVUG(C9C1GBHIX8sTdg@I^?GB9}FD}h-@A4+J-A-DDuJ0a$7B3mjwWRJASopv0_ zZR3%#F2>XG#MbI=t0tF6{Anwqo3NU>l46g0An@avSK?YcME#5%*riIGC6Iz6u8J7a zC;mP3u2uhKj-Y>0Mf%e|#-=v%SlW(7pe!$-hycDyZ520ye&Z>Xc@v+D-(eHj&n>Nh z_Fh(%O-9T*Z*C=_(}m_llG5)l2s7l~3mB~I;VXnu#!r7@1JU2iM`GN0^AbY#+0|wf zl1oWXutu%zDfREtCwdOIsdMEMYfbgkM=oWmv8 zh?}_Dl{ku86*@A+trFYy!wz_~Fu(s3j;_jhTp^>i5z-Z13T4FiD<=$p zcIc%&k#t{JEH2@ap7DPozu)zimPz~UQa-ejF9bX^{j{T22eC((}D|x*%n!I*=k5Eo!Gim_U(*(lHkB3}tkp8nk$c zFp1fQ*B_FBSD{U(2o=R~?ovS#i$_?H60|W%%W0RKP`E(cI7;o~Cpkx{L4f6_lL)8~ zjzCjRh2gq{t5cN+U3xoB-VfNwOkO6H0|BVpQ{CwLp?08FvRAH7~VZ#IU2D zCA3PdHw84Sj&cU;H^X#`f=^JC-cuL3RFt9EBe5`xBpDE9zNCc;V^*m)GXjal{7`cx z=_(auokOXU&TC{ol6id~betjq%BVT#tf}UUlsfY|<7F-4wyW|ejd6~q8PvQO3-BFl zYMO9i3Z>n;LQ!rVM`@WF{d}JURSfv-k(^*kHjQMGDRu06N@FWi-2q>LXB{i2Uxy&) z?Ai(AXqC#-3-({7a$$6oQWO{9vW~|+r6)FiA}CLkB@O613Up zrux0(;z@ijpHuo~g=}H>q_WHu7%6$(NW~rR7EWO1d&Qk#6NggkH&;|j+PNo0t2keK z)+k~$%@B1JrQ0c$Bv;gDITUA0bhZPM8g8;)b~;xSmme;wllpQ++bZI> zN$MUfV1s3wohM0VRvq7E-)mJC4-YPKphER~m^xnj71J)! z6>nr+k|neslawIcTGove-*=gCl2RI9^G`5wFjtD^O4@l*VE*(YyKE8gqLzt^9VdRs z&FZJfyHUvqtS9Z&k{0INCY~bH`r@?P)@s}6O;6xD850NFhnZijTfjXh5~QsW)!1gj z07!Et&f6>aiK}^#q@?&V+@aZNIKIsMLQ-PuCs|}Zso9f1T~%(ouJMZ;G~pztf6$fU zk0m-v+mI1k6krnR(ppt_BAvVgVfFHg+*2P5Ci6u)?kbfZ6K8b-=PTy*l8m&J%+Uc? z^qZ4XM5`J~Hu)<|l5-W`S;s+I3zHm-uY0wgt0=V~l`Dxu66Kj|ttXipNrrd;UsKNA zrrKj(nss33FZ13<`W_iCKFqa7Gs)H>MO&~JMQPqp;zHMj2LyR;DwF^atbrC`J~5g* z6_X_`#k(|jY}-*W1HJ1`{~>Bo(;Q@)INf&!q+U6z_R1L^{FJ9wfUC*4D;43)HRh5g6600v9=4DUR zq;tI{TwFY9+JXIQ(JChHcy@kQkyri-?-EKK{3|V3D$IOe{6p6JiUBBMNC?e*+jU7h zvhfPnOVnb%7@Xp$zQq%~7m2ntumTsp^BrDbK(}k%Q)7f;#)lH(qe<8qu}|x^lXQxKlDE5lb+d%y@{y3p+r*0Y}179AFq;`Jv4se10qd~c_~7_{iOwUv0W_Y zvk9Tn``LfBrUrz1x4>(<29#+y06> zm(~c7(cN8CN5$7g8o#FG%2VY4cn}~<~iDOPS#AtX6sU8 zf9SXKW6?IL+CA3E{zMZam?oIlFKvxy%T7b)c}0SCP}M<|3BJ&oi@di5pK(P(x*#!K z8dhbUN0rD>m^LNC{$34URu5?ciS=bH!G|3WDXiRDMMJ*_;0<+V? zR)L?JDqXQzvW>yIjC9x*;3v|F6Wp@;*scd}WGL0xapsefWV2FI<1@@tCw}h|TtiVI z>NXC1Uyf$W{*9vcwC*B-bd5`_GCy&`!^QW4uGqV7s>b7K=5_nt;~$F?h*il1T7C0* zs^-X(N2KE6Jh-S_ViC*8@nzy(JnM_3h4cEEUuf=dlh;kRm06co6^ItWaO;bAWeF>S zB?9}%t=-s1F4XHn>m^opO-f$7jMM+LFu->O^e8E#S)&ZXv;dEb{|O@qL0Cvw`lREn^7nb z=eX^$C$SyaChg;*3T4+w6s{6`Vz57rH6Vit3@)Jp7YP*2))&}S)!7`7T-C0c=Bn>h zeq4*nC4VHgBQ6WO3$oqWV3V{|O;$CVA8tx)TIHR=nKdk0?C&aBRop1asG9Aq+j*C- zD}E&@QRG<9|_LJB_0rMAjr}XOiFj~jrkHuZk*!3!nRb)ZMLGKblE!R7l z(s%~q$TRk^otJeU`X^V=5O#$o9cxy5MZt}q8y0Cy5Pq|C8BcVsBHpUJH&Ks-N93rT zy8HNf*HIy3eIq?Yb&(#=ymnTp>!`q8KH_tNtkK8waV{cDK5$*2PmjYdCN8R}=}_Jsfc3d^zl) zbh8wzX6V>jv#+Es?ftrA%n9qD{mWn&G4^NzI4|e;$F}Vnm&n7SZQ?y{nsjj<2|h<@ zOMNK)*rMWIzD``mBH4z(3>VyV^K3CYSx*WcNw@A5C%Pg7^CWfVIWR%IwJ5E-s(k;e z%kC#;BI8_2ulC`EHN^*jmd?XuXJ-OI`1Z6TLt00R&{KQof$8IjL{+$pjxtjT0 zugvZNGSP|1hge+1B0;{^Gx=U%tWsR*p-tc*^Odn4Oo=2pV7{omSdA)Kl+_2@1wnBs zN=XFPE1|~4=omcCL6GjfZhDiKs7B#uJ;|B;i82LWl*Y4>ckZ- zQe3gF4@e&v3cs(SD79x?@vGyv5d0#;%81HL&?!pnm?&TOxt24|e|L!L<&Q`rAgEgI4Jv5nyHah`cCj!R}}gf;u6x z2pfp=l2!HvS!=l425vSQ4=vkSwe^^}R#InQAz8Ka`!_o5V)vLYYWCP1GW( zigjhPM5l1k>=FBNC6=ns$IcPR+*Hkq1aFIA4-wVb3#}*3(Wtb3(jyxKu6XOTM|%m* zlAut*LKn|=t{`@4z(_HuqGrKp6P zHS?MFtTJ25je{^RmY4}ksZdX^wD(w?*K4kzu4Au+`r(3Ye@*6gMJOsjIg!NK6XDF^ z?kH_EU*aY9LOPpMv$|vL(j+k3ZbU(?&5* zwpkxWI>lbhU!ZC>&QfQv+aaBz`S@iliWxHYfe}w?|I5qAcw5c|50`k3V;>!5>~-g0 z6`nf(KfY|dRDOo^+L)M2<7MMqm)@n-xvqW7g-duL)iF!>VRZ#d1pVdZgWU!_X>}AD ziHgQ|tvgSPB6^)(f|rf2E_EMa5ZOIO#l+erV-nZdr5|NnwRf4ABU&b#R1mw&b4t;= zWnHIszG5s|?lr4AcDeW5x^>IDf6{t&d5=#iah`mMaAcm(a`x@wv3>sM?P7Jl%f`6m zDn((+u1M}@88!L;e!F<&ihzAZ>sJIWI(R0k_u-QcmpDf%)viIuYiGI!f7|GAI{4&^ z`(yYE$IJy+1VV?Q<>ukyh%KG#K2c~mW*IuA|CPK&bH@n22PM~wSktT zR2zw6Pqm4ssjnU^H21n1?qLnh(;jAD9X_41=j#9C)!|70;ypD{cx$+3sIsEhwdl|r zDc5MXyGut6@Avz4rxDX$)L$E6U=-J5!X>uru@U<>ZX7vObz|cwye7OUZd9z5eK}ZR zU|$ZlcHzd>NWu~pGNa>@SDQzVoxH<7dR)iL!Tr9q-oVSjSu4iRx@z4rVeY+6TM|Bb zc4N!LPjMubxI{F%FlmYRviRf`LFeNqtr_SvHf8-V%UIjy3CqS#-Zt(0*eRd?6jwQA z?qcaS_g$+kU3(O7J%2GxvUBq|_wM`sYCb-A{Nb&sr4@d+(~s?p{cIZdj#N1P+pN8} zr+@wI=g(#={E^dTd&XJOnC+Pte70<#c}azrepmN-xt&*S{^0Ixy!5*=>z4^e*Eu)S zzVV(rIP1~fxpg~Uj>}HXA5+==!MZ8tc@K9iPnh3)=t9CLEvK9(F8D(e)??wzn{#_C zk~ef&y68`R`EQZ$-n{OS!*MA&&%VrCBzE>KEXs9N*A^{tGs%*siigL$*(Kw$eU^EC zoL!#KZD#GxD$y;}9o#&UC`hLUSZBviFv3xeaxWUhB#u?9j zVKc8pPT4W%*Ze8l+w1pF$^PVzoBO`_)S+uh(KUWl>ds}|SETM*5qvRq_nJX2X?xZW zk4h`v+;3LXxWdUB_V0Z&?YnjB^cjH>`#xXf66mT<~>`J9;tr z(!mWs_4_m3afqqgp<~zPL{2;YbHd7LCvHu@H0_&Te>igMo3I_+mLd06xSlWbSvR#$ zV#BXzHhlYN*W*dwHP1g0{6$^q?Ze+cU9(w!^4Z1yeNVl-^-13!-l&V7{?PvF@zWov z3*uIbgWGnm(@eJo$}=DKioWFJ()V&{IKO|p@?7`uKF`j1j7fi1A)9jW*?F&S`jiWY zv)f&l^jP9`wDN~F(SHp3{MJV^L;b$+dc4|y-=?D%Wrr3pmx8`;S6}XbOZN3;9KS7) zN7#M-DL$k8{IyXwdKxaq>|y=E0GcNOQOon$$rN zkwYTbfgwS|%;AyNVW_vcPLfql|L5DuOb0<3`8PRCSnT*B=ie_XJAR7)VccU{x7a^* z)wuwfXJpTvT%WLEp6jDmMFhoZr$^@|$&QH%LwO#I-Q9t{q+>o?X6v?LY5CrQ=(41` zqs(c`Di=?@PLvmZK80H%+S997FO839FYeI2IS%>j=n#%$m@+AkH_m@OPZe3YOC;Oj4PCHkTTsaq%S$TY8|bvKWDz}A9dA~R?ur? zH#)j&u5zecBsTZ)-NVnPKQ=n4c1wNpEyGu;1!9hTw_&octe2x->St>IkB&}DsSN6N zG*#x$>E$saX=6PGnp4Bm?k5?7=6VLG6^_=9Ka9_`lU6jV2 z{vvok=Le2tg*sZ2(-8nU6a86V4^FquoZ3^#aEM-`8stt9oleOjy+*?#l{yrN)~ii^ z6j5q)gZwBuG|V&+r=lt?+GwSo9i~=?nA{OfXrV%)X0lfkDO#&E{eZRF5{*l($(uv8 zA?iVPiVh1k;onU)p8+Qdo`YT`K1x)94kN6Ty)ht#Hn4y;WUbWocK9;yzbSsv2h3C1iz z9FduBb69ygIB&x|y-D9eco*!UXJ@D^QyEHQsd9vTE$p;6cm}f+IMTXMmUyVvw2Om= z4gzmzRt~s9W7+M94?T|TEHGJ!I?a9#{Mgj(QZt!01{YsjlGoYDkKDlOoy4KqPCxG20so^V#kgKQsM2Jw{~2+`q@ z04FLeYJ?Ljk=nv{#>XQkS`*5;OJ(t9eRjeTM}w?Abt=rZeCiA;mDnO$5?R?glvoJk zTSA;M15#*0OvhRG;X@zwfrDimjrIjN@MJ$!;6bZ22t!(}GxftWFYCDw3&9iAi9G&RU5Ysmd+c3+44H8&7gjG$WF)e3Jv=uvj zi<>LhU1Pa}fVxwejjm993~Zt?A-uFY2tui%K67FnA8PS+rIl*S$Br;Q1RKT@fy`RV zpZKq7#1E$CaPbk(;C3SHA9dIXfsTo6M0fa22ZOM&qH?6wDue-;Lw}7$<^o!cEE$Hq zvhir83K52>A7gXR`c=n@8rqp+HwdlKLO>{mR18y@MmSNrs2whydZ}^x!3v^dBSEEO ztsMfxQ`+Gk4kiaganxE)r*n}zVU~1fkcDMctjKCu2{spcWhhy|rk)2V?438z9y z2N@JBRn!~0BO0R~h*$=CODk*Xjvzh4+O31%G&al;+4`t4j;Jpn+EH%mTyceD(6MT2 z*nkPK3}jh$lqs;#ci1cHsiWmr)@v|yN#&I#eqn?`?R1*_QVW?%II~rl3 zFxL2Con8@e(Sgj=TWZ(}f#}p)CJX2gwFxnzVnZ?1ViKbpiOo)C%XKpkCdy^mG&s;2 zrD-Lr41y(u4X%*RD$`K`A-axDX1!&sGdenoG)kFgO4C_D(5qxCh6z)LDY@ zkKmRKtc{QkVNpL&maFb)hFv-gtnN&ZjuMXo^Y^ZxBi6Ac39e2`!{)P|-47|*yhE{$xm&q^suNeDGRuU zu+bGV9Lqs7k4*u!1>w~pV<@{H3S*a=5H{&T*>d0EoQ~qC>R36VKJ#Q(QZ<_odUi*F zosG6*m9o4SKq}M?B^F_1>^QFFgcJjB3PGeJ5fQE0;jWIR3^#D;CxNJ#jI4CW)d)}U z!@Q1_Lf2VGi=8{ur=uDojCAZGp|N}{g_W@QXe|bWzr~52go>(ROHY_3klhMELZ#)S z?)boB9M)+n#DyL~IsBl536)W6iFLt8M>G;4#+-D6tT1tyB@16WTd=@}ScY&xiw%Y- z3C%i3V?pT(W6Ma#BCd~`L@e{z^%I3$W5HHX%hrm}s2ncxkmy5n|NEhx+usi*O8lt+ zvj1#g+wk1-|K|8koLI2fsar<%V&@(?|NgB2$@UzROJH=VgX@3%RzPirqd?zS>gYDG z?SGE%GHG$7{}I}KnC{t9*O+jh3XizBqzXxV%FYVu_}TX=x+E_3IWJ4un6!-lU&nV> zP2O|vEFRK*{*%)~dMwI*<}6=K)WpXwo0&)D_1abGyJ~xNUeC1#ael82F{Vhx=JC0a ze%`4^xgW?9tN%X3o0O<@2`K)*e<5J@vHKT$ZzxY5(&y-9)6k$3cXEdY?`u5j+V@o3 zANX;A>kbt8Uk~ys7hep$9yjSHO)YzncTIWUG3;*YgVoyKa@^PGCihYb^b0mFS*&;5 zzGQg6=7VR4_is7*Kc{&Qc!S?75M^RjX%tGuB%xxc6O{uX2ZGOqm1k8hV)qMQ6oK+ckEJxPeI&Bp5N+O;-~=c z-8BRKF8|@E>wV<<`k{SKdl;e(m7(s#G&g6v57#$czc75j%ld1>SG4aD#TZ4t9cO(T zO+$Q&hmDL1s)!rqoN|BJ-v@r1l470Xtk$JC_1m;bK5m*#Fs94N4SeqK*$r!T|8?|N zyh)XQyy55Q8GOermc{4r;{dO#+4H}Fxmz4(Dk_F1%>TV&%*6Dzmt7JUbLW&NF5P*J zbg_HRsqH(>$LXpuKfv#o=1`u`-dllR9+3{8dia!|H2sKCf#bm&leV0G?7K7ba_rAu z1Fj5oo-n({uJsP?89Qc2md}n^cZbvt3tKkETJo}>8_o_Vm=u}6+*Pu_8jzim;`?iM z{ifHO<_*$hcAq~`x$*NpH6`D4|K#^?Uw7BGRy^WQ{9|F(wFT>{mnVGk(lc>lm)D%w z!ScR!TgEMV%NJiy4*C7UMEA}^#fLqMWP+5PH8~4Jg^T%&c=GDS&b}q@bp!y|SskvvSkH@8dfvZUdj>U)LDrZgVexgaTfGLB*2$TT}P$-25#X&AM7{l3pTjyikC?Csih#XJ4pk8pq^lz2o-9N5!4f)_hF|4>0aB(ncF zgp;v{bRB>HbH)Da`+v8h#iiHZ|M-sI_y?su2b?5Jk30OwKhonXl-~2_r`v+%laglI zCTFFm&y(Ax&rX|^Hhqdb#g;K+R@&@Y^65!a9gds(rvbkPI_6h zz%Oaq3^3DSlzj4xY#Ru%WLN=`CQX`=H5~#Zb#lzGF|wFG@>yx=kWrpAYu>bs*)wKG zVkRfbqf;D-wjg=Nj9Ih)Q=yr*DOu@BGv#S1wxn55^y3-nHqRbj9RkmoW#J6-f|9|I zYo518HaBVJ?9>@kL^}Lb)o`khh?~bMJ0oRAdfIFoB$+gGh87Gnv(hF_O`jJmPo9yU zB4?85wAm_}NeYGVNt31~O~KHtSvIam_#h0PGKID9tPI|E-ri zYkJnKtfX`wFP;x(s7_G9-ZKN{^7~VRx%0RlqFcR!<&)F1CQnWSF;5Mb%uSl=y;`xA zdFjy`Gkyt|DlDpRI>H*cG_I?=h!+cxIitxh(b(|MiK&F%O2`Q!JO zzUVzUm-l&}_c>2RjW9SuR7KB$x4+trr|2p?vpv^yL+^T9%O${DK zbZI6;+tdA_4*p*F40!#?w8Rvhx4N>zTO`iV)uh4yr^83&$Hb{U;ial9_tbjJJvC$> z?bp%&vpHAqcz6%0O1%~6UJqqClxjnLRsD38)rIipSu8sE=@r_Pf<=~inAJW=sFsTvwpaoXt!^a(POzbc$W>K%FUh+wOh_m+8!$Oc-n zzag!WkLd{wy0-MC^;K5MYw*`qwJ$$C0jcz~#20&NJw>&;`VWGOG{c4A5r_u;fPR~N zo%jbaqgwG_i$MinSQdPQj&nDgo+C8>1cPR&nTZJab7Z)4nyg}mrjKTbyPvyL*br%G z_e_Alv!vEjw}aMMy%o!kQu3c5yamH#Xy~3l!r3l;*t_{1yW(1h%`J! z1b3Wft-aAU7vA3@XyY}8(sDKbjE_ZXn|E2iK^_?L@;d(D`<35xZIP7)@)kD#gOrIQ ziHisR7LOvCbRT*W2Cp7}IxnlKs&hJ05#{n2MI`5iC$I7tk`P*|`~T7B_N0Oys$}?UPyKgW=vk(#k^Ims zD?Bx)V(_42Bhpd>i!i;ku$>Gi0E1HTag@F99ls=#H|fzXIZqa+Fu)Ix~;9mtt#k-LxU%oXhqx3 z&OWApbJs1<3k;T5$D6@b94o1;2FG!Jn&w(`O{qtkE$zq0!T1PqJ3>vwoZDozI`*={ za19a4KfwW#6Yiy|ESv4I{RW5PAIItnt0ix-bwp%#X*U&&Aah%G03JuY{I0xj+-CZP z=T&QZH+B!Eg3($F8Wj$OuY_vB!Fol*07ryp8U1nBNcS>&HuwZXY6|A%?vge-Mtfg# zUunSM_`GXJOkdRc( z%lMOY5g9Kl_B6kt@>CRg(RE1ap#*7dS`CsPFY|KY-&l)E0CM@?hE+&f=z4q8`D)j& z1NC+p@tC~HmkMtquBY`8(dFJ5rM0eBPo}s)cmt7@Sfw{SBUm2+{|t(lSpC`bug&5ROlJd7fiURbp4(F3f}3tLfJuHy{xbrC~?sv^Hu^lyz>Jho~mU}69qT%9h)+! z+XVipv)M5u+?6^J$t%PB6+#&0`l#{IzI}+`^m0023ag;gu)z$(3jzTD=KL+D2y?u~ z#;V~rnK8p-Tn`$N@co?kBiv7g(}>|L`Xw^M8i%Onc+1N4X^OfjiMkR4G=OSP%Om}H zmG}`N915$7AUjVr5{`-1i@2(!S_+^o&I9ZTm8?cre+%(JZmGAF)|q->4=N!6{eHD2 zh4~r>0j2L3Y9THz03e#HRb$ouimV^tJpoVDT#*uulU3G$|1e|@_z4*9fH^8=S@esE zrcGUu`F2uay&pkd6Bi@i=5M*RSYDPV+EPJ&(Rjy~z>i1#9sgY$Tc3I9cX2iPgwC-x zBCZ^yCR_--(fYZ-eHyEhM(}UsZ%0bKw$}<=}8x8Zhr#5$> zv(vlC`gx?6BB~#B@;zi#9uTM8^tyMKvqu&k!5mTI-8mT)%%T^O!qC3ZDch^$F|n4Y z&wGTV*fwcy0;Lj>LjTBSc}#P{znGFq1O79TLcOOj{uo`5o;#_rPBQ@Uc~-P6(C>*6 zx?~-7h6CxzhA*ILhejY3{-2FinCU+H8k-36`{z!i|HEK}1&>uo3O)+3eGmV$efvr^wn@#Lg&Iu@Yi zPi55}$H#^^%>|8M8pAwR<`M0mgoo$aFbGNb%%*Q;QlV(kR1vrG#(|MBhUMiG4cVYh z5n9hqP`f3~x4G-{C)F=D?odddQQlY#LNP$n?0AJH%kYloXQIeGvU~@)MKIHqv9+bu zQcdZD6<}qovt7Jq&QclXqd$UQDEwZo!u|-fD3CH^S1g(By|N(Mq&Yu6Tp-Lw@dEc{ zq%XLFH^104rJWYKGhjPFYVE&*Cm6-X*`L*i+q~M#}wO+14tQFTowPNsRCv8eLT>6_wi4kby0Iky?Ht#fpt(Cms1um;BGg~y|Jn)0UHcq~` zCAVQp{%xcNyiju|l-xnZf(s49cuVjico?&3S9kRH9&=0gP0H^d{*K8 z2-O^z22T%x!zG#2N+pAlj+B2a=c zLva3j=pYG)_8Mx~X2soeR2l~0=tWly20<>^iDU3x*}ZYbr3&Bm?Dyny7O*N@p@LwZ zH~5hh_Lq3+cQ6)1-$!b}0Hvy`y0RQpCmA0qvcKe4D|o?0ROA$ zB=OLb;%{cU^DX~@Ac1($0j&##VJKImv859OV0bJ7_ptOBw_JFPrtL=@T4V+vPjT>! zGfE!oE2IyCZ`pv28>bBD?*8hg8h)^!wOphgD}rB(e=;5xe_*cT*-$*X%yHS?&ts-O zR=K(ZEByddd8F#Hv7@5w9nJ5b&LpmYPjrOCmJ+HE_C%PsCL;a8=xFkKrx&U4?jbuh zgXFu|3JQ#%sGcO5>1?8YjMIqTZpPg-BGo;xAqAI_5~-AVkB=|()XwpED)?-&7a$bQ zm7u#yJCm5Ne@=k^V)ZvjLTkN<ZjT(UIEs1rVX;&f;55AGv8w*a5?0dLlVt9{beeRQ% zZqVn$2{3rppGG{~yGr>-^c<=$@7zcCq$DL$$)Dr<1vh)I2wx+f>+FmK@n34T;z48c zPt;9b?gTk{3l3%^A(fyX4PMth7|(}o>zHONZzU>{w~Tyj-p_S!6^vqdovVTUmm@Py zt9*3$Z*GGD%+iV;OjqRvo0MP$W!q{STOW_MHflD>O++r-1= z$85cjB6)x74BWJrc+K{$(MIAd5OB6t+3=;N+&m0!s<4a%Yq2anhgT3!tlc4w2eFBv z0(Lb6H#(=_i){ay)}()*T>TX2iDdfgO}_f2KPtmPpKG*W>zK|f1;%<$BUQd7w!gY z48zjY0yKS#Mb^Ac9F_NW-X)c{xXdHJCacNR(DH|}DsNpf)q{0DZ2!((h6uERS&dPX z0Xfh(Qq5%IGRPO4YwZT|)$*Rq@gBgC1pdX`7T>xBE9(VK6|*hcwk*5?31mNz?a@$g z;m{3kbVS-49sk;tt_12IC3DdPksdUX?HXt^-I-&_c#FygkzxI9x1w_tq4%%6JBvDt z!6Xnyv-(Afu}Ec0TK2+7_PLRr0%R78|6d4Kzz{+hF2irv^)?)A8;F#)Z()8l@?RMP zWbkm~eeh*8x-%m-^3-IL88n*F(qP~AZjDE|d)@PVsjV{bshW9*W4~6AN#c(3{fM8r zYP8*My@c+gq-gL=!TcVRzfu2NjI;z9Ms3DGQ><|-FqGI?$3E&YLuu6+uSYm9%M-#i zLa3WvMn1;JXEY+*2a0eF*rPxiG59YYNTT|eO3<{GL+O=QVG z6-T)^PidN!r+0r*TPSsL-xe1XJO?uLT=pkaFrL74;S-6d~!e!6ub|jOm7}FM7PA4dR~a1nOWM^SB4m zL)(rBj-{=#wpef_8n#pd2F6Ot^sgzkZ}6!bcelVKe7gM+kWbhqC$TJFOx@9Cq zdUu2430QS)4c8oPb6ai-#}KsFflqzA-KB=lygg(!W$0b+5Bv=8`~2&KPJce3^&7C9 zCKb{Mb`V2eLB9i&y1Qo6CRyd|+_glWz8?eAoewn0i3}M;tqp8YNq&)Tggh4Lt6~-_ znh2o-NmUT|1dxeQc}l|x$JId0OFT-QjLH8K+Pn9oOZ#&Ly1Qp?ZdLPcSWpL6Sx#s- zG5aW%g88P})_CeEyo^*@GB~NgO{se@*PohE@EvCG2QkRFahGG|4T)8r>T;>VQvsH9 z?wP^=v)U2rw5mnS4@w7-TC|pXlgg8P{n@7=ZO0J?}qh$H~)+r zBj$n%mT|LSh8`SJ@j=ag<9-tH)y)QtarGqHlbmNJaj^}oR8ZBuEGtqkA$*nNZ{aF& zXaxO6`Z=$gIcKGeZk_1J2o} z)B$vq1MrW_iDdI#b;ToL2)OSxFf>OIuZ9u z`76RbBpMREWtE`_sz~X(!;$e!KZUa&s)GbqMXDza?3QVNcXm(H2yijxtqL^2V5n7uuAx>nhCNJuaqG3R2W{|PS9wb-}=j1ud@wwcs$d@@)3Y$K$OZng0s9W zex0gDeIE*oBSD{7>P{k&#�qLU}J>R@ zsSa&`Fh;6d{`5WL7lZfUF%kZ2vT8r~v)TJT-6?mIi@-(_+=*+9fHONtwRUDhMSi(S zFpgA3Ln58mh39&E1?uRx>8gysV=zdOg69gP+87lt@9xMU{sdE1*Z#5pS3cd|mX&~r zo4os2s2y?YvEHqV_BeTh9i(trG04ZI4UJ?zj^k&^AzRIh&Sm6M_cF~}L4@0&1O>6^ zVE0BcwQ;6=BIF2%GU+VIp{Zbt1%6UxPxAqq|Bl%%5)bY!gn!+U#GjS&SjMht%{eQx zC$?X;jWeV>YHq*C8HACY?wjVyM9|5Rae@$G|A#)vCMe0dNIY1t?a&ar$qNvwdV>L^ zICm2{wc`PjX7kC_`j|xLm*lqG(T#5-bHW#?2MPFMqh6_P-*iUg$#g0xI+d;5W2Tu%{3=`Rcx>kjpox#CGg=Pn8m7Gs?^lICKMeMBvHB>%FK~Fd`LfV;N5> z`D1||Ny~pKeIlGt+IxEE5hq<~@X&|5h@a}Ty?hr(-r+&d?6@g4aXYb1&gSc5H%1xXCqXk-KP7aPz9FUpG(q)y!hFk1vJlBJG(dN`e z1oJ2w902k&fb0Xd#L3OnbBukVg_D3ej|k(S0n`o*>{N17b;4f}{Boos;|MN{KwT&- zaRomDrt2)d@%Qc593Ap&xnAN;ekuD21Ku|@<;qH`!S8@NO=$AAxaoEKM-YJ70W2V7ef6UK(71XRnU4tJ zo7VQ;|&q|%qRuy0*J4Xv#7DLpc4Z* z2I@?-@xRoDJJx)}3*#Ae6c_FwmIljlqzIIC7feyEpQOpKlH zVIKpVbCVI@3ywl&MVOr|{+#v)(%5B`6s}{;L6R|>(JsJY)a4<$+vwHJjQ4HfKUDq% zDWux%mUV}!FxUeOEAi_EgPD3fdvj9wosK&B1g(fyfV%HsFN0SD;o6OrLK2S!J1JTgqm|2In!62BZj`*{On( zL0B71GWDl#st~jnBOXx2nl59(OQ^Nrx)3abEm8gbgu6q)=tw#Ca zd}OxceIjE|_1HTW$d2a9YO2B==tOM{ zL+nI^`hC(B@K-GBbdvx1jaJC%l{cRQpDWT8?kU7J{58*Iurn5!t9egYrtmN=zk4n) z3CZjTighSLO%zTbKBItvR-8M{Iqv#9ai4iIQ8yp`D|H$Q(v__fL4_Xtq6qePr^i=R z)>c+mc&S?n>@U%xP#%eY+&DtU>zaE)tUc2vTtjT{iZ!|8HroeSe?DvUqxXbxqbj0%dRxMES!$u8Q(_8pM)2pZ-IX_n%CWSx_blTc#al; z1cn`Ga`!7=m)C=Np0tM|xB4cKg`i&_wz)SFXd3tOi?g1N#yoIjQ{3rYv73}l#8VfO6L@gEa z4?%wwG6U2K=vL`N>AGOr0UBa6 z&cx`Wc*Y^Ms-)iZs96sBB|-+7$W_S6gO=W26c0kGAFxx z$JPB{;dMoo0oWe%RL|z_FjA%_pHRfY#;{aY&hR|Qz(Mc0)P4~iRfK?)fg*r} z5A54f&KS!%m{JXU9hJt>rVhIWanC1CgDC{H!Ek<#VS>B3?Gfc3Ys(|GFXKj!p87yi zUVZKqax7UbCR>|OTd&Ys5ja$4SkZVPh+4V>b-aHV%jNp^^UBg14M= zf51*Q3E8r{In*LPJc7Gt%019}qxtpnHMaZC8Hi+Fkjl=*2p&AuQgKG(RB*{MYD{Ea z3E_2je|c%ZxmY1H5i)+v?Vgf}Zg-B5-#eu4*eY#n<(l0xZ$;{vzNWF{FnzgrJ}-|6 z9pGl*peJG4i+GrNOp`1t_XBhUjMh4;s1K`Bf$P+|Nk}pKUN-yr0Qv!6BJ)&>_uWxx z6H#Ep{Rd$BTcU6;x;>q%!7R(b4JCV36_^438Q5$M(fCPGL;Jx#dff@>kK93?C7M4p zJ;dG8=FsWbO5I#bwZx5}&{;2Qperi5Z`9(5hP$CU>bd~WK>Ag%L_l1%NVvqDeGf~9P9JgZ#xEYA8650 zGF>4kQdl)|YOm1nPWcV$ZA{zXE}~0e4#G!JrhbjDlh>`U%UYTr^!6zHo=}qbxHX%1 z7!UUbyxJd|$I*+aNtmz}^(-hzM`ls;V=!-}pC4rTl5Fu!5_TfSuW|C8wJXwa7SVza zwAN~w@koLb14m*X+ys}2tc55_jw;53F|g-pN`u6NrGMQk<=bEI&PSHPhVlp7BpGgd ziK30u62L?;)qp~A2RKgK@}Jxi&)KT^_x;qVsm`%^hs7n*#oYHjGx4|BzA?5uXYtD) z_d;oGXGqR;sqQ|y0;JNy6qyCnzS?n@`YF!+ZfZqDdaIy~glNa7pnqpgYW$A;AQ(b! zx(_TNZgS@ zVN2`pz)a_iEWn>+y{{0*&=bJWUPviAd4c)CMKlgy+PF}D2W-XoP~B1E2N?He44L72 zhpyI`SUFDZj}2YpGI04A{~h*Q41<~?Pza0+?wM?As4JJR1mj81y}BpPY4(LaKu*^0 z!iZtiqTc8*Yib1gHj)CeXu8n4C`!1H)WFi!`J-E9FeLRJ4C#{Q2IHN?f<@GnTql79^&}lOr(}ufcf-|DWE``HY*ZR5+Fr|R0f5> z_l7dsW9^QD%}T~)wcTe>%&g`NB*>t^t*Nq#O7K~hYqVz>@WrCq=3ZD0p?WBY z4KKOl9VGKb(x{D$-J@v+pi--Po^oybsf*09=+q|>!6kT}!RSnq>Jh<(b53PY zpdL? z%A*cmpnulC=s|YR8{=-bEEE$d-8flc8Tc}mSRU*qe`tP>Ue{T_0-VF`e*6%)rxaZ; zad)T)zlwLWv^u}Vsh0p-iUChzSxtXo5o)>tNhxXrXTE_s_CFdV@$zrk z-_%SlZs;dyWndn0ex&&)4et$EEmy^vFw~YfSo1!WxRE<&pzVRp*8PYu2!(&I2A7q> z4bn_{vWKI#jnWznO8uH2F;86K0}-x$f)A`OCI8){ z6fVnN1G8x}QXg{Wxy=-w%lW0B{L}@hrs0A^wkg;+IIE=MS%}*l(+z`C! zB#21Ij~U0u)Q>ZL8v31T1&yuJMQ##NmsLN2{W?>#&z?clg7zWEJa8yUKaQ1`8jA47DfNJCiu!aEw_$8PQr8Jq#R8tEI!NIw zf~w~)>$Y3}Lp(8>S-#5|8e2j+76a|1NPxy^*U2WKVNUYJJ?cE{aq&6^SaQZqyhnAQdB!17UOM;cimo9)rR zG0om$n0c+*Jr?{nqAL@2FAu7SsQ`fTy~Y&EwNYq+9qEpZMprWYVe1usBV@s0!yMfT z0wS0g!^L8@!(tM-n=DIT`QUnL6iSh$&I5-2!m23$3B)TTNuW8Fx{7C5Bh0e_{SH#d zn0v{e@y*CwDx;hlNI{0Bp#r=**>S_~xAc?F1iMn#p}6*T4@a`zY7~bxuW)RK6f{N( zr6@WE{9Xuq28^H5pXdq0XLtlY#Wpx?p4@(%TNEe#VqDV4iMnzMS3=zzSkMC^zL19f z7RN>Qh|0CvF~AO92Al8}eWA^MhI~jI72b~EzU+;`i5@mG(GbUshpjwjCT@(FWvq^I z4y=lATaE=0zsz$9CFC3Cv{B@H+r6L>)T^!{1 zl1DZ6C-0Wy+z6NTZ3K3wK!-*$%ID>@cXI{te5Rig7CkV36-%du;{z;Pb6vs=Ky(Pd zVBUn_PIN%{k3AQAl>5tSy;ZPVUJYiFdS4Q`4w3T5hw|{}AW=letP{Our>N_AJKlho@gQ8&qcwQ9_FcPm9Dlr016CPDeN4c zaNJ}+B8X1{e>6tnyPbE?hm5@!6fOwp0?BBbQFoualLOOSVJuQ@?L-=#!X1^Z+M6lf zV1}x~ZySp%$sc|Hm;Wl17EqD;rgpaqCWxg*QxEgO@nIvQ(wr}%7Q@4asU2+NPD=u} z*Mi;wq_Y0GMr)7NZyid*Wko7BGhXv94DB;R<26I;GyOP{nV#T#4_zqz4DKkJ`?^+} zw(@DxVWd=OlX0)$hA*fKuRM!p-p)k={0T6#n7S9IspSv1{Ag=74p!R(NCopkS+COj zqRS^rdFF2iYOcUckv_0xtEV2x0KmdcQpqWO?O40<+HtTQx5netUZ*=*^ojlQQ2M;Z zj`Gce{h)cd@xxTPIcpzvo7xlGSOMHx@nH;6v_bK72%8aRx&!gzE7e@EX3w7!{s7VOlp z$tt4WK?to8_P>cq?Vn4D)Et_O7j~e`dZ0~bM9eIz#8yP0T9aL|roNmZ)7wqX!o=p2 zOo2Z{rELe3maVrd$yM%+-nCRG!IM15KzE@s%$*Ld6zy`Hyg?+ohW!Wz-2O?ZgTNj z*!hE7j--LM1mAr9oz$j2GH*GxS!G#IR{4Je)iF@sP-0KAmuO;5mj!(UWEBN9_ZV*5 zZ2W}HqD7wl!YH0^KWWia}jF zSi^#hX!#NQP5mZT|2!Ila|{lp_`o?F-h=H-Se6GAzy8wgY^k5MviZMRT zBmU$tyVc*V;W$4&?Q75JcWg)SGNKQ&Li1o6(O?)uObd2r-i!7&E=Y#GHQ&2LKlWyM zuzTv3SW?I3D7d|eO=_;5B;ISd0cK;wLTYW&rs1i%D80$@yCwkL5wzT(s9Z8NVH@ zQA)Sui5;PrU$qZ`}^a;BCFNwms~RvP zhWUmN_8=|!RLLaUI(8UFY<~8Y=5RxF-5@YrNA`^^^i)8_D6>dGo^JZ!?m&PfAv)@E zCi@?Q3^WL!A{H*KBr6?#fh~rNHol?C>rQOftg$e5nSVBPi5vwr;nl#!CsEwpLXRN1 zrPXMI;fO8%z*Bh*D2xZSIuWFP8p#~!>&j@VZjYz-#;0ajZchZ6Df*>7O`m!0c*eqn zcwlnV-{RjO5tkDMUFcEfn|FXb3H-eVK}0GDSfW~TJsUW z<)m(idRCja*D#vik2jk}f-8zWYtjM{pM8NhD9$)l%cB5ccD=i zN#n^`+;oF{Q~GdoI@NlTtmcXrp-YV`)#Y&+L4=Br0f*Fxchhb6gisYx;J#!|!7WPp zV=6lJQkwOly8Q!k0>vtvTb&=~f9CwzzY@sI!H8fn*qX!sl}rB`{DP@k+VB@T+jcfL z%3!PiiB7=B(%-W0j$`(gvGQ~x<8++y1ktb`>=5`wYK=PmTk&^mhYZn~hBeQ29O9Bx zqMKb2U>9;N7*a+R4mJVP73W&V*S9A^ABAw0+Do(ZMbgC+LKcE=4Quo&1)k)Nmlsh3 zdYUf2EMMe94d&e#8Me{*5@!3P)}(MR&?{ak~ zg&qhv69;G|b17P!Nj_%!Dl9!^g#k1TKO0KpTJ$o{Y|51iblu6p&-z%t7X2Va4c0DV zjxXCJkZDcbz&-VXQtr(+Uc|j*EJyUps}8*o9@Zb|Ant?D^FZ9MCc5rlB{X@{dimxyZO_0Z@dAkV-8uW}+6`}c0zLbVoG4!2d z7xlf(Jx&?`4w)?b8^4!}3=%`#5<{<0kqS|X|0qu*`^gVQ>x#V}@Rw8~WuwqxOrb+8rwEl{l>8P_#%1<`y}+C=RoSC)^c;9G z^TArr#g{elIn8M&cvlOxW~`S0@D#>5=?UaJ>r&u(7*DR8-%b^&QI-~;O_KQwDHoKG zWXwf0FUZl&wc%yNYesK(%`!0vbN2uhoa}f^_!sR!;Eat2 zSigM7J)|L@yanmX3Ts?M##Z!v%V`Is2w(BFp7AUt4kKlHs4IpF&}Ubfb!6%ZEcg}T zOTCl8=JzzO0zjps3z%tYcGNp!b>J#V0aO9>BsVB6kmZDuSB(iN4;KIu1IqztWL;fa zzwr~y^hlZuK36%eb9*WDv~jM2yx?r~{Vu#3A-(}_#^;CKu@(R;^)2UHV zA1&q#y}ZffnQ!O19%+Sn7^P5cU7lnF;DmSpR zbHKlxCJ_`wvm0=`!Zy*=0Q%^W*mO zTHj+4%tks!uT`1e)qDeqAgCD9uc#%K3#q81*|p2GUYJYc@y<<99eHAE!!rM0ci#mg z4AvuqNR9dLF;cWp)l)FQJ*M;`@K8rltKjoKQst}gb!a~@K0fpw@W;qWR1&fwol5=PQh5I9U)%W1nLtLNmm)*SN*b|6*%n{}?Cy6>m9-F9F#) z`3YPNMrJqWgYO44KM}e_DzUGFF+^Cxu+<1^Ec#&KsJUH-+kShg!rP+bNb0lqY9jL!2r+Hzd;Q!5nEi7OcxBN zrSI6qN?|G1vADevqVRJpGl9w2oQ#h(PgGir(sR~>HNFp(j?~v>4iD&LB&~i;AIEq- z$1$ta-i&AK@Mwb>B&n%GR4d*c4w=4c8XT~pljt~GjQFa!n!jZc%smmMDHBZ?8eshhrt)! zK&7xqB|1&`WI6g9+QP>ekEyt3484F_oggU?sQLWkdpOAc{e91_Fd#KX<>CM%5Mz;;5E+{Fff!+?b z=e`_mX7?$DvkBZmP>+Mdfchh~?k+dCmnRPFfO`A+Va7tVS2Y(0J>YuQjZU*=*1*$vHBJLlO>jZF`t=h4SyE)ldkCCf@ zPVZRe1g09WJcCLs5DU=LRpM8dzNC5o~spZD7!YKLe0Zpu{4b&vyZuczh z(3@A@y?Prt7x zIn}nGg3xL^3MJEjKn3I1smI}9W`c2`LOufe%{kz-4QMbjpIHrZ6PoLp>a~T>t-0Jp{&x6W9sxi|BPvLAi5HL9Vv2SLDxm=xAU;dSKg)M}G z1sGWa%abQsdV;6&^*wmo$w@H6x~{ zcsHk-U0@d-?CT2V&!tkLE$rUgm`R=KDXZbr*O}s=|E(9EqK@GpE4kn>XU6~@(~xZY zRDQq;BIVN|z=5Y~pd;mcmjJOPRXI*9K`O&LnGX^|O5aR)#h;MgA!sR|i`0a2QG%J8 zz!7j|C1Mp4Z_98iNy_ezXeQ zOtbu;=}R_p3l&i0m`1;eyMtTUB!_F6VF7G*jW@i9o27kZc72C5#C41-B{e}IEt2Zd z%a;$<5SroCqT%}Uc15c^NU{4T?k>|^s8!k@i!h-tA% z8&c3_u#9z;G?c;dCaPV{--VzJsMO^PTmzA6eHo3~q3vD5@qBV4@jcM<>{~`-4^=}a z?BBlF-oBA;x}7ESS39!Mpyuz?2a&{E#%Gi;x0`H^1TF~RXT3PaFbwgrKSsM}kp4}X zNXhL)%>YMPsK}f`g2wLrfVwK`GtAJNsv9i89aJTtG$u=jIDZQo83fP1ERlD-&Z9cL7y>wQGSj&fZJD=zaV}D3sk3%)7?e+fv35^vKZt?i?anO!Z67y z(|O)xqeRUp1I}-?3<3v~r@jc5fDe__I!b>FdVNqX^VDQq>{UMsiXEwr9?cKH@2hOx z!_V@TG2I#c`zkOLP(53i;aTKt*J4MXR9JJ8QMu*fJozJc1eXm}Mr!aC7Fu9D(%Uo` zjKIJb$%b#~&Egu+ngq@f8ob3~$#L(7J-c)7f+tGXNBq}p_l(@0pf!fFE7=>}@kO9R zX&OYo2C_-0&)0t@X;R2XZMo}fRDJ8wwa#w5h`Ul`{C6a~0ba z-P8-NuDAUylp{!q?VXo%rPPXOYfe;ou6dcp^fw&i!k4c5Kcp6NDLs8V%bz(pL8~e}p5kDXd<6Ga9H@(7 z#pv5_qy36D5<5<4Z)?!X;;l3aK== z1I&-6l7aqv``xAdzg*cM>f;oqjWfdo)~aIphWvNIROk(jxuy-w@_2H;*#CAqv#O+S zC;plHl1$(M8>2s(?}|it;{Ws%75;%`aASWZ;v*>BO+H|16sNWsw$b&}AEb2iWl-M} zBgJG7KriXwWQ1d9<1VOdx9W-TSz{*-`l_h|ls6-%M@;$rHr6-6p7Q@{yBg>wsw_NH zk~WhjugSDCO@=0R1~ZV789J#6O`w&4tt}W!z*LIzGo=9wEp13@t^6znidIFfShY*} zDGSS=AQVAZ7nXo7vZAmEDxd=OR6$YPv&$ZJ1^1?Dw^i_*Wj))|b2{_#^?P^j+_^7r z-h1CeC(Zn|)q#uVSp&_T$@K-yIz3&WgKc?{SuMV-yAVd-^l#ehC|9M!e3q7^?@HFM z@t<@}Xl}8NsLzia#{~DA*w*pVQs`p5{iI@Afc(=7>rH&xApH_hyO832JAGfEbK+O6 zkz)Gd;!OwYtVAnT-Bv>HO0uR<$1EnaT{G#=)MK=14E4HedqWbmqFd~!T@`uBH^IcW zE#g+10*BcKawCdN`0fqsKQSBnG3(S}Wy_?x9bd3BxP@bsk5aD$C|JZyXR#%_b31V% zUV0vX(U*p*bmiC0DK_y`g0!SY*qZFuKri@0A$KK-YK=VS+pN7pc}eTk&H?^QiVB%5 zc!Xb`Mjef$kTYl2Ph|JPcq?-|*I|IUx4CNXL~5vG1b!JG>V6g{C1{$A*~C_8(GDdF z?vfGA5leJbLt(GKzv*k=yo@H6)aXri~XJvg=Yax z7nGQ#Y+2Ok!e9 z3VaDF)Ss8Wv20VGf@M6UK8IogGYijfDhpR zdzoTWaHe$E$J>MND(gKtjBr=sWa166x}RmT6cXO|k3%C#v|3L#+`*sG>1zWsJUQqI zlTaiUs;x!#W`(QBS=O&%Fv`;rYlCj-$qV`D9==>ntwFlfhSS_8P22rqrqS}fKEH65 zeX~-2jp-O=x+LXc;Yfex%|v1}+XxNb<=v6NXkv==l*?>xyp1@C1^g*kMO!;69>N}B z=4qOqlg4EjMq!oa457J*eXo2!=&-PJ>~G<|XD|E?LbN#BBHLlX%E4irMaTXNS#`Y2 z#6%u8Kd+K4(EMFw`2wElBpu_NIaB3K%K^+yds2O4QIeKimMyn312m+!rt;I4psYgP z4Exa2dEYdZvB-hZn8rE|uB^K?SN&=p#LTMY#_fCIo^9*@2Zg;~9872(pNPh&ckK~MC$*cuv{_UAL5j#LiB6w=kY*frilZn`GW$2! zn?^f?n}sMNeA&guROZruQR6wzi;2?HO&j3FiLsIVe4UgpRl#)hJ@YM3!K-4mfjm#h zkE4Ga^4v+Duu#XerOHglDnwbxEO-sX>{cOzwt0{8JtU;?U#TcJRwpOEvjpk1Ozr^h zY;;x4jZeyO3e5Y}h_A0&)w!lOycs@e7{}{)!0iVCl#-7^|R;7nsXmtUI zOPRweP`yZp**~N*K$mJ!V}}1F6-WZ%Ib`u*iw-ZzYawgwW?LrTs6o0Fj zU0eSGwBHN4BPy;_)p$F)SB-{HPL-bd@OABR;u<@^H7~M?-eQJOG#i)&Gh6gZOUnuz5j2-izAh=(yff{}qmg2;?G*cM~jvKhgZ{F1z-9 zs!CD&5PFv(|4InkDvNwAdE8Z}=SayuDjyz(V`bKl#FaSzOsc*Jy{7?7texuOWC78n20O=0kUqz3h2fNIty@ z2AeQB_q?0AszrxWxuxE=S>>3$^HgZsjmYNO@syLrVF;LETzlQN=MT&iHbHCR8oY znKdOe?V&)WOpzF+u*4bERa2@!ZD9_m5yT3Jswp7E6z)3L06^Of2!-WvO$bC^C^eL7 zs-}Qgnp^>E|EC2oDhf>O29|+DRumR3OX@BISk;{Ja9vM%0#p*+OCWk4wE|U52|ZL^ z-2;pLbJUWm%1Th40vLTf$(UwrL$E^zM;Q|Z+TB=+8PheIZy!dkm^|)j8@9GM^t2b;OOOKXJsm4yd5V%)YlZ#{czU zoWAbm0Tng#mo`nh$+S7-#C#<^dJb}}T z?&bjzCF?5-dvl%Go3O9>5!fN#x^CFdf;3jiA8`60&I1md-5m}Pbv;K>XZIZUmfq~9 zb}a1I<(sa^?v4CdCD*sa)1T~);{g=Cl>dXsVxQ-L?%1h;rwNQS7^>#X5JB+>GKScd zJVa3C3xV#UL763HikX?z-U;!dMkGHX2@QasL~f)!Yy1)mod?#+DmflrgV|?q}N|V)Rne> z4ME>L-^+8$w>Z`?^b-7cE}1kRGmP$zPN8y6dUrUdOQMu1O6h}2Ysx_yQOshbjYNgX)~H=qK!^<%FsbdO08OhVA=*LLD^!oL!(XG^nkJsp|ygb zR#E4o2)c_^r*$gI7Ip5f!?KF9Wi8fuckO;(-t+!mzt88oet*K(bv^p*xyfW^a?d>z zL-<6aMxj|eLvt=JQghiK-fyN+1_vpBZL0XszaS-&-%)54?f!p03tv3jEA-PhU-m~x zfg?o|ytzVwTEYiD2=jw4U~-ztX)Z@82bZIg<15Ec4k3q><1Z&bj#>^Sr-htAIT|@y zIW6S`$gPhiKLgmnM!sN7(6E2656CtOqoOW_pIql_ikP|6~lhaX7 zCpn$v80AFC=_04A9Fv@AIo;%RmlGqWhn&yk^pq1TrYTR=E#{Vr%+CjoOyD-k~3e9Th0PG3*{`5*TDLvq8>AIc0J-$=NJti=1*f zTjgw%vt5o?&JH;{)W5 z*W_H6Qzz#~IX}s{A?K!?TXJs8`B}~#Id|p!BIll*dO7#yJdpEHPJ^6Bavsb1RZgRv zCvu+3`AyCb zXCDkmsc1q3D4G&Vg%7|$Gte9;0S;8a7x)1JkiZ`V05zaM3lIo2Knq%eAfN+!&7-$2+0RtjHThI=$pgrgSA^`_Ff=-|_FoG!11#|@_5DmJ4?jQ#A0H1-L zAQtojpM&0j2Yo@oi!4xnR6o6@9 zI`|UI05icXFdMkQ955FYf+8>vd~+Kfzz%Z}1WP z1OA0wKx0b<(!Xr^|F(Zv9(_SSUQ zz*JBGrh)0;OE3e>1hc?w-~w~NTu=y#z&!93m=D}w0ayqY0S{OVmVl+87%T%{gXN$E ztN<&)Do_emgEe3+SO?aF4PYZE1Dn8RumzNZtzaA24!mFo*a>!l3h)j17VHLlz+Uhj z*as@XesBOB1c$(3@I5#Jj)G(0I5+{Sz)5floCek43^)sZ05#woI1es>T5u6u0++!R za1~qw*Fhcl5&Q&hfSceJxD9>=cfein3%CdB!F})mJOmBk5uxn)_`ma4|92h>CbqU7 z_0cT4j{e6qSG1yCSb>z0ku@cBN}4@S3Z0mim75ouAx@SuQ>C2HTrtCul^GQJpQMJ0 z&V11!=GZf(P=`G;58mYF6@=zUsW8cPZGDp$qyIY%)($?Nn3a(#W#(k1{*T|54L>?D zG$k!7B`ZUOzm_4UNTKk(<1_Me(?avo;MZkHq41Fu`rliXE2gGqNNwPIa`D)}i8+## z8A=!iX6NT*!*`}+(Vm$*J}V~+^$*B^MVN_IbHVuj;Owj^u&Bcorgqt|Y_qa5LvtlDHw_+@ z|GU_;a#GUlsi<8n8#yIS67xdguV+q_)G>kodAM5BKOj6XG%LeCSyH$0%gM?uP@kp| z{zQ|mgb&zm^m5{yPlSSgv|G$fNt04T)3P$9f>dcrXl{BzXkLDfGe66o3y-OkYR`9k zT3)G=lpPu!qDV;15yyvSOJYh|EBLfMGu19;qAwH$#v{S7XhT1JAN{><7{LZ)NU)|P zxtWFZprkgzaL3^{i8#yY%9utUldGcxQcny-lkU$%#v`#)A!_{CC2=mdL?G(N|kk{+6Yj>XKPyZeoo z>=Sk0+it`}@O#ijpIlK~DNM&r!YcbJ6lSTy)$Fl=|7;oTm&#y;f-}*NKCOiXp}FQw zmNLFTa^szdBc9%DJb&6JH^bH6tt|q9g66i7QxA-bXNce#cmQuo4z5-`YOH+p-;Om|{*|j+CD(u|Jrn z2zvGsUTJD+PAc1mEn?TRm0oq#>iinrs*Y`&)}*<7eH02^q!PA%$E}J#OT&dW#AW(h z_XodhF+<{Ri)wpT!7RU&0x{FU`()%}ckO)@`EECWPg`S`3lU~3TqcQ6J zRc*v-KQS}UUf-X6-RQvD;ophFh%CC9XSv0R+Oa*VUMbVE@^rhZN&E~7E5A&P;o4(+ zg$F3ib;$X~P1amxC8n#_-$%@32N3$2T>eX>kBJnRqC=I2`qMTyd)w{6N3-3_o+M`y zG_$)Vfr?az&O8j2;&H5+Uayc$WUYn%hD{{g zOdoC?K8*j&oS=I||EdTzHe7Lw*8?ZpvofT-BwNx4*ImysR7$YAQ2Sl!M0FH>oc_q= zS>`oP5c<>Y@#xCF-p|N5yuwaTeYu=D5L@K@p50VGJ|x85-IK}nRyT?YrU`prTEmo? zQ|NV!nHxbYXfTJokkUwnZ*6mK2e%R%LN_(N_GVzG`Q_{yR4H6I+Ql-a^s$Y!8?mj3 zaU;%BeH7(*yA#8s@Fu80q0=0!c}-_jP7`l%k+qrZ#-bb^p>ZfmeAsaz((dr(f1rEe zqbjMSaP%3LSl^adV_t3QW?kOc9kpfFC3?$yn75Yi z{jxLi9oTn#Dm#k%qt~B)rUjCNd2?kDK8178ZAAyIw7wJ{mv85`S)bzZ`cb|OUs)fU zzjS>-KM~R1`HCs}m%gN*pSVy_Anq;s!E!Ms6D_Gtrv>-ED64|1RwYlPRqC&#bJo9| z$0Am$$Y9^9G3G1Wc`nyEk@l_1(ub&sg$(8Sz^+4Qv^XJ*G|l3?`F&}%{h9L@)V5RgrV&R*~MRP@*Yj3~7T*F=}GN}gy-5&K~Zd|AdY0Px;If2ba&9oy(EWU?P z)wta{v#1N#)>=hAAj5VB;EC*xiY1C1ZHOQ7t!WxyzCE{)-mA%h1iYY~r_E4gi=7b{CdXKXu652)!ZQyahaHQU=cgNKc8Yt^pOlS^Rx~l{s(hKfCIgo()v~vh@LYx}x%Z|* zER6ld)(yqy@AHf#x}!jAS`ipb(pv*CO_7R z&*o>jwp$NaXNv}`9m-T>3U4*v$IO@FP;YXYy6CX>vXc41J;Qc@?x?A+;E6TVHeA|( zvb?*se!koq&2*s~eUkK58Go!bTDnjxaC)%yhC-s3kUayEier``w$ETxJ4Fa0CeVr0 zMqkNk$ilfzx<33h{IYrl`@sGS??uOuvLMwxAN!Vlip>1@3WvRjNY{5!(mC1(f%sO( zm5v8}kllEM8cO1Y_FDICy2R@6Dz%sK>OYO&&~5NPxdZBt_R5$9(h;D3D~^Abq#fzY zeqX6T>ni>5Ff1Qmh5bQZZejbpqgA2HA-*kBqVxDani0Z7a{w_CF+_FIP4(YgTX!B} z@E2n9keaed#qUX%A3NMJk=syOQrW30kK5(xRK5!(f7;d-sv6Gedg$KR2jipBN+N?T zM@vH1oBQ)y&B<6w{%g%5OCo)ZJ!X08-DLDny2Pisr-t;y-xITZ#0=MikSJrA^=sxQ zYzZ}v(*77=X=M*~e#g7ylsahzL}2c#=+>T1S3qI>>tRausqz4^3bqkWpzqJ340#9fF`iMhr+aneLa6w6e~JX!?j^o z$fO5#wJ|>Q9-rJaT2;M96UJQgYh_ClvGz`>68W)zr`A< zgy9p>SMsCZ!+Jg_K)WJPU+Y&tKzF*Fa>dYHtGhWSqqaf{GBBsv5G;yK;MXfya;C5L zzCO0OqvcVa$whCu7u5KmH`!t?c9)C_WN2&<^@}fdfwbcEBU;NuV+^{Ua?Sx>!fk>WUb1-@20gBKvZEqLvTDS+zal8zp~$ zwUy$`UF4WC?5zr+Fjb;RJhIZRjHj^PQc==|3bee2=9vT_U=M) zbg3Dy*`V8PjuuDJR_Pz6iF-t!7P;qxcc6+aiU4^sKSx*qv zLy4Wo@?f;2E)F@TDRTSsC$wAjMT_`}$?;6Ls$G^v+CxfBdB{q8WLdPa5i7-P{JWx3 zx)X5~KgyU3hGLA^$z6+IavZWwqizsrzrm@s9Or$pzqm8xG_v3UHmah4dsSJ1R-n!N z?vCvdHc9;(ZRHva3S~i#dnaS%Us{@RU9kc7xfG{FVb4@}EArn~ip=t8?gzRFJ4F9h zC(WiW*?vT3a{=4O@ImQ*Ck-ZBk?M)mqEICqYT=u?C^SK^@LPSPOs-eQ0tITr?JnNy zQ3$wCo>)zp7CMfI2obQ_%|d zKl&J@@ksfkhUEIcujsj9rHL`Y?9sB>_H;@ejOy{R)R7idXG8vt*+FJfSU2ebt;C~i zGMRWwFLoWREDDLq)}B>bV#BIbuBLeWp5<6KI+H(zCJAlL#o|Zan;0zu#4v+0=Xc;X$quRek7q$BJ-zC%UZ$_~V) z7%!R99Z^hRS-j;S^Te@`-(UA!Kb+rhKbrgA}Y2Jf7x+*9R~N8 zSJ`}MwNlJz`SK5DKgk4O5!EfLnovK5guCWuh~pTYP%}EW9gL&j@+s@Ok_%<@|znTsWd(7JL=kf=uZ4@ZbY0y#GbgBUiX_g z*JG7JrXhy4Z8_9eD3SJ(8F-DAd01-uwH!_tCBC2#RO=rbfJ}K#3Hrn^>8!5>XCcUj*LchtsPA9AzX*=fXjpA4v_FIKf zT8GD>o8~pUs^bXr9 zk;E!Zd$nkGc_l1e=s|A9t@92!m^4R5Z2y|H@;LIZ4ixLlk@asVkFx9*zN?azb~K_G zihg1Es8YXB=`(ZuQ?Wxfl;yv%3pwx6UK< zc$P)1#>S)UqDjmIE)H|CIrKQ*WdDjAYyX4zRU8?s(UYCn`AO}iZe@>?j^SS6m=vu) z;y$z+z__rA^-@G970-4=FmUl6nSFmbKvXX6~a z#{4=7M=N6H)Uo6uyygu`rc5D(zx}=|iJj;i&Nq5yJxjr!c^jkyB;iXYQS3bPGe^hD zLufJ4%i+|t((d<_T>N>w)_Ek>!8JoUHG;wsu{e-FXl$uINDneUHNGI0vz1sjI?PQn z(fI7L1a~bzwfe`GVXVP&jb4uWwf2}^e!_I{!MfKPBB z>shj!Qaez|WZQkXwfcLs_oL%3KGg2(b<(wPP?4$h*>2;XRpChDa@-%-AFJcVATp^fc8(iG zs#)osYd?C9%*Kr|1>^!XbBWKw4^aVr{O`6Zyn>$fCPcl4-C&KyYWQL0A7YG{n~tuS zjkX{sYC(+&6wX?HrjLj`+K-i0_rlj5ipHkVqb+$%y0Ed-!$$BkxYO7I?6W;A8c+X* ze`nQ%^d)-OkHe6Nq^L(mQ$KgH<~$5Vi|*i^tOaDUFY}XCNnf%!@W1KPOruuvCDX|w zKeVBuzN`&AZJ7rPO)H+7ee4_1JT{fq7XdRg+Da0I)@{9HDGvN3D(K?lKbn zgJ-pRllc{c_DB6HF)77RzkxC>Bq>i>}!iHd~2y+2)}iV-E8_ z8B6Tv#jeC;^#XDx;q3LH9obi@&52fbdS+nz(I-%XDU5EEdT3U$C6<}&AvW1_TbQWb zO47Tf$IOTb{{TyWv?bcD#FJ6JFsAq{KA4U)Z`Y~ChYxTA>Lq{Mf|yipgHO*Qk3ntkQYfPrWSVx4}m zio1>G!iYeu<5#+SnHLLph%8}N#795D&DIek?4b%esR=%_IuR?4nF5CqiNsj!7BLXD zC8D`E21#l4QRd_BrLv5)Vu5Jxgt06Ei;zGs6hN z>1kKq5b;k7?Ob2iBsQ#kyF1^>72T&ZFkJ1hgfks!65Td=sTtk*i@N3IN9osC6xE6{ zj8!qAuFp|F%oVY~w>yR!v<2F|6iGv_S|KPTVxII{UZiN#*zK6D_4=Cwrsm zp}2|*V0ikGyMSEL0|tNAT`|q@2Sr`bV)i>&Z9X@ir4}i}h(N~I+rd5ne|Km(>Y$Bi zLyy!P(#}-jH?S;v2@1pKSO&5|gv)hZZ03?GUvW+LY%{JvaePljD1AkHQiW%f?4!e6 z2M8YfQRu+TEo+V8%)W|=nq%;c9ZY-Nw0fFp7(B7HYEf@R2{9?&2JSn$$WWv#XEh|bQyJm$W1670QcG;5 z)L#3bHGU<^rD~WL7EMb`$y2t6;{{dnnG&Y?G3S+ZOS;96{wdbS=^0|d+vSy#u1DU zQ>}fZB>z?6{`3Qjfqh+V6+2S3{&*li&~uXwu-)bJLk!q{+kMwn4sNz0n`+h<+l6ny zE@11J8DSNzQMG=#3fl;?wA2YA{|Pn`kF^wQUnt2JhM3WsPGP~MmeAI2pd;v+f(;v5 z;g7D6#WS;=TV80&2@Ww-0s?)Lp{2xNvVJ`CQ2bd_J@_!fsp9%Ch(|KVJ~#|&13liC_~%Tm-uPVA?;cv-NC{zM9KYzB5p|8@`{h< z3~8fwHU8;oCN$K!67ME*MV4wU>P6l1t7`5UWDHcK6#au4&@I;*Q+KluJAr;@&#Ucl z$QMg<|I5!o)7|fC4ib}!3Tb~Pl8q;>#@zX|C$ID4{54BsUo(=Y4_zZXG~6P&f6-hx zHl&dsp6j|s&`vk~q+pu66ZWQHreaEcZ?OgoQro$e>IKA8EStR0O21fH@ro&wI*3(5 zhO;Y~u3@kGNIBZdF53H)HbIH1%L1uiROM-CZ|PfPQxsSRYU*IFY@vR=FE>aUiLE#9 zqjw4}>pJW!x|6#fS>@kobBdC`UB^C0lB>$?*0inhrSs|TQ8$_rIi)-nS%z39X}D%k z<4nAg?_DrnOk()_E^v>^S1?aC8Z3p;=**tiRwetWsD#NAmurIY)!ZuA8Otp4vdR@( z^uBzl)ZZRTkM)RRzGEk~MkRbl-cpBcitRtR3Hh}d^|Q1?Pwj4eQq7W>&E~D@E$TsN zIXzm)Vcy1yX5XZEX0-O4Z*36D(<)WmDbxnrS73mr|4WKotW$tzzR+qr2unWG%ezN675f|w zymFxaEk*dlfdzZV`-E?T|6MRy_?*tylPYwdzf6zP?ot+Y^8P@-MvINL+A&JvTXz8N zb^Rhd5gryL;$NbEs6r(0;r4dzui^Z+u0*N>&Q+p7AUx^OUGo(4}BK8zZvaiT9db^3&NMg~VPn*miged^s zGi)W3zBdm>3)L&_OR<($&Ka+g-x0#TH$i9JSn1d?Ov4PKg@j>(xseQ$I z-FmKzDHnMyMV=#gpHhm^i+buKUVWH36En5+;ti-~l z<)~Qr)s$AYusFZtQ%w3&qw{{HAF33c^so31^17D&#(vAOvxuZtMx#!aD6Ae^fWF~| z-Hmw&(X89M9e9hWuFRzrqwZLH0dzn4bW3-gX|;~sjMBKnZ-}bFEo7a zOa0V3X*Yfa4aT3=EMQZGcA7}?6j|kZwgB&e&-P&T6X8kcxmx4ZRhz#<5{lsVYQ9hp z!Wd=_?dx8NBD9NJS`*=Xz|&{>^tHMliIMnWi;;VQ+S$$;=P|v+8rMGU1(n#7TVnUs zj`7vrQQD(uRndN_$I+eSeVkdzUqZiGTTmZVycOr+W?l#)#}8!>VF|U>xBa#Ijsb3PgUE&sPdq4c1zGY}k92FekaWQSVuHt$0s}P6H zGIp}GC>`bfz_}`>ROi66d7!RL^VKsi-?pMl(K7b>$3#4tjw3hQxIfi*@s5@^uskY~ z@x$&+^$U8LcDtXa3${(5!rV9#ik5RNO%rRZ?hJC8mO2H@3DHJNtV`dZtp1IA zX^t}0vTfMHnxs9C`MKD+XWiKcA4{v|pua7LNi`{K#iCgoxysk!wRbDqTV)|Ded)R8 z(ULW3BMRlFR^7HO!XiuNmaZ%nj{X+@{hdCvcm~-kLVE6n-@S8*E0^ zRn&oqZhw~aL}`A*`c-8XwIf#WfiZhUb3I=&OOYOtmq=B#NH&Os%c>bC5#GRZwmyWv zK=d#lV~d#M<_Bym=>QhmV6neJ<4U%gTZwu_f&ORhr?V9$od_O=0hwB6826QN7P>@4 zSdO~>Na}7(v78`BsIX(k-HxsFHTnS7z_;Sw&|&TtmbSbTI}-dG-1osz~-#?9yUBfJ!?ocR>>q#B=#WtciY$!U5QCvHGP*g^<5Wb+0NMq`e1$)!D zRWZJLig`a1Puq!CY-QC4E`zNg6aDmu@GuyUuEtFT-i2aeQeXB~!(+o(96v4=mIZMs z?se);VXGog1mk2MvLPYQyg$R485I^Ru6fz}*+cvVc}``(VzhrN#qaccmH1=ij95on z4p^6x$2uso$c8pFakF#PmskOJit8@6p`SZPq663(yq_X8hA}S}FDWv;PW&0yqOoU8 z3;LXAFkMcKR1qD-E~pAkie9Afj9`CO!qqRxp_?skittkwFD!aNJ}`co`1KTG-V$Z( z%=~ip1)Iz^8vgVXdrGG&rrG+te<@EQm-vx4;k;1Y*5YVJRdvF!L`~i7Yd(Pn!P9s7 z0nAKZ_p8aa|E<}G&k+9*T3EViLWzB*J9H-Vuf>`|I39b#g@)y%h(E;vv z$!3Ed*Too}a4u>p^Sk8>FJUQQvgw}U5yGnumRe{=xZ?HEWNDeZGFC^;Yl(8$^6CYS zk7kFZwEjg>Tg@Ka)IQ7)+d%(SZ6-oJ_sp?n>&i(}H{A$(2AxD)rN=pr#P(*=q@VR# zfAN9+fbm!R6FAxPfdBU895_2K{hn>-eTbJ5x7vF&ZWQh7q1Jty*RF`?Qf15D~Xk zjX8!5S}dA5#UtX3^R%lAG1H|$A8w+8Uc`O=FZBmHzUZOgL%gzJG2QrT#T4oyZZ;*^ zicHk-PbaR**0FPKdFXXaBiWIR*v5P2o2r>+uG6%~>~FtKk9GVenIF& zT|pCxHM(j=DAfXoF@6+(>!p85Q>@7Jgq;i5mQccARF2=qH`T8sE*eJ)mkl$O*g|fF z_5<(zg0C)kF{2x~%wX8v#lDc##pseht-17JQ<`_3WjRmgd+Vx^7G*P-Es)M|?8l$s zcgzpyLYPK*u8CsDSzO4%FGaydotozm7%*I{l=YV*ra5d3y6=v z-f6E-sJ|k7a9kluBico1qm&qndX{b>E0ndLaYmx2@H9CP{c9Og{(-)b{4@JEVidX7 z7_HTp_?2ZEVz9$hA;UZ_&B1;k?`Y8qm?3zsvB3D|Xi)NId?Y^)yK3xSaGA~5j_^Zm zjVpOKHlgDn1%3-nwKd>Z3H^r-NK~XyAKMES-EpB~QX8sCIGV;K99pXRkv3Cj2sBN9 zROyY-jhInJ=4r*Kn(w%Sjjt$7DQ@L!&FhKDC7a0)YRz!h47QIhQ2S7=|G`(iS#s61 zb+(}{Y4N)fT$Mnb97;ww44u^MGV-*_vsOvbfk6frR@5Yvw!6|4z8hL|6LSL;Hnz?ilqYdX<6Ko z>&!2lEb5tbi_}X-p~_| z$>k5@C)uB=Q~1&E*xl$mPX~$P2C$w_QywpT%Ma*%aN)(C%VdN@z|zsIWs)IlF#HQ@9X*J+HvClGOA!Ai_Q_R8kWTmf}J#bPKd zrgzZ{Y!$wl?^<7gUF4GMPZ%rA_Oh##6<4Hk6xYkzgW%|yB|DgQVQZ9J3f&rg6ozw6 z-9~jnT^IU&MXPl?faNR}&QV}fvk2(dl zg{j0{mY(_^;PC_g>UcagW?g5{>NPa+2R^(GnBUU&i<54AME=>cg@OadFvY&M zAk!Y{iXu%LJca3i_2JH9UbLp;TOWODI3aR{<_OVK8q!d~28Mi&_PJhiUYCKis$4zT zvE)Rern(><9n?=HEHDjHHXVz>A7RId&m<%NG1lgNOwe3kd$215JE;gQKebMWzIFbE zeu48N9P{+e^^&n@N=X4#;)_q{I8uRa6fMTVngQ4x=7Keie_B~5%oUtM3Cxk7B5o(m zs0b7$m2F4yTpxn-XxX>I6n=bc(;^q_R6h5s`dlhQ11$(U$9`#>$lVcpkyrWHLS~DU zi5KuY*<*D@H>)BB;8cVG<$JXDo#<+@%Sel_SOezec4)i!!rHEVMp8WqwXso^RDu4X zDqvo-=yAgjnwzYNpGsyB4IS`_j^>R<9#ucJ_V8popP6ovPg>~S<8}B^O-@WtbPJ#T zGRk!YE2KB0gMu1sQE?1?PQ9VZ=8NZ&LfGa+qQhT3rA&_vV5hN@++X4C>83nMH-G7< z9_TEi|E40t?H#0T`0bKH*K}bGT89oOa>*eX+R@XPKhSmQ$%nBn&hk^k7GA^6sE9Xj z#djGWBBnZ`W;Q(=#xR}$%@T33J;CxjyL8J`ET@7{4mwCxQ6*~~DB1?aL2D7PVXjO7dqUEaHXdR?oWH6rC-Dk7y@i^m>c6<%S~DvavN_%h5nt%aySgG!8}g|ialXef z1*PuE|6!P@j5wsiM`}GdHW|e=d4@HR|5`FroMgdn~-p6Fog ze^LJ$$jviN&xPIi0BH}uTYFlm*=&5Ptx!v~RdA_mthb<*uG~EmPUL55Tl-mBvSH>* zT}SZ%mdrKt-eS`2*7~!$FEkrm-&D_Z{9rzb&QeX3`p^7`+4!)ez2qGO`6S3Z8J__c z8+=)`mwch4a3#z*{bXN_?r7%dKlrL2Sw^53K94PQ+JzQy8RHgKQMCbQ9Zv+4SdG$& zhlJf6Z@HzZBY#$fu_R%(d6HklZe1FNM&UxfKQX2}l74b$y=6^BuOz3|>_=~k?M)2C zwiXO>M(`d(7eC=Eq5_*LeNfPRowlkEwXJ0%r+LB#)7_aJmb2ao(gp9Es%2HZ+>Q2@ z=HHli=yQBlh10CYJ{NkTqCLMW~=QZzns$HWCxmQpTwSU4zRotXXvb=~^Y4l#pZEUq!OShIhRx|O1 zc*L-{lfpq6D4~OApSmL^d^)pBzE)b=>YVy3O2H2Wxooh$`Qz5oM_hFIDt>%m0Zl}g@D=TT|1H^F4I5KjUKAVN%$ZE7iJ)BYxV_NE`gVO z&fz7;g_9q(%k3W6N(!TC5{4^OBAbL${ z>Ib&-Fpw@D#P2BklPjZpGvnM|w9Ijs_&~M88w?yxM;3W>nb>>nPpXL4ek3_R?E5%; zJ9E*|OI~&!f@TNQR)V_re@#P0E}xs~?G zn6^3yy}?!`%L;6+(SLLY1_K4M3H#SK4p<#q{;lm=z z?Pp?yFUwb>SK{C5ch;FDR~^5XeZVGRNtPz;1oi~qll`msR1wF`g$s&07_vgkZTOyw zgGJ>ijTywX#=~$sT!r{bn5G@46!k^Nn5Jd|Pxc-m}?ID7RaBcDP#8T zH7>W^pyTi^jGM7BCyjc33)|1pf*Hp&Loazf7g{&*!!R^iQDDAGJ7K)TcKl zcx~34W|~FK{`1N#pa1*^MDWTi@_+rHDd>NHY=qzJYWjLM`tNJ5n$GIIg|6?{yK3Y7 zz)hX|uEj<@$`JWMp(MNb3$+2+GN?B?v~{T)%encnxG~%^NL5WNK{t)rk|?MX$vKgi-O`pU1-ks@ZQa`TP*bnW1@ zB?S(czqa25c+7A3XkMTMP1L6ceOHsnPuJ4-f!AoIwlQ|m`T2B>f;U~dhWLd9DWXDx ze2hk2xS}h3&^2V3!UUg)GU+-ix^xUzz~_u1e(+7<&_YEQD105>&w%#)&*!^-YR40b z5WlV_uKYA9x9_<*cenph*JRY3er zEFo|AN#df%OL3~udnNDW&TkH`2~R&uHy4_GJ*q`yY(kxWlkUFheDI2QH&6F#@t#S1 zIgs1m|I+5yD&dSW>_pHH(*rKdmF8-$(!O0LwCmvbjGUjil-RWBZ2yXq#11>cBMxo< z&1cWrM}gWlgZg0=zaN`FQgdl_{aT;K!mx+Sv-f{Bgy7Y_1FJf!Y-xS0QS;bc*Y5Z3 zo6W|?cc5p`>y_m9Z3eczn4ML){b6YL3j?=gEONhBt?nMe5K)$SZ#xfi-qT$>`S#+q zQNh>7uiT!qcNNwr10S3A192i|bhEh1!qC;0+y!l)XJ69gUkiM4C*PX$)Nf#Yi$Q(P zu1;Cy{%m`!Vr|mejAtd|7kT=|6aJ(<|JKG`oQ{ABx` z8;zcW%M#bTC`}ah-apt5|FWEhP3C%x<}{U{+12FJHuEHSl@?aZA&G-6gG2k+5(f!G z2b%kZ_8mNAh&izZd?kTT9AFOZlW4<&hV)6~hYoECpBy@jAJmUe>=!y@a01>1Yo(x2 zt9BMXAt7`~0zb^$57#O*NZZG#P!1CYp-%X8qEo=oVP4;;R8<$pUr3l110^o=)T(cLZPeuB*s$`2acPZ$NiHX6MpbiKMN6oJT! z%{7sVp{NE0;d35MH#L4Dwo;7_-301-rNgI`uhD^0bEK9-=EP54fA5Eey0!o8Z4#8_g9@PA5s z7q}>@|NnoE%;F5}4!gtdu)6~8Ad4)*%3bivsEdLk-cay9zyhnl0?Q(JNjF7BMFqvX zB^fE1m6c_8G0iZo%uFl0P+`*D%(D7a|JPc6KYpt3|M&Ymem*|W_?(&do;l||=e*B3 z@0od@0qkfJ!vG+jE?G53@s^l0SWl$}v+LvcQ6yINrEZ}jLk>}$&#@WW z+03NuTlvRo>XI`l@(!D6jitOF9_V6+=Tqdy$nliwhNYh_lbRy#RBE7E)yWv8Du!LC zxn^O>ckGLXg@*4c4`~unxOvkaqS$P5(~z7rAci0Ape!RvqUIxw!(ddMHjG!?V{Pe0 zWj8J=8_wEwf+17&yRK!zV^r2FibAoE<}Q_<%_0U%UDA8(cyW=cNEH{eXs{LcQXE6l zRdaQRbhSyNV~(dADBWUJL2@m{wp(WqT_LW#l+v~=){RvZNcXUZ@CLb9KKwqSTBr%q z&5ikjwGA&LBxCq{)LAO#sRmkczf_`&F=Q%&#rbUfu=(+u>Eufk`Ie2<>GABODXyMf zZ0;@YwoKApvP>qicz!p~^3#lQzHFjliRK|zZy2x9qg1qKq<{+Mz$HqijhOg=?7H=|^e4mqSjicftiS7eMB3U%JkjmQ1ZqniLqh)HGzvVS@K($F# z!|KVbj76#-OhO*eSSdLCs42P^lSea_sbY%ee%&TT0(nO@K6Nq+C51IJx&+0t_*SCx zu!+Q>%GM>ZMsh^6!Z1HZ%O26iz@$6~tx*t1U_4wI=nr-Qq$new9l9bo_yUOsBL!KWqBSauCumf~$?8Rxy z7CN$C__Oo>kI8`Lg3Ut5`mXo)*L?mQc$sAs>E%N!e0wvjpTAPY1*jPfAE?y{L3)F) zu}^SFXkU{U)-T)~q3a)Mp`!)_8lq#OtpkNvIxapT(a)5WJSZh~z~CW6`z0g=4I4fp zO&Y059yQu%8x}mo&?|jF>KGN(cEgi1bh7`_Ch?-t?cjStdv$*;xBXwi#vr0YHW3{W?)nek3kfD8& zs;J;qWyyx>FbvbjTtp7 zYtqV!yrjm0SaCxR**J{LU7a|qq5rhTwF?xCqaf0qwsJIGGibuAkH{DU@H(pz+3z!{|OSIwh++zmCI?C2*ZMg-(edo=XvASe^iQpew;7C>Y4joZ9 zZCz5Vb5mkKfp3B6shqHMaAmO}XIfCx%w>@lv4$+3;JkZ*ZTQ4l3l(b`6pf>1r47?9 zT&>^ex5!=;v1vfvvcdtqH`NUu9i(CkxC!$I^dB^^+3NAJ^zQ4PWbKz;mgzfO<-06C zbV~o48hUkVPHnGwO~RB}aU;^^M=Gp-O#|FDW0L}=tSR%YNZ!0<;p%Z~H2o)(d3x2$ zn+rxOGg*C1a%y_HF=o73E~OQ-R_O|&1x;{E-U`Q{EYt9nfxaz#S#RGUeqOcD;!@9y zCR6izVS;12#LdZHXBiY;Ij~}4diW^HnJ_z=tXedA#GJIqk)gF*ER;c}FESr|pr%^L{#j2%u@y0l{ zEi}8PA!P2l!M&>n1sN)v3qtR%9a5vVWe*qY0_Lo0hz&1Us*l5qp;H_B#TFOjk6x2k zM?1~qG?_6o1I+7H1No(Nv1@Y8(rLv)Y2?6#t7~JH5_MytI!o`2(hsvt2%Onx=wwaf zoDJneYKrW-QA6g}<`3gyE9T6b$wrMDRvT=XTfJ#@#egvbGWs+u&R&?kId5TPuBmQ; zt-gkA?gU?d*q z%=<6mk3SSFvZ zwl}|@*ka2%R-Z8;d1})GrOP{Pd*Vvp{5a-|OxT z6BeZ$D{g9RoAF`0?i2fgJ}ZKEzB2R+dE~kiMtg)}dR)a@2NDAv$*=n85AIz5z}u4_ zo;P~xvBZ8+_XYJd;j>!(_4teV&jqBD&FRAxa&`BEbNFwk?S1T?kAB>J)>vuXKh$SP zQ-dXjdS~qblPS>M@oMhazU{sjuG_@iLb`4@SCnk5o!8Ls;qz66yXE>vw^x1ml$6%& z&VDrGo5bXZpW+sz`aay-^!4!^;Q|I`tEPgrV z72cfr<(eres_f609hVAs9&NH*4d2l5v_noDb9nLw)-UR93wwiWKV(lG2mP3G0pqqitt$bYW2L8xQ+9huSF8#_kcpPsqx z?K-w~?}I;VdFRy95ta@5`e;m^pNm z5cl~ih3EUF?>yGl5E%M?;?w#3)QxM)=4?CMnK^uUx-sWpy|j-v&p##DYsNn`qj_%W z`290!M}Oh!mLu0@yx`xR^uUkpll&%`n(J1n2fZW&KB+l*$1^@3d+5O8Bd--%wVn5`|H;1TaJyl4++i4jrwML*sHsq&Z=0F z7v|gli?LU43aWN$L1&}1dvt5hLv!NUfVv5dxq3eE1$}V&`)|}|t>0Sw$vhZ{dwT1d z>G8UWUIX#E&ny;QA~PpzWVUJIxG@MQO^qL88YNA!5xsKyWND&8HQKxQ$I`?!dU8&d zFaH0OERB^2%t&dBN=J{BG@~?8iV4&NlBUQ~j?B*W(^ZmU1E-uQjT%SXY$JnXB#F!= z1~QKxKZXbts~f>&N|QDHNT$J{T18LEQs}APs^{p5;}j3zL!(E%Vp_u>W|TC66yvmJ zhT<{rFUUyp9plZmQW506zE0`=NxDK$Zeof%`_?Ica>6j{)-{W~*~}n*iv1|uYK3G? zrc@s*^tW=&Law?3k{dsfD82Ri2crVd7~|D|S1)?@4;A5>P{2P~kY3G^{(*_FHET~^ zOKVCbt4AqxkLRAbF?M^u!kv#szVswhKe1xU7)x+L-aRMGb5F(WuF?eR>ukJrsws1y zg1USv<5Om0cBid(@r3uU<&OQtKDFPx6C}l`Fm6kz(6yGF{b}im+$F1+C7Zh>TliG} zlH7NW+vKL*r^!g^+gdF%|Nfj*<41M7w`xZ-!=~3vJ2!FG{_dpE+_>qw9|!UY`+{!`<|}t`I}>xE&KSJ6v_| z+)(d1(r|xkaQaF4SZ&(JWaRpfE4KAAG`3iDYwCN~$}gRd+w5LCw$0oxV}|~_lJ2VT zSu3-crwaQ{F|GSa-5It=+TL`j|J3tuEPeax*zESDqiZrRxw6!*^34qEn0G(>v6NPH-P1;eHJym_B=B<69=~@|3XT-+;%AS#xJFYLJ}zVIN8!WQq~3Er z(a=iGdO2g(((RFDAM~mT-)#ESGP3{o(TD)F^0;H1$(Av#X791F1;^^Ul4b-cPC8SUu68q zp3>Qxbbh)0bUrgZp->tme|Dg3f06XQV%6yWlSd`gob0H!>sIZQYd*agzW3$a9knfY z$D8JinKLW0Rwr+HxoT`zTVA%_HYGfE{OIK-Vo(R&v+CrU{NxGipQCsu6bhW zw)&E-H)iiDD$6*ZvA4;D;wh#S?3(H z+agSBiYwkw4}I?wgT1Y8vb}kKICb{G(w96b9ko?k!-nTbrhV5-TfR-)9;Rqrp)dq) zZz;~n%pJbR*V@sgxL-CN>nYtR#`Q}uHDzo?PeBm-E7Gmu_jV9_%dCyZZ#=RmP4Bf8 z6cxEX%^Sr2??-X&)bQXx*CiAo3?vEWCf@l)3P=NE!9>gVEm#iLfK6aK*sEfc2O86g5S0H>B#(k) z;1%!&xCA}|SHZUsz1f(i6`N>k1N+AuN{!=sU;>dK7NmeQ6k+U!w3lu~&e}K;>0B@m zlz<9Q3zma5U=w7wZ%C^ghRW?l`T%$o90kX~E8q=q348?Es~gh#qGm_GMf#>!Hyc^~ ztqGkP2lT)MB0(&KQ#PiZJ%fK~NRI^*K`xjFNg-ilNMZ)KJMROnXN!8OQq z{iclNx2m-T2e*UWU@y2I8~_i2N0I3ho6^RjF2;Pa00vn&VV<-Ti_Bhy}T*o z=q&|&iUU`{m*89QJ-7+}<-IpeXt#7z+7D<4#~B<{0yWSAJqQLS5Dp^ogXkuQ=}v{* ziY}dolcT{{Fdj@qQKvSg-CuzwlZ*5$Fb@=h5>N^%KsBgErc0U}6?ZD+R=mYcIJpIE z2fM*uw1E4Y(k8cFyHh2%LLVN*>F2;Pa00x7JkK;aj@_w`TQLYf!pTp;Rq&@e=S1RLQMKlsypqi$dsMH}||jlP$0y3Y=wu3TVJz)IpoK z^t^GWKK|N1Zfk?i@(SLej=wgK+p3`5Ucozb@z=(2TNCsiuizc3_-os^tp@t!mYy~L ze=Xy-8tAXQIbK68Z-Ae`uil2y!bbj84YaNWk_JeG01=ph1;l_jkOWfk>tS9&2?-m> z09oK4)Ii(4f)1Q41C_uH{y`0NqgSvACtJV{@Rt^GTMhIPui(=V_+OgC?G8e}-_mpR zR)A6jC8a(GUxV+!58!9;8!FOkD?66)-I~_@#x1vn*B#<@Pk7xBUe`n@@VYG`09+rm zbt^6&k3=#U42FY|ART0a31Bh=r+EcuATbBzgCZ~=l!Gcz0~SH>F0bI-NUQ>D!3NL_ zwt}7D9&jH7AMgr3h{VI-G4LdK2D|`{gO|Z+2)?$pXZx+du5aVOyWj)xF}MOg2VaBl zAoznf$Dfh-4T)acU^sn|@B!@)=hrh5f5k(dA` zgK1y}m;>@b5tt9ba<5<&5;b5ExC`73R)Mu(189ceR9>coIAV zJM_Y~H13-KdeL#DUk0baYv4`rHh33&06s>hSGM)Ma;GZ(vq@}+$@GP!A0)Z$xIB-9 z0E{3Mgnfqzf~eUDf0 zKAe02JO~~J|DXo?5n4; zAP@wBKAlIXTgi$C2$J73c=TR!BU<_;v#qt zdRIA!%TjG#5dqN_!0a9e#c3l zJus8Kk?=<%U=OkkL?Q_E0ewL~&>su{13>}=2YCgDATa`r0%O29kPW7Q>0l-V=XwPT zkSGQVfD^dDLa-Pt11li7+AFvYiH%@0*amiid%-@i9~@LMk?=-%T@hYi1Y8ymZ0@Do zx&AL9$(4&1A-K1Uc+$nDYQ5Sd=R4ii2oH`B(e5gT6F)xRKuZ4fRsg`*XJWa`H6O;u!Q ziD{v{0FlMzDtnQ;6vw=tRhJtP!)1odpz4T|vDYfdWRj`$#~DSfzQ9!EthH-NKqN|t z9~3#tY^ro4=5%QtnMpZGW3<)l&0QcD+g+vtB2sR)lEPp1gx34! zg{ew)Lh~MZBp4?VgB6jKeP~%pEcyyWWJmOLVwtCd7>bAbAz-%amR@96U0Fr7sMR7} zAr~m>Fd!z8wJ9b$0=b(iyuRdeM?tjWH04Kf+4BraDh>{=vRAqh7Tslz6B|-Yh)!Nz z3#phrSGRL50^4%^nF6jXZ z|5dxIs$EuVDn&cW#TXj(eNO3p|1s_@%Wg5sdVz@Fi;iG~DX&J&o#Y2SLboHxx`~{& zo}!fH)pkMh5%a^H3!y~Z@*2CSU=V7Zyr>ir|2yWo)>UCIaL8n!=3&aJSdcN7E+{1~ z@ouFHao?pVGF5t7EtjIvD5lUT_waB;e@8^^e7jt3?WRbVbdJtJ`-GkD&rDJMn+F32G zqwKmmsXXTk1_~$3#T}|0!LaJC?$Jkv~ zLkvP5k#u>8^^IN`1Ehy3QyD_SiQCJLX=czB4l!AN+@n<%Rokn~Iub8UQlqIBmm1!Y zQpk4A2qxN8q1d2G)4NI#?%Y%&IS^`^5@QK(zsVFkibQTa>ku-MF#iitRX`?3x_iLKgg-&C)H$6h8QRVVS4WF`o=t{TL&)z~=iAXyNJPNW+ zJY_#1((F{qePSVmVW*9kUIZwG2 z$4t1i>_QCuVrd>_a?B$)l_o^OCp&`?3>`ZRxJ=|rxi?flo3bl5w?4AtUb6adj>9Bv zG5Tw2*Hz^#=(N&>XV>eQz;%k&k7t&@E6WEn1+3rEBYQ&B&AnO#EA zXgqqrPrW^L6YVT)e1=&lyUfOu5vDT4?ym+W#eMZN8k*0p7H8)DY#ws*6EW@i=STom zttqEXGFgRg{dgocJ}}uU9My6~iAC@ryj88Dhl)1$H+QH$qD;2Qgi{C^Zpvz&A>R}8 zs1o`S*|ns&dlfs7E^xMfVw=*fN4(Yo@$qX{r~*X>j7BB{3uu;O`qHK<@h(zJ$ugJm z8wu<+a&&!=IB_c|2`5#eC>6a_J1p zBQxv;2ZQK2=8a@z>v6t-VZ1kHgLqz%jYz}2RKI0(*_N~QwAEx@MpX#)j2_YX?J%0e z!ybk%p$(18+rz&R_fXa3ydI}q&e9sehfx?kzsT948YaUJ-z>}J_D2zg8OBT6g&=fB zeEw#YTgV7i^YLskWL5oyB5S&j%T^$d{^p(sbJ7kYa^(tmSK_%2dq;Dob&nxl?R?vqyXAZ8B0y zWv8LXpmNsrgtB(cBENWk%earRnyM6gDO1B&9(};mmspG`Ggf`68E_cFl6jeQa}}UERK7gb`&;DCWHAO{JJVcUv@B&ZHw$LQYImk zak?3i%8}_H%?h>6#eHF+=Y&Nd)r=8V&f>Ly_RUJGPU9Tn& zwJS4bmZ_*J{$nLocjR5iHnW?mZ8;>rNdi^5=X}YOrsMChw7tf>*vKlW5sG1*27D(V zKh8g-v@Gq&+w(csU)d@v&RM3=CRbY|ZF2K*{!$LV)L*#QFZFG6h2ksPRAKwJUX)C% z163j~b}fWaxbT&|5g3=&1|H#Ei{o=H26iaeFZEJ4&Tx+(JJh_I$=* zK0zMk97^F?mdt0c-$u-*;za&orSO50vT8PQy;af_#Uyc_r61)GKjzEt12D5+(;+9Ya&*q93lJYb?ONYG8z?^*aY7ySIY->OQ%S3& z`i74qVa69=Ul1s`NHklU1Jp8#TWhApQ~U$Uh8YZ!;>HWEr01!^dI~qrHd2Z1Xeak^ zJ^^&K$#zC(p$b|p5&8hiZI>opwuH9k@tpy-BPVece95=TpR#jSmDCRPw4JIvOBIQ0 zIfI7#+>dgQ-&)UNlskfZQPh2ue_X{~^QWszY)=GZ4reu$JFDv#xGzgz*H4r-@Qc~D zU{SO50_}3tZ?UamFvT#+e)xd<-GI)AsB-DesQUv%%Cdpm#Zd*UJMMsZ&!IUz3(5VG zfnUqiSMfnC_cANy8}l@n7C3A&Wfh~snt8=zoj;O(J=^$)HC&upZOV5#U1C!ETsqp+ zeW7~@C5s~>CaWylz2m@hv-gPEt<$?d=1w5$D0y6Wj$5O`czGedpMpFnbSX6JI#=A>w!ZsO55j98z_X1Up%*P`{hA$1j+lRM$`n86 zM=>PIe7a#WCKHL$8K&4>N|KHo?fRs(%QAyCpB6Fc@*NMbO9R-7fC#tdl{1C_?yMILN9jV(VaxiyR$gS*w# zeTFKJFYeZH`FvjVFqpp!mz0*H(k}Fy7idiCzKSXdJh?12 zmyJSW`;Y)|5hX3cxO7= z%`Vk(Iht?x69Zx8vYwMZMA0gZxh&D+D`u30Og8e2Mkl0e#B}>mcD}Syyx9H=ab^XH zW4VEvkgHrFN84Qr*NzWTmq|l+tROoSzq}tUFBVtv`#2&;Jgib}R1}cUsnU$6xp6FI zXFrmc+Xuy{5r$!Ce)?c>WupdXyFo#M@FfkQBFei|G z%6xmdoj=NQzxpAVj77vC#eL(iNMf)CkUFVqPNU@tST30nhk67)Sx~~}Rgf_5YCvcu zE-Yq~Q6~q`rgFuXc7Z&X(L%dj{9Yx6VctB_tUv>q6cfSURGxE@i=;OGZK~~aQu5X3 zD9>>2n;^B*T~K0Ybv7b?XN(b~-onKIGQ~5KsVH#>EMw7AbwUM8$~yXTt97vbPQS*& zq#c-w(3rQBSSEWa%(hS}x|#X!s5P~>i~5R>fz;};tKJWILG zerBFPIfSp2ij|Hv?kD&VKVqQlo}{n~T299>Sg3k_>`LG#`cW~mt$l;Iy)p&U05jhh z$X{d#7nkO3+?K4zI%}?NC*_`fm`@4!D4#ABzr=(}E?r12k$S4MYn0MiT2VqYSq9;P zUhdy*HLmD$=uyuO(WJP{ea0yVo9wkxm}a{w^6Uwc-s6*T89}7Tv)l+({kPJBMxWM^ z^$!rXJ{CrZkTlWU*+EuM`rP;c+xZ-+!{Z{CEBeS>X@S9ty7Uc+S*;l%MQ8N37W5`- zU=*^&Q^LkTLx$o-nGXF@Z%t!l8b;_^2QhEtUkb*jo{aRM`I7jUxwo_;B(vjK*u#R@ z!6$LgcZo)(&gA44YelDFuPu?}lFMf~CAL$lAc2@M^G~po9D+_b{HWNAT&j!^eEb}1 z$VNLkwC! z^PJ~+f8vh%j%^q!>UBF$`bafYiLrM8=V8QE)ZmzhHPgMf$e9|_bTg$tMa)I zc(K_qy6J@YrP$o|wj#x|m#bB^rO>7VUSLA=SWI9G#Lu}A+@TJtlz&K1BDn;HV|lT1 zN1OdbhYgLdy?#C-Ys*hz51I+V{8t)PkZ!Qz5%x84k^LFW;=0}-a>q(hZ9d+3LmX5; zUJd7qyP8V~AW>p%%+bROglMJt+$3J{9eSF~>K@A*jo1Nvwn4LpOc!6s;57X)(0lTX zE0m%n)`z`pBCD}v<2P`jb&O}1tv@^g)i73%_Mu7*Z+jQS=h!|y?HKJ(8TTv~4vNxT zb6;^6Z|@Tt%ZM(-W&J6MxX1?b8adsvFRV?2E<(a5O(v_7CL_GT2UHdU2=iK{C|n-#63%Ph6YZ z^BOlKfUh!fD?_O=If^_Zlm|=iTAIn)h`J!|6NbFZu|d$y4$AgOSFGwXnc(?Yj-X1~ zb{Q9AhRqH1qh!mAs!&oYEvD_oBw4XiSzxcI=0^*Bo1ginq%&UeJ6YQLgolbk7zuq4 zVwL^Ik-J2V@C#!sZlUU7T%-5NG&eZ!gph;aB{!OZhA=Wc3Rzn4mx zYS#$GWTm4(E)_b=l-*V%HgIxq)Quo~{ysM}^j}IEv)K{JNj@DZe2}R=vIPqW%6?%3 zR<#Q?bWDt@*5>4YQ^!_s^*JAvHrB0_%&L$kE}lYbJxA{497X*hX~3~ z1(WgIum}czE-nt%Te#1^>?uWIYZnw$KMbMOxG@oEbo>iEVHzJ5uggueT*$?!yXSKm zjQ#PG2NgAR_)sFdA08qnNAe$Gyzc#cqBdD|^Bg^GcJe#7TC^3Y15B8q39; zC%7Jg!Pa&1OI(P`{5_e(-Gz5eTA#x`DdRgnq>CK9j+fq~3Q2nX$s~eSKu%L7&kv`n zunv*=-~Ejbsf@n{VD58CNjn$DjgiXXnc%gS&;rc-OL#|rK0SbTnfS;+-i5Vn)(h=3 zDL0?u-}<=X8O0}T4Y|*}zT-a6_4*o-IXRZ9km7`sTzehrLwKrI{K-vz&tamj;&uu6_adpHr9p4nQ;Ak>guz|ea5d;irw6+{#1c6 zk5M~|i|vK{TnzfUOhpp6AVQkcxLQ6z!_lB?*oL{>Az^-BG01aVKuGvEk`5W?v$~}UH`=xvFKGFTCZ^m}DDy3YxceiurZRVFwWISH-5fME{(~kMlTV8`&9_djta4SeVJ=OMn@{KM{i1#BV&vQysKGvO&U_r`l(|9U$79m6B z9bGDg4vi@AKZNEbtB#RZQ=_7;Ye+)U3aV0gMJFWaNXw)R#1JcN~97$ci9qsZ#bS2A*R1&G_N~;XWnFKpAB{zP^kP_TfMCgNe|MG;i$K zsqk}@!h`i?s){{hUaZ8DSMD}&IhL4W%AZ>oEnN2FUS&w6Jfl*5Bwq68o{#4DM`Mn+ zMN1wMcry%IJWG{?(n^J{?K1XOiWZ9-K8&g5Q}Jx3e$~<+;$EYyc3o&~fU%c9dD6DS zkx8C3p3~_Lo3j+{T!24ec{9tE`-f60(Nk}X(2FF8T#yB->g0y{Vg^fH{-~w$L6H;l z6vK&%JqPdW$xj*&o^3HIViX6Zo{^sfpmOlxb26)Ndg`$peyTN9QRJyqyidN0W6ZOS z9u*caoDZ+6CDRpy<R{q>ezy+!+>l7bxJ9KnGf%$ZMBKIJcg5(?XcwsUN z^_tr%3n|lA7@CBSIqs%Ew@n`!tUCFr`7AqMyg}7*+x*1@tGlnjDd|!tKiL$vM-XpX zhKh~C85O*Ge5VnUgYn!*f1`yL8>H{dI<~t#o*d%_>RFF8P#jMN^3VEt?RmOZuE+8# zWjLaF)T<@ry)83v{5GJib zC!weS?(x`&-&pK@DAw|G7=F~iQ22qbhYO$f+c8(M+B4RS$SPs|`iYf7FzfKKXW6E< zjbN}BE9VL^aw@8X-_29jh~Nle6(e5G*{XS;WMl~Z8ly3S<4?s=1!o5`JfeM1V{L_O zkF<=cI;0b38;xItdIF5kvt*$0sQ@uaZfea?U6z{c&v?>_s$1p`1(FAIwoVF_R@t*v zv!Q@;RukIw8eL;|{7l~J`JGSTY$?X?wYmxXKL2)y5@TyV*%j9oYU5f5VMf~a9=D5O z9QG=!)dv>)1~2y8T4ueULEA`j`n1_C-E<`Vroo+ zoRrr?$@2G9RjY^W7e}fxNsl1=wLKbkT}yWH69PSrJ>QeS)-@r4(l^2twPlde;x8=_ zWVK*YazzX|RL>t8K_)~UXAtn11j^-_@wDvX4{Ep*s*JeSm%TUM-ABMa9x2pdVj^xX zTd5g9dXnCtthN=`UTA-WY>@hgmIcK0#_EL|s!@chOQYeQC!e-XR$b-i1W<*7BN~di zAZidJ$sABtfE~d2%NE5{PelBM-txPxCxqkvlg=8?su9;~ubL!9bOd|5Kr_P+Q#;IL zG*!}?sqmv*=B=XCFeRsrjOjXs-E~qpUo9q`>jStwK~YoGBt*D7M$-4pr;1sfd`|Rt z|1Lfz>@v!)7*Dg^K4C3kWa(E7+a#^BeTMCiUUzhMg`aH3Dq1Gte4*Zx)Y;c|Mu=ef zj|PNgvH~5bGLqGyML|z%yK4ld3FFbnX(e1uoSe@v~c3E571-o5gqR=Z!rPTq>h@M4HDvgT<=wRDaHs z$Vq+SuWXyfO~*(`-fSDf&+-?Ou3^WMQeI~Rama#-Zt>v zMyz3$bca#-e4QWHVN_OOd*ZD7#VHIPb&{{b&g;TH)_D2|XLWGm=kuctZC7on{F_1M zd+mEj1nqQ3d}1MKWKQQQ(Z#!!Hrlv7KzM}58oSMC*UAeidxVr^TnPucm}}pL!p2yH zpZ(PN_Bv;|@Jv{1g5^T2)KQ}10=Jn@{!}Zgo-b$c$X425{92W|##{ON zSj%@*Auj~kKlF~ec5aTqPmF3v(ND#O!7hhv?9E})^L&7CED#g7!T*^T45G~1%}Pi%1by~vZBl5&v{I(v^b3GWX%0UT?FgL58yCU43w&1?mrp$ zyp5{nGeXXOW!%S7l|0QznU;3x8X0PPKVt;OJ+dU?-ej+^OtwX%+lcLGC61A=3DT6VG z<*PZW&@S_vDmYgI=m zncEzXz0fj+(+uw`oPEp@!tYnxHsZO!%j6rCfr{pS@#lSFEVX4@yuLgB32lg)q;iji zgem+w6yn!HYKpQ9TRyj`h4&LFSG_XpS3FtaOLUa|@O+0Iexn84Ikxdw&d$zn;B?aY z8LHZz?MZa>Id`A9rY%&Q-LXjgQs^6T_!XhXFXK5XUw)T=j13EcC)V80azFO%4Q1=( z?@m7Lol`xcqD!iUIu+bpaik7=_9&K;*A!2PPjl0?Vg7-nziusenicOh_t1q-+ZXaU zZq6{QGw<$y@=FG93UF6clvVI$a#|$~UFB$J@rc)jAu);@+qbJbY>N_iu2tZFgI#WI zu)ISWI{V`>6ko11_8TrwCc)eS6XN}m1knZq_~XE+R!iuyK;rJwi@x0Yft@!`j_f$& zc&+t{-GmheJ4ba&Znu9HHc3nPuI9>t*yr&{)$C>ZCl)I^4!;ru+5QH79>29(yXu?}~)h zHd#(79`sJF(JLhfn^%8HWZ40pFZk*C>xHYo}t$66)d5UA^<9Q6_0X?<4)HeY!$ zUP$8fU#)kg1IF}GYUvzrn5ybEKQb2Zo5KvXj#WP~rQHtlo1%Jw+1O?8rGR(9gCA<+}2sZ4FV1jkDA&K#& z40n`J*9)hG4R;Br$3-wh^~_c8`#t#gpI;9D{U`#p;5{Nr*9)={Z;lJrDgVs+&&vLh z@0Ay5;midBfuZ<*Fs#mAig!j#3rp}WimA8+`_7w6@kT;99t`5iXDQx1(!(!`KZT|{ zHjyvGV{a2&In{7h*$e;U!uPcHx^-om@illg<<;vSS3b@NS9?d`y!XQY8N%!DA-xRI z_z;vvT~HzKA5>{o`yDyozD0jzeIh`(qVx)Qv;L<*PydkJmq+f%?$0~?r;0L#kNQR6 zyf?T1GO)!zT2^|E8G@pWjiH zI~(L5mszD1R%#+3bW1z`^u_2uXjjmR`b!MiK!bZ>j%JFT&l?^o8f%VbOu0C%qnz^QejLq{%@t7ZKc^x`nD6$l#~ z_Q&gCVbNhl4tr^#D{S!0#bI7^f>ZxvXK?mUY43|zNR{IyIyt`-LU_5%9vyZ&D|#~S}T zi~n8+cbM;gXH)2Od+*HCWOp*C+ z2i~lbQ&J0yQi@Vj6XNre?M2DS2?>R<$+5|)_N3T?)cirIgJSdT`Gwe2Z+9?7{cr!H NlJFAdL}_x|{|Aa$q80!E diff --git a/integration_tests/src/test/resources/tpch/partsupp.tbl/part-00000-458bd63a-12e2-462b-a69a-2a356b8d9f37-c000.snappy.parquet b/integration_tests/src/test/resources/tpch/partsupp.tbl/part-00000-458bd63a-12e2-462b-a69a-2a356b8d9f37-c000.snappy.parquet deleted file mode 100644 index e22707d0678cbd62d86d5a7494123b76df0f2d01..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 56736 zcmZsD4P2Ae-}f1w=`PqVb`3VS3z-`VGGvpXLqNfyf=r!)ih{2U7%*ip-V{_6R1}mc z_>!oZSXye4XlZI$;7g)KYG$crX=Z6gYH88?^PTk>0vrj(f$`ud&^>LpamQc zP5>u@R&WwH8B76F!71QWa2l8fP6uaz>0kyp6PyLk2Iqit!FgaNXalprY%mA3gSlWH zm=8L@`QQSu0Ca+dU=ip7i@_4G6f6VF!G+)=umY?E7lTW{rC=4f3|tOYgEim^a3#13 ztOZwtYrs0N9$X8q0~uN8A3OlQ3cd!mg0F)I!8Y&^co=*Gd=oqZ9tGb5kAcU*6X4t6 zJK#z1UGP1y9qa&4fv3SU;92l}@Eq6)o(C_0AAlc%UEoLHMeq`M8T=T$0)7Hs1wREp z1FwPC!5iS`;1}Rc@JsM3@D_L*{2IIiegobGzXiVo?}7Kh@4*M)4`4U=Blr+}1U?3T z0)GadfKS0+z+b^0iD=|+pcp6tN`W$<99Re}0xEz?U@@=+SPE2munbrZR0B1@3ScF$3aABE18aaf zpdMHYtOFVVH?SVq05k$kz(!ybuo>6_JO^wAnt^S=c3=naJg^gZ0oVn!0K0(~ftP?i zz+T{G;1ysWupc-8yb8Pqv;waK2Z1)=5O5fH19%fS0vrY20*(R4ffK;nz&pT6;9cN7 zpdIJ{P64NZGr(Ekec&9>37iKm03QG!0$spIz(wE^a2fa*xB`3vTm?P_J_D`+*MS?r z=fD@hP2fx5E8rGz8~7Ty1AGJA1-=Eo1MUI$f$xC_zz;w-@FVaLcmzBKegb|5o&ZmQ zUw~hM9^f|sOGUd_exmn8d8fcrJGAtsUY^<^z*9R6!&+kndx8Bx6F3Q^U?3O;P6mab z8XOL$fW5)~U^JKt_5n3u3^)Z8fm$#YoC-2v5I6#y26}@7z>#1Y=mQP}!@x1%Y)}Rc2FHSPKtC`XOa$kG z{-7RA0_TBp&;X7DGeHG71WX2PU|%o-w18Ql5;TJ2!E8_kMuHQ-9FPZxVvV2ZS%!iC zTD-Bwe=m_gk^GM}KIh+S$p2j9|M$DN>}l^OW&gFt_d$whnnRUr-KwmP{AhJ!UC!wJ z>4xTp+|&0f;#(T?$A)d2v~TnLv-6M7ZEG$_di8qYvF8iVJ@{=|dy6YMoZry7r{sJ= z?5<1u%Eq_OJb3N3g&%Y;e(zRWML(&S2v65Eri4o+=T>q}6E*S=YBydkhzPv6#!u!p z2Vbe*Jf$^7|JgeErmjrW=kDjObi3ld+R)dZtx3Go)I815Wc_ZFYRjbEX%Dt+zgl=` z&cm$%iVYnr1e(lfsrOo8(Cvx{ej_t5>v&4oc1g&c-(0aTa-lh~P2&&9!|pCVH1n`3 zJooL(^G^gCzWd|x;#1m){E@PC7lI@2FOA&sv2NJ>lT%*3p^tj-SIOI78>2@OO8boN zXfqG=_f0x&tQkA7@A$KaJ938gpLA}hSMAtAQ_jCRdhfI$(?2k+JexCoX4lcvcgsf2 zy*Qlqt{sTH9yO*Uc%B8+~5RzU~^m-FM$Ns-`I~`W>*(Jd|~y z?`z*~zg%)S;Pt%EAFnvkzwKUtY}2U$haIycU%D{p&F`O|^2W!5k1n`ba{9)Q^jm>_ z#98J9qh?^gK4Zrm&Kw)=o18dwch1PZ6UV)Id)37LDHc=E-r0kuO*oo$wqQuQb@)qn zmkysj`PdzydsJp>%)pSH<8r2+$VqCQoS!ygZ|=z%1vB2cySmT3wp$UbQc=AsXH0&2 zd|h4cnSB+L8XNKx?>EnFZk+$V?s#F#=7MqauP@uzT-bTwxAkq$yDSe>yNA$-wR@-)Zxd7=qxQ>F8knRunZsmC5iB?;PlfFMoC8q z@%DWsURHbKrH#SMRu(Tuf^n%)5OfrZu_PTAY3P%I z80#>Pw;Kd_yIj(Y!ut`LhkP_B$!K8eP^8{^)@1aNq|*@!CYe~YW+~KohZFP`5mKr5 z^emT%Tc3`#PKvrd6MIE8og`~S<&*&qteJIHUeKjOY#y8(0E$Tdm_F%{^2BFF(udmqCWI3vg|YdYzR8z<@s_R9$LBXx9FA0~w? z)+A0O|U*4Bbl)^_K>9Feej}PuOU|aoDca^ByI3fGI__^P9C@s zU-;}T%#aBq{Q`xrLFzR89*UoXbq6VtVz_8nGSQuaZnha6a41=3%|v&cj*d5(Jh4jA z?c!ogeNIwAMH(gn<-s*_1sA;{kwPXZw(k~l3eh3PY||kxgP)I?$rOF_J?!I3@(SK~ z9wHV)xexW%$ahfa9s=(QyyB@tuIuk1=3VH|mDvAXqz5AYgdo_aufyyb7^Ip|{QfZ} z+74+NasLy%FS52kh)~oW)tEf}S-<+0wT>Ktf8T?%-bJ`R2Z5yec9M%Rf0pE<7E?a! z?}gS*GR#>)M9y{+YQ08IyEWt+%n@q6g+BQf&KRUHX)`H++QldnZ}<5r_T} zdhTOnIpla9CNF_I|Abuin9GGZMp#pD?$82#(or9^;~I}OHCQd$(N9N{bc_)Vc@A1D zhyaeBiLqYC_fR8j-_M{ejb7q-^S>ZtWrdNGG?kH z%^2yj-qSU=AsLg&$<}6aT7LPUx>d{6Aj9tAXruxW+l-aTg&fk5F8&f@>?Frwg*&j)W87N_ zUEW8CEyo=GxTIY#B|Gq5AX3a9mXARaIKK4SF{V)zW=R_&}HD*Y|g-T5ROgR~-k%#i#`NL~f2=nxmbLum;t zxz!ngdidJ2bCzT%zY?1M2G6GI;VmpEBU;;W4$??W&KA`EJvcjhgt$&2VmE>H>&PP{ zeGV-8C9eM%Uahl6BT`aGmh~Q#NkPV%pT%4pV(c5de+LorHWd2HDJ5SZ61*|?G|2Tm z4Dkyh;1zf?0WzwPJD(LtV8#=W^3Bv{M9_D*mN<_Q860&AV{np3k3FYNjpQgKU52}6 zV1{1M{54!L3nm-~AGjn^Xdg{3TQx+juOsH#z}o9dDEJY z<|vso!Pzzl{G+}_9V#IQtP!M^`HaG_nQ)8}OKBZRg5w6kF*2to%6hP(N7F6|W5MQd zj~s?AD)i}OX?Gn?6WtUN0-gSFx8a1gg(T}Aqq~kE zK2DqhJ2hm7UPB&P+lc_zpVhY@@}*>jwVJGQD>1YN4yzzrV2iu>{i6OFBE6A}fOEq2 zW|ScJFjgI+-i)25jeOv|1{IQtuRfnlch=$5wFjr0*U+gZKSq&+*!dcHvb@Xu>`Z#I z#D&$T9lK=<+1cdD;@OyE7TytH%o=?JnP+W7t#JwKQl`hDkog|2!43sSXW?fD?5%=1 z`e43dYcfuOqsh1~4Q!4a!BSIYHDeWshV3;dtMzr>pjR8LDu1#EJ$KP zfIfnrVrvCgf%An3l$pq7SnV%s3ijSMvIt&W17|FEN8`-64h2c{v%dXBjQ*T86z9$; z^f2o*c=Qu%I?fgDAq6yuS0&DMJ-bi@xdSQN_0=R>uY_hI{VNp4PA0vap{P1~aLnDJ zPWK}O>w;%BI*mJX@Y7R}OmbIXU8qBz-+~`AQTkPqOR!n0wGJ8DiCwu-?P5?XD#;Cm zRg~W2mTDXdq~ox-4po*?-Q!2DT5lmL?h(Bcb(iSFSV;nw>#@6aWQCy9T>%`-M zXe<;i&ww@fXcVsOu%fb-D9P*2TjXO*AmCHP&z9X$F!s;5w^yrK-N`-EHXPyk5j;H( z8o!SFJ;l1{14B&Fu-;-NMCCTI;s2*W#gf{KCV}fL#`j!v2gM0+nBa&j( zp!&au#}~;cU9jOoMmK`J4}xtT1`zApPztz^RGC=lG~`n~QHQEXAm%%YvisKo4hDOp z6l_Cn-be-_i1Tpm!`5o@P@jUjxbxZ3J&uahN+sE@_w)ryoZ^^KU4@eS)@D?=)#NM4 zdj>|`1ABW;66ASDT3EdV!H%4bh(s$rUaEY?a-NrI?DTzJy= zB5wOge~)~CwYVOZysAG&67`+rsh-1z(Dtkkc^`9EI0@MUS2{3NHRitv(IX|Dhw{k- zyc-RrJ;!9I76F!~;e@^Upc#!F;7z+%dJ3MW}X?a_BJ`GpArCUkN$@XEb`RYDk!l+{hKQi(UXee?<>9-N(q(t~PQPpS_~5#-0+5r(rHUX}U)iI#W=0MUZyHY$8lK z9on5m{G{s}NA3#vw|gHX`}gO+z5dq||3|MM=l$7Bu2CDzO404b{;!X_{mUp%um8W_ zeb(!j{ioNjeUN%g9av|ca!g(Oz-NQ`+2ve0BN{trp?QxB|9oYUdG>}=PeZpiFrgy* z60=~CI4-2rEa)3C`&@$=^DOocT5cXH5YWM^7&HAV53T#M+34*nS1=LupoK}MXO}SP z1OF^CpMDxTd&6i8+WN%{n2PX=)n-T>=RG6Sj3+^K)neSltzxR__jXM6)f%&~bKsFI z^D}LXskrNS36m)vR%Grk(9-YS7=6Uczd1s7x?ki?M1peda&N2r0q+fYD+eA zjDDX53!X1$ZqXeM{B!IY-9EYz#;2cnV&nHZNPb{F9QX5TXp@wO-|nr4B(ZCt;rlgE zDSid~oVyaYjhf3eqjv}u)vYxrj1w)+tYjK#+ALg?v4)ApCecGbUk6o0ZoFSm>d2?1 zWx8a#dMzg3?1DaL>oJsMCG?J2is8Gep}_rBOeQ&OmD0CX;$MT7K!A%i&`wr@%g)t9 z#=Y}k1=kQ}9}cxT>7^RTesmQ~smQ??6>D|P^zl-x1|_;s`eHR~#jasGX+oV2&Z)tB z3l`~=baTG$5v^MWL6%nFu00J1#t%lrGP$cEQs`=oNgQ~uJWkh14fRms$}&uSW0?iV zUnrv2j7Fnt(vljE8Y_PP``Riclz14`A`PtBLb0Oq^VW}Nm6m8)=foUWGI8(tOiMC-do7G&pMaO(@euo`2G1RH zkz9u=;q3DHh$E9t*G8XIGy5n-LJ>h%GwmCL_optjG}8EK7DC@#0e#w&m}DaCN+++i zw$Ym9kSlZsoO^XG?%})ZXxB=NKRg@Gd8rmQ=qkrQGd()=6z$hg{c`B~@p1%aP%)JI zV<{}WsQ@oOR_N~0t1B?v*%+n*M-$a_dxfr@7JIJsDPfYSr5=7+U#ANtkGqd~`Y0NV z{3Hr%C~a5?qg<_kI&(+qn(0ga7-sl17+^Ni*z*+xTDlhV4ykas$f_)Ywg zBHcAQcOA^1;D%;Fm6-A3Y8}p@x}`MOlkMG$V3_aM=#+HYNSLx_9aBfHyHn`>d5%yT zU&pjlx7!j*@6|C%64|PxYZmBI=#EN6{^?~1-wU<6Xq>iOqhGJkRTB;04xQH^BYsD| zkoxX+dLkM_40U4O*;NRh3F~1yHL{2FcH;1-dmnvc8RD#T3Icdpiq1t7iV$6^>T%PB zGF>!X;qhwHyv^QmM6SMwYHEoUClIP zons%4Z(8WJg>bZOA^h^N3VTgEB7{VA5n8)cS52?4w_Njl9fu}3bP@DM4N}&t7EVd` ztbWm%Iw{%PolL)XLi7)K!gvx@O6vJKZ<=Ytu(Rte*J$n%sKGjw|f?+ zrd#uoVPbcE1S%I~gTk3lr!PQYZss2`RI?Ih$z6v{=mUKxis}e@s{q+Qxd9pUZ6yTT zx=`0h?z``iO?;0J9WVsay)z%H?FZeLX!{6V9l64{Gjw`A1WT|s(%Y+b8ak&AcF8S; z{dX1buOo!76Vofx;pEASbounPDoY(*Sc7;TQmE^pTUS~#$;VyQbZ9XY5o|zMj+M00 zyhVtUHza2IFc&+ahTq4K&E3(of0-qMZds2w9KS}_Mg_|e1vh3PrI%tSK|@eMmh8RqIScxpVsJZ(IeAzE)w9bAP*(jDW_o=@oL!P zCKMK5A{D<7(hRq@9^&l~U9N7Hqw#%j1V+QROPL?4Fno^K8K1ue5mDV-e-o zzyTdI860Qw5%ACgT_=4r3voQ8LDxg<{5^(lUI$Yht=Ao+c?%IAcUK_|mhi_UI6JvT z-&vq*rxWv$`Y}skA|E&Ma`bB4|DF@SsTnM$GpE3Sn-^P>sc(TUpKR;OBy;&>mPTbl zBas7B@7L$k=n+`ke&8c~NsRRoeQgQTLw4yq>B9Li$B(Ff=$#rQV(3a-zSt84-3^Wi zn&J^ix)52lB^gohbrr5VIT3eED@KCX@JbPVFT;{g|7gJMV=Ev>U=@_vH$aE4Jy}IG zq!b?2)?g50JrhA=DsX>cHG_l2Rx@epGSj$4aIGT`Dcxs^A0)va*A_bn4uh+y9a{*A&?nQhdMs5wt=05y z8SeRJEfY%G_-YXm@;E>VMNn(&EEV)YJ|x+)l<6cJc$b(=cA`35sEZ&HUMeQ%_#R*C zSiFCq=*c>>Ov90Hy4vaQb9L9qCmzplMD?EGZlnzv2(jQQn7pdHhlc0sBIvL>#9@sK z2AWrdl`fi37gLP`LAcL_WUXz@r}k=Gwy++VbHj|c%JowELNW9*uhnOgR0vg5=t!ZP za^MTW5+<3ZPep)!=2;cKtcC{?YAsSyz^CAwVtld>eW?n``#s;o5KmnwCS$P#xanZ?)nP;LXvzJ>2$=*a=+m9+07T?;*3Q?I1q4Okd@m%zvCDlNCj zShtes_(vQX&3$zILIlkLk1Go7NQ0wrsS^r_(1yrFe)NacObbbH*U`2*%-rG$o0YX# zq`KE3imvgYGAf>lC|%)1u$*$Y;Cz74=;d(B`wiA=QiFNtmDg8LL7J|Dl=IiHHl|>$ zT?Ua0`F%3_@oFZM&YaJrAR01RGL?_;CgJ=s5q)PR9CwvZ=4e(m#1ivuKBSNJ9%Z6w!|#G@S$?+edpeb!l| zv@HtJ80*%#s5;M5P0sRCUpi+AyjiJM`jLfL_|~T)#O6YSLcZOHPR3qGA9_6WV0D0*jzj9v9X_bu95gOzvr3QHYbvQkG#H}8^=NM6K| zD!xrZtBuI!;j0im-D@n3c<35RZ>@y6hVhR?^eY>duKeY?6dE0abgb5+4X8p2Pmvs> ze=U_%J1a;r-{?pCx^zf~30UGfc(a(Q?YeZBsE4Q86}mpY*=CeSj z3?{EZR5Z6=r~3dg$g{F%i{4}{8ksV{DyDyqdC_2-0?RFR;j}Jx5Si?lqBbAa& zUgRz6nQoRH<4@P9Ri>P0z41&%V$DF3-X{9joN{m4L#-BzZt z<(An?1d3_@%^)s&VU7cq!}NmDo(UX9Q;b zoG^P<(L%e>=KrrL1#!_B&R&@9faUXTd3go$rQ#`&QP45$nVSrn;&Mm!f`UrsYV`kg zk*mZ}SZ0v#O?H+SlsQ}l4tq(d##vrkh9Ly5=pqfg_pj{_V<^>kLO&6l5V(O92|gS< zN|Obh^X;B!aM`mRwt}bw1THPJ=atyX>~YfILaDtXyS!8zsxnKTxl{1OxW-?TT~uC> zBi|wUMJw_c09O2uBmT>+BVdgkf}^w`+)yTn87?lhmn<|z(Spjj_l?6yZt#!cE=*IT z$+FuJH}S_v>{&r%458r~$(g7GqMf82p@bjuAhY%(r7as17M2O-2}?MsH6UZ8*b@lB z{`yK1ZV54dHfO)}uzZ5D1y{SuiV{EKUe<~eUZOd6fw$E<{v`o1jD93YoGn7I=}Js* zX1ro_a~DR(2xi+|jxw7ozf@Cbx94ae58~|^rqZSe|yDZT;%Yto$8NR%XL)D>1X^@ zSzcIPT5c;S7ctHlqrWY?q^Pu1_#9P5WG-%&Uc15_U0TKtRQO(C<`FvTm_dHz$f zq~_C^Dn>X+HKk*3$3eq*;_oUcaym*M`wH|=Z`Jq2oHq<7SMQ2X;BM`gA)!p|g`1he zT2ie2Ly7FnvR4+l>&t&iZ7@cJOlLTmakSXxEGl%4zWZI^M#BMtIIFz0GO|ms-jJc81%U`SfFNo_CKeKHwHys^fIlN&ll%8a?5J1c(D7{wmX4)Qh!aHqtzpf z-(-k3?X2^{U9~)i7|PX>Z6rr`9+6CE0S><@+3Hd30I%C`rd^fH`KlSP?Vq?D) zE2v-C0^hVwV@}45D5D^9ia_QkVlpj*Z26J#ym-l>u(yM>qj=TCq%S7 zSM~?Pea!Flk=hIMYkDok2Sosis0-VmrUZc=f6^ZI8??l|59n#@Ndc9l~O2d%NS1VN`FX zZ%8%s+eFiAoZbFUNSLox8no3{WmJ9Zo>jaastzpfG`<&@EPQ*;7kcULE9}Qisp=j%*Df5*PVmt`m+&Y1)X(sgGE2FghDae7r8Q2v;@8h*FLBxm?S;45)ZT`z z0%|MS{Wfdl<@3ZSkDB&=PO=Rfc~%CvL% zV@j6yRi#O;C(YokNs*HTGD~0WDd9`;g^Vs=5}om7YOuwdlnFMnkNpK}Mvo!oIg+hO zNhH`VjAM#^>qsTjPYcLIHq^&*HSKRjabG5xi)UUI*-J<4%+$uQ6|CVqFS5|EQWfl% zFjJNiVAX7Ek$)<;Z;K7$Wr6;NwU=GVZY<15B4FYQMJ0Bt?wU!?Z zb7&QZGBNR?vQ)DG+eQIjt&%Shqe?Ccb1S%6B$u5gW>@>NdwoWdOS^8SnN5vReFW@mwQ#$Q)-0Wa+$bA)wmVM&K*h4<|k?Z5`s5Ti-ke3sl zI&RCqwnfOF4~&00idhhQ!pkTiF7_Ay(XWUC#})r*_?pP$j_2*1EY2<{DwXvFsPlt@ zH*&YbB)$%n!YNM(ksszVjW?Yy1+{uxj|qRT{7R=(<;y1*PjZK_A!1g~8%#p3go~Gy zbgAQf^*7n!ilDXH$aTWR62VVwr#AjnAAypDmrsnDrGLrvNydIdwjkld-!`Ofkc>2j zv!y;{Cp(f23+!ier(vnClybiqRoc|}tMsa(=A9TPH^P6K1AU`|{b~RwAm-X|jB*!R{5cl5FYz>i|2Dk;Sn5dL}`|Ut?+Z zJ4n=f1BGQYw}kc5>N{jp865feglLpOGB&j`s8p=_Ja#iNHK|XCZ29-c*w*n+>R6 zcWrye;1t+C)+sHc*mZq7W*TzwH;4LP^-~pR2RWY_CK3_qa&~cl;$R2%)%J<|$+(h> z>fT#9h*O=s#82Ru7xYIB9}zyPpZd}u?p^|`y}-Uah<{aTA!LMcC9~7?g5p*A=xjmc z-+h>Y`nHf)SVIWP0KES-Ndo>JK_O>h-1(_pR3NQkQZ1 znD0e6D%f2mbg0z9eXjNqm0@elmFgDi|8S;dj8Ofo-YG&6s7F+Xs=Y+&c<(eDImG_u z6O|#b3^KISBW7+?sy!~%5JOZsQKWlg3*JTw^%=Tvv<=Q) zBJ}zRpU7tU);1d31mD=H~;2o?t~g-^pI#)z~xgBI#lchkh2-uSLDqj*{Hd-7;Qo)5D^ z9;#5wNQN!;IOXOte?A?_PWEXM+kYq?;rx{A$6U3hiX1r;uAWtnK=YGbR4GYU-A;Xu zt58?TBI^XTUx{$$!QSITP8wK=H~XQU?6|xECWczGbvXepC1JH@#dxMfG8r!(TR$=ZZo& z{5J^J4Z25H-G-iCToo6WctCJbWVhw0pVK9o1gILd8)Rohrc;J-a@Ag@n|U>UyukV` zsTH(I3RN3h-zy)bj!`+!1-f;PNo7Jw8r!6cA1>q?YCgU^UAhRVk}JCtkrr#z$rgJX z()uM1GmYXtAa<4C-cOjn`L~(kf$9e$c{oaD8`}~@^X+OMpWq>*y}c4s6lZ)ww0`OZ z692ON5?g6LlS5|Mu8|-n%WrDeiT|P`=>o)G5Zi&2M??c&RKQfCQ)Iav9 zS0qCN|K>}LHvgdBn#j}Mg4)PCgl0X5O@l2~C-w`>sPYw|9b|POlEZ?rg7bWOfc%Zr zFkSBDS5+?rhlYHr^7QPbIQmfh(m&$~vxa?VAe$X5!uH619o(@`<(*<14;y1L4R9H?b6`s#=qEaB6c?j7NspUcCh2cA!i1M%@N6-4o!U!`xB%8%E$(v zC$_M6MKK)LX{hZJQW~HNRe1}osgZvLv&SQ8Q9;m71-E^C7?GEkk7f7CI=1lVBe^Ye zyu%#cw8YMCGU-U#~&oYMe1R!d{gXq14O0eS=yhiBe~}dfA$Jasb8R5 zYY6eS{+5&`6XEbiF_ew0lacYPQ>;D}VUf$Gu*Kmq#bMtv2|-HFfna`7iHtW1o5X0U z%h+EA2MrHM9ig4l{hi|>lPB-Lx39rLf+Dym9SM0;EUZTUI5c~^jr$C7Uhc~wMY8he zlP=K868%SE>lN%nU+$~0Sdn~^Gc#izb4Oy434_F%tfC}#mQv9jrS1@Mkx8cn%rEt( zDk1*3Y$)5rvX4yM*Q%3&yO?lEZOvX%(lLeoaCq(Fno~lB!XBWA9wx>qpWX0jAh(;u z3J2GITe(rS&|a4~w5nF$m+WSL4Gj8{;bl?V1Yx25ji*x|ig41$?vB3NmEms~7f`i6 z=)QNPme?CqNhDR!w|F+MoPCvmIFEbwhw&M735 zPwq~g6P&EwqMaYMNiNjLRX8pyMEggwgO&_6Jn_O3H7-i`2g&V-WD`&c?EP_PCs!9I zVk*+EGefy1yC_8NZSBV%4v-y{N9OjC#V|1n`4p`b{c}?QQ{bb zof_^7xP$m2e`3;lANQ{&2hqlL7nd4{Kp_^(pVy1T*x}ExUEZ~u6bG1)ne3KBiO(yZ z$ZwBN+Z%jZAHWuP%QUP|uD;9gH6lJylyNmUKvo<&=L&bl?cFiMG(B*TZhx9hgOfu? z0ZybRv%W*dhl#pY$$vKna>ts|n{?>QUsJptMOLxP8UE$a@#b!L~tliQ8s;oqNu`O#U4y+-*_2 z{kXiKmpR=#T;HHKL>84q$$R;!OxzMqMl^;`e6YXe%7T6HHTl^=yL{9^VO@vg?r64gB-KeP3 zBqqW5hQ0N6Qq0!c9jYnZyG%-CoiO-0P7@rd`X}h2Nwvdtgk9!sTdnsBT0V@)=B|gW zXV?W2_6EmqHt)O^Y%`58&EiuO)}Ks&sXt-k8KG`A^De2l`+D(J!PJnn0g@Ee6@DjU zPzVgkK9cmHBVyZ_reEXS!6RnZcPKtGGQTAp)&1$1D4q$GWr`3kRq3siF+K3A5h5A%;apK2PLrnq}t_%bz& zX8VfyRZ5}EIFZwb4eJ-I;grT&p^0ttmA&X=8KLlsO5JE&rY;zLcY0iOP>NY{JE^`8 zJKcxfV>V8({1h`P&|A1CE-2RAhy6|^+%G9eNXLo7jlLu{ZA)s8_C@YA^O^iqjJdcY zWVOV80macFAseX)4vG9or1(p-?J&0}Oy*rY=&vOw!zEMs9e#$pd{dfn2=~71MG=>u zdWr1ayHfF&ca>b0?9DwkOzc%FB&FzK!@l+&eXEzk+h6~>cDs65ztlv*c3QMZ?yD`G zbCK(0-XK|Q>#&f;Ms+q**{R+n1*em8EHAfFwh zz#egA=*_?7oyI>^J)^(&ifv}}Q>xAwUo37AIOqYww#BP1NM1EgihYA>n=_X3VxFs4 zye(GE)!j~d=w-Ojdwef%MF=Aw9yGPT6wjQB(iQmQtV)}N4UO8lKJY-(YsPTdYd+e1 zymy(;_mkW=ig84^I6{QyN+H=uk>D$W5g{(UAwVoR#jgyqjF=-Sd`}?19kVrvGAxm8 z858roVx}nMNYLJ|SoH|>!*_k$fyNWv{)Ro?@k0f|@W?yfx?sywhcD~y$0&a5r%n=U zS4$R(ag2OYQ9eMJW~(>s7qSryJ0-|a?@LP5ts^A!uExbgG4)Yx-jNNxGM0>=EZ{S- zv6UAtLaP}>d=)KXZd_G=@mU1Df*oHoM)F)xUw=l*osX)-Q;V=@RUq&5GQ@ashfGgX z_XO6BenlXgHlX-qSc*u0C(=f2zmTokwXwH^34J3U2#oy`+G$>S!f%|cd!S%ES-|Fs zjo$=?_jg>kthcGIM+)8!8OkKKgt_oksidgpd&P0zVnfVvl{Z%s6KEJlCDj>&wAF^o zf+Hs5u;6ZX*U*|tinK|dc78Aw&Hmxm*_#(1A(CFi0>3;2h94LiJIhBFg_-&H$9 z1snL&KK#%i-9Pmq8Dr~Bvd!M957=}@R9Uc&J;(7$0jl|u#F*9mCAID_YmU@E$62|L zGWbBN?na0;ToEObkC1$Jwu61&M^T^POk`e;?;BF{_mO%@Yw;j`Hao?KWzK5yx>tWwan45R9)g<*3V?qhOtwWCcSk$>l|YjyKhjB2xLoD@^3t4fngVsZS;-V zE<3;_HG1tnof3rg$^4#w$T-65|qXsxU#mg&25F8RF+XXGupW z>n*x}Gu1MFGw}${T#TLTM@vhC#wZ07V|1K%@LK&BmPy7h|9YBw!4l_uNrVTT>_(RL z^KN~)X|DFBAyS~)Y=5NwM5$QlXP7mB$;YD6l}EH_d1N6gd=)ma>7n5Tfx95?iP|7m zZxl6MD-N&}WlT-DW9o0XC1~m=Ng#7si=S);{<|J-_Ms_JeZ-c0;h({yryL_8JAC9z zvBwTKTc@j!`gY6^7nSGbb60gk!{QC3aKBPDx#^YIcrVG>VzKcTL5!lTzj(SQXXTQZ zcCIk(14Dm7#!n7Lj1yv8&ql`y;X?L~JZZ+D zREyKQYYz@6`Ct1xRDlyRUR1oP;9m-$IYn%IV}mGU|4!Y`QkGA7c=SjI)iKZ@#Z<)qWCGB8r(Ubnxlkf!`QQ zMR-2WT4ls;8Xx&Q6E!`#tB><~EEm`<6c)+%sO=&5v00(Z370(P@5uFIiccjZL~;SSsOc*_TpsJKFTfbERBTeSBvH=ns^oOh%%q%2Z+Me4HKN(xd3ONO!^$_zsU zhD-tTjdq@5r&RHBxS)*P8PBhpE_Vu>V%{|F%rHfrBb+kQEi@E!GPOvRlBgx&vW{Tp zh=TfVYr{=%#IYPLf*ze_J1T%tNA)$q3itGP}tM`v@>G}iz?@ensckdl&;k`fb?g{A~m=?@e|(NUCjbktUq6}1&L9ra^d{o0r^ z!;Fos+sqg<+iWw#=hXK8em{@TzaEj?CikB6`tf`{Pl^y5Q8}MX&`mMxRECE?8z+Mm zyi7BBDmXlfnJ|u;BsZw>3@nyg)Gy^Hgg?>MT;9Cj5eNTraMM%LEY1-!Q2LpAI}?^^ z&w;ZBXA!vCjX4peKP&cSar~2Ta=5R)lHQyrvH!&Fhz@586D9ZvVFn8B0&Lo~yTppjUy%CZ+B5V!=3(M9Bt&h=;>Pw7{sFh|Smh0LJ# zyPh(JOVQc`E+4kc5(~c)xm>Zn?~Eq+T)Wbhsf>QC?}iSy^cw1Z$I|_8hP8 z4t^TW}>cZiN}qjl)Y2rkBbtsO2eX5mv8tR)w7Uqzike zMn=Q{ZJcgX6!ENh5mg^c7)=i*F+K41twI&TF0szypR&OT>`UR#t&39DM;HcBoJVeL zxw9brs_;Du)89jCq{x28JKHiW;$n5^W9xZlb38ek{y>a`^^nQn$^gi8aDIh>h_huy zn9uXk3jS>t^p665hcqz5yUA`chv}8vAUUmt^*eKdZ4nIDQ2c^*rf#F0vgClO{(u?; zl2!ci9Nz=&Zcy|x`4hI%GLofNS+k1k_#%Vxec}`HPyKIY^Fm(1gz72uH#DRliv)E? zx`1OucC6{B#^m9^%ZX$bc#y(m7|8+$5fyGMS?G#Hok)JD7;nvMv;L{gMKoAf=M(e! zylwm!s+u)iRA$Ow6JZ^2P2=ClQgEDkm}>meaDW9zDC=zSEQ-2Mnfzv0QCD9ev#qBp zH)|)~+2Q-rv;BS|$m?Z|WWJQy6e#<86RwUXrkJW2^F4Zn^KTi<;6X|kpctb|R@gn* zKUP=pruY%;r{w7*UCm4Qik_)5ZZKia(SN{gio-vzFE%=u;aOH4wi^qBcNCyS$!^E4 zfNRm#^>{QgnRcW~EY(=}y#JzQR+Mx`G1xT{hK<%O*izm@=z(9ZdpX|pM>H5~VkclN zwk2}*K;SOOB5f5Bh944)NooTOyAyz@U`DCLD?$;5@AXb;y6VYkF)(VterUZ6j;q+G zL?3zt!^yzL!Blog{RyXa3acf$p;5r1sO^exrwf=jmBMoLmcdpe5&yxq>P8cW4{%z8 zXXbqm%UkI^Ew>`cF4-FQ(i`ordChCfw^KmQsdB)pYDLnrfWq?)Mm7q8t$-UAQ`>XxFe*{sOeK;Ks>mCI&3CHpX!~3#}@xCF|Ylx-j zr=b4;-5xcQ3j}k7zC!n96r0AqLxZY#!-!PE&9y08mWOk&2ZB)|Ska&z6h%p=1i|Mq zjj`Fq!X;5Ozzu!5iuv0MzP6=~txe{5r?3?l!!+lvgnA_moJEbbh@Bt{AM(fhBc)5Xucjjkwa zHJSE9L`5rGhF{fx6L?t}fOS5gAdor2c_27D3B{=AT^Lf{V#j3VQtrdBdl7No!Tk?A8f7t@QoFbV9@fx2GSpyMK< zguv9yw$JT9v2y)-{WLIpu(r8$?$;VpoST6QwO~$U}!-!m+HcF?EIX0k3 zUrYZ8)2--|8uKF=WLN;12nOr(E~~#XZIfevk{_A3sT0XEqZ_|$AA#O5Y5mr1LOo_2 z;;%+$j8;Wz&!u+pj`KvD|EzkC8t)1p^Li>0JTuekWld>y?~~bN8eWS1z<)~!4N@=^ zIOq<5eo zm6IlVbZC&jpRh^kV$0jF47Hk#Q)`9=y27eN6(~7Y`G8C-?xgIdDSRKSQDK+;deTYm z7Td|=!XQ*=M%YhClP-^^V$$HA88G}T2lM6NPak+ng70znG?+AmA804TngRAwPu$AMtQkyq4UYlm4G<<8)6-gnuM8Z#6~>BCU^x|GFVV+bduB64&<{qZBb7%< zgVVtLreol|$14cGGn5)vS)Dpw+;gT2dkuoy5C=4WjG2(W#l5VXh3ArTDft(uQDyhA zgmLdt*w#|c3C);+|86yUFX?;0z666L(Ou}u@>+5~>@mFCA3o_|nG8EaC+oHV>PZrJ zmq3bwuY<{5Q?8@FhsCq;{rck|;4%H96t|g6{e#J;;W<~LwJELv6-C~;f=wzOlFka0 zMRV@CP0zep*|*7f8Wz)(ywRo~%Qxt?`QY2WRS~Q=@HRTo_Nhd`rNT@pmcxJ4wKdkB z%gUv*f?!T>U(s+qV#sa!1$3I}83{YWQ-CT&Jziw}6a3B*znO$s2;LiR z%R_$=Frf98Vh`-2k(vA>tD^)ZUl*23u)P)=Q9cyi7+ygAW^iVM*(^IsI5^;1nY$7x zGTil$^*BpE#y_*}1?r@Ym6H&bgU>sAjSw!*)}tm>lACe3hS4e!FlYpqetG_vF#=LXzI2J)b98=o1c zeFSu5#qXv}C3sQ@CX|9gts@Q%*7MT=U2A;RP$5TIX+aJMU5UJl85=O8IGRWiQa7G5Sm z4m^D3ByG`Rcb>lZ`l`Ak!HlP7CQD29b=V^jfj^XD?n91rIfGgl<}yQ)jmf$@v9{-u z)&P8L-uBvsi{G4DPIc|cy==%EZz)1cQ!Iz%WU4pXctHCmXdcY{L#GXt@^chtI9Eye zhT?syywH@XMO>l`A1PK>KL*RXGiR4*SsSC!WhvdMDZ5SLk7yt5TgEHs0r0U+$d$sy z66t$KOUIG>fCQu`ODV<$dd37NFz+OQwTttHKbjy6Q>45lq{m1oyTN*tK0(Ryh`kpd zPunML>|=NpH%|m}aBy(=u}JY-`g?L>d3@zAA5AP3E+eMSM&=h>AA@dopC%V*zl2Zp z@Ko{XP&y=xl;mkF`y_!~zOIIo9Dcxj+U0NVOfM&PIyIq{i!$E}x%wc_6`&pbE%zoW}Q{tA8)-_QuV zilY>=Zc{WjSt;&nIeIOj z@h9#yORf@*M%3$^xw;)g%zuKbi7gFegFa}Nu|;Atc|-;$62LODW^&u9l2w7<&=Pzp zwIKm103UWC(~x(mj2N=SU*@iLs@R3-Xv-uC8(;nv?3uA12iyxE71s&hD%qYB4x-*+ z=oGNrfj&)rg&r;H@uEPfb{Iq1vfm&(i~C^o3G$-IIQSZG5}qccNCGE8zo@2_ro%ui zqf_gZZRb4iDd9{-f99m!tYV*-CeijQqDp)h{i`D`>}^3py1D>65N{w?Xy0vpNv73= zIa-DtCSw~dtYc=eq*7w|*^UjZ>J9;4!|^U2q@3nB`g8 zsaMFlY1ZoZqDI}1ZYEQf zB1%ffhilN0+8AeJ-si-RNVf_M5)+@mtNqr|Dy8%E{q=~u|BIG%mIyIbM!^wYYbWC< zE~=Y8mpoH)g;`By&vGZ-p~d?p9I+&2KSuvzJAi`Efpxl66AqmNu!pq2%o@#IkvaZQ z;xmH0ZlM~FAs1V866(Vk(p0r0Z4qL0xIPi?brN?Le^R$ZNWz`P%*9?J9XQIR@!BSK z8vI*}<^-8Ps^~FPjttS-=Ni6D1{)Y*CBpSn(iNlgJ7H_7pJTDzDE7*=;9_DfOmB=9v)$c6=|AvxvA?1%1*h=F1;Rhlw2P>2 zffLIEKjl!%tQ>*!&luh)6QP<(r|LXmouuOiYz;HJqsZROpE6r763Amh0$f3lS?Pzf zy?tmm1xpg|slX*Ud57fb0QVLL4(eS_ds2yyOofr+TyQo;Sb$htu=V5wVh@|fj`j>A zO~N^poZ|f($pix?)?cz5!a5RriZv1$jeumZ6;-Q){n%HefrEjsy&HwmsZ_M@XVOG7lEW_>m^X@? z$35>NOhl%h}B4D_0xBw06}W;HZ_BP2`OC_`#2o9!)9pM`}~jahr^Q{J(pq8g6u^vfQSf<kw+-*46{emldT;fW~4dUr+z0Tc-?mYCbCbjff2uds05> zC-!W9H2Q>^97J{o^QX4o!)8-`2P2bBKTUD&q_@?)3u_}tU)%DCT0U(hVjf3-Ypl+5 zKrXh%K$!LCfT@f2ydk+4v$S*bBw-AyXM`b}AmilM&_k4p-Z?%eZKp zUaMDw@K7N=!gm7y;znlnld24KhhD88OPtY7AldJTH;qTpA>2(9^`^YxQ27LPV|6k$ zI|Zzaf`n!NY&qB7-NGx3ot_?a_aM9ZMeAC89`|=NqOpJBJ0gBYC-G^dgG0nk*d=rW zlHWQKVn^Y+T=(Q-Dd^l|5TeVRtf}%aGgQ!C7Z4xcnPzXn_kr8%c)8hMi5_K5tX$O4 zCgE=k_geckyk$*HdLH%$yG4o1iKC!ghKtrnS60b8;yBVtO%6IQa;Xd% z!rEOoOLe4=ba_9xsI_>K%`?(=M)<}SKQ_m9+=O7~deQ?^XuZ_3MRM#DOB~`lD9eV> z2R+E`vc6iiSe!$}>Z$Z7+z4x2qxjG86tokv2voRra|RK24e!dpXxvaLGk)aUVcjb_ zu`k5Y?kGbS1zqa%nbL4rzbGoWU+%*i$AOpA_`xZ+dh0SZ>34f!zcgvfcJN!{iScx; z^P!MIs5f&8SzZZarJ!&#fW2J)WoUwtFuxDJ(gSCLx6*PJb+uTGsPzqOsIU$-9?1xd zDBUER1F^JCBmPZioYJ{La{+tSzb^F#=%eyEE;3q^?gL&MkGjAm+LDZ7{rsPz$xN2P zMd;rM{eccA6ZIDWmBm=s=nnM}_12&LlbG=%tchHy9F~H3it80cH4pSB0Y!9q0k*)n zq3Rf!(K@MoYDOPP2bFGnIj+q`p)&DUIB1l+bnWFn6RoMrfFbcH2s7G9y=7P=>{JD3 zQ&TgjHx%R$?*MYMHI?#;y0qOCHpBkAJ&v8gkHE20lsZnVBG!Ua@$@Wr6+2Tfqdtmx z%ZMh}@sj7(gqB2@W+n0D#S} zcunTL%q?fkyX)pKTS@y+QExaY3l9Xd2Ix!;rVq#fG*UBv^|CK)n2a=l&}eG>JMP?a zS?V5<#&}+(3{BDJ7E|=EYy;UZ)ui5SUM4;j1KdUfam!RLgZEDnh|{yJZc^;i0lAl0 zo=TaXTN*n9PDxKDyJJ^{ho9o^GdD-MVK67bhLwx6jminyVTfceE;{Kh`q*SxsG&!(qtcO3vn)}lr9$SNGH zQ$B1fGl7_$%zZ!hm5#d7Y9l$@{bQ_W=d>@)ODR2k%$0Ge* z<`==0T=LYi0U_ry_tR|~BB;B)#3AeeJQ(8uI<#$d5^Zp{(=K}R8GwcPY$PdGW|$FKTUw9>9=rqx63ACU7b-I04W!CFH3 z)t+Z^2fKlZA70bK zSbk0-S3|kT?2ncz`4?w1S%3$*Daz{GT(dk6546D&~FM|QR7xnyCr)&Gq1Br zsY~%AR|eLf6vm=@9VwVrM4KnpF9j^%+hdV#;H7BW9YoM12)hyTwA({{3uHl!qrDlK zY{_)!-+oGA>wHSs!%!!J0}L#0+%Xb6@yXyrQ2bC^W6=UTh6S*mCX>>uLigTsu{!9_463vG;gwHf#`^X9-vOxT@7X4aW@rMC!+3 zux$2yl<+EMl%xNkD;sAL`Nzr~eR=8R0LXu}fw4qI`0Vzp-Zj2H)_k#YLU3WeKFkdTX*-qwzUAj5hw7`7VN#W{dnjVdcOFE zL87{`olqBFJ#!L7)3}tF&mpmp9qr~$Bsy_@qxA>DbibkRcJHe~4GPn6ydQO1#=TbSC21Sx zrDOvwbC9-OpxB{rVE}9XnZB*%m_%Sr1r<8eNSc{UuwS}22xSqpM+Ee+n$M^Kvec=T zFadaIxHf+ucxI`PnI%+mGMEx%o|}bq95zw?p~meU&-?Ap{9cAgM1y6g@V4w#gvP2g z|L*-BD2>d=es3i9h1C^~%>FS??w^F8l|kJA`1?I;TV4^piC?A6jF%XG=Zy1!Uhcn0 z4^@|QUk@yv#PwtF{&3L=Vkwy)mhIMcQHUx;IJVR+EB=;E^4)3tI9zBvLoR#4!nbv2 zq}|iYFtYQV4~3^ZTPJC*$jca}vA<-z$3bC}qRthy9gs zIDHFsUDTab(?_XKry;q9Or`G9?9iv&M>63lf-fq`JN9~5>!tVzQooxx<9HB>$D%V4 zYh4%PyH+P6;AeRk2m42VPY29oyvv#Hb(#uo_6QYZ1|YEf%1-)q-x+!febf8UtHURT zUSnxPxe84KU2!Iz<+>^j5r*a*=eAUhxv$`thF#!qyrId3TP8}i>MP&d*a&cACpGSH zlZ}{6E~)K9aP)T_jFCN>{nGd9XT5!QL=*W5fE5fFOgZ0C!; z?q4^zX0lOw+|Wg48rS>UFk^q}9RSM?bc@;?D8M@h-U9owxK21udv;okQWBU$(JjlZ*DHQPrr_fY(-hcn z?0`!JM!J|l5>n{A%?@GTBHiJLswFUQn43-K8$Tzv2uLSaJelgghm}K6UiVN%Qf5o# z5PAYOh|IS29KsGF8u%s|*&VEB!Al;d_eoC;F^fD$%n@28)Ht;eDRqZ=ROSyFMn~B$ zOCY^4dth9Z)uq;(PiO5432Viu()7PXR4n=3<>H^(;g$9hxnCaeaOMSjP_(kjT^{pF4g5x z;LjVkn?zYq%WP!8p}`<6N!QR<{jKl|YVMjgDf4MR^9hR+VSgl@NT5M=F$Z1c?=Egd z`ucB|d#%q~G0$WE55PZ)1#7mY_0ZGy$8U8EIe|ZI?56hL>3L#qcuJ~`n9FwcqihMO zr%TZN?myG@NR8XLTPGinyasuKRJVB1o50R{JVtjq>5-f>lT^Mf?>@VXZ1+qRE&*L_ zqP?b`2TwYaK8{n+CxTgbT058<@7wl(HAd(~={gy%#j>@e#jC1p zYI6qlXC2;+&NH8)GPS&=vU~wQ7BJ^@jqY?vztwv%q_35b-7>UY|nR8`1-m6}$?g#p9Bv_-?ayLTzruVvg zhEIEOAa%Mk(YMq`JOS;|gqlv#sTjjYGI6?4jfTd7h~dP1-)%!Ajzel)MqfdKzE{Ir z(wBF#y@PO(BzQFe#w@x7IdkL!7o`%USo+_nQ4=)sKg!aJN&STv2#Y>B_+v{CZxfd0 zo@xBX_Ht)kj{?(JCH+Gv+H_RwYL-E<45(iWqPOust6dnozz9c$g*8#PSh}}=820m( zSMlb?@rK_u_$O(L6<9ws7s;n8;yoEK6U(8=HlSLngTcc&8TU(+<0hH89ScUId$`@e zau|j1SrDA=%>O|clXNRsyw+QxOJ?}h%8VR|mG`FWaz=aim)43oWWezZAvNm#p#OY2 zC-o0(H*~@zd-od>0qG`}K^`BOQxygkVXj7@9m^Qx@Nm7sVueG6J2Yy>s@yimh3tpJVmQqrPpDs_ca7?il`f-(NUJ0 zQTkDaH`4_jVDfSAU`I;~`;aVDU&CwJBqR`+pL#x%kO+xL`YuHGXb$l#uZOtBh?D2_ zT5-Qu`qV?DS~??QnHQE9z{FJj#O85YDK$j}wHSJP-Sw6Vx{@4LGhSGW+KL#V06lZo z=$FVh{NIT4=}lbkI2h)Pp3KeMWKC(V@O}?_l-|uk1Cj?@Tb1jLMB5qDplI%7 z7XCi@mRmw5wDluPQp(T{SQ|7P2E5ka)83DC2~Q=&8$@?!J7tljy^RtjX>Um2K?;|p z#k0@^atJ>)*25AL1cwHqus{{>@mG`r6HCq8LVn*Svv^VO-1{MBf4VL`#Z*ojrw8xr zj^Nfv@ew)AINjux;f{JvScC}EB9X1ytcf+E|ATw4|DaHDsA zFwIn!AEQ4*=Q35IcLHZtM~T;Z?%Qe;H;M0v=i)liK&!Zr*Ojwb|&5q z$FnQA)&WR9KWvxry(^foCPG?VC`4cos)p@na3!ntkznp*=4>-j&?hMn-=H$yVR{o) zCjoREL|T@}^1i2^g5!c=F&(RKI$nJ74rj9v!pJ@q2*B+J(b zGck?&k+7{nsfQEkWmQOK5?U-J>%65MLa5*@rl}_qKD7~?Dm3Iw%$zC7xD){?|BA`< z-awLRd7{p%EQ$5cpf(9s5L^aPnRsovnc{s>W5qMXP1yP{)83Po?DIY-dFEd1qXTaGOxZ@T_9d1g2QXqk?%E>Z-3W47 zUcFdj_)bYuST={QOaBg~4bn;Z)K7`!^Qk?(fq@bK6a}HzIs8)+K51}Xq7LuEuNlQ^ zMz>+WpA019*e>KUdK#GYC3}Y3&oGIzM1q&a=IPRQp`o7K-%4?vzAtz@j+)bs52aM= z$lW}l;5|#k4-9={EW@PKQyJTUmA6iL*j3aPKW6%WNi+*jB7z9GWaS=M$g9;q;EiRI zn4aI+9j>c+Rn8**;Y@L~Wj~^X!(alKu!j3^pz*NyPbReLUt#Z1*}Zh1%8k>p<)$|X zY8@_0OqwYD`G>2(VHLHg#`?s$-e8Fr7vGucJ*)dj#rAHSK#rk){TA9JZqmVq(d-nv zN3@F3>K19iL|}G;YPor}unVIY%Ggc#UdH2szt68#0M(shp;xY&{(!HlHLt=?ha_0#-1`r7-%e%2S1 z8+IocUxuYa(?Y_129F1k(e!@HJ1EmHio1YYwj!-RrR5-~n=&eOqoSc7VLsZ|Uc&D3 zQ=(066MmDDiTvPA*h4x?9N#ieyA!-EgANl=Zsd?^J~z&^q<1iAo@ZC_U#alfAo9?u zTy~EqNalulA~sya-^Ja#uswkqrj6FOgJ1OgjZttcs(4I*!n;_0h_sHELTo+Gr+!>{ zkbj3}yfN-y;W{2|+Dm}_ierm~mI(ZJNB1b&1!0vPa{4hb$u|l;7(7$vz;JrI=@~&? z#QCnV?UP{bocP|oKwYCgU|A)(HB9fNR!2K_#J~luM8qq-*E(5ivzJ>R+FNDNIR`de zX5;xK_@fP>y{$J5Qlo3Q6|P%N*f!tbwJtB}`YhrsEx#VJz<-CR*B+N$0A@+iL*oj3lHpH2tWNnM=PeY>lW`+Rg zm#Fisp$zA$`yw;JSzFj!dp)fV`m((9u*v+^?ZOB+zCmWFcl&1;ZppcZENmxazD&=P zbXXWx?N5&p>3;ebl zm$Ey{cldrM-gf>BMkQ)_uBV1NoCquDN+THF#nA@a+djl#SMZH^)ot@CbuVF81fI{` zR@#0>%X@;~M}yft%tA6aNI^CkhxtE=SdOkG76HYONV`VAs97BN#|kQ8FH zyB}mVK>xe`yfEYuYByEPX5=KE>i(3;pYPj?chI<8~TitOQ^FFprmoD-t(xdFqg=%M1UD;W1@H!^;lEL(8HCtj)O6KOyiv3 zhYzK}0w6D8-e=NpNswIKT1MlQPP66S`_W{1Q18f%8wgrY(vv_6aRYZF20u=mVZ|YB8W6 zvB#AFOJLiB*KUOf#Pr$Qeo z_*`z;FL_+xTLLS@2`-kiM(DSQlBQZghyY_L&ldIsR>XegJ>(h*T4@%}oac)+-4lu( z;ayMu0x>1V0Ty<1_tO8ZFKpdm8uBIxljbwzFIAcNjV8Sr%Vu>SbR`}e7MSbL2#QF; z#+G@$4z>V~u0GMmvCJhR?N7Dy)6`V$o#K!<%DI3n0%`|{ep7gaHvJ%qo=5ttl*Yt= z0*x(c<56)%$+SlXbXCaJBoAd#Y8)F+DdnUbKU?+(m;S)42kT28H>!>83|gi}Nl+rZm$tc2)9j$+Op zH6S6erlJHMNCPW@bn^lilDZilFUh~oaJI2m#+Q8MOD0Zm8?-Q!`;p{6k+~KMmm)h- zp5WzR=e(l^%);G= z^C}|2icrq+IBg&9stay(+)V5^LXI0mu;*NzJVTgSWU0^bGV)>wN^VVikVBpoE}$g~ zd}zZ+eo2Bjq<$(tyPmtPFlvc6z5eW4;bTcK-Wu2Tg?_r9dhMhQ!?r1%^BoaK#`_$TJMXv(eq)Ua4A0i+gXuM``I1-3^BCoS|& zAa9}7hNDU{B?0moFI=eK{3|2lj1f7Wp#-&~U6%bGD5G=A=@q};; zo}`2sp+Mghsv=bXI(I2C1FE9(U3Bjl)8Tw?7pt;l?TD4|v;LEh6?RxeHLy!PRSy9o{QK%`Jcx9|W629xzluRM zb!;#17tlPbou%wh`Iglpc~xh{fO0~|w|zrh%4Wid?g897{(y*GD7AzX&{=`w#A;<9 z_<|No%nQp-p=FiS6R7yLc6&$Hq`KUO1nE8QEM!hyx-ka*8tP{UKz$(ziIB(EZ#wi% z0j#3F4ux0tuZUSoT%%3OsezeON>jvSYk~0wIvq)`>y8wbRU)|;-z(} zwN-j-43>e+V(UsrK+4ru>OFe#Y|Otl)8;48!FWyCA<3z@*}U#XrLhP6&?^Ftre4sR z-9B;$c>xhB%7X6&qL9F&L9*Q9(txQrOj=yq(5W#{(xb`GbvoHM@K<4)*PpE^Ux%*v z|5VT^uE#ZFJbt9GvKLyR>M*b7FiA4)miEi^IM>(K=#mO)p+Fa@Y4a@&l+(;;x>RzlT5#ZerX0+cwLg*;O*KuF zr@etHJ4C>>f4HBEJTxle-2Ht%X+nqpvSSdk{E=}J(_emP2-Na)SUijTe)yY)!1r?R zO?s{G>I>O1vjZo~<6g*^gN5cP_;R53$2sNBKT|uBApu8$)D2F z&-#TmgW0Gc#`!7Q==-4R2YZbSx-TE^DqaCsc`9EH3*>favXT*=xQXyOp!(YyusxAj zY!YpY5&S0%*5C2K6H~p`)MUIHlnoz+FN6~>nXwU;5aRl#IoUtd&%*t7DL$US%zGK+SJ?*;hzWcxiK= z(ni|j*r32ps=7h(Rl%*h%8nUZOO2u;Mb)9n3^XjpcXa5Z1&^l#rhOA^l{+iwXwwbl zt#rGxCdb%DObgCMQq*0rZCTeacvwd20=%1NQ9_~SU(;@_1=&KuZpSr(UE@8{GziLm zJLGmj`-Sk<+tK}s81ATv0KB29;S3pa)_!%7YY+qs|YaApmoRw6p*Fn$M zJ!q?Aua|nAi?|lN_;dJBJmFT!ko~Pwb^GOFuz1IwjzzTmfr^fx8h0QsIvKa2SxlxlyWls(M`XqM0dqpY=eb~vQ8vcyPo+j z7U#gh@UmMSsh_si1Naj%`UB;QR@rhH+68nY>*m*7B0AjM;@hm<*`p@6@l&Baf~cjZ z>3_A3%$qHPhEV{|k12TiSlrp}6Z%BKL41iSkEJt&T2!oP7+imlJPgHQx<(NEtz@U! z>0fL6x&vSy=hHV%6DwgsY%gGxasSD-(<1M0TqJ^TUNsLV2gY>g<0W7tjqD`b$xF+im2dT9_IbEynCYBRZ z{6AHH%1$G{5)Ea?oZGpP%E~M-n*m2~a%kq~5fx3&wV@QYn=Ku+*19fbXQXM+>|OeN zBn9|!4=enF;2G@Ys;T@GsLIuMM@E-LaLFn<$#^GRPp!c%V-e#Ih{n4oGp(b-JhSdh zUb_1i*d^T+=~^$eA<)@YWTtC>wcNmU`(qFR`rh?Dl`tTf0#++px9R>NEYDH%d%hu| zY^oQH2&ubXsIUUZ4SIzo9Wi4R#WcdUL$rSSz(^!Sr zomnS0oKun(-2|LG1<%~r>KehzIPxx?+4ve8-7NLzutFaQhpK>4p?OG$J{59L{2o~!4@0n9%t{8Op|*q^%! za81(~X($g4tm9J{MGU#w7bSd*zA#I1gUR6MNZ5Io8p(on1QHUOdDH1wYM;`6mfK0{ z#~|z43-4Sv9Ehg79_yPo?qR?%0f>z1g-=zdPoxJT8ZjqzQ3Bksy1}d=WVjNIRhpK_ zk;Ui+BjbR_vYsd6*`TosnZYkq8ERB9nirC4W-4J{b#0olS12~DAUw#C_GL(6RYd75 z{czUf$;3`jZ>wY-j`)2Tsj+;60iBHfz}JxX%F|J>z}zf*e9zeU_)_I)^;YBC$T96e zp$G{LfIK@qnuEWhGUgf{%ZR1o)g}csTA@8$ndDl8^`ply_f(dyfW9VTu42%G<3`Ht zgYCflE1WDYYhc-$4e+b@s}(j82_&EEDnn)ixi0juegsl1)JL#CxIPxV@oYu8nS5^c zfWZX!D$(u55RvAEUOC8pCTBm_Gk6nN91o7eZhnld5)Hj#|AH<+f2Y?#nZ#+>W5;x$ zD3N?@PG;vZjS2XCaEqn7CRq-3iuu=x3%cPBy&~pcLmsrqbN~;!%hBj-hA4u|mRFQ1 zJ5`KxCroNqn|txNExEt4`()oiqC$N;vM7r4?z;M!{Jc~$HPOs9Pl{mtAN*-1UqVqEH{h|sVWJdO7}@tKViutM$ohSw?Mb@DE^ zKw&Q+ow`HOgx{+W;N;S~^-rL!ys9h#e92>_{^b{M=XshvG>+XAk|gS{#R@vT?moLX z>*ENpBsP3XxfFh#&J}H|xdNqbz83w^a8ON|WTi*xQcw^FMN0y7-@yYrN+02O-Gf}~ zgo_x*;20ZFQ~f&|x3I0|TyX;vFEiA{2yThl8Bii6rh9Q%Gm%ib*){@2!uOvW6S#F` zFvTw+_u+}iVGt<;vGumSh+Ts%#9ymB z*!~$l)SfK%6D}agKCm@Pw;#tg2F{U(Q}PiksQ=KT#cH^BWU15X75~}6??GD6xblA* z&Vk$vxo!o?-HY#6n43f6Xk%HzSxnvGmo;hIqUMMW>)5hx4}f0e=`DGv~JcDbB<+qD>~+vWUk zbmcslY861=FJZ4U(Z}@E==UQqqje#7L+u32b~Sa`OoqwM-eOY4KLEyq(pM2v*g#~2 zuC@HcJ(FMQLQb+ZpafH#L;s)B-UOj^A{k8}BNI&^AYh_F zg9eQnH7F`d(5R?kQBkp?qNR#0){QD{QK{9s6qmLxsJK!BTNKO)_i(p4c`FUvqEIK|=wH=b0F18U*HNPlHwJSlKE^FLLM)RK% zc|G~@WJi182#h)bWB&KF#9OnPWRC4#ur zRra&G_2`Iy^CFvVAez&@=Qv&#Vw#85NCP=BSlPdQ;MNvr?h##@lA~vqJp8k|!UiSA=-J zwMY;qbIW&2caexTuR&Y`X|K=_B8`Bc|4$I0iLL2Pb`=go!LlJn`|3gH%Wk<}X7-D| z_m^I;WxsLFWPhc{Ggmp&-mOxocm!^mjB~zVI4vWYspp4 z{^{2{8A?e3fn&))qN}DNl$h=h1qXDXRy?*AO{O#L)gB|cQG65Db=j`mSjJw_JXxy- z6HJZ^rg40&0>ZaT!PYQvw=46K(kIifRgll}1IT~6$lmz&X$j2aV&5e;o=JAI&}54Te%j=2|%w zuH*-FMfZ|x^gjh7C0uJ1`o7Ucb-g*+c6MC1A zGi0vFq#_73`%JX!Z7~`Z&IZeh-Ukf?zd26xtI`pD%=t2;CZRM5@3d)#-3kIE?oNou zXW?Cirs;+bf-=++T9X0AK$Jo=HVFc~l3-80&;pebuW|9z1SRnXcNoVzmYpznK-tOU zd#3iLy=j@@x;YcJlMt-FX{xrT>jp9m%!uZWcHIiYNi2uJ;BG#nK#=}3s>7CSY=mn7 z;$&WgiAxYz*s|y|Jt@dU{b1a3p1u!7z=(qPnb?xN?v8g{gfKVE(98CdymSOK#pShq z25zLR2tV@%*d1z}h>y^Bi1u~TeQD@Mr;(mBze@&XA9xDUmG)KXH7G`FKT-Y3-j+mo zaQt9wA~VhS@&i_HoR121aw%9ITkI7;Dx6Cj56JLu4Bgx_;K#uRSAXDreysR4g$V$e zk(>icBQ+s}Z-L7vhtW`w+}t|7(8bode&)2Gb^|`$nj@T2AWOjH&d6k8LYLfSzIFnv zw6LMu#mDJ)5mN%a&|gGemzBkOR#xAFmo=<%%nq&utOPewk%Np~@m1M%R|@;c{01uK zAQ~ZxZju#b)N8@=I2WgmZaD5Z$j;ZjY5d)StB^0OusW~MwBmRYIV9_IMeYu&K1Mtl z+!%t}rMWF--~|8!m2f(2yWBx^BKWqpFD*NfEnrVa^>A)`igr5Nr0ZMBNu&9_0D7k` zi)lezp#f01!^R3*%WE0KjM8s4HL*M$4b44wshJ^sbNY(&L@#=Sy&mzO>B~MNmOv)9 zI6cVrOs@Z$<=WsyAr6DA_!N4s=PPonBqE|dQ7(riZ*WD)f1*X>G>fAPH(5G!7jV;J z-Q(5_VUZj$w$2i_C<;B|tswD>AfTq0Y8cQw$ru+X{1D?PB~Mbfg6X$)&6Rt&uk>sb zRKM-(B+f*d8u(pu<7$|WEFgt>&)GiW{s4T5cp9oG$s6v9mI0O(nm;(}A^PWXJ2FF! zwY#nziSEwghQOMUz6S84vM>ywD&|l(I+R)biy})&_eg?Kh++Nf0_ru}FY!s@b5REAnAMYPRZV*QX;KNC7 zG6=x+FdxE}gSGR7_qW{#Nge6s(p%CY!>{-!+7Ge{03v2CMQKobwySE7Wu zfEOeY>OH!E<_LUUX4D0!^o>K0Qp+b)(4%XrDF}`Vwe}l&2k<{?Jb3 zB)~uKUZ$_3L^8c;>+_gcM2m{J3H9tCN(ZCYbn z*Vb%9r#(+^%a|c{Fo&l$*$lr*C&hm4hcUtPpFLrz$8IBK^u(5L&1Ioa!l7A#x|5(zz`QVXOaE&)y9A={ytA}7 z%=-~`DLM(9Qqu>yAvCS28zhGv8jvRHN-RxV=aeRHx@1@`4b1DpUna>C$eG{B9?iYP zheqgY)h-i!+I)jxJ^5M=e@LHCYu8WCZ1ZUi#?_c{LLlBkJ1YOCsyKpeD6Y(9_hYlU zokZHU#?gfHb?||3TnTT+{z6rEBv!L8$_z8)MuTh{bzhXi$b)F8@n;#fQn;lMw*=x* z{^R0o)EZcE!}ug2cawjLn2l%$asgWAWNcYr&3Do>BqQZ(MBk)_OcB_2i$p<9D!HRp?R?W1GuFRjp1QSBo)(i;6M5}Cw^ zJuxS%#(Ihm!^HClIh0=YT7SXHweM4(CDI+qnf6t?qfIvM1))Vjii6ogDtE%0EFob* z#8N%Q*6TcF>N)1&0pvyU4QWK3rLi1M)9yoYSRmQd1;5>%Dk1O42oB2^LZBS22AZyL zJfEl%Cq@waF|}j2@iI*=Wb28Y{Iw901$XqqV}q%sIvQ5OE&F-gnl+LqF`oa%kP^(iZ5)VofQ5=P#D zgU5larVp-Zp~wcQYx+urRyT|UeWSf+>Gy>M1O6EgCq|ob*Q3T@tHEmK)IifIL|edk zaA^RAa@Gi#3ei)YfcB@)I!2vY&23gd_ii#pp9lAqaM*hQsL1r&m_=?GLvDlm#{qJ5 zi)1Hmm|ZgGL&r1cA`{ParUg!+S;nxF4PUzA3~v-(K+1H}xK7c{lbAt~Omr}_KUf<@ zCqh}%+wew|N^5}INQJ8CuD&g43(9}Pqv=PM2c|3FGm?2vS^YPAn;vtlAAD91GYKn| z&*Ik;KGFtk<47VCse$Sp|_mNL^t9LfI(_$)c6GMWkl_CseigPz^p*)Q^8>4nR-u? z*c}rCgUBPAUdrliTeCfenhtuwA)b~y8>C%O!+1jOe2Vr4b3%Q#0NRN6gK-km%U2UMaL&?6UkIJ% za8gp(LmV6^JP(7@i0Oa{fABll6T&>{2f{;S$ zEh7|rF0$Gs9lafW(0o!D1=8xYKS;Z!L1a(ygFr*1>+P*pWDFRpfVlAE#`|HwPm2e$ z@w=o?$dF@KD1Aq=+%h#+8~Qqp3n)Db_;R=OfgFhLcZWunMT3|zjQXDIj)BNs%7=zn?J?;&sVRHTye0 z1i~kNGG|`Jbct9zM-$*gN2X!`^+;J3NZusp!*rev<=e1QR~gY+6+=}i#3d-v(T%aC zQnLSR{nj|w7;Q3LSf61{M?M2A1Wi&KG+d*DKc`MFf`w&o9;4EbZ4GBkd4^PSJ^PF0 zrP&uqWK3q<8At;6rYdFBDTda*IK1D;U-43d8-Rt@WC z)U9xGG_Khy7;X@W9Gn+U8h}$sj ze)M?5bL0Eb)L)L)F~kls0d$c=8HYD-hxctUUtSi7jgk(FGZ5^AsXt$%ldk%1zyl|p z0ey_()}gv=f1T6f>Qh(fO2=yU%mu%7qsSB_fscx43*k>HG#_%fx$wosZYX~P)He*RiGKQ~lD#QuR!8V=1rrES9dxW(`I2o8n?cX!ptXA(dLE1hrJE?BhW zThRS06h^w@HDfY_{=BI`+e?rU%(*Uneg?4&OjECT=0Q&HF&C^*x1(V_^Y%^|M#XLFbxGR|TvdZt9D7;K@%uu={wM^H0SITmYg%d@Yh2R7>_o6drZM(`+OuVA zA;R}g7j)pU$O;bjmWrsKhm(uRTUjA8GDaD8GL9K(1L?I|Lg}#)y9-H6*!{ zOuZe8&e0?iEkSHAbWr0sI^DV1I-;IRzr=yiSayZ^XOxXJ5>qzfw>)m8_NT5aLE*(bxH` zX5)i1;`f0REvGl&qv&d8o=Q-6mbQR#No_6SOcZkzUZGH}HBH-#8=(XnJO&l*XxW>o zM_5npVdpLQq{S!NMTqldVLn@wn_0{FR~Qs4VhV@r2Pt)Hu`i|22UdI{Vf6n64}1Yw z-j$8leA27_Ay}w9H-WxR9;RRE^Fy!}Q(4n0A{<%MY$c9~Oi1bpQzSw zF_E4V==R1}Oph>l-mCnMH|qK$zKVVBm&j)Dg_3_y*{Y`B71~&#FRmMkJ#Ss8TS(k1 zJNVp8L{XIr$ETVHS}_te&O>px_KF<^>?n3EH#SqR3`<`tLqFt5ArRSN#u6`HevcACRT-shO(53vW2>esWFwbPkB!j_Kc~bZ3JIb`UJ(tZU4oAUtvfbB< zjM6uZ@Oao$Rdv!-F(RPWmv>NeCX_vXEYVD0V20BDr!<_%7w&Zt-9gTNzPC7Q2tM`o zXkNqcQ#071L=)@_W(|b(2()=zRV#bZyVs+GBH7ktV0H=HCq%+zq;~ju&H2*(4@R?I z>tl2W^);i**3IX)$C>jxLYaJ-=xeiFLZ(w2#yfXv*9wu;MH@8Spx+_OtdFH@)#D6W z<5e0z2puHkoA6m~9YK%gQgnrfJk6xqG?}4J_5X^bU|bQ4wN4mzLvChXo3&@{yK zB^ht-5d^+4QW&p&8=fIbDBqd^O1r@j4OFLYgmd_dUlD)P$jXI>|vV#LIBdAl0=flWVNCu!?w*83YFs?xgk7I^BKNli5r1CLJ z)i%|7`y=&8_^dq|O!Rfbkl#sDa;aG{RP%uLx$xRBV4Fg!xrGYj+lVwklNe&;5o15v z-U99<5N`lKuBsyro5H;}EiJb0?U8-xsz0!UIAh2*E(~m%0Dv6LtPZXWgB+I_ z92r~L#!m_MW46plxD~$!DQ;lt?r1KofT!vj_N%7nc{Cz!iiJaB2Yc4$YAuC>*on|U>M_>Rd{m*aFi5LUyTyMb zcbwB8az~sEN_OdSNP6Bc)MH?|n0YqP(bE~o?*s2&05P%OZD$U|AH5>D-NpS3S10os7^zwsMx-i!Nd+T7LcB1Hp!vEmHW=SzH3K7bR5T^Y<%t8)W0Bp4Pl$z9;Q4n`Sh ziX)!f*Poz8-&E``j~eR7+7nYOHuVYn9N2zv+V{f>x)UR$MNmxp%m82tZk}YjiNa(% z==Q2Gs#Pufz>kUCXIgx_sR;WC*FqYt`Z85XnlkM0E`l1`zHsV<)vPfTMK`!FRccWu zAmW&`PH3iaB+5tOH8#^sDD~7>vbl+JtRH*O{y_bPCMQI%k<*JQtIjwTC2dl*^B_>l zD~GBV7{>=Va}kKaRH?MjEPcQoHDU&%c~H~C9E~&? zm5p4D4jolpppAf5N`7ZBWDCOKZ$c(I3ZhY3ezi`lfAmsLuhV}$1wi=o5Yp7IAr{*7 zFKcoD5EE;zXs(c!-eN_E!Vl9Pb3GN;O2FTpeZBSHh8XIMcMxD3B9ATgYpeJlVu{Ye z)nGCYJC}CU^Z{Y?tjl}Gq(u?iJSnB&;JKc<;&gqb+fw+w)J2t*XEh{CJGh0R&nY6E z891K*9yi^U8pvE~?pSQPMkCjphH-@AD;U~Bmdj$Z!6BO_bvhnx2!>XIMf_2@a2I$( zgd#wsfjz_2AF81)1LVuyN}AHS;PWl*O`=2@3sZN%*(rc2rq)N7P0sDW(qxi49b~fB z^XA72ofSzX-ZzYJ1IR4K_u7|=(Mrohdo^j=y@dT5Dhy*O)C4)iA5McagMMM<1>liL zN?5Vhz)teF7>~2~J|=hS_tWSL z>_%?`&bUBXciM9AUA+iAJjuK+1ir zD)W8h%vJ0wyN*qhMjOhhU)>M45aC?MS>opi=~6W|oRM;|2}TPFiuWTPT zP#SXWX1W>zWw~_)pg#{gfzX$nKJe$bvQIQ@C&bHL4ptq`{e&!Whj9stvbiY@LG{^S zWjCfX$uZ z*<@Q-2LRPq1zBIC_3MpHFoZL&m4i27ZRFu;dbIIyQ1v6zItmTf&r7zvO}_;jeR;#o zHz`)-Dy(cGM_FCeyIt!H{ z9W@jV;{a?HXp8u`{3~`b`3E1VP|vn7Ei*Ma5uzO3WGekDMHQ${0d)#^LcObTt-@pOJq6h~^HQ!lgI7zf_5!j=;}1!%L&5LzCla6-@Eu+^*>vx9KI2s$-nKO7 zIX>C~ZilEh)AcpGJ*c$Lfza#fd*OdRUuGHZdO~(7zs~$Ypzm-WC0hT~znVp+dN=Fu z)1{9rjZQ;pXU|dzT16}0BrjZB%Nz&tScpV94WWh9mVEU!Zv<4Xwnqw5j~VeY!en#k zZvKQyT!2Eu>8U})W}QTz)IbOBI&%*Y3aPe}3ZGnLW6^HLryU@2g4#M~(>~E1Ku)N; za_(SCeL$Veb=25P(U9hROLs)IBJB^bqyXK}HlC@VO6_C>^?eWUMKCiE;BGLFa)x>a zm=@bFlRXf%dx%&WARA`_(J}DwnnI|}z%@&nO*e>Y)XIYU95@!@+pskb%1F8z_PEv{ zQ^oNy?m12l|621bn2w>w;;IkHuY^?uF$Gd3OG{x;kHhZY1bar{)+l5KSfoxrO1YM7 zdWxO*IN-IEyeRv4K*}F7^Gqby&3}c19x=wKK*ziCSm?1m5w6GOZJ8#na-pbF!dhPm z|LZh-qw8Hn^n~H8>2~E1)pZ%n#*t4$n0{aN2mkIAlk1uSYGs}~a zf+rW9S?U^J`~232zD=eJheJ&=X^XK@h7m9v8q`g%jFo)cou1ZTTKZ}SW;#%2zM^FY zNQVx6-VaQOm4}()*(oyOFGmOVD7nu*j;x17rh{M#OW*X|)eSPvLM*Pr41N?tzDbMB z*hO`5#w!?}WU!QVHH|}8!IQ}H74tx0*^6Jr2iM;O^^0I_5u~e68)5O|DP60!sj@7w zh2Oyk8#hF5ekXulW7s0xpqY_L=PrPf40~vOis3OIX@uh1qso%)X>OS=FpeN$xZ3$D)QT8yMYMuBn?R zC_Nh2RE$8%tOSz7pJ8oZE9#>89#Q_Q>AU2XYhpYmhcYd2rX#bN0wLjKMve{D8XxrM z^RpzV#O527bgy$oCYND`<%#1w3}#u}DW$uJF0K2}`n5Dp{3*~?Ce_GdrvU#P@Jonk z{O!3)XcHI?J~D!p(b#cbAet`u2GGeKJ0>}UYxHH)@zL<0Y%9ge$Y=iC37dPeV?Fy_ zcZG)(SV>U7R|-o&^L(~zLn=O-Su}3|EEY;T7A##&!i z4L0R-pNGJnLO0#HN`Ffl+mO-=u`QRedT>E5RD}~q_`U>)SGKU_SGRs&F_P(li(DYp zRbWT@&YRxP>@E{Nk=!S>H&uhP=LK+26z$GvyANX3KtMGAW2AJkG+5pIa%bLK&9~sr zH&Eo`lx&pwMu5qie5!68wwfGl8!b0|%8Zs<0wt^29f%JiH^Ym83Wf2^gz5XOYlN_H z(Aopsisa5mYI@1B)t+s6r`b7ac{R_U&tXT1b#UU&+FukvU#R0j!pgpnp=^k?AqhpN!GBDR5xuTII^Fv3JmJbzv@v{tec6?Fct*hJuKn+ z{lUg3fl%f6XfRWjXjy33o~C4@@f>=Wmd3+5hOt;#Q(-Dh+nlb|-4yyM4H58|-~;F^ zP&C7>S+P#+iPW87HUX1`OIAyFET6efm0>z3btc$86uUxLG>=T}Oqac#8Mm7MYdBdb z{EWLkfas1}L@UHc-O=Mybo-4v19VqByDh!_WA*)`o~!^|wh$NEuD48oB`5C@qlwnE zRC1(PjqG8i?{LZZg>vf<*OyX>`krP_grq=S+zA{8r?A)C2`|>S;YsryOFdbp>sbEG z!8(29RIUoA#~KQabTBNV=Jmd&pt9r?wwSsZB0fUEOeJ_yLr$s*%AIN}LbwltTT!VW z{tU>0COdP5O%poj+B?#2yd2HYoasn>C(-&BVK9ud2<;|^UYzayC503 z52Ffz1RYrg-c)q&Yxu-KIhChi29!8eP>|wR<30QzzUrwm$9^z3HGe3`{hdaM3?Mvg%>F5nuQkl(jAlP>W+~*nFO8!1& ziFKZ1Crb*jaVFFoXg-MM3Sw(!(dDi)x!pmn1MCv87Cp~WzbG1KdA`*8U{zArg9utt*~`GFaeElNukH3B5ODhrKzu1>Ik+1yZvJz~i%9aR>wErH2!#5A z*nZX-#uF%3P7eXU$(bA=z%#HKUI4$!%dcvcrUU+$-pjUeK|m!3-;ZtX2bR@&=Hkzf zb>6xUuLLL8(p?b0?&IgKTLADelYU&LLg z#Xqull7>r>>?t$`0_A3of-=jY9&Ik3>H)-QlVq8||B>o%pc_4n*ghee);x;yWHU)| zbr&D%)ArUxyS}fA2J&#&6$r)Q(K^|YddpCg?%59`{-R9_?FaJwY#7MsgsnBEV%jF- zI|$iOHK%f7d=;o(TK^Hd%#cNGe2<}shp8$bOoCw{O-%dSbK%nL2}(!k9g?2OW-*Z*lG0}V1J9)M_t|DL|Lw)0b%`a1JH!4D=uS9$JZZ#p>Ejk0Ql`EO8YH{yF{zibg?K zzyEu1DayCU6>Fc1dB9*ZH#^imt@^vX9(zu^637g2UZD6$@FcbErvBRGi@|5m@+{pO zFwNdI99zgQ!O7ENMiAElDxrDvGAc`PE*c+7rXsP;ky@p$lLhl?=uVzd_871hXO0A; z6W}0ynG!E3AM4yGASw+Nsh5QiT2lfPA1v&Klo^RY;OS`UKoJoplk153G!(M#m4gqI z+?3($r`&hl$qUSOoc+K!8?$??=NN&iS5jN%X@9b4w}vz)`fgbnzN`n5ibZPXtE-lI zPtnb==qo-$UejF427vEO5T9ndE8|w#of+J*i1G}2qBH^Cg0UZ+Zf3Sc5?oELB<&(8 zb>MMC9}e1>_(38Sv7N@+2-fEqYd3a5}s$!=gY+z5D_9Jq1nQ&Z4#l>tAr#u=dZBqs|lf{bq8jdtBun)!C8C07gkNhS`X>?p@)Lp1#U2 zX@6oarUg&vMYSfL$wk;c^%G#}B}YvuH)*OSmU``r{GX5NC*dd@XoZf-|#Qd-v%r3yLt0rbPa?f+}YF~cHEwhNnWd3nR7lI}kzeZiB zz$e}KZan=x-K(+-4})MAj^G|FFun-LJP=q`?${0^o_?@^bz#o4xk;sd9gLe68xSy| z6GYEkwCY=N(dAK(WV2vBlvMLoHczdk7rg2BR+#SO-eu}T#BrFFRGsrZH$<`}y3w37 zI4=jpM-c{5kfBjoW}tF#2js>Bs8wx}HO=8>1`}UvW-3dYE4`=y77gJ>Zvy*Mx%lb2 zIyCoz*>i|o#BA{SI)f3()FlSr$mA0G z6N<78jn%b7wNo_@v(SsKbHeXh&52IbFLS8qg?J0oOb`px9;KByH(OS-3gQoHaOZlE zc{soowF#F;}2&*$(vB1Og(vsybqdkneT!tuV-FGr9Qd&WG{M`VIxvdib|8! zL2wg-P~x%nT*%C9{Mja>s`tC`9|vG99!oGDYEk(C;|6r?I*Tcqe9agcR$S z=rknn_6gu4=rCe{Hy&MVIiot(K1fYtNsS-_w!4FbS<9h#4c?xNPJ2IBXJS*Dtm#9;d15I3 z89qT93!8#z7j779 z$VL1zf?L|hlH(mC^W}qek>D_aPc)VV-CHlb8|!S)T#!qLs$hoVrasbJU|L`5^Bq~w zQTH9~JF4a4g#eQUyH9p?Z6dEjGOmn}4v0$>_(-}>t;unR9b~x9FJUw$IXDwbU7(pE z(~)NN=gtKy0`y}ev;*n%s!8M~@M-spXR-6nSYrU0a4%PkY23j?N1c&RriW}pQ`==)I~cu4go|S zsf%<-BWZvLCsC!Ul5} zgxOA~%}!s9G*1zqP6b~EI~{qL_V3-NoN?7Td`6X_-g@qywKBs5C8{vCs&Gda zwghAc%ce{gCj}C7(%lFzk;dU2&=D{kCz=!eGIBDbXAz&{W1_Y6ZMHRK724F_yVJdj zvFXHhXl1m2dBqBpRNv z)6*IPbv~z8^^7~FWlO0SI8|Q6Tf!_0+^NiAk~UGpqQP||mlIYoSR5WCjKmGB@GIe( zDqM~uHn_XxO@*i=Yb?NnoA!!&nQ1M4NY$qQLJngR@<4^p<%M>|$zz$!PL{7yico5s zT;0NrkR;;RH7L%#JeJ#{6Q0GZp1YQ--ZMM|zl_v;;37+mYl8O3b;n7q;|K4Tp3y=9 zZkQyC%?HgL!Hp?-kU8^3>(Om5ZXK(g$dzsfu8nvc&xX@?eHbWx0N_A5U(;y>h z)o#G8n!MsJxJ)s<09%M%F|3Cg_C%+;z25ciUTWe&T&Gphu+O0xoxrUg#N5iW#O$er zgHh{9&30NlR8ywm>CVPcfhkB}-g0AD5L?hV4eba<5~gBT#?~aTF`4S#te>oA?y5@* zTs_H@>}#07;M^M6FR4EQddT&WxKA#9Blbq9TUH%PTA|mn8%e!btmoG^7TM#dKQhUw zV&4G%LuykOZf}AKmpIK9nl6&(jZFe^$)wlyHH;&>7)y7daeEcm)2Vqw{WsZ0REPP0 z$+aP|wN5fwSn7ppdbbw*)|<~C;;63un(S(SL5+${@gauQ9!=TzQg`WIZ5EOW5<{Sp z3ZDX#5++00ETkJi1P{J-kBKF6)Gu|+^gQq^$XViXi0QNrF&8EXp$HjaOP$8 z4A7a#DpHU)vJ(Tb>FjOh)NXD5wdpm1*y#oq^p!#SOA><+Pw9tjx(lu+ z0!5;5SDAL0u(B!8mt#r>-NLv^4p<3hFbN9`e<{o#bbPHz{jk{;k2hPAXt%3mPcLKh zeB90M*WY=E?P(gh;*RMCcT+C#S#Tz}*hq4DKj3vtc1N)`&>myT1bgMMu{(ta?6KC< zSOn2q+fsOhUIb@RQ#(;FtMEy14CM@FzhZ|;`Pg9hhiS#kb~v{C3^q;aX{Jbh`5^sa z1(%BJQ{)dK(xx6u=MTn^1AK#--$KQTU~QsxjXe~O@^0~Bb_ncm19}noX*{N-Je}F6 zi2q?afMl$j_l{W9jgo1Np(puV(_3!bi4l$DD5w*b#@C&O1Fd_WiU9uBMqq)-*0z|A z^!j5})O_5w1wvCF4CnTBB=6bBYqLtj3Rl4#-W~qlAUT3aWV%3l5}SGKT}X#`fV;?K zs&2%J5OEcnQj9t-NxU=#JTwA9cSdCQP^sCLN^ZZL%&;G$8X#_td`IshVNuAGO-r^A z1?avH)O^TqljG&YM1BDv@8|xh9qrz%{u%!X?_j9qYY6sDGD*mbL@W6Hq2zbo7%?)? zpc9`2s!hfwg_Ns(#TK>pv>5!c_6p#h$VB6X7ov?6mO*R6MRF2icGS{g_bu zzS`5^!Vuy2@T&2und&1{TLc@&*l}|{Vi@f1i5IYsQ%Vu?0hH2bd3bfP;d!yHVdvB&Xl5 z&O0h;%kN=fdYuX0%}fY?2rql1<=fo%+BF%}!~>RrlAd3x6!s~lX6K*c*ba_1saxJC zZe{fDJLD|!qX6;!0Lw$XAM>ePcTV@nz7!n*77^S8rjc_!tgLfg;&>dwkz!q43IRU| z2P>1n)s9Gs`ZMW0n@_>NjdMJ7LLfm+VsIk=vk`+T0$Cj1>@Sk7-3nS-JFA<%3O^vS9{&+js$2}>Ikn2@8T zr|@P$%p#+-U#fh^Ca8Q^ES!aJW6vV~$}z6x(CC@s3wJ#5eBcmZOa68OQi3PLqRI_Y zBM+mZS(^oAW4TKO>b0`iG0kO2QEn`QrqHXOAi`}HU!gwZQEQe*xc6&!!3QnF%u53A zIKb2Mn!!X(Os*c{j~WY}cpT7H*}6yZA0H$)4?o8OFINXT0GV%w#bUBaoje#>bZm@qCE|Jsng>ua+@)yj z=b8mcH3morM&KN(=skbUrtec|WmXHi`#q{D1@TvOVCE-K)0HWsWFln!_yEOUyYW)g zCD1R?M_Drw-Cnapo@EG>dO8}w9`PL)Z9|?ztDBxO2f!T@JI#D8}P*gp=Hub@4-47p=K79pvY-+$?)p#@u6eQ_PgH-d!#r6w)-(C%$8LLqT zp;;ZS>-ap5US!TmZurz<#6KYSnPkk@A!vwkGjJmh$K*|e-s-WSq6d`|j*X;eGL>@r zFT9bCDfgQh6yJfr2QF-_`Yx+pMkH6Rs2d`^(0@agM>lpM1{jj4L!o$a=0PQR>Y?CF zS>Dw@1^bD)-r4Y(@o*^K8Ehn}6tgMCZYN5SBK_ne*fe!p)i7{3?)D{V4X5hm&%qws z!`+{R#!esaE78oHsvE@r9VKqS$bs7Z;O079!R7Sjf`Z*O;)!4>)o4SAfC26DNI#I_8Z}$LORCP;Qa3jq!%a=&ZM(5)e0_5 zF0BAdBPeTU?zoWmF{mQ#q3uGY)H2Y6w#SLhGDkT13nsGT9WTi>^7i-=%cMWBKz^V;J zstx!+@Iz?3=%oK3{qOD#N8!SRC*v!35QV1u(2ayd$%W$Nms$RQpH!M<3W7@MP&$!vc?&Akk|3 z$cC>t!2|{$;t0w)j%$(|!M59d8ztQY!VNLLef<0AshkR@PnG4rlxvRjmixLz>=Mv-X*J?@L=&h0M@~Tyq&ptK%Hi)&s;Os$j&d|y{jK@kPB8NA zdEM{YCXopw5;9i>rriKW1=w>b-5wyxF}ES8;;{w`)v2`>uqzm*oP4>OG(be`Q%-#s*7EyiOh@jlUFR-KhmvYc0jcR2;`B^b$p5|V# zI2Cbk@;Z^&^k};nkKnskn1m;Bx;y$=6xr(2(oYQ|!2A91e$Y*mDjX(&OzxOK#Ag+T zL+s3A{0lTi%H`fC%|$Bsa64fUp9dArqCerNID9U@SmFN3AqN@B$NmEMfx0ShcDgCB zH4I5b&7h#PZ7P0AGYbbfoml;*fG2(z2B$EpXR-nxm>Ok0S6OA87$EvOHio%asBI<3 zsOE1yX_@IfR1wYi$C1TPiqIcm0wYuXK5<(Pfn?x*3b#0<<`nrRzdeIk(A1*7tUev-CxImpIu|s5yu`=M| zNW2r(9jxr1Bp+-D!|9X$%qw`@G53P1X-d#DR(TzwUk3aYl*_L z-_Z9#JO#xwZ3@?Lo|fZvVtfZ?gc3@$=Inf^z~dhz*X+?ez4%O5} z80Rs}0eSUPyc|mZ-&ZFx%VzU`yf0=bgbtyum+l?>LOD~Z!*pW4iu@2u24}K#ZOOb#y1k|tb|fwmzZ;JQ!N8!?(t`SLkk7%+!DgoR*>B*GQ_g!4>43;Hn1~bj?%A2b3$ZZC$HLijtDa zR3ip-zqJ+eE9L38WdLF#is^ynKf-{OLdY=bC5?ii9KHkW+g6#en+JxaEtMNaTc&Yq z+hAp5KAr&iTkQa$b{b(U9l%cNqb#19S2S@{@!0X+!aQ1m&@yd6q+)9EDEM;8L~q%D zy--{_b?X0qAs8-^H!aUM$zK8&QNW)e;4AHmOez}-A$tg|{AWF5rxZeZV2KtQ+rJKB zquCLHJeroVCYihg`=1-41K|72hD7F%SV7)BQvL!8E8BlX%F%!Q0|t(F{=dA6unDvb z{tiHB_*vm!Fb;06`oC}DpO5~pO`MEk&eCuZxQ+aO*8JbM@jqYvzis2J`SmvB9S}D1 zKaa-0HnMaMb16eOYla($U&)Bm2MC zf5Rz!G!3plP4V6OsTptgh?KX@4Bytf?P*5)f%>1X{vP zcN`00=GI{XT&lYn={yvoxnheaO_>g{US0^Ac+Hl4eF}LYqz3ZJ#!k$GTJl)Ku<{z)RdqTo)Hh%iJ`EcBM-<>!^;Mg9^nOH)!d)vHO-{OibQ5;O%q@1*=< zP0H|Dn#p;E?Qi|p^$6eoXYqExC;Z=Jc&zirOoB_n1M5xD{Cmxslv(BF3IDlBavDEd=G}gqS$}F zxL8S)5=AMt^O#Z}q-W+OTk;E%3oMqd#xZ7Zf!W-(Yks0R(QNUW631G`Bw3OY$9Tu& WOFDK$G=lu+A7n6wAOoxerT+zCEK&3T diff --git a/integration_tests/src/test/resources/tpch/region.tbl/part-00000-03defe09-3d7c-4a7c-b9df-259eba52db22-c000.snappy.parquet b/integration_tests/src/test/resources/tpch/region.tbl/part-00000-03defe09-3d7c-4a7c-b9df-259eba52db22-c000.snappy.parquet deleted file mode 100644 index ab8001abd98b6981285b9e0a55f9fd9bc17d1588..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1502 zcmd6n-AWrl6vt;`jB8?`h3>Kou?{PWmbfGpqmf)RXo?o4qU}{JGTEIZTQ|FLW>yVW z`VJ!W1^NuV>rJnu^t!i&(g)}x^dwnLLMzm}E@3kt=lst9oH?^Q<=woB)O~d$t0rNP zP?e0X#8L0jm1ux8H20TP*tPT+yho-`8Y7%mu!eDIB=Wk(0QXdt{Qw0tm)hQ3U*D>b zN_p>vo+v-v-CQe!SLsQQmVj|@aSSKp+ZCKbahy{2Hp_T&r}A?5`A#JTCIb+NaW$%5 z#fPGJp=%#{Z6aFCcBx0GZ3m$*1+nRyz+>>oSuOOyqUyP_!C+u}fnbEQqmT(HtO1jn z>vs^M-E7G~GC>-&UI$1m}4om z=21=@)(V8%iF6~O7pO(;Lkh%5s|A|sVaMZyO2!2N>FGxgdP4CU;_2yrdUyt>-*U(G z>jdIYL8yixA>`%qz<2FzSKZ_~5_o!UK=e3S8CJZ2KcRN|-94xpmsrboix%apKHO)g zHb6bV57&A0{pbf|j~DyJD~@Nb3w!^&F+D%+++IgDW^8Rth!*9C2f}W!CRGzijnBmN z6hD9>xPgDj+DM&%l>|TVX_L*M-gKNF*g><&dXevtMvVoru_xk}mcu)~+z@mTqhGLCD2OAJW&~%MxNXwn z!HLymk~&mU{RciXWxK`9RZ~bF+S1ILHJ5pgFjrrlnVnOIt$~Js{m9-6{5s5hxY)Gn zff@L-ta*(#SI^GRvjd1O>&6?R!M(YQf7Eh)0sn@0eU9{JQSJ*(PNHIwyp_*cc`{cG zUC$wOrRdaFYQ^H><7$D`3Wdc*Cs)W7ifk!o7pu#~~xgG#s*2ud!&#S>t)G$OSWXoJiZ7-ZNa!)6(_$S@+q zs0=5_Febye3@6HPk_<)}Ofr~dI9Ucl22ut}23iJ2237`623`h>3|1Mo%5aJd+hq8x z44;$XR2e2@u*qPT!6AcF2A2$zGPq^%$S@^?R|cO9ei;HX1Z4=x5SAe#LsW)o8D?aN z$q<)eR)#ql5;DxoaGDIK%W#GaXUec$hO=Z4WJoGd4=^$XMf!Y9a$x<#oAg&=pNIes z8S9;HRw{+OSldjjen^5C_+Rf41POAk*M$#9KrdV!8KzMnWV9baD5;{QijgW-syM0Q zrOF~z)?h!PBnkL8`T_!@{;`UZ`W;n8OCye|Vx$pARk6~DqpCP*#8FkeG~%c#i!|b> zDyuZ&AFhHgAQ(^txO9$Lf06m`xis8qPl>agWU{h26Q}v5B3UXSa#%zZ0+RG7Vnr~!lomx6Q56n8}xIcYi?O~>EhUfnNQ-@P#usLVm^JB&(}U2-zQ_wX z#^JL}Cn7Uq#Z_DiEG}jPt*j0j-Klhus(GWG0$VN2#5*Y|;Wr}7&3KWo^Nx~Fn!j#j zX7Z_^+s-sYTv(W{2JNxxMrg23SDB>ULrq3pMPV{jP9+$&9-Ub(bM0o?JpesOUoMuN z7qhJZ-4ytAJy8@F6ZX1qHO_=~R>;xW#B89#Q+Yg`=z$U5OlziY^v~DgMm%7&I@ZEH z!9O*N<5j|wNK4atd_3uiv}eoJ=xk`t;-11MD_L7UGi#*6WzRb3;hL+LLTqIwxWMG+ zqeW>E<-%M%8MJ5OooF>%UaXNb#wl?NG$a~_apYPdY6H``nCMoGC~y$+wd>moZwLX%}nr@q{orza&u89^-hSnw=P&SbUF!EMhim7_T$pO^~b@jkf2v6hCbI_D2BMbq4JWinlL zP^-jef<*^eD@HOQBC`ORmy`Y($$*)tH(#{zom8%Et5l|MYcDr{exsYC_W4@VLI_aMjTTv%(aazfO&0*oK-5@MgU1;0c z99fD>23U5bmKk=*NleapX84q~xHLPNkmj5Z81d3{qT&kA*2uPP(jz#mWOZROm0ws8 zW31?r4Do-=SyBYLadOhk?ve?%&~jxfBE3K@C1du)0zKXG=X@OH zh5Lbq(=jG#z!v>YTX`nt5`77FiK61Al8td%cw3oUn56tmfkde=XUR!fV9^y_F8H0L z!qmJs9pt!8G5$Bg&LutjVne9vtB8M1edBfR}@l{jJx4o z>a2%bHFr5#oel&N9l}+sg>w$~v_!_5bJ0GRB*<`R%C`_|)$$7*)eKld%n&rrIu`Lc zhbPgo+sO2i*EwA-F!SC>Bozr7C5`gdB^r0t6ON4EHyPu?Qw>W#pR+DbTgtm&(Yk>f<%l=EZ!)@;z9v;=RNjNGf|F3 z@VSUJmH(% zm#r2_iE=Fq;aw`%uQqRmEf$wFq`g=)1j%Y-9#G1Qn&#H4&|uiHwVSaEvN zR;yHewSY9fnn>50lir9ok;wa&O2K$Bk#WS^>A2mJOwAf?v&I-V9g5RZcI6W_m(N>D zc_tTGCqB#13)PHmE-s0QiCmB+S{};Laxvj>cE;^53JqgE=}sgH&T=S8FGpI9Y*Mn% z4CO3XjTtL1(ljkbn}Iyl!6l1K1!sdEs$~p?X0t(Om9R%0DSImvfOHW~PdZbK20A4s z%NC);hq+cBk4S7O`YJYWd7&Y+7Ac`njWhVXE9s@f9=erDRNJ^MV5zyQt5M^z&)dkL zXQ@_iMI6gBGYeUqovUT0L+)%K>I*tVs=VZq!mAme+i^A+N^wcH6--x_CpoKQ7Pmx} zoXgcEUo#o?Hf&Pf<}-9Q$kf7QA-^;=B{AP?&rRC}I^|C+1+$)5q7m~H8!m^#De2mZ zH&QjC>7gC%+H`(4;ICIpcEUQ{E*oRb`Q>mxv}bdL#3*#Nw&~D3<%r`scdcHFEO+dD zobJpt+F4KBQ*hJMPOe?+NNKb}H0egCF+CS^QeMl_WX2zy4EpM|Xmp9MwquPM`;1LW z@hLAA@JzeBBwd^GQ?m=Slg)FvXqhIA^Z8(GE>|r{3z+s7gj6z@t@w&@!I+(->q26Q zq(nlBu-W#E&&JPJCP}tI3XWi5W_3R`8}-H(JB|{;5_K`&VfnHxQ%gFUt>a;gGnH7F z4dmzDk*doV^5&VAtvD4YEcwM{X=mkFEYz*hMxo=Io|HB=UNV!-vJs(L3QBC5b+q_c z)VW~kl;`g;&QfGS{ z&}@vW($e}%jAgdXM5gFMv0#~@NWQix(V&$k(+dl}xfvtVu@|MhTkshx{_uh=5O;e+ zb3qHy3C}l!k%fvQSIE1hbyg^x%E1PKopDOL2!C?9LeEa+D{T=kQ?2=;Nan41@3L`u zra=ZOQi=-H9Fd$rxw%q$j?7dzhfR`G&U#9gbY;%YM&QhaFw@J6Gaff(Db3d!GlUWH z)TGrT`Jpyd2;zmjrL#0kQ3a}ANT!x?mm|z%{CcH+*D?B+6n;IWS4yuvlsf6P2dX+& zi6}G}vJT2xC|W4TLFtAv0HqfS2IUwi{ZQ6J>4DM(MF~Zv!E_2luTde#!j%S!T7zv{ z1IkDrX`3T`;COMFLVvYl^?0E?{IW)RMoOOnOhyEeUe14H{oACec0jRve&VSEs=GbfTkMXjIKA`u zO^5dU=B#@w-%zb8^V+cfSI7PRS!W&1m@GVJvXYdEBM5$A9Z8rNo-mONVOM=z4-Q(H7dolAMVo;hLdXlZBy8 zyoERM46j?n`o;ACgvzmLs|(-nHUSG!dh4bOlvW5q$CIQV5KpjHwl_rK_8y9pxYT} zPiNd!g3~x;Y}>TS`>Z@3}IN9hlO6Wl>wKa3>cn*30PPxYaz#X z5MXz(y@iI(782cwkc!hVtBFxusN{LoC_vqHVYKfr~hX8XpFAEnqeaIDzG8)po@K zLXNzJaaQZdu%Ua5219^vC>A{UmXQPJQ&2dXqs{a?Hmh}aJC!tqXQIU zg$Q63VYJo*L}l3(T@TQP;amz_iczpCV^Q3Tka?#t+e5=pT44hYvBI7q2pFb(r~I8;(#0JR+6OQi)u(8)|L2pw_+nkn=H1i2sibk z6+OaU*oEfUrUes7d8_NhyefrCDKZR)R&J zkrDzA>aOM)2&DCW;B%I;ph}D+*^##pQdm$!l)#Pg`-VV0NQvN9O!bVCqR>qP91H9v zt*T!Gi#br8;|(K-1-;{jtRg(XL(;bL7Q-cKuqtIZ6(JPAR^0*4f&>QzvM@_)$)m|D&^hdyCl8_nJ-_|q!HIE&!Re__{4N4r zEw@a-l$FnJzgjinJmQM7`A{^$js+~@-n(Bb9z z*AGsBMte?p|DbBG(#(S3&Oco{5mj7@{o+dp(8`w6FFa6%wQSz?i0T66&;#htu(zMM z1I=OUzI{MNC~qsGIF=i}2mLyRe(^@tnDWcFsJ^cJ`b{v~H(q%_^#q2)vpI*|vGGCl zx7g6`{i+G&84sftZ11|q&@oRFxn%+l4D`Jp+%e$+EzCo=j99Si{&JrRQ=ah{ieWF^ zbPswKmYMnvdZqf3KUOD3DoVq?@d4#&yJ6bVj~+xnP`eNQ^MR2u3$WiT)GVXnL4uZU!_4O-vss;JamVuvgNP$ zPJsK6i3<;a>L-8sQS<<|=bcB;fw6Nk53ZWte&b!B(7E2ds73wqhQa~G+e(^KeSp0) zzZczyWx~7AG3?Rnzl`S8S6Q70T!`}f>G6HojprY5Ei3An?PRZ-oLuh{u1lE@~V-&*yGlH;~r&sA3A~U3EYGd*qFMdswl7BjrL%-T5nc` zw>*FQ#AkpXmOt(v!7$yS`&E{)J$FuIq|YC2odEj&Nz~tL`UHL98-j^;p^azqB{(e$ zsZ_aHYSeLnLK+upBA$XO(<~I)W;|19W-@u`oUfHj`4pZn)f=^Dsxc6PfwgkIjyG~5 zo@v(dPGAk^?mL1>5t5yZBHMG+U5#1ftsE9H8=Q8(k&WT@}MF}spkt~sR0^Rn_?Xl6EY34wkq{wHPRYBm&%LK1V*SAHpA$T*Cj1r z_*DIsL(o?`KbbX`@A}(X^QA@^Pl|#U8vfp>W?VP2EnO~Da-b#Ntc$pi6%n(^QPX0o+y>oKFdll9TKQ5MSN&L7Xb+#Pn^%KlYKjF_2dU}bLuwu(0?6)}|;3T8y1Hp8M4>^3}EF4r4J2&wu?UrVUv z$~DEahR1qB&>~%dqxWW3kI#npw759zo9oPnfz;i3>dLT}6 z!VP|;yz@oXUS{tec~V*Q~?L%*)kJ)CB>g$ha zzT`T2q3ZOY8Ww2{b2w1 zU%fbSJy};ImHE=a^}V|=iPXhry`keZU(v+L1BbFxmzW>x!jk1P4F{j>Q+SVLt zrrtp`zqft1{}`b}dQA1Lp5%5AwecsI_Sv-;o@0)#K{Av7tC$)djC$+Z9ZRl@jmq1hUYf?axH5f z`D_1ARCm2~@tW9sQ$GevZ)Je1U9jZ|2&w$jnhAvgDff42ik@~8eO~|1=r#I>T<`Un zap~<;V2$Ej%6SZuVVhX8HgV=n-O8FsKHqz@3aR&AuTs_;-a6KkWT0#RAJ!JPY4>Wi z7iy98h92$sqn!H~AkdQGJN@73)}_#gdUIO+ODm!NV>RC2B6pnxoG~N^pDVs7;Y_h%{%M!$4%PeS z^N{ghjo!Xi4Nt%^?L*!DUsp%Yx$g`N9%|+hRGANJ52{qRPbsgxUtNO@#q0m3bGNbpxAuKZYuAm}bZ#ow zb!pNRDZXL7cKi|L%)Ib5m74q=VjlUg>aTFIo)RiTTC8up7wbQuzIfeWxcb~Cy~;2l zY@@15tlms&|Em4$w+8K|=e7T>HC#Sm#!K+jud&03F#6}79zg(2&e-_mkCEKyib~^C z<8?6L>zdv}I@3o31l^ol4lY{>MsbcUVpUT*!J;fovY)R=#+P<*)Yl-^5U zl3G2t-9-U zL&IO$dORXrGfJvY^sVcKq$i9nYGV6!w+$7Wg+?BJ>d~9(9XuPWH$=sQ@2(rEY46oi zcOldhh}onhUlwm4xmR@&cYy*eul!28Pp8uM?&%>1b(17-P9o#yX;o_N&vp5{ez&TK zzJi=G>QxQDsoHjv`_Z+zq6L?Ja>KZtJm@)voO`B$1uY$U=6!4*ue>3wV#Ku zx2a+K!3+D461BF=d}G%Ls%z=Kwc*b_2TyjLANwBZLK?-2_LOxe{-J+l)|GW7R?eU< z#!Q=INn|VA)o<;`V8>^(P2sw<7GEbGG&hfp?LfaadUv<>6|DE#^^fmXth~AxN%cC` zYx^<%bsOI3);^^%HLGUrO}*NFjrp1`b-hq-QC~(>(WgE7*vOSHZ_%#w>m%Ckfyg7{ z6?F@?n~D%hXs7k?s^=Ty+@*zT5&bjz8^xX6xeDYY?Qhjr9n!sR{x;%zy!Q~M#G9_y zuzFq?zfJWebrW{2{jCAhv7+H%H*&o8vYy^Ij5epDt<<3bd+fgrIkvvNk}H>1 z$8^gg;VjWe3WdMPNR^94*mHx7{wEZJLxVd;u_3*3kj2&@gP%}A-v!P6|E}pjPd$ol zJBpw14GzKa2GMsxWRCuedPuK6Ec)@|2959) zfD+6FHj@CX_GizL#eM8hd4lIR_Wxu6+NGua-(QS>Qt=-NL;CI`PCiTuSt>aBFWsX* z57X zIeSy1T@g2J-6S8=HyJl&@?s%f-?a6#vo}c*E;W4|`p|l~w{#eUs}dagg=7Ib!69EX zZu(g8rmbgx{`1C92w0W$x6v>cJPD3w{NdzsM1i9h@waSrM99&uVl{mo8GiIS9H#aW z5$SSMGT%{H@-Tysi1^2}IOFrkfcG#5OvM_0m^3(PCJ8f%k0qOV>1B?^n$B1< sIWYs@F41X%BRH$b5-DqvxAH_%Os2{4!5#3*<2hv(3YybcN diff --git a/integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/common/BenchUtilsSuite.scala b/integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/common/BenchUtilsSuite.scala deleted file mode 100644 index 4fafaab06f4..00000000000 --- a/integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/common/BenchUtilsSuite.scala +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.nvidia.spark.rapids.tests.common - -import java.io.File - -import com.nvidia.spark.rapids.AdaptiveQueryExecSuite.TEST_FILES_ROOT -import com.nvidia.spark.rapids.TestUtils -import org.scalatest.{BeforeAndAfterEach, FunSuite} - -import org.apache.spark.sql.SparkSession - -object BenchUtilsSuite { - val TEST_FILES_ROOT: File = TestUtils.getTempDir(this.getClass.getSimpleName) -} - -class BenchUtilsSuite extends FunSuite with BeforeAndAfterEach { - - override def beforeEach(): Unit = { - TEST_FILES_ROOT.mkdirs() - } - - override def afterEach(): Unit = { - org.apache.commons.io.FileUtils.deleteDirectory(TEST_FILES_ROOT) - } - - test("round-trip serialize benchmark results") { - - val report = BenchmarkReport( - filename = "foo.bar", - startTime = 0, - env = Environment( - Map("foo" -> "bar"), - Map("spark.sql.adaptive.enabled" -> "true"), - "3.0.1"), - testConfiguration = TestConfiguration(gcBetweenRuns = false), - action = "csv", - writeOptions = Map("header" -> "true"), - query = "q1", - queryPlan = QueryPlan("logical", "physical"), - Seq.empty, - rowCounts = Seq(10, 10, 10), - queryTimes = Seq(99, 88, 77), - queryStatus = Seq("Completed", "Completed", "Completed"), - exceptions = Seq.empty) - - val filename = s"$TEST_FILES_ROOT/BenchUtilsSuite-${System.currentTimeMillis()}.json" - BenchUtils.writeReport(report, filename) - - val report2 = BenchUtils.readReport(new File(filename)) - assert(report == report2) - } - - test("validate coalesce/repartition arguments - no duplicates") { - BenchUtils.validateCoalesceRepartition(Map("a" -> 1, "b" -> 1), Map("c" -> 1, "d" -> 1)) - } - - test("validate coalesce/repartition arguments - with duplicates") { - assertThrows[IllegalArgumentException] { - BenchUtils.validateCoalesceRepartition(Map("a" -> 1, "b" -> 1), Map("c" -> 1, "b" -> 1)) - } - } - - // this test is to check that the following code (based on docs/benchmarks.md) compiles - ignore("test TPC-DS documented usage") { - val spark = SparkSession.builder().getOrCreate() - - import com.nvidia.spark.rapids.tests.tpcds._ - - // convert using minimal args - TpcdsLikeSpark.csvToParquet(spark, "/path/to/input", "/path/to/output") - - // convert with explicit partitioning - TpcdsLikeSpark.csvToParquet(spark, "/path/to/input", "/path/to/output", - coalesce=Map("customer_address" -> 1), repartition=Map("web_sales" -> 8)) - - // set up prior to running benchmarks - TpcdsLikeSpark.setupAllParquet(spark, "/path/to/tpcds") - - import com.nvidia.spark.rapids.tests._ - val benchmark = new BenchmarkRunner(new TpcdsLikeBench()) - - // run benchmarks - benchmark.collect(spark, "q5", iterations=3) - benchmark.writeParquet(spark, "q5", "/path/to/output", iterations=3) - } - - // this test is to check that the following code (based on docs/benchmarks.md) compiles - ignore("test TPC-H documented usage") { - val spark = SparkSession.builder().getOrCreate() - - import com.nvidia.spark.rapids.tests.tpch._ - - // convert using minimal args - TpchLikeSpark.csvToParquet(spark, "/path/to/input", "/path/to/output") - - // convert with explicit partitioning - TpchLikeSpark.csvToParquet(spark, "/path/to/input", "/path/to/output", - coalesce=Map("orders" -> 8), repartition=Map("lineitem" -> 8)) - - // set up prior to running benchmarks - import com.nvidia.spark.rapids.tests._ - val benchmark = new BenchmarkRunner(new TpchLikeBench()) - - // run benchmarks - benchmark.collect(spark, "q5", iterations=3) - benchmark.writeParquet(spark, "q5", "/path/to/output", iterations=3) - } - - // this test is to check that the following code (based on docs/benchmarks.md) compiles - ignore("test TPCx-BB documented usage") { - val spark = SparkSession.builder().getOrCreate() - - import com.nvidia.spark.rapids.tests.tpcxbb._ - - // convert using minimal args - TpcxbbLikeSpark.csvToParquet(spark, "/path/to/input", "/path/to/output") - - // convert with explicit partitioning - TpcxbbLikeSpark.csvToParquet(spark, "/path/to/input", "/path/to/output", - coalesce=Map("customer" -> 1), repartition=Map("item" -> 8)) - - // set up prior to running benchmarks - import com.nvidia.spark.rapids.tests._ - val benchmark = new BenchmarkRunner(new TpcxbbLikeBench()) - - // run benchmarks - benchmark.collect(spark, "q5", iterations=3) - benchmark.writeParquet(spark, "q5", "/path/to/output", iterations=3) - } - - // this test is to check that the following code (based on docs/benchmarks.md) compiles - ignore("test documented usage for comparing results") { - val spark = SparkSession.builder().getOrCreate() - - import com.nvidia.spark.rapids.tests.common._ - val cpu = spark.read.parquet("/data/tpcxbb/q5-cpu") - val gpu = spark.read.parquet("/data/tpcxbb/q5-gpu") - BenchUtils.compareResults(cpu, gpu, "parquet", ignoreOrdering=true, epsilon=0.0001) - } - -} diff --git a/integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeAdaptiveSparkSuite.scala b/integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeAdaptiveSparkSuite.scala deleted file mode 100644 index 1927a6e2629..00000000000 --- a/integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeAdaptiveSparkSuite.scala +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.nvidia.spark.rapids.tests.tpch - -import org.scalatest.Ignore - -// we need the AQE suites to have unique names so that they don't overwrite -// surefire results from the original suites -class TpchLikeAdaptiveSparkSuite extends TpchLikeSparkSuite { - override def adaptiveQueryEnabled: Boolean = true -} diff --git a/integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeSparkSuite.scala b/integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeSparkSuite.scala deleted file mode 100644 index 21eeaaf0b70..00000000000 --- a/integration_tests/src/test/scala/com/nvidia/spark/rapids/tests/tpch/TpchLikeSparkSuite.scala +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.nvidia.spark.rapids.tests.tpch - -import com.nvidia.spark.rapids.{ColumnarRdd, ExecutionPlanCaptureCallback} -import com.nvidia.spark.rapids.ShimLoader -import org.scalatest.{BeforeAndAfterAll, FunSuite} - -import org.apache.spark.sql.{DataFrame, SparkSession} -import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec - -class TpchLikeSparkSuite extends FunSuite with BeforeAndAfterAll { - - /** - * This is intentionally a def rather than a val so that scalatest uses the correct value (from - * this class or the derived class) when registering tests. - */ - def adaptiveQueryEnabled = false - - lazy val session: SparkSession = { - var builder = SparkSession.builder - .master("local[2]") - .appName("TPCHLikeTest") - .config("spark.sql.join.preferSortMergeJoin", false) - .config("spark.sql.shuffle.partitions", 2) - .config("spark.sql.queryExecutionListeners", - classOf[ExecutionPlanCaptureCallback].getCanonicalName) - .config("spark.plugins", "com.nvidia.spark.SQLPlugin") - .config("spark.rapids.sql.test.enabled", false) - .config("spark.rapids.sql.explain", true) - .config("spark.rapids.sql.incompatibleOps.enabled", true) - .config("spark.rapids.sql.hasNans", false) - val rapidsShuffle = ShimLoader.getSparkShims.getRapidsShuffleManagerClass - val prop = System.getProperty("rapids.shuffle.manager.override", "false") - if (prop.equalsIgnoreCase("true")) { - println("RAPIDS SHUFFLE MANAGER ACTIVE") - // configure the cache-only shuffle manager (disable transport) - builder = builder - .config("spark.shuffle.manager", rapidsShuffle) - .config("spark.rapids.shuffle.transport.enabled", false) - } else { - println("RAPIDS SHUFFLE MANAGER INACTIVE") - } - - builder.getOrCreate() - } - - override def beforeAll(): Unit = { - super.beforeAll() - TpchLikeSpark.setupAllParquet(session, "src/test/resources/tpch/") - } - - test("GPU data export with conversion") { - val df = session.sql( - """ - | select l_orderkey, SUM(l_quantity), SUM(l_discount), SUM(l_tax) from lineitem - | group by l_orderkey - """.stripMargin) - val rdd = ColumnarRdd(df) - assert(rdd != null) - assert(255.0 == rdd.map(table => try { - table.getRowCount - } finally { - table.close - }).sum()) - // max order key - assert(999 == rdd.map(table => try { - table.getColumn(0).max().getLong - } finally { - table.close() - }).max()) - } - - test("zero copy GPU data export") { - val df = session.sql("""select l_orderkey, l_quantity, l_discount, l_tax from lineitem""") - val rdd = ColumnarRdd(df) - assert(rdd != null) - assert(1000.0 == rdd.map(table => try { - table.getRowCount - } finally { - table.close() - }).sum()) - - // Max order key - assert(999 == rdd.map(table => try { - table.getColumn(0).max().getLong - } finally { - table.close() - }).max()) - } - - private def testTpchLike( - name: String, - expectedRowCount: Int) - (fun: SparkSession => DataFrame): Unit = { - var qualifiedName = name - if (adaptiveQueryEnabled) { - qualifiedName += " AQE" - } - test(qualifiedName) { - session.conf.set("spark.sql.adaptive.enabled", adaptiveQueryEnabled) - ExecutionPlanCaptureCallback.startCapture() - val df = fun(session) - val c = df.count() - val plan = ExecutionPlanCaptureCallback.getResultWithTimeout() - assert(plan.isDefined) - assertResult(adaptiveQueryEnabled)(plan.get.isInstanceOf[AdaptiveSparkPlanExec]) - assert(expectedRowCount == c) - } - } - - testTpchLike("Something like TPCH Query 1", 4) { - session => Q1Like(session) - } - - testTpchLike("Something like TPCH Query 2", 1) { - session => Q2Like(session) - } - - testTpchLike("Something like TPCH Query 3", 3) { - session => Q3Like(session) - } - - testTpchLike("Something like TPCH Query 4", 5) { - session => Q4Like(session) - } - - testTpchLike("Something like TPCH Query 5", 1) { - session => Q5Like(session) - } - - testTpchLike("Something like TPCH Query 6", 1) { - session => Q6Like(session) - } - - testTpchLike("Something like TPCH Query 7", 0) { - session => Q7Like(session) - } - - testTpchLike("Something like TPCH Query 8", 0) { - session => Q8Like(session) - } - - testTpchLike("Something like TPCH Query 9", 5) { - session => Q9Like(session) - } - - testTpchLike("Something like TPCH Query 10", 4) { - session => Q10Like(session) - } - - testTpchLike("Something like TPCH Query 11", 47) { - session => Q11Like(session) - } - - testTpchLike("Something like TPCH Query 12", 2) { - session => Q12Like(session) - } - - testTpchLike("Something like TPCH Query 13", 6) { - session => Q13Like(session) - } - - testTpchLike("Something like TPCH Query 14", 1) { - session => Q14Like(session) - } - - testTpchLike("Something like TPCH Query 15", 1) { - session => Q15Like(session) - } - - testTpchLike("Something like TPCH Query 16", 42) { - session => Q16Like(session) - } - - testTpchLike("Something like TPCH Query 17", 1) { - session => Q17Like(session) - } - - testTpchLike("Something like TPCH Query 18", 0) { - session => Q18Like(session) - } - - testTpchLike("Something like TPCH Query 19", 1) { - session => Q19Like(session) - } - - testTpchLike("Something like TPCH Query 20", 0) { - session => Q20Like(session) - } - - testTpchLike("Something like TPCH Query 21", 0) { - session => Q21Like(session) - } - - testTpchLike("Something like TPCH Query 22", 7) { - session => Q22Like(session) - } -} diff --git a/tests/README.md b/tests/README.md index 8b503d1c685..2542dcbf8aa 100644 --- a/tests/README.md +++ b/tests/README.md @@ -6,36 +6,10 @@ parent: Developer Overview --- # RAPIDS Accelerator for Apache Spark Testing -We have several stand alone examples that you can run in the [integration tests](../integration_tests). - -One set is based off of the mortgage dataset you can download +We have a stand-alone example that you can run in the [integration tests](../integration_tests). +The example is based off of the mortgage dataset you can download [here](http://www.fanniemae.com/portal/funding-the-market/data/loan-performance-data.html) -and are in the `com.nvidia.spark.rapids.tests.mortgage` package. - -The other is based off of TPCH. You can use the TPCH `dbgen` tool to generate data for them. They -are in the `com.nvidia.spark.rapids.tests.tpch` package. `dbgen` has various options to -generate the data. Please refer to the documentation that comes with dbgen on how to use it, but -we typically run with the default options and only increase the scale factor depending on the test. -```shell -dbgen -b dists.dss -s 10 -``` - -You can include the test jar `rapids-4-spark-integration-tests_2.12-0.4.0.jar` with the -Spark --jars option to get the TPCH tests. To setup for the queries you can run -`TpchLikeSpark.setupAllCSV` for CSV formatted data or `TpchLikeSpark.setupAllParquet` -for parquet formatted data. Both of those take the Spark session, and a path to the dbgen -generated data. After that each query has its own object. - -So you can make a call like: -```scala -import com.nvidia.spark.rapids.tests.tpch._ -val pathTodbgenoutput = SPECIFY PATH -TpchLikeSpark.setupAllCSV(spark, pathTodbgenoutput) -Q1Like(spark).count() -``` - -They generally follow TPCH but are not guaranteed to be the same. -`Q1Like(spark)` will return a DataFrame that can be executed to run the corresponding query. +and the code is in the `com.nvidia.spark.rapids.tests.mortgage` package. ## Unit Tests diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala index c44500bd712..8878d9ed645 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala @@ -114,7 +114,12 @@ object SparkSessionHolder extends Logging { setAllConfs(origConf.toArray) val currentKeys = spark.conf.getAll.keys.toSet val toRemove = currentKeys -- origConfKeys - toRemove.foreach(spark.conf.unset) + if (toRemove.contains("spark.shuffle.manager")) { + // cannot unset the config so need to reinitialize + reinitSession() + } else { + toRemove.foreach(spark.conf.unset) + } } logDebug(s"RESET CONF TO: ${spark.conf.getAll}") } From 6483543636a99566a2f95046973d6298a519e28b Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Mon, 1 Mar 2021 14:19:35 -0600 Subject: [PATCH 06/28] Spark 3.0.2 shim no longer a snapshot shim (#1831) * Spark 3.0.2 shim no longer a snapshot shim Signed-off-by: Jason Lowe * Remove 3.0.2-SNAPSHOT support --- pom.xml | 2 +- shims/aggregator/pom.xml | 12 ++++++------ shims/pom.xml | 2 +- .../shims/spark302/SparkShimServiceProvider.scala | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pom.xml b/pom.xml index 5e4e4724467..37f851bd6d8 100644 --- a/pom.xml +++ b/pom.xml @@ -175,7 +175,7 @@ please update the snapshot-shims profile as well so it is accurate --> 3.0.1 3.0.1-databricks - 3.0.2-SNAPSHOT + 3.0.2 3.1.1-SNAPSHOT 3.6.0 4.3.0 diff --git a/shims/aggregator/pom.xml b/shims/aggregator/pom.xml index 7ed3bcf0aa9..14e69185026 100644 --- a/shims/aggregator/pom.xml +++ b/shims/aggregator/pom.xml @@ -68,12 +68,6 @@ ${project.version} compile - - com.nvidia - rapids-4-spark-shims-spark302_${scala.binary.version} - ${project.version} - compile - @@ -103,5 +97,11 @@ ${project.version} compile + + com.nvidia + rapids-4-spark-shims-spark302_${scala.binary.version} + ${project.version} + compile + diff --git a/shims/pom.xml b/shims/pom.xml index f849d7faa03..2d1df82e717 100644 --- a/shims/pom.xml +++ b/shims/pom.xml @@ -45,7 +45,6 @@ true - spark302 spark311 @@ -56,6 +55,7 @@ spark300emr spark301emr spark301 + spark302 aggregator diff --git a/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/SparkShimServiceProvider.scala b/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/SparkShimServiceProvider.scala index 17c921ca381..8db3f9d460c 100644 --- a/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/SparkShimServiceProvider.scala +++ b/shims/spark302/src/main/scala/com/nvidia/spark/rapids/shims/spark302/SparkShimServiceProvider.scala @@ -20,7 +20,7 @@ import com.nvidia.spark.rapids.{SparkShims, SparkShimVersion} object SparkShimServiceProvider { val VERSION = SparkShimVersion(3, 0, 2) - val VERSIONNAMES = Seq(s"$VERSION", s"$VERSION-SNAPSHOT") + val VERSIONNAMES = Seq(s"$VERSION") } class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider { From 7e210c2e46762cfc384fde49c5d171893b3a5ed0 Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Mon, 1 Mar 2021 17:02:21 -0600 Subject: [PATCH 07/28] Make databricks build.sh more convenient for dev (#1838) Signed-off-by: Thomas Graves --- jenkins/databricks/build.sh | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/jenkins/databricks/build.sh b/jenkins/databricks/build.sh index c6c9bdabdd1..e79c70e5de3 100755 --- a/jenkins/databricks/build.sh +++ b/jenkins/databricks/build.sh @@ -21,6 +21,8 @@ SPARKSRCTGZ=$1 # version of Apache Spark we are building against BASE_SPARK_VERSION=$2 BUILD_PROFILES=$3 +BUILD_PROFILES=${BUILD_PROFILES:-'databricks301,!snapshot-shims'} +BASE_SPARK_VERSION=${BASE_SPARK_VERSION:-'3.0.1'} echo "tgz is $SPARKSRCTGZ" echo "Base Spark version is $BASE_SPARK_VERSION" @@ -31,11 +33,14 @@ sudo apt install -y maven # this has to match the Databricks init script DB_JAR_LOC=/databricks/jars/ -rm -rf spark-rapids -mkdir spark-rapids -echo "tar -zxvf $SPARKSRCTGZ -C spark-rapids" -tar -zxvf $SPARKSRCTGZ -C spark-rapids -cd spark-rapids +if [[ -n $SPARKSRCTGZ ]] +then + rm -rf spark-rapids + mkdir spark-rapids + echo "tar -zxvf $SPARKSRCTGZ -C spark-rapids" + tar -zxvf $SPARKSRCTGZ -C spark-rapids + cd spark-rapids +fi export WORKSPACE=`pwd` SPARK_PLUGIN_JAR_VERSION=`mvn help:evaluate -q -pl dist -Dexpression=project.version -DforceStdout` From 51049a676b61d029dad379c22f1990e7fda9082d Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Mon, 1 Mar 2021 18:17:18 -0800 Subject: [PATCH 08/28] Add a shim provider for Spark 3.2.0 development branch (#1704) Signed-off-by: Gera Shegalov Add a shim provider for Spark 3.2.0 development branch. Closes #1490 - fix overflows in aggregate buffer for GpuSum by wiring the explicit output column type - unit tests for the new shim - consolidate version profiles in the parent pom --- api_validation/pom.xml | 6 ++ .../rapids-shuffle.md | 1 + integration_tests/pom.xml | 30 ------ pom.xml | 30 ++++++ shims/aggregator/pom.xml | 6 ++ shims/pom.xml | 10 ++ .../rapids/shims/spark300/Spark300Shims.scala | 18 +++- shims/spark301db/pom.xml | 1 - shims/spark320/pom.xml | 92 +++++++++++++++++++ ...idia.spark.rapids.SparkShimServiceProvider | 1 + .../rapids/shims/spark320/Spark320Shims.scala | 53 +++++++++++ .../spark320/SparkShimServiceProvider.scala | 36 ++++++++ .../spark320/RapidsShuffleManager.scala | 26 ++++++ .../shims/spark320/Spark320ShimsSuite.scala | 33 +++++++ .../nvidia/spark/rapids/GpuColumnVector.java | 65 +++++++------ .../nvidia/spark/rapids/GpuOverrides.scala | 2 +- .../com/nvidia/spark/rapids/SparkShims.scala | 12 ++- .../spark/sql/rapids/AggregateFunctions.scala | 33 +++++-- .../execution/GpuShuffleExchangeExec.scala | 16 ++-- tests-spark310+/pom.xml | 4 - tests/pom.xml | 30 ------ .../spark/rapids/AdaptiveQueryExecSuite.scala | 7 +- 22 files changed, 396 insertions(+), 116 deletions(-) create mode 100644 shims/spark320/pom.xml create mode 100644 shims/spark320/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider create mode 100644 shims/spark320/src/main/scala/com/nvidia/spark/rapids/shims/spark320/Spark320Shims.scala create mode 100644 shims/spark320/src/main/scala/com/nvidia/spark/rapids/shims/spark320/SparkShimServiceProvider.scala create mode 100644 shims/spark320/src/main/scala/com/nvidia/spark/rapids/spark320/RapidsShuffleManager.scala create mode 100644 shims/spark320/src/test/scala/com/nvidia/spark/rapids/shims/spark320/Spark320ShimsSuite.scala diff --git a/api_validation/pom.xml b/api_validation/pom.xml index 0062e481c4e..47f9604ff76 100644 --- a/api_validation/pom.xml +++ b/api_validation/pom.xml @@ -46,6 +46,12 @@ ${spark311.version} + + spark320 + + ${spark320.version} + + diff --git a/docs/additional-functionality/rapids-shuffle.md b/docs/additional-functionality/rapids-shuffle.md index 41c8cdbd746..85cf8bc02e2 100644 --- a/docs/additional-functionality/rapids-shuffle.md +++ b/docs/additional-functionality/rapids-shuffle.md @@ -258,6 +258,7 @@ In this section, we are using a docker container built using the sample dockerfi | 3.0.1 EMR | com.nvidia.spark.rapids.spark301emr.RapidsShuffleManager | | 3.0.2 | com.nvidia.spark.rapids.spark302.RapidsShuffleManager | | 3.1.1 | com.nvidia.spark.rapids.spark311.RapidsShuffleManager | + | 3.2.0 | com.nvidia.spark.rapids.spark320.RapidsShuffleManager | 2. Recommended settings for UCX 1.9.0+ ```shell diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml index 8550e38cd59..b7925a7b582 100644 --- a/integration_tests/pom.xml +++ b/integration_tests/pom.xml @@ -28,36 +28,6 @@ rapids-4-spark-integration-tests_2.12 0.5.0-SNAPSHOT - - ${spark300.version} - - - - spark301dbtests - - ${spark301db.version} - - - - spark301tests - - ${spark301.version} - - - - spark302tests - - ${spark302.version} - - - - spark311tests - - ${spark311.version} - - - - org.slf4j diff --git a/pom.xml b/pom.xml index 3c13e85fa2d..0f8139e4055 100644 --- a/pom.xml +++ b/pom.xml @@ -131,11 +131,39 @@ true + + + spark301dbtests + + ${spark301db.version} + + spark301tests + + ${spark301.version} + + + + spark302tests + + ${spark302.version} + spark311tests + + ${spark311.version} + + + tests-spark310+ + + + + spark320tests + + ${spark320.version} + tests-spark310+ @@ -146,6 +174,7 @@ 1.8 1.8 ${spark300.version} + ${spark300.version} cuda10-1 0.19-SNAPSHOT 2.12 @@ -177,6 +206,7 @@ 3.0.1-databricks 3.0.2 3.1.1-SNAPSHOT + 3.2.0-SNAPSHOT 3.6.0 4.3.0 3.2.0 diff --git a/shims/aggregator/pom.xml b/shims/aggregator/pom.xml index dd6565d115b..efd44b9a4c2 100644 --- a/shims/aggregator/pom.xml +++ b/shims/aggregator/pom.xml @@ -62,6 +62,12 @@ true + + com.nvidia + rapids-4-spark-shims-spark320_${scala.binary.version} + ${project.version} + compile + com.nvidia rapids-4-spark-shims-spark311_${scala.binary.version} diff --git a/shims/pom.xml b/shims/pom.xml index fec14f4966c..1dca15fdd09 100644 --- a/shims/pom.xml +++ b/shims/pom.xml @@ -46,6 +46,7 @@ spark311 + spark320 @@ -71,6 +72,11 @@ ${cuda.version} provided + + org.scalatest + scalatest_${scala.binary.version} + test + @@ -78,6 +84,10 @@ net.alchim31.maven scala-maven-plugin + + org.scalatest + scalatest-maven-plugin + diff --git a/shims/spark300/src/main/scala/com/nvidia/spark/rapids/shims/spark300/Spark300Shims.scala b/shims/spark300/src/main/scala/com/nvidia/spark/rapids/shims/spark300/Spark300Shims.scala index 180accf98ae..96cbbbebda4 100644 --- a/shims/spark300/src/main/scala/com/nvidia/spark/rapids/shims/spark300/Spark300Shims.scala +++ b/shims/spark300/src/main/scala/com/nvidia/spark/rapids/shims/spark300/Spark300Shims.scala @@ -32,11 +32,13 @@ import org.apache.spark.sql.{SparkSession, SparkSessionExtensions} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.Resolver import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.catalyst.errors.attachTree import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last} import org.apache.spark.sql.catalyst.plans.JoinType import org.apache.spark.sql.catalyst.plans.physical.{BroadcastMode, Partitioning} import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.trees.TreeNode import org.apache.spark.sql.connector.read.Scan import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, BroadcastQueryStageExec, ShuffleQueryStageExec} @@ -44,7 +46,7 @@ import org.apache.spark.sql.execution.datasources.{FileIndex, FilePartition, Fil import org.apache.spark.sql.execution.datasources.rapids.GpuPartitioningUtils import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan -import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchangeExec} +import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeExec} import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, HashJoin, SortMergeJoinExec} import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec import org.apache.spark.sql.execution.python.WindowInPandasExec @@ -584,4 +586,18 @@ class Spark300Shims extends SparkShims { } recurse(plan, predicate, new ListBuffer[SparkPlan]()) } + + override def reusedExchangeExecPfn: PartialFunction[SparkPlan, ReusedExchangeExec] = { + case ShuffleQueryStageExec(_, e: ReusedExchangeExec) => e + case BroadcastQueryStageExec(_, e: ReusedExchangeExec) => e + } + + /** dropped by SPARK-34234 */ + override def attachTreeIfSupported[TreeType <: TreeNode[_], A]( + tree: TreeType, + msg: String)( + f: => A + ): A = { + attachTree(tree, msg)(f) + } } diff --git a/shims/spark301db/pom.xml b/shims/spark301db/pom.xml index ef41d7e6f1d..2364123e5b7 100644 --- a/shims/spark301db/pom.xml +++ b/shims/spark301db/pom.xml @@ -73,7 +73,6 @@ 1.10.1 - 3.0.1-databricks 0.15.1 diff --git a/shims/spark320/pom.xml b/shims/spark320/pom.xml new file mode 100644 index 00000000000..fd893e82315 --- /dev/null +++ b/shims/spark320/pom.xml @@ -0,0 +1,92 @@ + + + + 4.0.0 + + + com.nvidia + rapids-4-spark-shims_2.12 + 0.5.0-SNAPSHOT + ../pom.xml + + com.nvidia + rapids-4-spark-shims-spark320_2.12 + RAPIDS Accelerator for Apache Spark SQL Plugin Spark 3.2.0 Shim + The RAPIDS SQL plugin for Apache Spark 3.2.0 Shim + 0.5.0-SNAPSHOT + + + + + + + maven-antrun-plugin + + + dependency + generate-resources + + + + + + + + + + + + + run + + + + + + org.scalastyle + scalastyle-maven-plugin + + + + + + + ${project.build.directory}/extra-resources + + + src/main/resources + + + + + + + com.nvidia + rapids-4-spark-shims-spark311_${scala.binary.version} + ${project.version} + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark320.version} + provided + + + diff --git a/shims/spark320/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider b/shims/spark320/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider new file mode 100644 index 00000000000..f6e343b6bfe --- /dev/null +++ b/shims/spark320/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider @@ -0,0 +1 @@ +com.nvidia.spark.rapids.shims.spark320.SparkShimServiceProvider diff --git a/shims/spark320/src/main/scala/com/nvidia/spark/rapids/shims/spark320/Spark320Shims.scala b/shims/spark320/src/main/scala/com/nvidia/spark/rapids/shims/spark320/Spark320Shims.scala new file mode 100644 index 00000000000..ceddf82f741 --- /dev/null +++ b/shims/spark320/src/main/scala/com/nvidia/spark/rapids/shims/spark320/Spark320Shims.scala @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.shims.spark320 + +import com.nvidia.spark.rapids.ShimVersion +import com.nvidia.spark.rapids.shims.spark311.Spark311Shims +import com.nvidia.spark.rapids.spark320.RapidsShuffleManager + +import org.apache.spark.sql.catalyst.trees.TreeNode +import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.adaptive.{BroadcastQueryStageExec, ShuffleQueryStageExec} +import org.apache.spark.sql.execution.exchange.ReusedExchangeExec + +class Spark320Shims extends Spark311Shims { + + override def getSparkShimVersion: ShimVersion = SparkShimServiceProvider.VERSION320 + + override def getRapidsShuffleManagerClass: String = { + classOf[RapidsShuffleManager].getCanonicalName + } + + /** + * Case class ShuffleQueryStageExec holds an additional field shuffleOrigin + * affecting the unapply method signature + */ + override def reusedExchangeExecPfn: PartialFunction[SparkPlan, ReusedExchangeExec] = { + case ShuffleQueryStageExec(_, e: ReusedExchangeExec, _) => e + case BroadcastQueryStageExec(_, e: ReusedExchangeExec, _) => e + } + + /** dropped by SPARK-34234 */ + override def attachTreeIfSupported[TreeType <: TreeNode[_], A]( + tree: TreeType, + msg: String)( + f: => A + ): A = { + identity(f) + } +} diff --git a/shims/spark320/src/main/scala/com/nvidia/spark/rapids/shims/spark320/SparkShimServiceProvider.scala b/shims/spark320/src/main/scala/com/nvidia/spark/rapids/shims/spark320/SparkShimServiceProvider.scala new file mode 100644 index 00000000000..f451f0e8679 --- /dev/null +++ b/shims/spark320/src/main/scala/com/nvidia/spark/rapids/shims/spark320/SparkShimServiceProvider.scala @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.shims.spark320 + +import com.nvidia.spark.rapids.{SparkShims, SparkShimVersion} + +object SparkShimServiceProvider { + val VERSION320 = SparkShimVersion(3, 2, 0) + val VERSIONNAMES: Seq[String] = Seq(VERSION320) + .flatMap(v => Seq(s"$v", s"$v-SNAPSHOT")) +} + +class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider { + + def matchesVersion(version: String): Boolean = { + SparkShimServiceProvider.VERSIONNAMES.contains(version) + } + + def buildShim: SparkShims = { + new Spark320Shims() + } +} diff --git a/shims/spark320/src/main/scala/com/nvidia/spark/rapids/spark320/RapidsShuffleManager.scala b/shims/spark320/src/main/scala/com/nvidia/spark/rapids/spark320/RapidsShuffleManager.scala new file mode 100644 index 00000000000..4c6b0551db0 --- /dev/null +++ b/shims/spark320/src/main/scala/com/nvidia/spark/rapids/spark320/RapidsShuffleManager.scala @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.spark320 + +import org.apache.spark.SparkConf +import org.apache.spark.sql.rapids.shims.spark311.RapidsShuffleInternalManager + +/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */ +sealed class RapidsShuffleManager( + conf: SparkConf, + isDriver: Boolean) extends RapidsShuffleInternalManager(conf, isDriver) { +} diff --git a/shims/spark320/src/test/scala/com/nvidia/spark/rapids/shims/spark320/Spark320ShimsSuite.scala b/shims/spark320/src/test/scala/com/nvidia/spark/rapids/shims/spark320/Spark320ShimsSuite.scala new file mode 100644 index 00000000000..bdc363c986d --- /dev/null +++ b/shims/spark320/src/test/scala/com/nvidia/spark/rapids/shims/spark320/Spark320ShimsSuite.scala @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.shims.spark320; + +import com.nvidia.spark.rapids.{SparkShims, SparkShimVersion} + +import org.scalatest.FunSuite; + +class Spark320ShimsSuite extends FunSuite { + val sparkShims: SparkShims = new SparkShimServiceProvider().buildShim + test("spark shims version") { + assert(sparkShims.getSparkShimVersion === SparkShimVersion(3, 2, 0)) + } + + test("shuffle manager class") { + assert(sparkShims.getRapidsShuffleManagerClass === + classOf[com.nvidia.spark.rapids.spark320.RapidsShuffleManager].getCanonicalName) + } +} \ No newline at end of file diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java index 46fc9c794c4..ac48d7740a4 100644 --- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java +++ b/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java @@ -107,23 +107,17 @@ public static synchronized void debug(String name, HostColumnVector hostCol) { hexString(hostCol.getUTF8(i))); } } - } else if (DType.INT32.equals(type)) { - for (int i = 0; i < hostCol.getRowCount(); i++) { - if (hostCol.isNull(i)) { - System.err.println(i + " NULL"); - } else { - System.err.println(i + " " + hostCol.getInt(i)); - } - } - } else if (DType.INT8.equals(type)) { - for (int i = 0; i < hostCol.getRowCount(); i++) { - if (hostCol.isNull(i)) { - System.err.println(i + " NULL"); - } else { - System.err.println(i + " " + hostCol.getByte(i)); - } - } - } else if (DType.BOOL8.equals(type)) { + } else if (DType.INT32.equals(type) + || DType.INT8.equals(type) + || DType.INT16.equals(type) + || DType.INT64.equals(type) + || DType.TIMESTAMP_DAYS.equals(type) + || DType.TIMESTAMP_SECONDS.equals(type) + || DType.TIMESTAMP_MICROSECONDS.equals(type) + || DType.TIMESTAMP_MILLISECONDS.equals(type) + || DType.TIMESTAMP_NANOSECONDS.equals(type)) { + debugInteger(hostCol, type); + } else if (DType.BOOL8.equals(type)) { for (int i = 0; i < hostCol.getRowCount(); i++) { if (hostCol.isNull(i)) { System.err.println(i + " NULL"); @@ -131,20 +125,39 @@ public static synchronized void debug(String name, HostColumnVector hostCol) { System.err.println(i + " " + hostCol.getBoolean(i)); } } - } else if (DType.TIMESTAMP_MICROSECONDS.equals(type) || - DType.INT64.equals(type)) { - for (int i = 0; i < hostCol.getRowCount(); i++) { - if (hostCol.isNull(i)) { - System.err.println(i + " NULL"); - } else { - System.err.println(i + " " + hostCol.getLong(i)); - } - } } else { System.err.println("TYPE " + type + " NOT SUPPORTED FOR DEBUG PRINT"); } } + private static void debugInteger(HostColumnVector hostCol, DType intType) { + for (int i = 0; i < hostCol.getRowCount(); i++) { + if (hostCol.isNull(i)) { + System.err.println(i + " NULL"); + } else { + final int sizeInBytes = intType.getSizeInBytes(); + final Object value; + switch (sizeInBytes) { + case Byte.BYTES: + value = hostCol.getByte(i); + break; + case Short.BYTES: + value = hostCol.getShort(i); + break; + case Integer.BYTES: + value = hostCol.getInt(i); + break; + case Long.BYTES: + value = hostCol.getLong(i); + break; + default: + throw new IllegalArgumentException("INFEASIBLE: Unsupported integer-like type " + intType); + } + System.err.println(i + " " + value); + } + } + } + private static HostColumnVector.DataType convertFrom(DataType spark, boolean nullable) { if (spark instanceof ArrayType) { ArrayType arrayType = (ArrayType) spark; diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index 2e3501462e9..37a093c746b 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -1857,7 +1857,7 @@ object GpuOverrides { } } - override def convertToGpu(child: Expression): GpuExpression = GpuSum(child) + override def convertToGpu(child: Expression): GpuExpression = GpuSum(child, a.dataType) }), expr[Average]( "Average aggregate operator", diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SparkShims.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SparkShims.scala index 07ee8a42786..1ba4a055956 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SparkShims.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SparkShims.scala @@ -30,11 +30,12 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, ExprId, Nul import org.apache.spark.sql.catalyst.plans.JoinType import org.apache.spark.sql.catalyst.plans.physical.{BroadcastMode, Partitioning} import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.trees.TreeNode import org.apache.spark.sql.connector.read.Scan import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.adaptive.ShuffleQueryStageExec import org.apache.spark.sql.execution.datasources.{FileIndex, FilePartition, HadoopFsRelation, PartitionDirectory, PartitionedFile} -import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec +import org.apache.spark.sql.execution.exchange.{ReusedExchangeExec, ShuffleExchangeExec} import org.apache.spark.sql.execution.joins._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.rapids.{GpuFileSourceScanExec, ShuffleManagerShimBase} @@ -191,4 +192,13 @@ trait SparkShims { def shouldFailDivByZero(): Boolean def findOperators(plan: SparkPlan, predicate: SparkPlan => Boolean): Seq[SparkPlan] + + def reusedExchangeExecPfn: PartialFunction[SparkPlan, ReusedExchangeExec] + + /** dropped by SPARK-34234 */ + def attachTreeIfSupported[TreeType <: TreeNode[_], A]( + tree: TreeType, + msg: String = "")( + f: => A + ): A } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/AggregateFunctions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/AggregateFunctions.scala index 32da9614133..0fcd3b4bfce 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/AggregateFunctions.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/AggregateFunctions.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.rapids import ai.rapids.cudf -import ai.rapids.cudf.{Aggregation, AggregationOnColumn, ColumnVector} +import ai.rapids.cudf.{Aggregation, AggregationOnColumn, ColumnVector, DType} import com.nvidia.spark.rapids._ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult @@ -192,10 +192,29 @@ class CudfCount(ref: Expression) extends CudfAggregate(ref) { } class CudfSum(ref: Expression) extends CudfAggregate(ref) { + // Up to 3.1.1, analyzed plan widened the input column type before applying + // aggregation. Thus even though we did not explicitly pass the output column type + // we did not run into integer overflow issues: + // + // == Analyzed Logical Plan == + // sum(shorts): bigint + // Aggregate [sum(cast(shorts#77 as bigint)) AS sum(shorts)#94L] + // + // In Spark's main branch (3.2.0-SNAPSHOT as of this comment), analyzed logical plan + // no longer applies the cast to the input column such that the output column type has to + // be passed explicitly into aggregation + // + // == Analyzed Logical Plan == + // sum(shorts): bigint + // Aggregate [sum(shorts#33) AS sum(shorts)#50L] + // + @transient val rapidsSumType: DType = GpuColumnVector.getNonNestedRapidsType(ref.dataType) + override val updateReductionAggregate: cudf.ColumnVector => cudf.Scalar = - (col: cudf.ColumnVector) => col.sum - override val mergeReductionAggregate: cudf.ColumnVector => cudf.Scalar = - (col: cudf.ColumnVector) => col.sum + (col: cudf.ColumnVector) => col.sum(rapidsSumType) + + override val mergeReductionAggregate: cudf.ColumnVector => cudf.Scalar = updateReductionAggregate + override lazy val updateAggregate: Aggregation = Aggregation.sum() override lazy val mergeAggregate: Aggregation = Aggregation.sum() override def toString(): String = "CudfSum" @@ -329,12 +348,8 @@ case class GpuMax(child: Expression) extends GpuDeclarativeAggregate Aggregation.max().onColumn(inputs.head._2) } -case class GpuSum(child: Expression) +case class GpuSum(child: Expression, resultType: DataType) extends GpuDeclarativeAggregate with ImplicitCastInputTypes with GpuAggregateWindowFunction { - private lazy val resultType = child.dataType match { - case _: DoubleType => DoubleType - case _ => LongType - } private lazy val cudfSum = AttributeReference("sum", resultType)() diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleExchangeExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleExchangeExec.scala index d43c433c577..d865d52d3b8 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleExchangeExec.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleExchangeExec.scala @@ -26,8 +26,7 @@ import org.apache.spark.{MapOutputStatistics, ShuffleDependency} import org.apache.spark.rdd.RDD import org.apache.spark.serializer.Serializer import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.errors._ -import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, SortOrder} +import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute} import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.exchange.{Exchange, ShuffleExchangeExec} @@ -142,13 +141,14 @@ abstract class GpuShuffleExchangeExecBase( protected override def doExecute(): RDD[InternalRow] = throw new IllegalStateException(s"Row-based execution should not occur for $this") - override def doExecuteColumnar(): RDD[ColumnarBatch] = attachTree(this, "execute") { - // Returns the same ShuffleRowRDD if this plan is used by multiple plans. - if (cachedShuffleRDD == null) { - cachedShuffleRDD = new ShuffledBatchRDD(shuffleDependencyColumnar, metrics ++ readMetrics) + override def doExecuteColumnar(): RDD[ColumnarBatch] = ShimLoader.getSparkShims + .attachTreeIfSupported(this, "execute") { + // Returns the same ShuffleRowRDD if this plan is used by multiple plans. + if (cachedShuffleRDD == null) { + cachedShuffleRDD = new ShuffledBatchRDD(shuffleDependencyColumnar, metrics ++ readMetrics) + } + cachedShuffleRDD } - cachedShuffleRDD - } } object GpuShuffleExchangeExec { diff --git a/tests-spark310+/pom.xml b/tests-spark310+/pom.xml index 82e651bb044..e05a532377f 100644 --- a/tests-spark310+/pom.xml +++ b/tests-spark310+/pom.xml @@ -28,10 +28,6 @@ rapids-4-spark-tests-next-spark_2.12 0.5.0-SNAPSHOT - - ${spark311.version} - - org.apache.spark diff --git a/tests/pom.xml b/tests/pom.xml index 43a836a0905..0bd1229e09a 100644 --- a/tests/pom.xml +++ b/tests/pom.xml @@ -30,36 +30,6 @@ RAPIDS plugin for Apache Spark integration tests 0.5.0-SNAPSHOT - - ${spark300.version} - - - - spark301dbtests - - ${spark301db.version} - - - - spark301tests - - ${spark301.version} - - - - spark302tests - - ${spark302.version} - - - - spark311tests - - ${spark311.version} - - - - org.slf4j diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala index 4e0a50b5849..5de8e891450 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala @@ -24,7 +24,7 @@ import org.scalatest.BeforeAndAfterEach import org.apache.spark.SparkConf import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession} import org.apache.spark.sql.execution.{PartialReducerPartitionSpec, SparkPlan} -import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveSparkPlanHelper, BroadcastQueryStageExec, ShuffleQueryStageExec} +import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveSparkPlanHelper, ShuffleQueryStageExec} import org.apache.spark.sql.execution.command.DataWritingCommandExec import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec} import org.apache.spark.sql.execution.joins.SortMergeJoinExec @@ -94,10 +94,7 @@ class AdaptiveQueryExecSuite } private def findReusedExchange(plan: SparkPlan): Seq[ReusedExchangeExec] = { - collectWithSubqueries(plan) { - case ShuffleQueryStageExec(_, e: ReusedExchangeExec) => e - case BroadcastQueryStageExec(_, e: ReusedExchangeExec) => e - } + collectWithSubqueries(plan)(ShimLoader.getSparkShims.reusedExchangeExecPfn) } test("skewed inner join optimization") { From 28b00a7c9edebaafbd297b82c6be9af85c138cc2 Mon Sep 17 00:00:00 2001 From: NvTimLiu <50287591+NvTimLiu@users.noreply.github.com> Date: Tue, 2 Mar 2021 17:45:21 +0800 Subject: [PATCH 09/28] Cleanup unused Jenkins files and scripts (#1829) * Cleanup unused Jenkins files and scripts https://github.com/NVIDIA/spark-rapids/issues/1568 Move Databricks scripts to GitLab so we can use the common scripts for the nightly build job and integration tests job Remove unused Dockerfiles Signed-off-by: Tim Liu * rm Dockerfile.integration.ubuntu16 * Restore Databricks nightly scripts Signed-off-by: Tim Liu --- jenkins/Dockerfile.integration.ubuntu16 | 36 ----------------------- jenkins/Dockerfile.ubuntu16 | 39 ------------------------- 2 files changed, 75 deletions(-) delete mode 100644 jenkins/Dockerfile.integration.ubuntu16 delete mode 100644 jenkins/Dockerfile.ubuntu16 diff --git a/jenkins/Dockerfile.integration.ubuntu16 b/jenkins/Dockerfile.integration.ubuntu16 deleted file mode 100644 index 4027aa8fa77..00000000000 --- a/jenkins/Dockerfile.integration.ubuntu16 +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -### -# -# Arguments: CUDA_VER=10.1 or 10.2 -# -### -ARG CUDA_VER=10.1 - -FROM nvidia/cuda:${CUDA_VER}-runtime-ubuntu16.04 - -#Install java-8, maven, docker image -RUN apt-get update -y && \ - apt-get install -y software-properties-common -RUN add-apt-repository ppa:deadsnakes/ppa && \ - apt-get update -y && \ - apt-get install -y maven \ - openjdk-8-jdk python3.8 python3.8-distutils python3-setuptools -RUN python3.8 -m easy_install pip - -RUN ln -s /usr/bin/python3.8 /usr/bin/python -RUN python -m pip install pytest sre_yield diff --git a/jenkins/Dockerfile.ubuntu16 b/jenkins/Dockerfile.ubuntu16 deleted file mode 100644 index b6915a9fd6f..00000000000 --- a/jenkins/Dockerfile.ubuntu16 +++ /dev/null @@ -1,39 +0,0 @@ -# -# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -### -# -# Build the image for rapids-plugin development environment -# -# Arguments: CUDA_VER=10.1 or 10.2 -# -### - -ARG CUDA_VER=10.1 - -FROM nvidia/cuda:${CUDA_VER}-runtime-ubuntu16.04 - -#Install java-8, maven, docker image -RUN apt-get update -y && \ - apt-get install -y software-properties-common -RUN add-apt-repository ppa:deadsnakes/ppa && \ - apt-get update -y && \ - apt-get install -y maven \ - openjdk-8-jdk python3.8 python3.8-distutils python3-setuptools tzdata git -RUN python3.8 -m easy_install pip - -RUN ln -s /usr/bin/python3.8 /usr/bin/python -RUN python -m pip install pytest sre_yield requests pandas pyarrow From e614ef4dc45695cdeb4eb359f99a5b96dfac259d Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Tue, 2 Mar 2021 10:16:36 -0600 Subject: [PATCH 10/28] Spark 3.1.1 shim no longer a snapshot shim (#1832) * Spark 3.1.1 shim no longer a snapshot shim Signed-off-by: Jason Lowe * Remove 3.1.0, 3.1.0-SNAPSHOT, and 3.1.1-SNAPSHOT support * Remove obsolete comment --- pom.xml | 2 +- shims/aggregator/pom.xml | 12 ++++++------ shims/pom.xml | 2 +- .../shims/spark311/SparkShimServiceProvider.scala | 4 +--- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pom.xml b/pom.xml index 37f851bd6d8..149660a9dc3 100644 --- a/pom.xml +++ b/pom.xml @@ -176,7 +176,7 @@ 3.0.1 3.0.1-databricks 3.0.2 - 3.1.1-SNAPSHOT + 3.1.1 3.6.0 4.3.0 3.2.0 diff --git a/shims/aggregator/pom.xml b/shims/aggregator/pom.xml index 14e69185026..7204c55d7dd 100644 --- a/shims/aggregator/pom.xml +++ b/shims/aggregator/pom.xml @@ -62,12 +62,6 @@ true - - com.nvidia - rapids-4-spark-shims-spark311_${scala.binary.version} - ${project.version} - compile - @@ -103,5 +97,11 @@ ${project.version} compile + + com.nvidia + rapids-4-spark-shims-spark311_${scala.binary.version} + ${project.version} + compile + diff --git a/shims/pom.xml b/shims/pom.xml index 2d1df82e717..9b2025225cf 100644 --- a/shims/pom.xml +++ b/shims/pom.xml @@ -45,7 +45,6 @@ true - spark311 @@ -56,6 +55,7 @@ spark301emr spark301 spark302 + spark311 aggregator diff --git a/shims/spark311/src/main/scala/com/nvidia/spark/rapids/shims/spark311/SparkShimServiceProvider.scala b/shims/spark311/src/main/scala/com/nvidia/spark/rapids/shims/spark311/SparkShimServiceProvider.scala index 8ce22923520..3e7e694564a 100644 --- a/shims/spark311/src/main/scala/com/nvidia/spark/rapids/shims/spark311/SparkShimServiceProvider.scala +++ b/shims/spark311/src/main/scala/com/nvidia/spark/rapids/shims/spark311/SparkShimServiceProvider.scala @@ -19,10 +19,8 @@ package com.nvidia.spark.rapids.shims.spark311 import com.nvidia.spark.rapids.{SparkShims, SparkShimVersion} object SparkShimServiceProvider { - // keep 3.1.0 snapshot version for now until 3.1.1 is released - val VERSION310 = SparkShimVersion(3, 1, 0) val VERSION = SparkShimVersion(3, 1, 1) - val VERSIONNAMES = Seq(s"$VERSION310-SNAPSHOT", s"$VERSION", s"$VERSION-SNAPSHOT") + val VERSIONNAMES = Seq(s"$VERSION") } class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider { From fc9cecfa9fdd6a45cfbf2c75426a345ff8bd64e9 Mon Sep 17 00:00:00 2001 From: Sameer Raheja Date: Tue, 2 Mar 2021 08:38:12 -0800 Subject: [PATCH 11/28] Update to note support for 3.0.2 (#1842) * Update to note support for 3.0.2 Signed-off-by: Sameer Raheja * Update FAQ to reflect 3.0.2 and 3.1.1 support Signed-off-by: Sameer Raheja --- docs/FAQ.md | 8 ++++---- docs/download.md | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/FAQ.md b/docs/FAQ.md index 16b888e9da7..4e6498b6134 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -10,10 +10,10 @@ nav_order: 11 ### What versions of Apache Spark does the RAPIDS Accelerator for Apache Spark support? -The RAPIDS Accelerator for Apache Spark requires version 3.0.0 or 3.0.1 of Apache Spark. Because the -plugin replaces parts of the physical plan that Apache Spark considers to be internal the code for -those plans can change even between bug fix releases. As a part of our process, we try to stay on -top of these changes and release updates as quickly as possible. +The RAPIDS Accelerator for Apache Spark requires version 3.0.0, 3.0.1, 3.0.2 or 3.1.1 of Apache +Spark. Because the plugin replaces parts of the physical plan that Apache Spark considers to be +internal the code for those plans can change even between bug fix releases. As a part of our +process, we try to stay on top of these changes and release updates as quickly as possible. ### Which distributions are supported? diff --git a/docs/download.md b/docs/download.md index cae33496dae..f743eddfb38 100644 --- a/docs/download.md +++ b/docs/download.md @@ -21,8 +21,8 @@ This release includes additional performance improvements, including * Instructions on how to use [Alluxio caching](get-started/getting-started-alluxio.md) with Spark to leverage caching. -The release is supported on Apache Spark 3.0.0, 3.0.1, 3.1.1, Databricks 7.3 ML LTS and Google Cloud -Platform Dataproc 2.0. +The release is supported on Apache Spark 3.0.0, 3.0.1, 3.0.2, 3.1.1, Databricks 7.3 ML LTS and +Google Cloud Platform Dataproc 2.0. The list of all supported operations is provided [here](supported_ops.md). From 85bfacb2c811489a84dcd1ee848f65203f0fd16c Mon Sep 17 00:00:00 2001 From: NvTimLiu <50287591+NvTimLiu@users.noreply.github.com> Date: Wed, 3 Mar 2021 00:39:41 +0800 Subject: [PATCH 12/28] Fix fails on the mortgage ETL test (#1845) In the 'Map' of dataset-format, the function of 'Run.csv()/Run.orc()/Run.parquet' will be executed one by one, then it causes the dataset format error, because the dataset format in the current test is 'parquet' Change 'Run.csv()/Run.orc()/Run.parquet' into the lambda expressions, to avoid running the 'Run.xxx()' functions in the dataFrameFormatMap Signed-off-by: Tim Liu --- .../spark/rapids/tests/mortgage/MortgageSpark.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/mortgage/MortgageSpark.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/mortgage/MortgageSpark.scala index 04b9350ff05..de2edc10ea5 100644 --- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/mortgage/MortgageSpark.scala +++ b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/mortgage/MortgageSpark.scala @@ -436,16 +436,18 @@ object Main { // extend args to support csv/orc/parquet dataset val dataFrameFormatMap = Map( - "csv" -> Run.csv(session, perfPath, acqPath), - "orc" -> Run.orc(session, perfPath, acqPath), - "parquet" -> Run.parquet(session, perfPath, acqPath) + "csv" -> { () => Run.csv(session, perfPath, acqPath) }, + "orc" -> { () => Run.orc(session, perfPath, acqPath) }, + "parquet" -> { () => Run.parquet(session, perfPath, acqPath) } ) val format = args.lift(4).getOrElse("parquet") - if (!dataFrameFormatMap.contains(format)) { + val contains = dataFrameFormatMap.contains(format) + if (!contains) { System.err.println(s"Invalid input format $format, expected one of csv, orc, parquet") System.exit(1) } - 0.until(10).foreach( _ => dataFrameFormatMap(format).write.mode("overwrite").parquet(output)) + val runFunc = dataFrameFormatMap(format) + 0.until(10).foreach( _ => runFunc().write.mode("overwrite").parquet(output)) } } From 63a2e3df1255f09b550d56103525621cbf6bfe56 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Tue, 2 Mar 2021 11:19:57 -0600 Subject: [PATCH 13/28] Have most of range partitioning run on the GPU (#1796) Signed-off-by: Robert (Bobby) Evans --- .../src/main/python/sort_test.py | 14 + .../src/main/python/window_function_test.py | 1 + .../nvidia/spark/rapids/GpuColumnVector.java | 30 +++ .../spark/rapids/GpuColumnarToRowExec.scala | 53 ++-- .../nvidia/spark/rapids/GpuOverrides.scala | 21 +- .../spark/rapids/GpuRangePartitioner.scala | 192 ++++++------- .../spark/rapids/GpuRangePartitioning.scala | 106 +------- .../spark/rapids/GpuRowToColumnarExec.scala | 16 +- .../com/nvidia/spark/rapids/GpuSortExec.scala | 11 +- .../nvidia/spark/rapids/SamplingUtils.scala | 253 +++++++++++++++--- .../com/nvidia/spark/rapids/SortUtils.scala | 36 +-- .../spark/rapids/SpillableColumnarBatch.scala | 6 +- .../execution/GpuShuffleExchangeExec.scala | 8 +- .../sql/rapids/execution/TrampolineUtil.scala | 2 + 14 files changed, 412 insertions(+), 337 deletions(-) diff --git a/integration_tests/src/main/python/sort_test.py b/integration_tests/src/main/python/sort_test.py index 40874f10c2e..3c07aecaf12 100644 --- a/integration_tests/src/main/python/sort_test.py +++ b/integration_tests/src/main/python/sort_test.py @@ -81,3 +81,17 @@ def test_orderby_with_processing_and_limit(data_gen): assert_gpu_and_cpu_are_equal_collect( # avoid ambiguity in the order by statement for floating point by including a as a backup ordering column lambda spark : unary_op_df(spark, data_gen).orderBy(f.lit(100) - f.col('a'), f.col('a')).limit(100)) + +# We are not trying all possibilities, just doing a few with numbers so the query works. +@pytest.mark.parametrize('data_gen', [byte_gen, long_gen, float_gen], ids=idfn) +def test_single_orderby_with_skew(data_gen): + # When doing range partitioning the upstream data is sampled to try and get the bounds for cutoffs. + # If the data comes back with skewed partitions then those partitions will be resampled for more data. + # This is to try and trigger it to happen. + assert_gpu_and_cpu_are_equal_collect( + lambda spark : unary_op_df(spark, data_gen)\ + .selectExpr('a', 'random(1) > 0.5 as b')\ + .repartition(f.col('b'))\ + .orderBy(f.col('a'))\ + .selectExpr('a'), + conf = allow_negative_scale_of_decimal_conf) diff --git a/integration_tests/src/main/python/window_function_test.py b/integration_tests/src/main/python/window_function_test.py index e6e92909502..d3372f83e35 100644 --- a/integration_tests/src/main/python/window_function_test.py +++ b/integration_tests/src/main/python/window_function_test.py @@ -241,6 +241,7 @@ def _gen_data_for_collect(nullable=True): ['child_decimal', DecimalGen(precision=8, scale=3)]]))] + _collect_sql_string =\ ''' select diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java index ac48d7740a4..064cc437a30 100644 --- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java +++ b/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java @@ -57,6 +57,20 @@ public static synchronized void debug(String name, Table table) { } } + /** + * Print to standard error the contents of a table. Note that this should never be + * called from production code, as it is very slow. Also note that this is not production + * code. You might need/want to update how the data shows up or add in support for more + * types as this really is just for debugging. + * @param name the name of the table to print out. + * @param cb the batch to print out. + */ + public static synchronized void debug(String name, ColumnarBatch cb) { + try (Table table = from(cb)) { + debug(name, table); + } + } + /** * Print to standard error the contents of a column. Note that this should never be * called from production code, as it is very slow. Also note that this is not production @@ -125,6 +139,22 @@ public static synchronized void debug(String name, HostColumnVector hostCol) { System.err.println(i + " " + hostCol.getBoolean(i)); } } + } else if (DType.FLOAT64.equals(type)) { + for (int i = 0; i < hostCol.getRowCount(); i++) { + if (hostCol.isNull(i)) { + System.err.println(i + " NULL"); + } else { + System.err.println(i + " " + hostCol.getDouble(i)); + } + } + } else if (DType.FLOAT32.equals(type)) { + for (int i = 0; i < hostCol.getRowCount(); i++) { + if (hostCol.isNull(i)) { + System.err.println(i + " NULL"); + } else { + System.err.println(i + " " + hostCol.getFloat(i)); + } + } } else { System.err.println("TYPE " + type + " NOT SUPPORTED FOR DEBUG PRINT"); } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuColumnarToRowExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuColumnarToRowExec.scala index 8028525e594..bb9858f70d2 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuColumnarToRowExec.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuColumnarToRowExec.scala @@ -19,6 +19,7 @@ package com.nvidia.spark.rapids import scala.collection.mutable.Queue import ai.rapids.cudf.{HostColumnVector, NvtxColor, Table} +import com.nvidia.spark.rapids.GpuColumnarToRowExecParent.makeIteratorFunc import org.apache.spark.TaskContext import org.apache.spark.rdd.RDD @@ -257,46 +258,50 @@ abstract class GpuColumnarToRowExecParent(child: SparkPlan, val exportColumnarRd val numInputBatches = gpuLongMetric(NUM_INPUT_BATCHES) val totalTime = gpuLongMetric(TOTAL_TIME) - // This avoids calling `output` in the RDD closure, so that we don't need to include the entire - // plan (this) in the closure. - val localOutput = this.output + val f = makeIteratorFunc(child.output, numOutputRows, numInputBatches, totalTime) - val f = if (CudfRowTransitions.areAllSupported(child.output) && + val cdata = child.executeColumnar() + if (exportColumnarRdd) { + // If we are exporting columnar rdd we need an easy way for the code that walks the + // RDDs to know where the columnar to row transition is happening. + GpuColumnToRowMapPartitionsRDD.mapPartitions(cdata, f) + } else { + cdata.mapPartitions(f) + } + } +} + +object GpuColumnarToRowExecParent { + def unapply(arg: GpuColumnarToRowExecParent): Option[(SparkPlan, Boolean)] = { + Option(Tuple2(arg.child, arg.exportColumnarRdd)) + } + + def makeIteratorFunc( + output: Seq[Attribute], + numOutputRows: GpuMetric, + numInputBatches: GpuMetric, + totalTime: GpuMetric): Iterator[ColumnarBatch] => Iterator[InternalRow] = { + if (CudfRowTransitions.areAllSupported(output) && // For a small number of columns it is still best to do it the original way - child.output.length > 4 && + output.length > 4 && // The cudf kernel only supports up to 1.5 KB per row which means at most 184 double/long // values. Spark by default limits codegen to 100 fields "spark.sql.codegen.maxFields". // So, we are going to be cautious and start with that until we have tested it more. - child.output.length < 100) { + output.length < 100) { (batches: Iterator[ColumnarBatch]) => { // UnsafeProjection is not serializable so do it on the executor side - val toUnsafe = UnsafeProjection.create(localOutput, localOutput) - new AcceleratedColumnarToRowIterator(localOutput, + val toUnsafe = UnsafeProjection.create(output, output) + new AcceleratedColumnarToRowIterator(output, batches, numInputBatches, numOutputRows, totalTime).map(toUnsafe) } } else { (batches: Iterator[ColumnarBatch]) => { // UnsafeProjection is not serializable so do it on the executor side - val toUnsafe = UnsafeProjection.create(localOutput, localOutput) + val toUnsafe = UnsafeProjection.create(output, output) new ColumnarToRowIterator(batches, numInputBatches, numOutputRows, totalTime).map(toUnsafe) } } - - val cdata = child.executeColumnar() - if (exportColumnarRdd) { - // If we are exporting columnar rdd we need an easy way for the code that walks the - // RDDs to know where the columnar to row transition is happening. - GpuColumnToRowMapPartitionsRDD.mapPartitions(cdata, f) - } else { - cdata.mapPartitions(f) - } - } -} - -object GpuColumnarToRowExecParent { - def unapply(arg: GpuColumnarToRowExecParent): Option[(SparkPlan, Boolean)] = { - Option(Tuple2(arg.child, arg.exportColumnarRdd)) } } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index 37a093c746b..3ae3b84d8cc 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -2387,29 +2387,10 @@ object GpuOverrides { override val childExprs: Seq[BaseExprMeta[_]] = rp.ordering.map(GpuOverrides.wrapExpr(_, conf, Some(this))) - override def tagPartForGpu(): Unit = { - def isSortOrderSimpleEnough(so: SortOrder): Boolean = so.child match { - case _: AttributeReference => true - case _ => false - } - // Once https://github.com/NVIDIA/spark-rapids/issues/1730 is fixed this check should be - // removed - if (!rp.ordering.forall(isSortOrderSimpleEnough)) { - willNotWorkOnGpu("computation is not supported for sort order in range partitioning") - } - } - override def convertToGpu(): GpuPartitioning = { if (rp.numPartitions > 1) { val gpuOrdering = childExprs.map(_.convertToGpu()).asInstanceOf[Seq[SortOrder]] - val tmp = gpuOrdering.flatMap { ord => - ord.child.references.map { field => - StructField(field.name, field.dataType) - } - } - val schema = new StructType(tmp.toArray) - - GpuRangePartitioning(gpuOrdering, rp.numPartitions, schema)(new GpuRangePartitioner) + GpuRangePartitioning(gpuOrdering, rp.numPartitions) } else { GpuSinglePartitioning(childExprs.map(_.convertToGpu())) } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRangePartitioner.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRangePartitioner.scala index a6b4efddd3f..e871fcb26b2 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRangePartitioner.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRangePartitioner.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,21 +17,17 @@ package com.nvidia.spark.rapids import scala.collection.mutable.ArrayBuffer -import scala.reflect.ClassTag import scala.util.hashing.byteswap32 -import com.nvidia.spark.rapids.GpuColumnVector.GpuColumnarBatchBuilder - import org.apache.spark.rdd.{PartitionPruningRDD, RDD} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.{Attribute, BoundReference, SortOrder, UnsafeProjection} +import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.expressions.codegen.LazilyGeneratedOrdering -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.rapids.execution.TrampolineUtil +import org.apache.spark.sql.types.{DataType, IntegerType} import org.apache.spark.sql.vectorized.ColumnarBatch -import org.apache.spark.util.MutablePair -class GpuRangePartitioner extends Serializable { - var rangeBounds: Array[InternalRow] = _ +object GpuRangePartitioner { /** * Sketches the input RDD via reservoir sampling on each partition. * @@ -39,21 +35,37 @@ class GpuRangePartitioner extends Serializable { * @param sampleSizePerPartition max sample size per partition * @return (total number of items, an array of (partitionId, number of items, sample)) */ - def sketch[K: ClassTag]( - rdd: RDD[K], - sampleSizePerPartition: Int): (Long, Array[(Int, Long, Array[K])]) = { + private[this] def sketch( + rdd: RDD[ColumnarBatch], + sampleSizePerPartition: Int, + sorter: GpuSorter): (Long, Array[(Int, Long, Array[InternalRow])]) = { val shift = rdd.id + val toRowConverter = GpuColumnarToRowExecParent.makeIteratorFunc(sorter.projectedBatchSchema, + NoopMetric, NoopMetric, NoopMetric) val sketched = rdd.mapPartitionsWithIndex { (idx, iter) => - iter.map(unsafeRow => unsafeRow.asInstanceOf[InternalRow]) val seed = byteswap32(idx ^ (shift << 16)) val (sample, n) = SamplingUtils.reservoirSampleAndCount( - iter, sampleSizePerPartition, seed) + iter, sampleSizePerPartition, sorter, toRowConverter, seed) Iterator((idx, n, sample)) }.collect() val numItems = sketched.map(_._2).sum (numItems, sketched) } + private[this] def randomResample( + rdd: RDD[ColumnarBatch], + fraction: Double, + seed: Int, + sorter: GpuSorter): Array[InternalRow] = { + val toRowConverter = GpuColumnarToRowExecParent.makeIteratorFunc(sorter.projectedBatchSchema, + NoopMetric, NoopMetric, NoopMetric) + rdd.mapPartitions { iter => + val sample = SamplingUtils.randomResample( + iter, fraction, sorter, toRowConverter, seed) + Iterator(sample) + }.collect().flatten + } + /** * Determines the bounds for range partitioning from candidates with weights indicating how many * items each represents. Usually this is 1 over the probability used to sample this candidate. @@ -62,19 +74,20 @@ class GpuRangePartitioner extends Serializable { * @param partitions number of partitions * @return selected bounds */ - def determineBounds[K: Ordering : ClassTag](candidates: ArrayBuffer[(K, Float)], - partitions: Int): Array[K] = { - val ordering = implicitly[Ordering[K]] - val ordered = candidates.sortBy(_._1) + private[this] def determineBounds( + candidates: ArrayBuffer[(InternalRow, Float)], + partitions: Int, + ordering: Ordering[InternalRow]): Array[InternalRow] = { + val ordered = candidates.sortBy(_._1)(ordering) val numCandidates = ordered.size val sumWeights = ordered.map(_._2.toDouble).sum val step = sumWeights / partitions var cumWeight = 0.0 var target = step - val bounds = ArrayBuffer.empty[K] + val bounds = ArrayBuffer.empty[InternalRow] var i = 0 var j = 0 - var previousBound = Option.empty[K] + var previousBound = Option.empty[InternalRow] while ((i < numCandidates) && (j < partitions - 1)) { val (key, weight) = ordered(i) cumWeight += weight @@ -92,32 +105,29 @@ class GpuRangePartitioner extends Serializable { bounds.toArray } - def createRangeBounds(partitions: Int, gpuOrdering: Seq[SortOrder], rdd: RDD[ColumnarBatch], - outputAttributes: Seq[Attribute], - samplePointsPerPartitionHint: Int): Unit = { - val sparkShims = ShimLoader.getSparkShims - val orderingAttributes = gpuOrdering.zipWithIndex.map { case (ord, i) => - sparkShims.copySortOrderWithNewChild(ord, BoundReference(i, ord.dataType, ord.nullable)) - } - implicit val ordering: LazilyGeneratedOrdering = new LazilyGeneratedOrdering(orderingAttributes) - val rowsRDD = rddForSampling(partitions, gpuOrdering, rdd, outputAttributes) + def createRangeBounds(partitions: Int, + sorter: GpuSorter, + rdd: RDD[ColumnarBatch], + samplePointsPerPartitionHint: Int): Array[InternalRow] = { // We allow partitions = 0, which happens when sorting an empty RDD under the default settings. require(partitions >= 0, s"Number of partitions cannot be negative but found $partitions.") require(samplePointsPerPartitionHint > 0, s"Sample points per partition must be greater than 0 but found $samplePointsPerPartitionHint") + implicit val ordering: LazilyGeneratedOrdering = new LazilyGeneratedOrdering(sorter.cpuOrdering) + // An array of upper bounds for the first (partitions - 1) partitions val rangeBounds : Array[InternalRow] = { if (partitions < 1) { - Array.empty[InternalRow] + Array.empty } else { // This is the sample size we need to have roughly balanced output partitions, capped at 1M. // Cast to double to avoid overflowing ints or longs val sampleSize = math.min(samplePointsPerPartitionHint.toDouble * partitions, 1e6) // Assume the input partitions are roughly balanced and over-sample a little bit. - val sampleSizePerPartition = math.ceil(3.0 * sampleSize / rowsRDD.partitions.length).toInt - val (numItems, sketched) = sketch(rowsRDD.map(_._1), sampleSizePerPartition) + val sampleSizePerPartition = math.ceil(3.0 * sampleSize / rdd.partitions.length).toInt + val (numItems, sketched) = sketch(rdd, sampleSizePerPartition, sorter) if (numItems == 0L) { Array.empty } else { @@ -140,98 +150,68 @@ class GpuRangePartitioner extends Serializable { } if (imbalancedPartitions.nonEmpty) { // Re-sample imbalanced partitions with the desired sampling probability. - val imbalanced = new PartitionPruningRDD(rowsRDD.map(_._1), - imbalancedPartitions.contains) - val seed = byteswap32(-rowsRDD.id - 1) - val reSampled = imbalanced.sample(withReplacement = false, fraction, seed).collect() + val imbalanced = new PartitionPruningRDD(rdd, imbalancedPartitions.contains) + val seed = byteswap32(-rdd.id - 1) + val reSampled = randomResample(imbalanced, fraction, seed, sorter) val weight = (1.0 / fraction).toFloat candidates ++= reSampled.map(x => (x, weight)) } - determineBounds(candidates, math.min(partitions, candidates.size)) + determineBounds(candidates, math.min(partitions, candidates.size), ordering) } } } - this.rangeBounds = rangeBounds.asInstanceOf[Array[InternalRow]] + rangeBounds.asInstanceOf[Array[InternalRow]] } +} - def rddForSampling(partitions: Int, gpuOrdering: Seq[SortOrder], rdd: RDD[ColumnarBatch], - outputAttributes: Seq[Attribute]) : RDD[MutablePair[InternalRow, Null]] = { +case class GpuRangePartitioner( + rangeBounds: Array[InternalRow], + sorter: GpuSorter) extends GpuExpression with GpuPartitioning { - val sortingExpressions = gpuOrdering - lazy val toUnsafe = UnsafeProjection.create( - sortingExpressions.map(_.child), - outputAttributes) - val rowsRDD = rdd.mapPartitions { - batches => { - new Iterator[InternalRow] { - @transient var cb: ColumnarBatch = _ - var it: java.util.Iterator[InternalRow] = _ + private lazy val converters = new GpuRowToColumnConverter( + TrampolineUtil.fromAttributes(sorter.projectedBatchSchema)) - private def closeCurrentBatch(): Unit = { - if (cb != null) { - cb.close() - cb = null - } - } - - def loadNextBatch(): Unit = { - closeCurrentBatch() - if (batches.hasNext) { - val devCb = batches.next() - cb = try { - new ColumnarBatch(GpuColumnVector.extractColumns(devCb).map(_.copyToHost()), - devCb.numRows()) - } finally { - devCb.close() - } - it = cb.rowIterator() - } - } + override def nullable: Boolean = false + override def dataType: DataType = IntegerType + override def children: Seq[Expression] = Seq.empty + override val numPartitions: Int = rangeBounds.length + 1 - override def hasNext: Boolean = { - val itHasNext = it != null && it.hasNext - if (!itHasNext) { - loadNextBatch() - it != null && it.hasNext - } else { - itHasNext - } + private[this] def computeBoundsAndClose(cb: ColumnarBatch): (Array[Int], ColumnarBatch) = { + withResource(cb) { cb => + withResource( + sorter.appendProjectedAndSort(cb, NoopMetric)) { sortedTbl => + val parts = withResource( + GpuColumnVector.from(sortedTbl, sorter.projectedBatchTypes)) { sorted => + val retCv = withResource(converters.convertBatch(rangeBounds, + TrampolineUtil.fromAttributes(sorter.projectedBatchSchema))) { ranges => + sorter.upperBound(sorted, ranges) } - - override def next(): InternalRow = { - if (it == null || !it.hasNext) { - loadNextBatch() - } - if (it == null) { - throw new NoSuchElementException() - } - val a = it.next() - a + withResource(retCv) { retCv => + // The first entry must always be 0, which upper bound is not doing + Array(0) ++ GpuColumnVector.toIntArray(retCv) } - }.map(toUnsafe) + } + (parts, sorter.removeProjectedColumns(sortedTbl)) } } - rowsRDD.mapPartitions(it => it.map(ir => MutablePair(ir, null))) } - def getRangesBatch(schema: StructType, rangeBounds: Array[InternalRow]) : ColumnarBatch = { - val rangeBoundsRowsIter = rangeBounds.toIterator - if (!rangeBoundsRowsIter.hasNext) { - throw new NoSuchElementException("Ranges columnar Batch is empty") - } - val builders = new GpuColumnarBatchBuilder(schema, rangeBounds.length, null) - val converters = new GpuRowToColumnConverter(schema) - try { - var rowCount = 0 - while (rangeBoundsRowsIter.hasNext) { - val row = rangeBoundsRowsIter.next() - converters.convert(row.asInstanceOf[InternalRow], builders) - rowCount += 1 + override def columnarEval(batch: ColumnarBatch): Any = { + if (rangeBounds.nonEmpty) { + val (parts, sortedBatch) = computeBoundsAndClose(batch) + withResource(sortedBatch) { sortedBatch => + val partitionColumns = GpuColumnVector.extractColumns(sortedBatch) + val slicedCb = sliceInternalGpuOrCpu(sortedBatch.numRows(), parts, partitionColumns) + slicedCb.zipWithIndex.filter(_._1 != null) + } + } else { + withResource(batch) { cb => + // Nothing needs to be sliced but a contiguous table is needed for GPU shuffle which + // slice will produce. + val sliced = sliceInternalGpuOrCpu(cb.numRows, Array(0), + GpuColumnVector.extractColumns(cb)) + sliced.zipWithIndex } - // The returned batch will be closed by the consumer of it - builders.build(rowCount) - } finally { - builders.close() } } } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRangePartitioning.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRangePartitioning.scala index 5cad9ad01f1..ccd78aa9f87 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRangePartitioning.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRangePartitioning.scala @@ -16,15 +16,9 @@ package com.nvidia.spark.rapids -import scala.collection.mutable.ArrayBuffer - -import ai.rapids.cudf.{ColumnVector, Table} -import com.nvidia.spark.rapids.RapidsPluginImplicits._ - -import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.SortOrder import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution, OrderedDistribution} -import org.apache.spark.sql.types.{DataType, IntegerType, StructType} +import org.apache.spark.sql.types.{DataType, IntegerType} import org.apache.spark.sql.vectorized.ColumnarBatch /** @@ -35,17 +29,12 @@ import org.apache.spark.sql.vectorized.ColumnarBatch * @note The actual number of partitions created might not be the same * as the `numPartitions` parameter, in the case where the number of sampled records is less than * the value of `partitions`. + * + * The GpuRangePartitioner is where all of the processing actually happens. */ - case class GpuRangePartitioning( gpuOrdering: Seq[SortOrder], - numPartitions: Int, - schema: StructType)(val part: GpuRangePartitioner) - extends GpuExpression with GpuPartitioning { - - override def otherCopyArgs: Seq[AnyRef] = Seq(part) - - var rangeBounds: Array[InternalRow] = _ + numPartitions: Int) extends GpuExpression with GpuPartitioning { override def children: Seq[SortOrder] = gpuOrdering override def nullable: Boolean = false @@ -81,89 +70,6 @@ case class GpuRangePartitioning( } } - override def columnarEval(batch: ColumnarBatch): Any = { - var rangesBatch: ColumnarBatch = null - var rangesTbl: Table = null - var sortedTbl: Table = null - var slicedSortedTbl: Table = null - var finalSortedCb: ColumnarBatch = null - var retCv: ColumnVector = null - var inputCvs: Seq[GpuColumnVector] = null - var inputTbl: Table = null - var partitionColumns: Array[GpuColumnVector] = null - var parts: Array[Int] = Array(0) - var slicedCb: Array[ColumnarBatch] = null - val descFlags = new ArrayBuffer[Boolean]() - val nullFlags = new ArrayBuffer[Boolean]() - val numSortCols = gpuOrdering.length - - val orderByArgs: Seq[Table.OrderByArg] = gpuOrdering.zipWithIndex.map { case (order, index) => - val nullsSmallest = SortUtils.areNullsSmallest(order) - if (order.isAscending) { - descFlags += false - nullFlags += nullsSmallest - Table.asc(index, nullsSmallest) - } else { - descFlags += true - nullFlags += nullsSmallest - Table.desc(index, nullsSmallest) - } - } - - try { - //get Inputs table bound - inputCvs = SortUtils.evaluateForSort(batch, gpuOrdering) - inputTbl = new Table(inputCvs.map(_.getBase): _*) - //sort incoming batch to compare with ranges - sortedTbl = inputTbl.orderBy(orderByArgs: _*) - val sortColumns = (0 until numSortCols).map(sortedTbl.getColumn(_)) - //get the table for upper bound calculation - slicedSortedTbl = new Table(sortColumns: _*) - //get the final column batch, remove the sort order sortColumns - val outputTypes = GpuColumnVector.extractTypes(batch) - finalSortedCb = GpuColumnVector.from(sortedTbl, outputTypes, - numSortCols, sortedTbl.getNumberOfColumns) - val numRows = finalSortedCb.numRows - partitionColumns = GpuColumnVector.extractColumns(finalSortedCb) - // get the ranges table and get upper bounds if possible - // rangeBounds can be empty or of length < numPartitions in cases where the samples are less - // than numPartitions. The way Spark handles it is by allowing the returned partitions to be - // rangeBounds.length + 1 which is essentially what happens here when we do upperBound on the - // ranges table, or return one partition. - if (part.rangeBounds.nonEmpty) { - rangesBatch = part.getRangesBatch(schema, part.rangeBounds) - rangesTbl = GpuColumnVector.from(rangesBatch) - retCv = slicedSortedTbl.upperBound(nullFlags.toArray, rangesTbl, descFlags.toArray) - parts = parts ++ GpuColumnVector.toIntArray(retCv) - } - slicedCb = sliceInternalGpuOrCpu(numRows, parts, partitionColumns) - } finally { - batch.close() - if (inputCvs != null) { - inputCvs.safeClose() - } - if (inputTbl != null) { - inputTbl.close() - } - if (sortedTbl != null) { - sortedTbl.close() - } - if (slicedSortedTbl != null) { - slicedSortedTbl.close() - } - if (rangesBatch != null) { - rangesBatch.close() - } - if (rangesTbl != null) { - rangesTbl.close() - } - if (retCv != null) { - retCv.close() - } - if (partitionColumns != null) { - partitionColumns.safeClose() - } - } - slicedCb.zipWithIndex.filter(_._1 != null) - } + override def columnarEval(batch: ColumnarBatch): Any = + throw new IllegalStateException("This cannot be executed") } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRowToColumnarExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRowToColumnarExec.scala index c4f7dba6ed6..cff73275ee4 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRowToColumnarExec.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRowToColumnarExec.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.rapids.execution.TrampolineUtil import org.apache.spark.sql.types._ import org.apache.spark.sql.vectorized.ColumnarBatch -private class GpuRowToColumnConverter(schema: StructType) extends Serializable { +private class GpuRowToColumnConverter(schema: StructType) extends Serializable with Arm { private val converters = schema.fields.map { f => GpuRowToColumnConverter.getConverterForType(f.dataType, f.nullable) } @@ -48,6 +48,20 @@ private class GpuRowToColumnConverter(schema: StructType) extends Serializable { } bytes } + + /** + * Convert an array of rows into a batch. Please note that this does not do bounds or size + * checking so keep the size of the batch small. + * @param rows the rows to convert. + * @param schema the schema of the rows. + * @return The batch on the GPU. + */ + final def convertBatch(rows: Array[InternalRow], schema: StructType): ColumnarBatch = { + val numRows = rows.length + val builders = new GpuColumnarBatchBuilder(schema, numRows, null) + rows.foreach(convert(_, builders)) + builders.build(numRows) + } } private object GpuRowToColumnConverter { diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSortExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSortExec.scala index 3bb9442547f..d5d530da93e 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSortExec.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSortExec.scala @@ -23,7 +23,6 @@ import scala.collection.mutable.ArrayBuffer import ai.rapids.cudf.{ColumnVector, ContiguousTable, NvtxColor, NvtxRange, Table} import com.nvidia.spark.rapids.GpuColumnVector.GpuColumnarBatchBuilder import com.nvidia.spark.rapids.GpuMetric._ -import com.nvidia.spark.rapids.StorageTier.StorageTier import org.apache.spark.TaskContext import org.apache.spark.rdd.RDD @@ -431,10 +430,8 @@ case class GpuOutOfCoreSortIterator( // The data is only fully sorted if there is nothing pending that is smaller than it // so get the next "smallest" row that is pending. val cutoff = pending.peek().firstRow - val builders = new GpuColumnarBatchBuilder( - TrampolineUtil.fromAttributes(sorter.projectedBatchSchema), 1, null) - converters.convert(cutoff, builders) - withResource(builders.build(1)) { cutoffCb => + withResource(converters.convertBatch(Array(cutoff), + TrampolineUtil.fromAttributes(sorter.projectedBatchSchema))) { cutoffCb => withResource(sorter.upperBound(mergedBatch, cutoffCb)) { result => withResource(result.copyToHost()) { hostResult => assert(hostResult.getRowCount == 1) @@ -448,7 +445,9 @@ case class GpuOutOfCoreSortIterator( pending.isEmpty)) { // This is a special case where we have everything we need to output already so why // bother with another contig split just to put it into the queue - return Some(GpuColumnVector.incRefCounts(mergedBatch)) + withResource(GpuColumnVector.from(mergedBatch)) { mergedTbl => + return Some(sorter.removeProjectedColumns(mergedTbl)) + } } withResource(GpuColumnVector.from(mergedBatch)) { mergedTbl => splitAfterSortAndSave(mergedTbl, sortSplitOffset) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SamplingUtils.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SamplingUtils.scala index af982b79f41..6fb754468cc 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SamplingUtils.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SamplingUtils.scala @@ -1,4 +1,6 @@ /* + * Copyright (c) 2021, NVIDIA CORPORATION. + * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -20,62 +22,231 @@ package com.nvidia.spark.rapids import java.nio.ByteBuffer import java.util.{Random => JavaRandom} -import scala.reflect.ClassTag +import scala.collection.mutable import scala.util.Random import scala.util.hashing.MurmurHash3 -import org.apache.spark.sql.catalyst.expressions.UnsafeRow +import ai.rapids.cudf.{ColumnVector, Table} -object SamplingUtils { +import org.apache.spark.TaskContext +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.vectorized.ColumnarBatch + +object SamplingUtils extends Arm { + private def selectWithoutReplacementFrom(count: Int, rand: Random, cb: ColumnarBatch): Table = { + val rows = cb.numRows() + assert(count <= rows) + if (rows == count) { + GpuColumnVector.from(cb) + } else if (count < rows/2) { + // Randomly select a gather map, without replacement so use a set + val selected = mutable.Set[Int]() + while (selected.size < count) { + selected += rand.nextInt(rows) + } + withResource(ColumnVector.fromInts(selected.toSeq: _*)) { gatherMap => + withResource(GpuColumnVector.from(cb)) { tab => + tab.gather(gatherMap) + } + } + } else { + // Randomly select rows to remove, without replacement so use a set + val toRemove = rows - count; + val notSelected = mutable.Set[Int]() + while (notSelected.size < toRemove) { + notSelected += rand.nextInt(rows) + } + val selected = (0 until rows).filter(notSelected.contains) + withResource(ColumnVector.fromInts(selected: _*)) { gatherMap => + withResource(GpuColumnVector.from(cb)) { tab => + tab.gather(gatherMap) + } + } + } + } /** - * Reservoir sampling implementation that also returns the input size. - * - * @param input input size - * @param k reservoir size - * @param seed random seed - * @return (samples, input size) + * Random sampling without replacement. + * @param input iterator to feed batches for sampling. + * @param fraction the percentage of rows to randomly select + * @param sorter used to add rows needed for sorting on the CPU later. The sorter should be + * setup for the schema of the input data and the output sampled rows will have + * any needed rows added to them as the sorter needs to. + * @param converter used to convert a batch of data to rows. This should have been setup to + * convert to rows based of the expected output for the sorter. + * @param seed the seed to the random number generator + * @return the sampled rows */ - def reservoirSampleAndCount[T: ClassTag]( - input: Iterator[T], - k: Int, - seed: Long = Random.nextLong()) : (Array[T], Long) = { - val reservoir = new Array[T](k) - // Put the first k elements in the reservoir. - var i = 0 - while (i < k && input.hasNext) { - val copyRow = input.next().asInstanceOf[UnsafeRow].copy() - reservoir(i) = copyRow.asInstanceOf[T] - i += 1 + def randomResample( + input: Iterator[ColumnarBatch], + fraction: Double, + sorter: GpuSorter, + converter: Iterator[ColumnarBatch] => Iterator[InternalRow], + seed: Long = Random.nextLong()): Array[InternalRow] = { + val jRand = new XORShiftRandom(seed) + val rand = new Random(jRand) + var runningCb: SpillableColumnarBatch = null + var totalRowsSeen = 0L + var totalRowsCollected = 0L + while (input.hasNext) { + withResource(input.next()) { cb => + // For each batch we need to know how many rows to select from it + // and how many to throw away from the existing batch + val rowsInBatch = cb.numRows() + totalRowsSeen += rowsInBatch + val totalRowsWanted = (totalRowsSeen * fraction).toLong + val numRowsToSelectFromBatch = (totalRowsWanted - totalRowsCollected).toInt + withResource(selectWithoutReplacementFrom(numRowsToSelectFromBatch, rand, cb)) { selected => + totalRowsCollected += selected.getRowCount + if (runningCb == null) { + runningCb = SpillableColumnarBatch( + GpuColumnVector.from(selected, GpuColumnVector.extractTypes(cb)), + SpillPriorities.ACTIVE_ON_DECK_PRIORITY, + RapidsBuffer.defaultSpillCallback) + } else { + val concat = withResource(runningCb) { spb => + runningCb = null + withResource(spb.getColumnarBatch()) { cb => + withResource(GpuColumnVector.from(cb)) { table => + Table.concatenate(selected, table) + } + } + } + withResource(concat) { concat => + runningCb = SpillableColumnarBatch( + GpuColumnVector.from(concat, GpuColumnVector.extractTypes(cb)), + SpillPriorities.ACTIVE_ON_DECK_PRIORITY, + RapidsBuffer.defaultSpillCallback) + } + } + } + } + GpuSemaphore.releaseIfNecessary(TaskContext.get()) + } + if (runningCb == null) { + return Array.empty + } + // Getting a spilled batch will acquire the semaphore if needed + val cb = withResource(runningCb) { spb => + runningCb = null + spb.getColumnarBatch() } + val withSortColumns = withResource(cb) { cb => + sorter.appendProjectedColumns(cb) + } + // The reader will close withSortColumns, but if we want to really be paranoid we should + // add in a shutdown handler or something like that. + // Might even want to turn this into a utility function for collect etc. + val retIterator = converter(new Iterator[ColumnarBatch] { + var read = false + override def hasNext: Boolean = !read - // If we have consumed all the elements, return them. Otherwise do the replacement. - if (i < k) { - // If input size < k, trim the array to return only an array of input size. - val trimReservoir = new Array[T](i) - System.arraycopy(reservoir, 0, trimReservoir, 0, i) - (trimReservoir, i) - } else { - // If input size > k, continue the sampling process. - var l = i.toLong - val rand = new XORShiftRandom(seed) - while (input.hasNext) { - val item = input.next().asInstanceOf[UnsafeRow].copy() - l += 1 - // There are k elements in the reservoir, and the l-th element has been - // consumed. It should be chosen with probability k/l. The expression - // below is a random long chosen uniformly from [0,l) - val replacementIndex = (rand.nextDouble() * l).toLong - if (replacementIndex < k) { - reservoir(replacementIndex.toInt) = item.asInstanceOf[T] + override def next(): ColumnarBatch = { + read = true + withSortColumns + } + }).map(_.copy()) + retIterator.toArray + } + + /** + * Reservoir sampling implementation that also returns the input size. + * @param input iterator to feed batches for sampling. + * @param k the number of rows to randomly select. + * @param sorter used to add rows needed for sorting on the CPU later. The sorter should be + * setup for the schema of the input data and the output sampled rows will have + * any needed rows added to them as the sorter needs to. + * @param converter used to convert a batch of data to rows. This should have been setup to + * convert to rows based of the expected output for the sorter. + * @param seed the seed to the random number generator + * @return (samples, input size) + */ + def reservoirSampleAndCount( + input: Iterator[ColumnarBatch], + k: Int, + sorter: GpuSorter, + converter: Iterator[ColumnarBatch] => Iterator[InternalRow], + seed: Long = Random.nextLong()) : (Array[InternalRow], Long) = { + val jRand = new XORShiftRandom(seed) + val rand = new Random(jRand) + var runningCb: SpillableColumnarBatch = null + var numTotalRows = 0L + var rowsSaved = 0L + while (input.hasNext) { + withResource(input.next()) { cb => + // For each batch we need to know how many rows to select from it + // and how many to throw away from the existing batch + val rowsInBatch = cb.numRows() + val (numRowsToSelectFromBatch, rowsToDrop) = if (numTotalRows == 0) { + (Math.min(k, rowsInBatch), 0) + } else if (numTotalRows + rowsInBatch < k) { + (rowsInBatch, 0) + } else { + val v = (k * rowsInBatch.toDouble / (numTotalRows + rowsInBatch)).toInt + (v, v) + } + numTotalRows += rowsInBatch + withResource(selectWithoutReplacementFrom(numRowsToSelectFromBatch, rand, cb)) { selected => + if (runningCb == null) { + rowsSaved = selected.getRowCount + runningCb = SpillableColumnarBatch( + GpuColumnVector.from(selected, GpuColumnVector.extractTypes(cb)), + SpillPriorities.ACTIVE_ON_DECK_PRIORITY, + RapidsBuffer.defaultSpillCallback) + } else { + withResource(runningCb) { spb => + runningCb = null + withResource(spb.getColumnarBatch()) { cb => + val filtered = if (rowsToDrop > 0) { + selectWithoutReplacementFrom(cb.numRows() - rowsToDrop, rand, cb) + } else { + GpuColumnVector.from(cb) + } + val concat = withResource(filtered) { filtered => + Table.concatenate(selected, filtered) + } + withResource(concat) { concat => + rowsSaved = concat.getRowCount + runningCb = SpillableColumnarBatch( + GpuColumnVector.from(concat, GpuColumnVector.extractTypes(cb)), + SpillPriorities.ACTIVE_ON_DECK_PRIORITY, + RapidsBuffer.defaultSpillCallback) + } + } + } + } } } - (reservoir, l) + GpuSemaphore.releaseIfNecessary(TaskContext.get()) + } + if (runningCb == null) { + // Nothing to sort + return (Array.empty, numTotalRows) + } + // Getting a spilled batch will acquire the semaphore if needed + val cb = withResource(runningCb) { spb => + runningCb = null + spb.getColumnarBatch() } + val withSortColumns = withResource(cb) { cb => + sorter.appendProjectedColumns(cb) + } + // The reader will close withSortColumns, but if we want to really be paranoid we should + // add in a shutdown handler or something like that. + // Might even want to turn this into a utility function for collect etc. + val retIterator = converter(new Iterator[ColumnarBatch] { + var read = false + override def hasNext: Boolean = !read + + override def next(): ColumnarBatch = { + read = true + withSortColumns + } + }).map(_.copy()) + (retIterator.toArray, numTotalRows) } } - /** * This class implements a XORShift random number generator algorithm * Source: diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SortUtils.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SortUtils.scala index cb6645db2fc..b4361d7631d 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SortUtils.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SortUtils.scala @@ -26,40 +26,6 @@ import org.apache.spark.sql.types.DataType import org.apache.spark.sql.vectorized.ColumnarBatch object SortUtils extends Arm { - private [this] def evaluateBoundExpressions[A <: GpuExpression](cb: ColumnarBatch, - boundExprs: Seq[A]): Seq[GpuColumnVector] = { - withResource(GpuProjectExec.project(cb, boundExprs)) { cb => - (0 until cb.numCols()).map(cb.column(_).asInstanceOf[GpuColumnVector].incRefCount()) - // because this processing has a side effect (inc ref count) we want to force - // the data to execute now, instead of lazily. To do this we first convert it - // to an array and then back to a sequence again. Seq does not have a force method - .toArray.toSeq - } - } - - /* - * This function takes the input batch and the bound sort order references and - * evaluates each column in case its an expression. It then appends the original columns - * after the sort key columns. The sort key columns will be dropped after sorting. - */ - def evaluateForSort(batch: ColumnarBatch, - boundInputReferences: Seq[SortOrder]): Seq[GpuColumnVector] = { - val sortCvs = new ArrayBuffer[GpuColumnVector](boundInputReferences.length) - val childExprs = boundInputReferences.map(_.child.asInstanceOf[GpuExpression]) - sortCvs ++= evaluateBoundExpressions(batch, childExprs) - val originalColumns = GpuColumnVector.extractColumns(batch) - originalColumns.foreach(_.incRefCount()) - sortCvs ++ originalColumns - } - - /* - * Return true if nulls are needed first and ordering is ascending and vice versa - */ - def areNullsSmallest(order: SortOrder): Boolean = { - (order.isAscending && order.nullOrdering == NullsFirst) || - (!order.isAscending && order.nullOrdering == NullsLast) - } - @scala.annotation.tailrec def extractReference(exp: Expression): Option[GpuBoundReference] = exp match { case r: GpuBoundReference => Some(r) @@ -97,7 +63,7 @@ object SortUtils extends Arm { * @param inputSchema The schema of the input data */ class GpuSorter( - sortOrder: Seq[SortOrder], + val sortOrder: Seq[SortOrder], inputSchema: Array[Attribute]) extends Arm with Serializable { /** diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SpillableColumnarBatch.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SpillableColumnarBatch.scala index 7f514fab491..c40b5c27b1e 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SpillableColumnarBatch.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SpillableColumnarBatch.scala @@ -16,6 +16,7 @@ package com.nvidia.spark.rapids +import org.apache.spark.TaskContext import org.apache.spark.sql.rapids.TempSpillBufferId import org.apache.spark.sql.types.DataType import org.apache.spark.sql.vectorized.ColumnarBatch @@ -54,8 +55,10 @@ trait SpillableColumnarBatch extends AutoCloseable { class JustRowsColumnarBatch(numRows: Int) extends SpillableColumnarBatch { override def numRows(): Int = numRows override def setSpillPriority(priority: Long): Unit = () // NOOP nothing to spill - override def getColumnarBatch(): ColumnarBatch = + override def getColumnarBatch(): ColumnarBatch = { + GpuSemaphore.acquireIfNecessary(TaskContext.get()) new ColumnarBatch(Array.empty, numRows) + } override def close(): Unit = () // NOOP nothing to close override val sizeInBytes: Long = 0L } @@ -106,6 +109,7 @@ class SpillableColumnarBatchImpl (id: TempSpillBufferId, */ override def getColumnarBatch(): ColumnarBatch = { withResource(RapidsBufferCatalog.acquireBuffer(id)) { rapidsBuffer => + GpuSemaphore.acquireIfNecessary(TaskContext.get()) rapidsBuffer.getColumnarBatch(sparkTypes) } } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleExchangeExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleExchangeExec.scala index d865d52d3b8..30f40f2b4cf 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleExchangeExec.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleExchangeExec.scala @@ -269,9 +269,11 @@ object GpuShuffleExchangeExec { case h: GpuHashPartitioning => GpuBindReferences.bindReference(h, outputAttributes) case r: GpuRangePartitioning => - r.part.createRangeBounds(r.numPartitions, r.gpuOrdering, rdd, outputAttributes, - SQLConf.get.rangeExchangeSampleSizePerPartition) - GpuBindReferences.bindReference(r, outputAttributes) + val sorter = new GpuSorter(r.gpuOrdering, outputAttributes) + val bounds = GpuRangePartitioner.createRangeBounds(r.numPartitions, sorter, + rdd, SQLConf.get.rangeExchangeSampleSizePerPartition) + // No need to bind arguments for the GpuRangePartitioner. The Sorter has already done it + new GpuRangePartitioner(bounds, sorter) case s: GpuSinglePartitioning => GpuBindReferences.bindReference(s, outputAttributes) case rrp: GpuRoundRobinPartitioning => diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala index 23034d4181f..93dcd6a56a5 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala @@ -43,6 +43,8 @@ object TrampolineUtil { def fromAttributes(attrs: Seq[Attribute]): StructType = StructType.fromAttributes(attrs) + def toAttributes(structType: StructType): Seq[Attribute] = structType.toAttributes + def jsonValue(dataType: DataType): JsonAST.JValue = dataType.jsonValue /** Get a human-readable string, e.g.: "4.0 MiB", for a value in bytes. */ From e06c22664f7300cbee8894407590fc22f0250cae Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Tue, 2 Mar 2021 11:02:03 -0800 Subject: [PATCH 14/28] Fix NullPointerException on null partition insert (#1744) Port https://github.com/apache/spark/pull/31320 to close #1735 Signed-off-by: Gera Shegalov --- ...GpuInsertIntoHadoopFsRelationCommand.scala | 11 ++---- .../rapids/InsertPartition311Suite.scala | 39 +++++++++++++++++++ 2 files changed, 42 insertions(+), 8 deletions(-) create mode 100644 tests-spark310+/src/test/scala/com/nvidia/spark/rapids/InsertPartition311Suite.scala diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInsertIntoHadoopFsRelationCommand.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInsertIntoHadoopFsRelationCommand.scala index 7cd815179e9..39d6cfb6442 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInsertIntoHadoopFsRelationCommand.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInsertIntoHadoopFsRelationCommand.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ import org.apache.spark.internal.io.FileCommitProtocol import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession} import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTablePartition} import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec -import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName +import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.getPartitionPathString import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap @@ -204,12 +204,7 @@ case class GpuInsertIntoHadoopFsRelationCommand( committer: FileCommitProtocol): Unit = { val staticPartitionPrefix = if (staticPartitions.nonEmpty) { "/" + partitionColumns.flatMap { p => - staticPartitions.get(p.name) match { - case Some(value) => - Some(escapePathName(p.name) + "=" + escapePathName(value)) - case None => - None - } + staticPartitions.get(p.name).map(getPartitionPathString(p.name, _)) }.mkString("/") } else { "" diff --git a/tests-spark310+/src/test/scala/com/nvidia/spark/rapids/InsertPartition311Suite.scala b/tests-spark310+/src/test/scala/com/nvidia/spark/rapids/InsertPartition311Suite.scala new file mode 100644 index 00000000000..4865c9aa910 --- /dev/null +++ b/tests-spark310+/src/test/scala/com/nvidia/spark/rapids/InsertPartition311Suite.scala @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids + +import org.scalatest.BeforeAndAfterEach + +class InsertPartition311Suite extends SparkQueryCompareTestSuite with BeforeAndAfterEach { + var tableNr = 0 + + override def afterEach(): Unit = { + List(1, 2).foreach { tnr => + SparkSessionHolder.sparkSession.sql(s"DROP TABLE IF EXISTS t$tnr") + } + } + + testSparkResultsAreEqual( + testName ="Insert null-value partition ", + spark => { + tableNr += 1 + spark.sql(s"""CREATE TABLE t${tableNr}(i STRING, c STRING) + |USING PARQUET PARTITIONED BY (c)""".stripMargin) + spark.sql(s"""INSERT OVERWRITE t${tableNr} PARTITION (c=null) + |VALUES ('1')""".stripMargin)})( + _.sparkSession.sql(s"SELECT * FROM t$tableNr")) +} From dea867a7abd4e5ee54cbc721340780e78e24e601 Mon Sep 17 00:00:00 2001 From: Sameer Raheja Date: Tue, 2 Mar 2021 13:56:10 -0800 Subject: [PATCH 15/28] Update changelog for 0.4 (#1849) * Update changelog for 0.4 Signed-off-by: Sameer Raheja * Update generate-changelog script Signed-off-by: Sameer Raheja --- CHANGELOG.md | 283 ++++++++++++++++++++++++++++++++++++- scripts/generate-changelog | 10 +- 2 files changed, 287 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a94e699af1a..7cf8994f744 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,286 @@ # Change log -Generated on 2021-02-08 +Generated on 2021-03-02 + +## Release 0.4 + +### Features +||| +|:---|:---| +|[#1773](https://github.com/NVIDIA/spark-rapids/issues/1773)|[FEA] Spark 3.0.2 release support| +|[#80](https://github.com/NVIDIA/spark-rapids/issues/80)|[FEA] Support the struct SQL function| +|[#76](https://github.com/NVIDIA/spark-rapids/issues/76)|[FEA] Support CreateArray| +|[#1635](https://github.com/NVIDIA/spark-rapids/issues/1635)|[FEA] RAPIDS accelerated Java UDF| +|[#1333](https://github.com/NVIDIA/spark-rapids/issues/1333)|[FEA] Support window operations on Decimal| +|[#1419](https://github.com/NVIDIA/spark-rapids/issues/1419)|[FEA] Support GPU accelerated UDF alternative for higher order function "aggregate" over window| +|[#1580](https://github.com/NVIDIA/spark-rapids/issues/1580)|[FEA] Support Decimal for ParquetCachedBatchSerializer| +|[#1600](https://github.com/NVIDIA/spark-rapids/issues/1600)|[FEA] Support ScalarSubquery| +|[#1072](https://github.com/NVIDIA/spark-rapids/issues/1072)|[FEA] Support for a custom DataSource V2 which supplies Arrow data| +|[#906](https://github.com/NVIDIA/spark-rapids/issues/906)|[FEA] Clarify query explanation to directly state what will run on GPU| +|[#1335](https://github.com/NVIDIA/spark-rapids/issues/1335)|[FEA] Support CollectLimitExec for decimal| +|[#1485](https://github.com/NVIDIA/spark-rapids/issues/1485)|[FEA] Decimal Support for Parquet Write| +|[#1329](https://github.com/NVIDIA/spark-rapids/issues/1329)|[FEA] Decimal support for multiply int div, add, subtract and null safe equals| +|[#1351](https://github.com/NVIDIA/spark-rapids/issues/1351)|[FEA] Execute UDFs that provide a RAPIDS execution path| +|[#1330](https://github.com/NVIDIA/spark-rapids/issues/1330)|[FEA] Support Decimal Casts| +|[#1353](https://github.com/NVIDIA/spark-rapids/issues/1353)|[FEA] Example of RAPIDS UDF using custom GPU code| +|[#1487](https://github.com/NVIDIA/spark-rapids/issues/1487)|[FEA] Change spark 3.1.0 to 3.1.1| +|[#1334](https://github.com/NVIDIA/spark-rapids/issues/1334)|[FEA] Add support for count aggregate on decimal| +|[#1325](https://github.com/NVIDIA/spark-rapids/issues/1325)|[FEA] Add in join support for decimal| +|[#1326](https://github.com/NVIDIA/spark-rapids/issues/1326)|[FEA] Add in Broadcast support for decimal values| +|[#37](https://github.com/NVIDIA/spark-rapids/issues/37)|[FEA] round and bround SQL functions| +|[#78](https://github.com/NVIDIA/spark-rapids/issues/78)|[FEA] Support CreateNamedStruct function| +|[#1331](https://github.com/NVIDIA/spark-rapids/issues/1331)|[FEA] UnionExec and ExpandExec support for decimal| +|[#1332](https://github.com/NVIDIA/spark-rapids/issues/1332)|[FEA] Support CaseWhen, Coalesce and IfElse for decimal| +|[#937](https://github.com/NVIDIA/spark-rapids/issues/937)|[FEA] have murmur3 hash function that matches exactly with spark| +|[#1324](https://github.com/NVIDIA/spark-rapids/issues/1324)|[FEA] Support Parquet Read of Decimal FIXED_LENGTH_BYTE_ARRAY| +|[#1428](https://github.com/NVIDIA/spark-rapids/issues/1428)|[FEA] Add support for unary decimal operations abs, floor, ceil, unary - and unary +| +|[#1375](https://github.com/NVIDIA/spark-rapids/issues/1375)|[FEA] Add log statement for what the concurrentGpuTasks tasks is set to on executor startup| +|[#1352](https://github.com/NVIDIA/spark-rapids/issues/1352)|[FEA] Example of RAPIDS UDF using cudf Java APIs| +|[#1328](https://github.com/NVIDIA/spark-rapids/issues/1328)|[FEA] Support sorting and shuffle of decimal| +|[#1316](https://github.com/NVIDIA/spark-rapids/issues/1316)|[FEA] Support simple DECIMAL aggregates| + +### Performance +||| +|:---|:---| +|[#1435](https://github.com/NVIDIA/spark-rapids/issues/1435)|[FEA]Improve the file reading by using local file caching| +|[#1738](https://github.com/NVIDIA/spark-rapids/issues/1738)|[FEA] Reduce regex usage in CAST string to date/timestamp| +|[#987](https://github.com/NVIDIA/spark-rapids/issues/987)|[FEA] Optimize CAST from string to temporal types by using cuDF is_timestamp function| +|[#1594](https://github.com/NVIDIA/spark-rapids/issues/1594)|[FEA] RAPIDS accelerated ScalaUDF| +|[#103](https://github.com/NVIDIA/spark-rapids/issues/103)|[FEA] GPU version of TakeOrderedAndProject| +|[#1024](https://github.com/NVIDIA/spark-rapids/issues/1024)|Cleanup RAPIDS transport calls to `receive`| +|[#1366](https://github.com/NVIDIA/spark-rapids/issues/1366)|Seeing performance differences of multi-threaded/coalesce/perfile Parquet reader type for a single file| +|[#1200](https://github.com/NVIDIA/spark-rapids/issues/1200)|[FEA] Accelerate the scan speed for coalescing parquet reader when reading files from multiple partitioned folders| + +### Bugs Fixed +||| +|:---|:---| +|[#1785](https://github.com/NVIDIA/spark-rapids/issues/1785)|[BUG] Rapids pytest integration tests FAILED on Yarn cluster with unrecognized arguments: `--std_input_path=src/test/resources/`| +|[#999](https://github.com/NVIDIA/spark-rapids/issues/999)|[BUG] test_multi_types_window_aggs_for_rows_lead_lag fails against Spark 3.1.0| +|[#1818](https://github.com/NVIDIA/spark-rapids/issues/1818)|[BUG] unmoored doc comment warnings in GpuCast| +|[#1817](https://github.com/NVIDIA/spark-rapids/issues/1817)|[BUG] Developer build with local modifications fails during verify phase| +|[#1644](https://github.com/NVIDIA/spark-rapids/issues/1644)|[BUG] test_window_aggregate_udf_array_from_python fails on databricks| +|[#1771](https://github.com/NVIDIA/spark-rapids/issues/1771)|[BUG] Databricks AWS CI/CD failing to create cluster| +|[#1157](https://github.com/NVIDIA/spark-rapids/issues/1157)|[BUG] Fix regression supporting to_date on GPU with Spark 3.1.0| +|[#716](https://github.com/NVIDIA/spark-rapids/issues/716)|[BUG] Cast String to TimeStamp issues| +|[#1117](https://github.com/NVIDIA/spark-rapids/issues/1117)|[BUG] CAST string to date returns wrong values for dates with out-of-range values| +|[#1670](https://github.com/NVIDIA/spark-rapids/issues/1670)|[BUG] Some TPC-DS queries fail with AQE when decimal types enabled| +|[#1730](https://github.com/NVIDIA/spark-rapids/issues/1730)|[BUG] Range Partitioning can crash when processing is in the order-by| +|[#1726](https://github.com/NVIDIA/spark-rapids/issues/1726)|[BUG] java url decode test failing on databricks, emr, and dataproc| +|[#1651](https://github.com/NVIDIA/spark-rapids/issues/1651)|[BUG] GDS exception when writing shuffle file| +|[#1702](https://github.com/NVIDIA/spark-rapids/issues/1702)|[BUG] check all tests marked xfail for Spark 3.1.1| +|[#575](https://github.com/NVIDIA/spark-rapids/issues/575)|[BUG] Spark 3.1 FAILED join_test.py::test_broadcast_join_mixed[FullOuter][IGNORE_ORDER] failed| +|[#577](https://github.com/NVIDIA/spark-rapids/issues/577)|[BUG] Spark 3.1 log arithmetic functions fail| +|[#1541](https://github.com/NVIDIA/spark-rapids/issues/1541)|[BUG] Tests fail in integration in distributed mode after allowing nested types through in sort and shuffle| +|[#1626](https://github.com/NVIDIA/spark-rapids/issues/1626)|[BUG] TPC-DS-like query 77 at scale=3TB fails with maxResultSize exceeded error| +|[#1576](https://github.com/NVIDIA/spark-rapids/issues/1576)|[BUG] loading SPARK-32639 example parquet file triggers a JVM crash | +|[#1643](https://github.com/NVIDIA/spark-rapids/issues/1643)|[BUG] TPC-DS-Like q10, q35, and q69 - slow or hanging at leftSemiJoin| +|[#1650](https://github.com/NVIDIA/spark-rapids/issues/1650)|[BUG] BenchmarkRunner does not include query name in JSON summary filename when running multiple queries| +|[#1654](https://github.com/NVIDIA/spark-rapids/issues/1654)|[BUG] TPC-DS-like query 59 at scale=3TB with AQE fails with join mismatch| +|[#1274](https://github.com/NVIDIA/spark-rapids/issues/1274)|[BUG] OutOfMemoryError - Maximum pool size exceeded while running 24 day criteo ETL Transform stage| +|[#1497](https://github.com/NVIDIA/spark-rapids/issues/1497)|[BUG] Spark-rapids v0.3.0 pytest integration tests with UCX on FAILED on Yarn cluster| +|[#1534](https://github.com/NVIDIA/spark-rapids/issues/1534)|[BUG] Spark 3.1.1 test failure in writing due to removal of InMemoryFileIndex.shouldFilterOut| +|[#1155](https://github.com/NVIDIA/spark-rapids/issues/1155)|[BUG] on shutdown don't print `Socket closed` exception when shutting down UCX.scala| +|[#1510](https://github.com/NVIDIA/spark-rapids/issues/1510)|[BUG] IllegalArgumentException during shuffle| +|[#1513](https://github.com/NVIDIA/spark-rapids/issues/1513)|[BUG] executor not fully initialized may get calls from Spark, in the process setting the `catalog` incorrectly| +|[#1466](https://github.com/NVIDIA/spark-rapids/issues/1466)|[BUG] Databricks build must run before the rapids nightly| +|[#1456](https://github.com/NVIDIA/spark-rapids/issues/1456)|[BUG] Databricks 0.4 parquet integration tests fail| +|[#1400](https://github.com/NVIDIA/spark-rapids/issues/1400)|[BUG] Regressions in spark-shell usage of benchmark utilities| +|[#1119](https://github.com/NVIDIA/spark-rapids/issues/1119)|[BUG] inner join fails with Column size cannot be negative| +|[#1079](https://github.com/NVIDIA/spark-rapids/issues/1079)|[BUG]The Scala UDF function cannot invoke the UDF compiler when it's passed to "explode"| +|[#1298](https://github.com/NVIDIA/spark-rapids/issues/1298)|TPCxBB query16 failed at UnsupportedOperationException: org.apache.parquet.column.values.dictionary.PlainValuesDictionary$PlainIntegerDictionary| +|[#1271](https://github.com/NVIDIA/spark-rapids/issues/1271)|[BUG] CastOpSuite and AnsiCastOpSuite failing with ArithmeticException on Spark 3.1| +|[#84](https://github.com/NVIDIA/spark-rapids/issues/84)|[BUG] sort does not match spark for -0.0 and 0.0| +|[#578](https://github.com/NVIDIA/spark-rapids/issues/578)|[BUG] Spark 3.1 qa_nightly_select_test.py Full join test failures| +|[#586](https://github.com/NVIDIA/spark-rapids/issues/586)|[BUG] Spark3.1 tpch failures| +|[#837](https://github.com/NVIDIA/spark-rapids/issues/837)|[BUG] Distinct count of floating point values differs with regular spark| +|[#953](https://github.com/NVIDIA/spark-rapids/issues/953)|[BUG] 3.1.0 pos_explode tests are failing| +|[#127](https://github.com/NVIDIA/spark-rapids/issues/127)|[BUG] String CSV parsing does not respect nullValues| +|[#1203](https://github.com/NVIDIA/spark-rapids/issues/1203)|[BUG] tpcds query 51 fails with join error on Spark 3.1.0| +|[#750](https://github.com/NVIDIA/spark-rapids/issues/750)|[BUG] udf_cudf_test::test_with_column fails with IPC error | +|[#1348](https://github.com/NVIDIA/spark-rapids/issues/1348)|[BUG] Host columnar decimal conversions are failing| +|[#1270](https://github.com/NVIDIA/spark-rapids/issues/1270)|[BUG] Benchmark runner fails to produce report if benchmark fails due to an invalid query plan| +|[#1179](https://github.com/NVIDIA/spark-rapids/issues/1179)|[BUG] SerializeConcatHostBuffersDeserializeBatch may have thread issues| +|[#1115](https://github.com/NVIDIA/spark-rapids/issues/1115)|[BUG] Unchecked type warning in SparkQueryCompareTestSuite| + +### PRs +||| +|:---|:---| +|[#1842](https://github.com/NVIDIA/spark-rapids/pull/1842)|Update to note support for 3.0.2| +|[#1832](https://github.com/NVIDIA/spark-rapids/pull/1832)|Spark 3.1.1 shim no longer a snapshot shim| +|[#1831](https://github.com/NVIDIA/spark-rapids/pull/1831)|Spark 3.0.2 shim no longer a snapshot shim| +|[#1826](https://github.com/NVIDIA/spark-rapids/pull/1826)|Remove benchmarks| +|[#1828](https://github.com/NVIDIA/spark-rapids/pull/1828)|Update cudf dependency to 0.18| +|[#1813](https://github.com/NVIDIA/spark-rapids/pull/1813)|Fix LEAD/LAG failures in Spark 3.1.1| +|[#1819](https://github.com/NVIDIA/spark-rapids/pull/1819)|Fix scaladoc warning in GpuCast| +|[#1820](https://github.com/NVIDIA/spark-rapids/pull/1820)|[BUG] make modified check pre-merge only| +|[#1780](https://github.com/NVIDIA/spark-rapids/pull/1780)|Remove SNAPSHOT from test and integration_test READMEs| +|[#1809](https://github.com/NVIDIA/spark-rapids/pull/1809)|check if modified files after update_config/supported| +|[#1804](https://github.com/NVIDIA/spark-rapids/pull/1804)|Update UCX documentation for RX_QUEUE_LEN and Docker| +|[#1810](https://github.com/NVIDIA/spark-rapids/pull/1810)|Pandas UDF: Sort the data before computing the sum.| +|[#1751](https://github.com/NVIDIA/spark-rapids/pull/1751)|Exclude foldable expressions from GPU if constant folding is disabled| +|[#1798](https://github.com/NVIDIA/spark-rapids/pull/1798)|Add documentation about explain not on GPU when AQE is on| +|[#1766](https://github.com/NVIDIA/spark-rapids/pull/1766)|Branch 0.4 release docs| +|[#1794](https://github.com/NVIDIA/spark-rapids/pull/1794)|Build python output schema from udf expressions| +|[#1783](https://github.com/NVIDIA/spark-rapids/pull/1783)|Fix the collect_list over window tests failures on db| +|[#1781](https://github.com/NVIDIA/spark-rapids/pull/1781)|Better float/double cases for casting tests| +|[#1790](https://github.com/NVIDIA/spark-rapids/pull/1790)|Record row counts in benchmark runs that call collect| +|[#1779](https://github.com/NVIDIA/spark-rapids/pull/1779)|Add support of DateType and TimestampType for GetTimestamp expression| +|[#1768](https://github.com/NVIDIA/spark-rapids/pull/1768)|Updating getting started Databricks docs| +|[#1742](https://github.com/NVIDIA/spark-rapids/pull/1742)|Fix regression supporting to_date with Spark-3.1| +|[#1775](https://github.com/NVIDIA/spark-rapids/pull/1775)|Fix ambiguous ordering for some tests| +|[#1760](https://github.com/NVIDIA/spark-rapids/pull/1760)|Update GpuDataSourceScanExec and GpuBroadcastExchangeExec to fix audit issues| +|[#1750](https://github.com/NVIDIA/spark-rapids/pull/1750)|Detect task failures in benchmarks| +|[#1767](https://github.com/NVIDIA/spark-rapids/pull/1767)|Consistent Spark version for test and production| +|[#1741](https://github.com/NVIDIA/spark-rapids/pull/1741)|Reduce regex use in CAST| +|[#1756](https://github.com/NVIDIA/spark-rapids/pull/1756)|Skip RAPIDS accelerated Java UDF tests if UDF fails to load| +|[#1716](https://github.com/NVIDIA/spark-rapids/pull/1716)|Update RapidsShuffleManager documentation for branch 0.4| +|[#1740](https://github.com/NVIDIA/spark-rapids/pull/1740)|Disable ORC writes until bug can be fixed| +|[#1747](https://github.com/NVIDIA/spark-rapids/pull/1747)|Fix resource leaks in unit tests| +|[#1725](https://github.com/NVIDIA/spark-rapids/pull/1725)|Branch 0.4 FAQ reorg| +|[#1718](https://github.com/NVIDIA/spark-rapids/pull/1718)|CAST string to temporal type now calls isTimestamp| +|[#1734](https://github.com/NVIDIA/spark-rapids/pull/1734)|Disable range partitioning if computation is needed| +|[#1723](https://github.com/NVIDIA/spark-rapids/pull/1723)|Removed StructTypes support for ParquetCachedBatchSerializer as cudf doesn't support it yet| +|[#1714](https://github.com/NVIDIA/spark-rapids/pull/1714)|Add support for RAPIDS accelerated Java UDFs| +|[#1713](https://github.com/NVIDIA/spark-rapids/pull/1713)|Call GpuDeviceManager.shutdown when the executor plugin is shutting down| +|[#1596](https://github.com/NVIDIA/spark-rapids/pull/1596)|Added in Decimal support to ParquetCachedBatchSerializer| +|[#1706](https://github.com/NVIDIA/spark-rapids/pull/1706)|cleanup unused is_before_spark_310| +|[#1685](https://github.com/NVIDIA/spark-rapids/pull/1685)|Fix CustomShuffleReader replacement when decimal types enabled| +|[#1699](https://github.com/NVIDIA/spark-rapids/pull/1699)|Add docs about Spark 3.1 in standalone modes not needing extra class path| +|[#1701](https://github.com/NVIDIA/spark-rapids/pull/1701)|remove xfail for orc test_input_meta for spark 3.1.0| +|[#1703](https://github.com/NVIDIA/spark-rapids/pull/1703)|Remove xfail for spark 3.1.0 test_broadcast_join_mixed FullOuter| +|[#1676](https://github.com/NVIDIA/spark-rapids/pull/1676)|BenchmarkRunner option to generate query plan diagrams in DOT format| +|[#1695](https://github.com/NVIDIA/spark-rapids/pull/1695)|support alternate jar paths| +|[#1694](https://github.com/NVIDIA/spark-rapids/pull/1694)|increase mem and limit parallelism for pre-merge| +|[#1691](https://github.com/NVIDIA/spark-rapids/pull/1691)|add validate_execs_in_gpu_plan to pytest.ini| +|[#1692](https://github.com/NVIDIA/spark-rapids/pull/1692)|Add the integration test resources to the test tarball| +|[#1677](https://github.com/NVIDIA/spark-rapids/pull/1677)|When PTDS is enabled, print warning if the allocator is not ARENA| +|[#1683](https://github.com/NVIDIA/spark-rapids/pull/1683)|update changelog to verify autotmerge 0.5 setup [skip ci]| +|[#1673](https://github.com/NVIDIA/spark-rapids/pull/1673)|support auto-merge for branch 0.5 [skip ci]| +|[#1681](https://github.com/NVIDIA/spark-rapids/pull/1681)|Xfail the collect_list tests for databricks| +|[#1678](https://github.com/NVIDIA/spark-rapids/pull/1678)|Fix array/struct checks in Sort and HashAggregate and sorting tests in distributed mode| +|[#1671](https://github.com/NVIDIA/spark-rapids/pull/1671)|Allow metrics to be configurable by level| +|[#1675](https://github.com/NVIDIA/spark-rapids/pull/1675)|add run_pyspark_from_build.sh to the pytest distribution tarball| +|[#1548](https://github.com/NVIDIA/spark-rapids/pull/1548)|Support executing collect_list on GPU with windowing.| +|[#1593](https://github.com/NVIDIA/spark-rapids/pull/1593)|Avoid unnecessary Table instances after contiguous split| +|[#1592](https://github.com/NVIDIA/spark-rapids/pull/1592)|Add in support for Decimal divide| +|[#1668](https://github.com/NVIDIA/spark-rapids/pull/1668)|Implement way for python integration tests to validate Exec is in GPU plan| +|[#1669](https://github.com/NVIDIA/spark-rapids/pull/1669)|Add FAQ entries for executor-per-GPU questions| +|[#1661](https://github.com/NVIDIA/spark-rapids/pull/1661)|Enable Parquet test for file containing map struct key| +|[#1664](https://github.com/NVIDIA/spark-rapids/pull/1664)|Filter nulls for left semi and left anti join to work around cudf| +|[#1665](https://github.com/NVIDIA/spark-rapids/pull/1665)|Add better automated tests for Arrow columnar copy in HostColumnarToGpu| +|[#1614](https://github.com/NVIDIA/spark-rapids/pull/1614)|add alluxio getting start document| +|[#1639](https://github.com/NVIDIA/spark-rapids/pull/1639)|support GpuScalarSubquery| +|[#1656](https://github.com/NVIDIA/spark-rapids/pull/1656)|Move UDF to Catalyst Expressions to its own document| +|[#1663](https://github.com/NVIDIA/spark-rapids/pull/1663)|BenchmarkRunner - Include query name in JSON summary filename| +|[#1655](https://github.com/NVIDIA/spark-rapids/pull/1655)|Fix extraneous shuffles added by AQE| +|[#1652](https://github.com/NVIDIA/spark-rapids/pull/1652)|Fix typo in arrow optimized config name - spark.rapids.arrowCopyOptimizationEnabled| +|[#1645](https://github.com/NVIDIA/spark-rapids/pull/1645)|Run Databricks IT with python-xdist parallel, includes test fixes and xfail| +|[#1649](https://github.com/NVIDIA/spark-rapids/pull/1649)|Move building from source docs to contributing guide| +|[#1637](https://github.com/NVIDIA/spark-rapids/pull/1637)|Fail DivModLike on zero divisor in ANSI mode| +|[#1646](https://github.com/NVIDIA/spark-rapids/pull/1646)|Update links in rapids-udfs.md after moving to subfolder| +|[#1641](https://github.com/NVIDIA/spark-rapids/pull/1641)|Xfail struct and array order by tests on Dataproc| +|[#1565](https://github.com/NVIDIA/spark-rapids/pull/1565)|Add GPU accelerated array_contains operator| +|[#1617](https://github.com/NVIDIA/spark-rapids/pull/1617)|Enable nightly test checks for Apache Spark| +|[#1636](https://github.com/NVIDIA/spark-rapids/pull/1636)|RAPIDS accelerated Spark Scala UDF support| +|[#1634](https://github.com/NVIDIA/spark-rapids/pull/1634)|Fix databricks build since Arrow code added| +|[#1599](https://github.com/NVIDIA/spark-rapids/pull/1599)|Add division by zero tests for Spark 3.1 behavior| +|[#1619](https://github.com/NVIDIA/spark-rapids/pull/1619)|Update GpuFileSourceScanExec to be in sync with DataSourceScanExec| +|[#1631](https://github.com/NVIDIA/spark-rapids/pull/1631)|Explicitly add maven-jar-plugin version to improve incremental build time.| +|[#1624](https://github.com/NVIDIA/spark-rapids/pull/1624)|Update explain format to show what will and will not run on the GPU| +|[#1622](https://github.com/NVIDIA/spark-rapids/pull/1622)|Support faster copy for a custom DataSource V2 which supplies Arrow data| +|[#1621](https://github.com/NVIDIA/spark-rapids/pull/1621)|Additional functionality docs| +|[#1618](https://github.com/NVIDIA/spark-rapids/pull/1618)|update blossom-ci for security updates [skip ci]| +|[#1562](https://github.com/NVIDIA/spark-rapids/pull/1562)|add alluxio support| +|[#1597](https://github.com/NVIDIA/spark-rapids/pull/1597)|Documentation for Parquet serializer| +|[#1611](https://github.com/NVIDIA/spark-rapids/pull/1611)|Add in flag for integration tests to not skip required tests| +|[#1609](https://github.com/NVIDIA/spark-rapids/pull/1609)|Disable float round/bround by default| +|[#1615](https://github.com/NVIDIA/spark-rapids/pull/1615)|Add in window support for average| +|[#1610](https://github.com/NVIDIA/spark-rapids/pull/1610)|Limit length of spark app name in BenchmarkRunner| +|[#1579](https://github.com/NVIDIA/spark-rapids/pull/1579)|Support TakeOrderedAndProject| +|[#1581](https://github.com/NVIDIA/spark-rapids/pull/1581)|Support Decimal type for CollectLimitExec| +|[#1591](https://github.com/NVIDIA/spark-rapids/pull/1591)|Add support for running multiple queries in BenchmarkRunner| +|[#1595](https://github.com/NVIDIA/spark-rapids/pull/1595)|Fix Github documentation issue template| +|[#1577](https://github.com/NVIDIA/spark-rapids/pull/1577)|rename directory from spark310 to spark311| +|[#1578](https://github.com/NVIDIA/spark-rapids/pull/1578)|Test to track RAPIDS-side issues re SPARK-32639| +|[#1583](https://github.com/NVIDIA/spark-rapids/pull/1583)|fix request-action issue [skip ci]| +|[#1555](https://github.com/NVIDIA/spark-rapids/pull/1555)|Enable ANSI mode for CAST string to timestamp| +|[#1531](https://github.com/NVIDIA/spark-rapids/pull/1531)|Decimal Support for writing Parquet| +|[#1545](https://github.com/NVIDIA/spark-rapids/pull/1545)|Support comparing ORC data| +|[#1570](https://github.com/NVIDIA/spark-rapids/pull/1570)|Branch 0.4 doc cleanup| +|[#1569](https://github.com/NVIDIA/spark-rapids/pull/1569)|Add shim method shouldIgnorePath| +|[#1564](https://github.com/NVIDIA/spark-rapids/pull/1564)|Add in support for Decimal Multiply and DIV| +|[#1561](https://github.com/NVIDIA/spark-rapids/pull/1561)|Decimal support for add and subtract| +|[#1560](https://github.com/NVIDIA/spark-rapids/pull/1560)|support sum in window aggregation for decimal| +|[#1546](https://github.com/NVIDIA/spark-rapids/pull/1546)|Cleanup shutdown logging for UCX shuffle| +|[#1551](https://github.com/NVIDIA/spark-rapids/pull/1551)|RAPIDS-accelerated Hive UDFs support all types| +|[#1543](https://github.com/NVIDIA/spark-rapids/pull/1543)|Shuffle/transport enabled by default| +|[#1552](https://github.com/NVIDIA/spark-rapids/pull/1552)|Disable blackduck signature check| +|[#1540](https://github.com/NVIDIA/spark-rapids/pull/1540)|Handle ShuffleManager api calls when plugin is not fully initialized| +|[#1547](https://github.com/NVIDIA/spark-rapids/pull/1547)|Cleanup shuffle transport receive calls| +|[#1512](https://github.com/NVIDIA/spark-rapids/pull/1512)|Support window operations on Decimal| +|[#1532](https://github.com/NVIDIA/spark-rapids/pull/1532)|Support casting from decimal to decimal| +|[#1542](https://github.com/NVIDIA/spark-rapids/pull/1542)|Change the number of partitions to zero when a range is empty| +|[#1506](https://github.com/NVIDIA/spark-rapids/pull/1506)|Add --use-decimals flag to TPC-DS ConvertFiles| +|[#1511](https://github.com/NVIDIA/spark-rapids/pull/1511)|Remove unused Jenkinsfiles [skip ci]| +|[#1505](https://github.com/NVIDIA/spark-rapids/pull/1505)|Add least, greatest and eqNullSafe support for DecimalType| +|[#1484](https://github.com/NVIDIA/spark-rapids/pull/1484)|add doc for nsight systems bundled with cuda toolkit| +|[#1478](https://github.com/NVIDIA/spark-rapids/pull/1478)|Documentation for RAPIDS-accelerated Hive UDFs| +|[#1477](https://github.com/NVIDIA/spark-rapids/pull/1477)|Allow structs and arrays to pass through for Shuffle and Sort | +|[#1489](https://github.com/NVIDIA/spark-rapids/pull/1489)|Adds in some support for the array sql function| +|[#1438](https://github.com/NVIDIA/spark-rapids/pull/1438)|Cast from numeric types to decimal type| +|[#1493](https://github.com/NVIDIA/spark-rapids/pull/1493)|Moved ParquetRecordMaterializer to the shim package to follow convention| +|[#1495](https://github.com/NVIDIA/spark-rapids/pull/1495)|Fix merge conflict, merge branch 0.3 to branch 0.4 [skip ci]| +|[#1472](https://github.com/NVIDIA/spark-rapids/pull/1472)|Add an example RAPIDS-accelerated Hive UDF using native code| +|[#1488](https://github.com/NVIDIA/spark-rapids/pull/1488)|Rename Spark 3.1.0 shim to Spark 3.1.1 to match community| +|[#1474](https://github.com/NVIDIA/spark-rapids/pull/1474)|Fix link| +|[#1476](https://github.com/NVIDIA/spark-rapids/pull/1476)|DecimalType support for Aggregate Count| +|[#1475](https://github.com/NVIDIA/spark-rapids/pull/1475)| Join support for DecimalType| +|[#1244](https://github.com/NVIDIA/spark-rapids/pull/1244)|Support round and bround SQL functions | +|[#1458](https://github.com/NVIDIA/spark-rapids/pull/1458)|Add in support for struct and named_struct| +|[#1465](https://github.com/NVIDIA/spark-rapids/pull/1465)|DecimalType support for UnionExec and ExpandExec| +|[#1450](https://github.com/NVIDIA/spark-rapids/pull/1450)|Add dynamic configs for the spark-rapids IT pipelines| +|[#1207](https://github.com/NVIDIA/spark-rapids/pull/1207)|Spark SQL hash function using murmur3| +|[#1457](https://github.com/NVIDIA/spark-rapids/pull/1457)|Support reading decimal columns from parquet files on Databricks| +|[#1455](https://github.com/NVIDIA/spark-rapids/pull/1455)|Upgrade Scala Maven Plugin to 4.3.0| +|[#1453](https://github.com/NVIDIA/spark-rapids/pull/1453)|DecimalType support for IfElse and Coalesce| +|[#1452](https://github.com/NVIDIA/spark-rapids/pull/1452)|Support DecimalType for CaseWhen| +|[#1444](https://github.com/NVIDIA/spark-rapids/pull/1444)|Improve UX when running benchmarks from Spark shell| +|[#1294](https://github.com/NVIDIA/spark-rapids/pull/1294)|Support reading decimal columns from parquet files| +|[#1153](https://github.com/NVIDIA/spark-rapids/pull/1153)|Scala UDF will compile children expressions in Project| +|[#1416](https://github.com/NVIDIA/spark-rapids/pull/1416)|Optimize mvn dependency download scripts| +|[#1430](https://github.com/NVIDIA/spark-rapids/pull/1430)|Add project for testing code that requires Spark 3.1.0 or later| +|[#1425](https://github.com/NVIDIA/spark-rapids/pull/1425)|Add in Decimal support for abs, floor, ceil, unary - and unary +| +|[#1427](https://github.com/NVIDIA/spark-rapids/pull/1427)|Revert "Make the multi-threaded parquet reader the default"| +|[#1420](https://github.com/NVIDIA/spark-rapids/pull/1420)|Add udf jar to nightly integration tests| +|[#1422](https://github.com/NVIDIA/spark-rapids/pull/1422)|Log the number of concurrent gpu tasks allowed on Executor startup| +|[#1401](https://github.com/NVIDIA/spark-rapids/pull/1401)|Accelerate the coalescing parquet reader when reading files from multiple partitioned folders| +|[#1413](https://github.com/NVIDIA/spark-rapids/pull/1413)|Add config for cast float to integral types| +|[#1313](https://github.com/NVIDIA/spark-rapids/pull/1313)|Support spilling to disk directly via cuFile/GDS| +|[#1411](https://github.com/NVIDIA/spark-rapids/pull/1411)|Add udf-examples jar to databricks build| +|[#1412](https://github.com/NVIDIA/spark-rapids/pull/1412)|Fix a lot of tests marked with xfail for Spark 3.1.0 that no longer fail| +|[#1414](https://github.com/NVIDIA/spark-rapids/pull/1414)|Build merged code of HEAD and BASE branch for pre-merge [skip ci]| +|[#1409](https://github.com/NVIDIA/spark-rapids/pull/1409)|Add option to use decimals in tpc-ds csv to parquet conversion| +|[#1410](https://github.com/NVIDIA/spark-rapids/pull/1410)|Add Decimal support for In, InSet, AtLeastNNonNulls, GetArrayItem, GetStructField, and GenerateExec| +|[#1408](https://github.com/NVIDIA/spark-rapids/pull/1408)|Support RAPIDS-accelerated HiveGenericUDF| +|[#1407](https://github.com/NVIDIA/spark-rapids/pull/1407)|Update docs and tests for null CSV support| +|[#1393](https://github.com/NVIDIA/spark-rapids/pull/1393)|Support RAPIDS-accelerated HiveSimpleUDF| +|[#1392](https://github.com/NVIDIA/spark-rapids/pull/1392)|Turn on hash partitioning for decimal support| +|[#1402](https://github.com/NVIDIA/spark-rapids/pull/1402)|Better GPU Cast type checks| +|[#1404](https://github.com/NVIDIA/spark-rapids/pull/1404)|Fix branch 0.4 merge conflict| +|[#1323](https://github.com/NVIDIA/spark-rapids/pull/1323)|More advanced type checking and documentation| +|[#1391](https://github.com/NVIDIA/spark-rapids/pull/1391)|Remove extra null join filtering because cudf is fast for this now.| +|[#1395](https://github.com/NVIDIA/spark-rapids/pull/1395)|Fix branch-0.3 -> branch-0.4 automerge| +|[#1382](https://github.com/NVIDIA/spark-rapids/pull/1382)|Handle "MM[/-]dd" and "dd[/-]MM" datetime formats in UnixTimeExprMeta| +|[#1390](https://github.com/NVIDIA/spark-rapids/pull/1390)|Accelerated columnar to row/row to columnar for decimal| +|[#1380](https://github.com/NVIDIA/spark-rapids/pull/1380)|Adds in basic support for decimal sort, sum, and some shuffle| +|[#1367](https://github.com/NVIDIA/spark-rapids/pull/1367)|Reuse gpu expression conversion rules when checking sort order| +|[#1349](https://github.com/NVIDIA/spark-rapids/pull/1349)|Add canonicalization tests| +|[#1368](https://github.com/NVIDIA/spark-rapids/pull/1368)|Move to cudf 0.18-SNAPSHOT| +|[#1361](https://github.com/NVIDIA/spark-rapids/pull/1361)|Use the correct precision when reading spark columnar data.| +|[#1273](https://github.com/NVIDIA/spark-rapids/pull/1273)|Update docs and scripts to 0.4.0-SNAPSHOT| +|[#1321](https://github.com/NVIDIA/spark-rapids/pull/1321)|Refactor to stop inheriting from HashJoin| +|[#1311](https://github.com/NVIDIA/spark-rapids/pull/1311)|ParquetCachedBatchSerializer code cleanup| +|[#1303](https://github.com/NVIDIA/spark-rapids/pull/1303)|Add explicit outputOrdering for BHJ and SHJ in spark310 shim| +|[#1299](https://github.com/NVIDIA/spark-rapids/pull/1299)|Benchmark runner improved error handling| ## Release 0.3 diff --git a/scripts/generate-changelog b/scripts/generate-changelog index 31fbba8f0b7..5b888ab6f35 100755 --- a/scripts/generate-changelog +++ b/scripts/generate-changelog @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -44,11 +44,11 @@ Github personal access token: https://github.com/settings/tokens, and make you h Usage: cd spark-rapids/ - # generate changelog for release 0.1,0.2,0.3 - scripts/generate-changelog --token= --releases=0.1,0.2,0.3 + # generate changelog for release 0.1,0.2,0.3,0.4 + scripts/generate-changelog --token= --releases=0.1,0.2,0.3,0.4 - # generate changelog for release 0.1,0.2,0.3 to /tmp/CHANGELOG.md - GITHUB_TOKEN= scripts/generate-changelog --releases=0.1,0.2,0.3 --path=/tmp/CHANGELOG.md + # generate changelog for release 0.1,0.2,0.3,0.4 to /tmp/CHANGELOG.md + GITHUB_TOKEN= scripts/generate-changelog --releases=0.1,0.2,0.3,0.4 --path=/tmp/CHANGELOG.md """ import os import sys From 40c0eda18221c67cf69e4f3f2b45ceb1dd91c746 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Tue, 2 Mar 2021 17:42:34 -0600 Subject: [PATCH 16/28] Refactor join code to reduce duplicated code (#1839) * Refactor join code to reduce duplicated code Signed-off-by: Jason Lowe * Move nodeName override to base class --- .../spark300/GpuShuffledHashJoinExec.scala | 84 +------------- .../spark301db/GpuShuffledHashJoinExec.scala | 85 ++------------ .../spark311/GpuShuffledHashJoinExec.scala | 86 +------------- .../rapids/GpuShuffledHashJoinBase.scala | 107 ++++++++++++++++++ .../execution/GpuShuffleHashJoinExec.scala | 35 ------ .../spark/rapids/AdaptiveQueryExecSuite.scala | 2 +- 6 files changed, 123 insertions(+), 276 deletions(-) create mode 100644 sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinBase.scala delete mode 100644 sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleHashJoinExec.scala diff --git a/shims/spark300/src/main/scala/com/nvidia/spark/rapids/shims/spark300/GpuShuffledHashJoinExec.scala b/shims/spark300/src/main/scala/com/nvidia/spark/rapids/shims/spark300/GpuShuffledHashJoinExec.scala index 43540feef2e..941aabdadc6 100644 --- a/shims/spark300/src/main/scala/com/nvidia/spark/rapids/shims/spark300/GpuShuffledHashJoinExec.scala +++ b/shims/spark300/src/main/scala/com/nvidia/spark/rapids/shims/spark300/GpuShuffledHashJoinExec.scala @@ -18,16 +18,11 @@ package com.nvidia.spark.rapids.shims.spark300 import com.nvidia.spark.rapids._ -import org.apache.spark.TaskContext -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.JoinType -import org.apache.spark.sql.catalyst.plans.physical.{Distribution, HashClusteredDistribution} import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, BuildSide, ShuffledHashJoinExec} -import org.apache.spark.sql.rapids.execution.{GpuHashJoin, GpuShuffledHashJoinBase} -import org.apache.spark.sql.vectorized.ColumnarBatch +import org.apache.spark.sql.rapids.execution.GpuHashJoin object GpuJoinUtils { def getGpuBuildSide(buildSide: BuildSide): GpuBuildSide = { @@ -85,79 +80,6 @@ case class GpuShuffledHashJoinExec( extends GpuShuffledHashJoinBase( leftKeys, rightKeys, - joinType, + buildSide, condition, - left, - right, - isSkewJoin) - with GpuHashJoin { - import GpuMetric._ - - override val outputRowsLevel: MetricsLevel = ESSENTIAL_LEVEL - override val outputBatchesLevel: MetricsLevel = MODERATE_LEVEL - override lazy val additionalMetrics: Map[String, GpuMetric] = Map( - BUILD_DATA_SIZE -> createSizeMetric(ESSENTIAL_LEVEL, DESCRIPTION_BUILD_DATA_SIZE), - BUILD_TIME -> createNanoTimingMetric(ESSENTIAL_LEVEL, DESCRIPTION_BUILD_TIME), - STREAM_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_STREAM_TIME), - JOIN_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_JOIN_TIME), - JOIN_OUTPUT_ROWS -> createMetric(MODERATE_LEVEL, DESCRIPTION_JOIN_OUTPUT_ROWS), - FILTER_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_FILTER_TIME)) - - override def requiredChildDistribution: Seq[Distribution] = - HashClusteredDistribution(leftKeys) :: HashClusteredDistribution(rightKeys) :: Nil - - override protected def doExecute(): RDD[InternalRow] = { - throw new UnsupportedOperationException( - "GpuShuffledHashJoin does not support the execute() code path.") - } - - override def childrenCoalesceGoal: Seq[CoalesceGoal] = buildSide match { - case GpuBuildLeft => Seq(RequireSingleBatch, null) - case GpuBuildRight => Seq(null, RequireSingleBatch) - } - - override def doExecuteColumnar() : RDD[ColumnarBatch] = { - val buildDataSize = gpuLongMetric(BUILD_DATA_SIZE) - val numOutputRows = gpuLongMetric(NUM_OUTPUT_ROWS) - val numOutputBatches = gpuLongMetric(NUM_OUTPUT_BATCHES) - val totalTime = gpuLongMetric(TOTAL_TIME) - val buildTime = gpuLongMetric(BUILD_TIME) - val streamTime = gpuLongMetric(STREAM_TIME) - val joinTime = gpuLongMetric(JOIN_TIME) - val filterTime = gpuLongMetric(FILTER_TIME) - val joinOutputRows = gpuLongMetric(JOIN_OUTPUT_ROWS) - - val boundCondition = condition.map(GpuBindReferences.bindReference(_, output)) - - streamedPlan.executeColumnar().zipPartitions(buildPlan.executeColumnar()) { - (streamIter, buildIter) => { - var combinedSize = 0 - - val startTime = System.nanoTime() - val builtTable = withResource(ConcatAndConsumeAll.getSingleBatchWithVerification( - buildIter, localBuildOutput)) { buildBatch: ColumnarBatch => - withResource(GpuProjectExec.project(buildBatch, gpuBuildKeys)) { keys => - val combined = GpuHashJoin.incRefCount(combine(keys, buildBatch)) - withResource(combined) { combined => - combinedSize = - GpuColumnVector.extractColumns(combined) - .map(_.getBase.getDeviceMemorySize).sum.toInt - GpuColumnVector.from(combined) - } - } - } - - val delta = System.nanoTime() - startTime - buildTime += delta - totalTime += delta - buildDataSize += combinedSize - val context = TaskContext.get() - context.addTaskCompletionListener[Unit](_ => builtTable.close()) - - doJoin(builtTable, streamIter, boundCondition, - numOutputRows, joinOutputRows, numOutputBatches, - streamTime, joinTime, filterTime, totalTime) - } - } - } -} + isSkewJoin = isSkewJoin) diff --git a/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/GpuShuffledHashJoinExec.scala b/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/GpuShuffledHashJoinExec.scala index 21f76b2f29f..ebf511cce29 100644 --- a/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/GpuShuffledHashJoinExec.scala +++ b/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/GpuShuffledHashJoinExec.scala @@ -18,17 +18,12 @@ package com.nvidia.spark.rapids.shims.spark301db import com.nvidia.spark.rapids._ -import org.apache.spark.TaskContext -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} import org.apache.spark.sql.catalyst.plans.JoinType -import org.apache.spark.sql.catalyst.plans.physical.{Distribution, HashClusteredDistribution} -import org.apache.spark.sql.execution.{BinaryExecNode, SparkPlan} +import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec import org.apache.spark.sql.rapids.execution.GpuHashJoin -import org.apache.spark.sql.vectorized.ColumnarBatch object GpuJoinUtils { def getGpuBuildSide(buildSide: BuildSide): GpuBuildSide = { @@ -80,74 +75,10 @@ case class GpuShuffledHashJoinExec( buildSide: GpuBuildSide, condition: Option[Expression], left: SparkPlan, - right: SparkPlan) extends BinaryExecNode with GpuHashJoin { - import GpuMetric._ - - override val outputRowsLevel: MetricsLevel = ESSENTIAL_LEVEL - override val outputBatchesLevel: MetricsLevel = MODERATE_LEVEL - override lazy val additionalMetrics: Map[String, GpuMetric] = Map( - BUILD_DATA_SIZE -> createSizeMetric(ESSENTIAL_LEVEL, DESCRIPTION_BUILD_DATA_SIZE), - BUILD_TIME -> createNanoTimingMetric(ESSENTIAL_LEVEL, DESCRIPTION_BUILD_TIME), - STREAM_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_STREAM_TIME), - JOIN_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_JOIN_TIME), - JOIN_OUTPUT_ROWS -> createMetric(MODERATE_LEVEL, DESCRIPTION_JOIN_OUTPUT_ROWS), - FILTER_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_FILTER_TIME)) - - override def requiredChildDistribution: Seq[Distribution] = - HashClusteredDistribution(leftKeys) :: HashClusteredDistribution(rightKeys) :: Nil - - override protected def doExecute(): RDD[InternalRow] = { - throw new UnsupportedOperationException( - "GpuShuffledHashJoin does not support the execute() code path.") - } - - override def childrenCoalesceGoal: Seq[CoalesceGoal] = buildSide match { - case GpuBuildLeft => Seq(RequireSingleBatch, null) - case GpuBuildRight => Seq(null, RequireSingleBatch) - } - - override def doExecuteColumnar() : RDD[ColumnarBatch] = { - val buildDataSize = gpuLongMetric(BUILD_DATA_SIZE) - val numOutputRows = gpuLongMetric(NUM_OUTPUT_ROWS) - val numOutputBatches = gpuLongMetric(NUM_OUTPUT_BATCHES) - val totalTime = gpuLongMetric(TOTAL_TIME) - val buildTime = gpuLongMetric(BUILD_TIME) - val streamTime = gpuLongMetric(STREAM_TIME) - val joinTime = gpuLongMetric(JOIN_TIME) - val filterTime = gpuLongMetric(FILTER_TIME) - val joinOutputRows = gpuLongMetric(JOIN_OUTPUT_ROWS) - - val boundCondition = condition.map(GpuBindReferences.bindReference(_, output)) - - streamedPlan.executeColumnar().zipPartitions(buildPlan.executeColumnar()) { - (streamIter, buildIter) => { - var combinedSize = 0 - - val startTime = System.nanoTime() - val builtTable = withResource(ConcatAndConsumeAll.getSingleBatchWithVerification( - buildIter, localBuildOutput)) { buildBatch: ColumnarBatch => - withResource(GpuProjectExec.project(buildBatch, gpuBuildKeys)) { keys => - val combined = GpuHashJoin.incRefCount(combine(keys, buildBatch)) - withResource(combined) { combined => - combinedSize = - GpuColumnVector.extractColumns(combined) - .map(_.getBase.getDeviceMemorySize).sum.toInt - GpuColumnVector.from(combined) - } - } - } - - val delta = System.nanoTime() - startTime - buildTime += delta - totalTime += delta - buildDataSize += combinedSize - val context = TaskContext.get() - context.addTaskCompletionListener[Unit](_ => builtTable.close()) - - doJoin(builtTable, streamIter, boundCondition, - numOutputRows, joinOutputRows, numOutputBatches, - streamTime, joinTime, filterTime, totalTime) - } - } - } -} + right: SparkPlan) + extends GpuShuffledHashJoinBase( + leftKeys, + rightKeys, + buildSide, + condition, + isSkewJoin = false) diff --git a/shims/spark311/src/main/scala/com/nvidia/spark/rapids/shims/spark311/GpuShuffledHashJoinExec.scala b/shims/spark311/src/main/scala/com/nvidia/spark/rapids/shims/spark311/GpuShuffledHashJoinExec.scala index 30c50804c86..fa2b3439681 100644 --- a/shims/spark311/src/main/scala/com/nvidia/spark/rapids/shims/spark311/GpuShuffledHashJoinExec.scala +++ b/shims/spark311/src/main/scala/com/nvidia/spark/rapids/shims/spark311/GpuShuffledHashJoinExec.scala @@ -18,17 +18,12 @@ package com.nvidia.spark.rapids.shims.spark311 import com.nvidia.spark.rapids._ -import org.apache.spark.TaskContext -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.{Expression, SortOrder} +import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} import org.apache.spark.sql.catalyst.plans.JoinType -import org.apache.spark.sql.catalyst.plans.physical.{Distribution, HashClusteredDistribution} import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec -import org.apache.spark.sql.rapids.execution.{GpuHashJoin, GpuShuffledHashJoinBase} -import org.apache.spark.sql.vectorized.ColumnarBatch +import org.apache.spark.sql.rapids.execution.GpuHashJoin object GpuJoinUtils { def getGpuBuildSide(buildSide: BuildSide): GpuBuildSide = { @@ -86,79 +81,6 @@ case class GpuShuffledHashJoinExec( extends GpuShuffledHashJoinBase( leftKeys, rightKeys, - joinType, + buildSide, condition, - left, - right, - isSkewJoin) - with GpuHashJoin { - import GpuMetric._ - - override val outputRowsLevel: MetricsLevel = ESSENTIAL_LEVEL - override val outputBatchesLevel: MetricsLevel = MODERATE_LEVEL - override lazy val additionalMetrics: Map[String, GpuMetric] = Map( - BUILD_DATA_SIZE -> createSizeMetric(ESSENTIAL_LEVEL, DESCRIPTION_BUILD_DATA_SIZE), - BUILD_TIME -> createNanoTimingMetric(ESSENTIAL_LEVEL, DESCRIPTION_BUILD_TIME), - STREAM_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_STREAM_TIME), - JOIN_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_JOIN_TIME), - JOIN_OUTPUT_ROWS -> createMetric(MODERATE_LEVEL, DESCRIPTION_JOIN_OUTPUT_ROWS), - FILTER_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_FILTER_TIME)) - - override def requiredChildDistribution: Seq[Distribution] = - HashClusteredDistribution(leftKeys) :: HashClusteredDistribution(rightKeys) :: Nil - - override protected def doExecute(): RDD[InternalRow] = { - throw new UnsupportedOperationException( - "GpuShuffledHashJoin does not support the execute() code path.") - } - - override def childrenCoalesceGoal: Seq[CoalesceGoal] = buildSide match { - case GpuBuildLeft => Seq(RequireSingleBatch, null) - case GpuBuildRight => Seq(null, RequireSingleBatch) - } - - override def doExecuteColumnar() : RDD[ColumnarBatch] = { - val buildDataSize = gpuLongMetric(BUILD_DATA_SIZE) - val numOutputRows = gpuLongMetric(NUM_OUTPUT_ROWS) - val numOutputBatches = gpuLongMetric(NUM_OUTPUT_BATCHES) - val totalTime = gpuLongMetric(TOTAL_TIME) - val buildTime = gpuLongMetric(BUILD_TIME) - val streamTime = gpuLongMetric(STREAM_TIME) - val joinTime = gpuLongMetric(JOIN_TIME) - val filterTime = gpuLongMetric(FILTER_TIME) - val joinOutputRows = gpuLongMetric(JOIN_OUTPUT_ROWS) - - val boundCondition = condition.map(GpuBindReferences.bindReference(_, output)) - - streamedPlan.executeColumnar().zipPartitions(buildPlan.executeColumnar()) { - (streamIter, buildIter) => { - var combinedSize = 0 - - val startTime = System.nanoTime() - val builtTable = withResource(ConcatAndConsumeAll.getSingleBatchWithVerification( - buildIter, localBuildOutput)) { buildBatch: ColumnarBatch => - withResource(GpuProjectExec.project(buildBatch, gpuBuildKeys)) { keys => - val combined = GpuHashJoin.incRefCount(combine(keys, buildBatch)) - withResource(combined) { combined => - combinedSize = - GpuColumnVector.extractColumns(combined) - .map(_.getBase.getDeviceMemorySize).sum.toInt - GpuColumnVector.from(combined) - } - } - } - - val delta = System.nanoTime() - startTime - buildTime += delta - totalTime += delta - buildDataSize += combinedSize - val context = TaskContext.get() - context.addTaskCompletionListener[Unit](_ => builtTable.close()) - - doJoin(builtTable, streamIter, boundCondition, - numOutputRows, joinOutputRows, numOutputBatches, - streamTime, joinTime, filterTime, totalTime) - } - } - } -} + isSkewJoin = isSkewJoin) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinBase.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinBase.scala new file mode 100644 index 00000000000..efa0f84d316 --- /dev/null +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinBase.scala @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids + +import org.apache.spark.TaskContext +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.plans.physical.{Distribution, HashClusteredDistribution} +import org.apache.spark.sql.execution.BinaryExecNode +import org.apache.spark.sql.rapids.execution.GpuHashJoin +import org.apache.spark.sql.vectorized.ColumnarBatch + +abstract class GpuShuffledHashJoinBase( + leftKeys: Seq[Expression], + rightKeys: Seq[Expression], + buildSide: GpuBuildSide, + condition: Option[Expression], + val isSkewJoin: Boolean) extends BinaryExecNode with GpuHashJoin { + import GpuMetric._ + + override val outputRowsLevel: MetricsLevel = ESSENTIAL_LEVEL + override val outputBatchesLevel: MetricsLevel = MODERATE_LEVEL + override lazy val additionalMetrics: Map[String, GpuMetric] = Map( + BUILD_DATA_SIZE -> createSizeMetric(ESSENTIAL_LEVEL, DESCRIPTION_BUILD_DATA_SIZE), + BUILD_TIME -> createNanoTimingMetric(ESSENTIAL_LEVEL, DESCRIPTION_BUILD_TIME), + STREAM_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_STREAM_TIME), + JOIN_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_JOIN_TIME), + JOIN_OUTPUT_ROWS -> createMetric(MODERATE_LEVEL, DESCRIPTION_JOIN_OUTPUT_ROWS), + FILTER_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_FILTER_TIME)) + + override def requiredChildDistribution: Seq[Distribution] = + HashClusteredDistribution(leftKeys) :: HashClusteredDistribution(rightKeys) :: Nil + + override protected def doExecute(): RDD[InternalRow] = { + throw new UnsupportedOperationException( + "GpuShuffledHashJoin does not support the execute() code path.") + } + + override def childrenCoalesceGoal: Seq[CoalesceGoal] = buildSide match { + case GpuBuildLeft => Seq(RequireSingleBatch, null) + case GpuBuildRight => Seq(null, RequireSingleBatch) + } + + override def doExecuteColumnar() : RDD[ColumnarBatch] = { + val buildDataSize = gpuLongMetric(BUILD_DATA_SIZE) + val numOutputRows = gpuLongMetric(NUM_OUTPUT_ROWS) + val numOutputBatches = gpuLongMetric(NUM_OUTPUT_BATCHES) + val totalTime = gpuLongMetric(TOTAL_TIME) + val buildTime = gpuLongMetric(BUILD_TIME) + val streamTime = gpuLongMetric(STREAM_TIME) + val joinTime = gpuLongMetric(JOIN_TIME) + val filterTime = gpuLongMetric(FILTER_TIME) + val joinOutputRows = gpuLongMetric(JOIN_OUTPUT_ROWS) + + val boundCondition = condition.map(GpuBindReferences.bindReference(_, output)) + + streamedPlan.executeColumnar().zipPartitions(buildPlan.executeColumnar()) { + (streamIter, buildIter) => { + var combinedSize = 0 + + val startTime = System.nanoTime() + val builtTable = withResource(ConcatAndConsumeAll.getSingleBatchWithVerification( + buildIter, localBuildOutput)) { buildBatch: ColumnarBatch => + withResource(GpuProjectExec.project(buildBatch, gpuBuildKeys)) { keys => + val combined = GpuHashJoin.incRefCount(combine(keys, buildBatch)) + withResource(combined) { combined => + combinedSize = + GpuColumnVector.extractColumns(combined) + .map(_.getBase.getDeviceMemorySize).sum.toInt + GpuColumnVector.from(combined) + } + } + } + + val delta = System.nanoTime() - startTime + buildTime += delta + totalTime += delta + buildDataSize += combinedSize + val context = TaskContext.get() + context.addTaskCompletionListener[Unit](_ => builtTable.close()) + + doJoin(builtTable, streamIter, boundCondition, + numOutputRows, joinOutputRows, numOutputBatches, + streamTime, joinTime, filterTime, totalTime) + } + } + } + + override def nodeName: String = { + if (isSkewJoin) super.nodeName + "(skew=true)" else super.nodeName + } +} diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleHashJoinExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleHashJoinExec.scala deleted file mode 100644 index 16e5f7a6d55..00000000000 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleHashJoinExec.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.rapids.execution - -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.plans.JoinType -import org.apache.spark.sql.execution.{BinaryExecNode, SparkPlan} - -abstract class GpuShuffledHashJoinBase( - leftKeys: Seq[Expression], - rightKeys: Seq[Expression], - joinType: JoinType, - condition: Option[Expression], - left: SparkPlan, - right: SparkPlan, - val isSkewJoin: Boolean) extends BinaryExecNode { - - override def nodeName: String = { - if (isSkewJoin) super.nodeName + "(skew=true)" else super.nodeName - } -} \ No newline at end of file diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala index 5de8e891450..dd78bbef58f 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec} import org.apache.spark.sql.execution.joins.SortMergeJoinExec import org.apache.spark.sql.functions.{col, when} import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.rapids.execution.{GpuCustomShuffleReaderExec, GpuShuffledHashJoinBase} +import org.apache.spark.sql.rapids.execution.GpuCustomShuffleReaderExec import org.apache.spark.sql.types.{ArrayType, DecimalType, IntegerType, StructField, StructType} object AdaptiveQueryExecSuite { From 19d1f0504993b38e3691fbaa506c86fe02b21512 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Wed, 3 Mar 2021 08:04:41 -0600 Subject: [PATCH 17/28] Add shim for Spark 3.0.3 (#1834) * Add shim for Spark 3.0.3 Signed-off-by: Jason Lowe * Add premerge testing for Spark 3.0.2 and Spark 3.0.3 --- .../rapids-shuffle.md | 1 + jenkins/Jenkinsfile-blossom.premerge | 2 +- jenkins/spark-nightly-build.sh | 1 + jenkins/spark-premerge-build.sh | 2 + pom.xml | 7 ++ shims/aggregator/pom.xml | 6 ++ shims/pom.xml | 1 + shims/spark303/pom.xml | 96 +++++++++++++++++++ ...idia.spark.rapids.SparkShimServiceProvider | 1 + .../rapids/shims/spark303/Spark303Shims.scala | 30 ++++++ .../spark303/SparkShimServiceProvider.scala | 34 +++++++ .../spark303/RapidsShuffleManager.scala | 26 +++++ tests/README.md | 1 + 13 files changed, 207 insertions(+), 1 deletion(-) create mode 100644 shims/spark303/pom.xml create mode 100644 shims/spark303/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider create mode 100644 shims/spark303/src/main/scala/com/nvidia/spark/rapids/shims/spark303/Spark303Shims.scala create mode 100644 shims/spark303/src/main/scala/com/nvidia/spark/rapids/shims/spark303/SparkShimServiceProvider.scala create mode 100644 shims/spark303/src/main/scala/com/nvidia/spark/rapids/spark303/RapidsShuffleManager.scala diff --git a/docs/additional-functionality/rapids-shuffle.md b/docs/additional-functionality/rapids-shuffle.md index 85cf8bc02e2..45e3f1734fb 100644 --- a/docs/additional-functionality/rapids-shuffle.md +++ b/docs/additional-functionality/rapids-shuffle.md @@ -257,6 +257,7 @@ In this section, we are using a docker container built using the sample dockerfi | 3.0.1 | com.nvidia.spark.rapids.spark301.RapidsShuffleManager | | 3.0.1 EMR | com.nvidia.spark.rapids.spark301emr.RapidsShuffleManager | | 3.0.2 | com.nvidia.spark.rapids.spark302.RapidsShuffleManager | + | 3.0.3 | com.nvidia.spark.rapids.spark303.RapidsShuffleManager | | 3.1.1 | com.nvidia.spark.rapids.spark311.RapidsShuffleManager | | 3.2.0 | com.nvidia.spark.rapids.spark320.RapidsShuffleManager | diff --git a/jenkins/Jenkinsfile-blossom.premerge b/jenkins/Jenkinsfile-blossom.premerge index 15cf6fdddc4..306e7b5783e 100644 --- a/jenkins/Jenkinsfile-blossom.premerge +++ b/jenkins/Jenkinsfile-blossom.premerge @@ -161,7 +161,7 @@ pipeline { step([$class : 'JacocoPublisher', execPattern : '**/target/jacoco.exec', classPattern : 'target/jacoco_classes/', - sourcePattern : 'shuffle-plugin/src/main/scala/,udf-compiler/src/main/scala/,sql-plugin/src/main/java/,sql-plugin/src/main/scala/,shims/spark311/src/main/scala/,shims/spark300/src/main/scala/,shims/spark301db/src/main/scala/,shims/spark301/src/main/scala/,shims/spark302/src/main/scala/', + sourcePattern : 'shuffle-plugin/src/main/scala/,udf-compiler/src/main/scala/,sql-plugin/src/main/java/,sql-plugin/src/main/scala/,shims/spark311/src/main/scala/,shims/spark300/src/main/scala/,shims/spark301db/src/main/scala/,shims/spark301/src/main/scala/,shims/spark302/src/main/scala/,shims/spark303/src/main/scala/', sourceInclusionPattern: '**/*.java,**/*.scala' ]) } diff --git a/jenkins/spark-nightly-build.sh b/jenkins/spark-nightly-build.sh index afd3b0976bf..b0c5029fae9 100755 --- a/jenkins/spark-nightly-build.sh +++ b/jenkins/spark-nightly-build.sh @@ -25,6 +25,7 @@ mvn -U -B -Pinclude-databricks,snapshot-shims clean deploy $MVN_URM_MIRROR -Dmav # Run unit tests against other spark versions mvn -U -B -Pspark301tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR mvn -U -B -Pspark302tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR +mvn -U -B -Pspark303tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR mvn -U -B -Pspark311tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR # Parse cudf and spark files from local mvn repo diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh index 9e66b97c770..5cf4e39df67 100755 --- a/jenkins/spark-premerge-build.sh +++ b/jenkins/spark-premerge-build.sh @@ -40,6 +40,8 @@ tar zxf $SPARK_HOME.tgz -C $ARTF_ROOT && \ mvn -U -B $MVN_URM_MIRROR '-P!snapshot-shims,pre-merge' clean verify -Dpytest.TEST_TAGS='' -Dpytest.TEST_TYPE="pre-commit" -Dpytest.TEST_PARALLEL=4 # Run the unit tests for other Spark versions but dont run full python integration tests env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark301tests,snapshot-shims test -Dpytest.TEST_TAGS='' +env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark302tests,snapshot-shims test -Dpytest.TEST_TAGS='' +env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark303tests,snapshot-shims test -Dpytest.TEST_TAGS='' env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark311tests,snapshot-shims test -Dpytest.TEST_TAGS='' # The jacoco coverage should have been collected, but because of how the shade plugin diff --git a/pom.xml b/pom.xml index caa35293b89..8251ca4704b 100644 --- a/pom.xml +++ b/pom.xml @@ -150,6 +150,12 @@ ${spark302.version} + + spark303tests + + ${spark303.version} + + spark311tests @@ -205,6 +211,7 @@ 3.0.1 3.0.1-databricks 3.0.2 + 3.0.3-SNAPSHOT 3.1.1 3.2.0-SNAPSHOT 3.6.0 diff --git a/shims/aggregator/pom.xml b/shims/aggregator/pom.xml index e05bdae7f91..2f06dc2d2c2 100644 --- a/shims/aggregator/pom.xml +++ b/shims/aggregator/pom.xml @@ -68,6 +68,12 @@ ${project.version} compile + + com.nvidia + rapids-4-spark-shims-spark303_${scala.binary.version} + ${project.version} + compile + diff --git a/shims/pom.xml b/shims/pom.xml index 7d0490f8406..84669d2bbda 100644 --- a/shims/pom.xml +++ b/shims/pom.xml @@ -45,6 +45,7 @@ true + spark303 spark320 diff --git a/shims/spark303/pom.xml b/shims/spark303/pom.xml new file mode 100644 index 00000000000..e264e0cc953 --- /dev/null +++ b/shims/spark303/pom.xml @@ -0,0 +1,96 @@ + + + + 4.0.0 + + + com.nvidia + rapids-4-spark-shims_2.12 + 0.5.0-SNAPSHOT + ../pom.xml + + com.nvidia + rapids-4-spark-shims-spark303_2.12 + RAPIDS Accelerator for Apache Spark SQL Plugin Spark 3.0.3 Shim + The RAPIDS SQL plugin for Apache Spark 3.0.3 Shim + 0.5.0-SNAPSHOT + + + + + + + maven-antrun-plugin + + + dependency + generate-resources + + + + + + + + + + + + + run + + + + + + org.scalastyle + scalastyle-maven-plugin + + + + + + + ${project.build.directory}/extra-resources + + + src/main/resources + + + + + + + com.nvidia + rapids-4-spark-shims-spark301_${scala.binary.version} + ${project.version} + + + com.nvidia + rapids-4-spark-shims-spark302_${scala.binary.version} + ${project.version} + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark303.version} + provided + + + diff --git a/shims/spark303/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider b/shims/spark303/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider new file mode 100644 index 00000000000..a7e60bf9239 --- /dev/null +++ b/shims/spark303/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider @@ -0,0 +1 @@ +com.nvidia.spark.rapids.shims.spark303.SparkShimServiceProvider diff --git a/shims/spark303/src/main/scala/com/nvidia/spark/rapids/shims/spark303/Spark303Shims.scala b/shims/spark303/src/main/scala/com/nvidia/spark/rapids/shims/spark303/Spark303Shims.scala new file mode 100644 index 00000000000..1b9ca917d64 --- /dev/null +++ b/shims/spark303/src/main/scala/com/nvidia/spark/rapids/shims/spark303/Spark303Shims.scala @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.shims.spark303 + +import com.nvidia.spark.rapids.ShimVersion +import com.nvidia.spark.rapids.shims.spark302.Spark302Shims +import com.nvidia.spark.rapids.spark303.RapidsShuffleManager + +class Spark303Shims extends Spark302Shims { + + override def getSparkShimVersion: ShimVersion = SparkShimServiceProvider.VERSION + + override def getRapidsShuffleManagerClass: String = { + classOf[RapidsShuffleManager].getCanonicalName + } +} diff --git a/shims/spark303/src/main/scala/com/nvidia/spark/rapids/shims/spark303/SparkShimServiceProvider.scala b/shims/spark303/src/main/scala/com/nvidia/spark/rapids/shims/spark303/SparkShimServiceProvider.scala new file mode 100644 index 00000000000..dfd86328572 --- /dev/null +++ b/shims/spark303/src/main/scala/com/nvidia/spark/rapids/shims/spark303/SparkShimServiceProvider.scala @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.shims.spark303 + +import com.nvidia.spark.rapids.{SparkShims, SparkShimVersion} + +object SparkShimServiceProvider { + val VERSION = SparkShimVersion(3, 0, 3) + val VERSIONNAMES = Seq(s"$VERSION", s"$VERSION-SNAPSHOT") +} +class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider { + + def matchesVersion(version: String): Boolean = { + SparkShimServiceProvider.VERSIONNAMES.contains(version) + } + + def buildShim: SparkShims = { + new Spark303Shims() + } +} diff --git a/shims/spark303/src/main/scala/com/nvidia/spark/rapids/spark303/RapidsShuffleManager.scala b/shims/spark303/src/main/scala/com/nvidia/spark/rapids/spark303/RapidsShuffleManager.scala new file mode 100644 index 00000000000..9f253c1babe --- /dev/null +++ b/shims/spark303/src/main/scala/com/nvidia/spark/rapids/spark303/RapidsShuffleManager.scala @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.spark303 + +import org.apache.spark.SparkConf +import org.apache.spark.sql.rapids.shims.spark301.RapidsShuffleInternalManager + +/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */ +sealed class RapidsShuffleManager( + conf: SparkConf, + isDriver: Boolean) extends RapidsShuffleInternalManager(conf, isDriver) { +} diff --git a/tests/README.md b/tests/README.md index 178449145d9..f86a120d0fd 100644 --- a/tests/README.md +++ b/tests/README.md @@ -30,6 +30,7 @@ default version runs against Spark 3.0.0, to run against other versions use one profiles: - `-Pspark301tests` (spark 3.0.1) - `-Pspark302tests` (spark 3.0.2) + - `-Pspark303tests` (spark 3.0.3) - `-Pspark311tests` (spark 3.1.1) Please refer to the [tests project POM](pom.xml) to see the list of test profiles supported. From 32213faee35a023bd3fdb91fe14a42e4d62adbfa Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 3 Mar 2021 07:33:37 -0700 Subject: [PATCH 18/28] Cost-based optimizer (#1616) --- .../spark/rapids/CostBasedOptimizer.scala | 271 +++++++++++++ .../nvidia/spark/rapids/GpuOverrides.scala | 47 ++- .../com/nvidia/spark/rapids/RapidsConf.scala | 72 +++- .../com/nvidia/spark/rapids/RapidsMeta.scala | 29 +- .../rapids/CostBasedOptimizerSuite.scala | 383 ++++++++++++++++++ .../rapids/SparkQueryCompareTestSuite.scala | 13 + 6 files changed, 811 insertions(+), 4 deletions(-) create mode 100644 sql-plugin/src/main/scala/com/nvidia/spark/rapids/CostBasedOptimizer.scala create mode 100644 tests/src/test/scala/com/nvidia/spark/rapids/CostBasedOptimizerSuite.scala diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/CostBasedOptimizer.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/CostBasedOptimizer.scala new file mode 100644 index 00000000000..ff3a3fc6b34 --- /dev/null +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/CostBasedOptimizer.scala @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids + +import scala.collection.mutable.ListBuffer + +import org.apache.spark.internal.Logging +import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression} +import org.apache.spark.sql.execution.{ProjectExec, SparkPlan} +import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec +import org.apache.spark.sql.internal.SQLConf + +class CostBasedOptimizer(conf: RapidsConf) extends Logging { + + // the intention is to make the cost model pluggable since we are probably going to need to + // experiment a fair bit with this part + private val costModel = new DefaultCostModel(conf) + + /** + * Walk the plan and determine CPU and GPU costs for each operator and then make decisions + * about whether operators should run on CPU or GPU. + * + * @param plan The plan to optimize + * @return A list of optimizations that were applied + */ + def optimize(plan: SparkPlanMeta[SparkPlan]): Seq[Optimization] = { + val optimizations = new ListBuffer[Optimization]() + recursivelyOptimize(plan, optimizations, finalOperator = true, "") + optimizations + } + + private def recursivelyOptimize( + plan: SparkPlanMeta[SparkPlan], + optimizations: ListBuffer[Optimization], + finalOperator: Boolean, + indent: String = ""): (Double, Double) = { + + // get the CPU and GPU cost of the child plan(s) + val childCosts = plan.childPlans + .map(child => recursivelyOptimize( + child.asInstanceOf[SparkPlanMeta[SparkPlan]], + optimizations, + finalOperator = false, + indent + " ")) + + val (childCpuCosts, childGpuCosts) = childCosts.unzip + + // get the CPU and GPU cost of this operator + val (operatorCpuCost, operatorGpuCost) = costModel.applyCost(plan) + + // calculate total (this operator + children) + val totalCpuCost = operatorCpuCost + childCpuCosts.sum + var totalGpuCost = operatorGpuCost + childGpuCosts.sum + + // determine how many transitions between CPU and GPU are taking place between + // the child operators and this operator + val numTransitions = plan.childPlans + .count(_.canThisBeReplaced != plan.canThisBeReplaced) + + if (numTransitions > 0) { + if (plan.canThisBeReplaced) { + // at least one child is transitioning from CPU to GPU + val transitionCost = plan.childPlans.filter(!_.canThisBeReplaced) + .map(costModel.transitionToGpuCost).sum + val gpuCost = operatorGpuCost + transitionCost + if (gpuCost > operatorCpuCost) { + optimizations.append(AvoidTransition(plan)) + plan.costPreventsRunningOnGpu() + // stay on CPU, so costs are same + totalGpuCost = totalCpuCost; + } else { + totalGpuCost += transitionCost + } + } else { + // at least one child is transitioning from GPU to CPU + plan.childPlans.zip(childCosts).foreach { + case (child, childCosts) => + val (childCpuCost, childGpuCost) = childCosts + val transitionCost = costModel.transitionToCpuCost(child) + val childGpuTotal = childGpuCost + transitionCost + if (child.canThisBeReplaced && childGpuTotal > childCpuCost) { + optimizations.append(ReplaceSection( + child.asInstanceOf[SparkPlanMeta[SparkPlan]], totalCpuCost, totalGpuCost)) + child.recursiveCostPreventsRunningOnGpu() + } + } + + // recalculate the transition costs because child plans may have changed + val transitionCost = plan.childPlans + .filter(_.canThisBeReplaced) + .map(costModel.transitionToCpuCost).sum + totalGpuCost += transitionCost + } + } + + // special behavior if this is the final operator in the plan + if (finalOperator && plan.canThisBeReplaced) { + totalGpuCost += costModel.transitionToCpuCost(plan) + } + + if (totalGpuCost > totalCpuCost) { + // we have reached a point where we have transitioned onto GPU for part of this + // plan but with no benefit from doing so, so we want to undo this and go back to CPU + if (plan.canThisBeReplaced) { + // this plan would have been on GPU so we move it and onto CPU and recurse down + // until we reach a part of the plan that is already on CPU and then stop + optimizations.append(ReplaceSection(plan, totalCpuCost, totalGpuCost)) + plan.recursiveCostPreventsRunningOnGpu() + } + + // reset the costs because this section of the plan was not moved to GPU + totalGpuCost = totalCpuCost + } + + if (!plan.canThisBeReplaced) { + // reset the costs because this section of the plan was not moved to GPU + totalGpuCost = totalCpuCost + } + + (totalCpuCost, totalGpuCost) + } + +} + +/** + * The cost model is behind a trait so that we can consider making this pluggable in the future + * so that users can override the cost model to suit specific use cases. + */ +trait CostModel { + + /** + * Determine the CPU and GPU cost for an individual operator. + * @param plan Operator + * @return (cpuCost, gpuCost) + */ + def applyCost(plan: SparkPlanMeta[_]): (Double, Double) + + /** + * Determine the cost of transitioning data from CPU to GPU for a specific operator + * @param plan Operator + * @return Cost + */ + def transitionToGpuCost(plan: SparkPlanMeta[_]): Double + + /** + * Determine the cost of transitioning data from GPU to CPU for a specific operator + */ + def transitionToCpuCost(plan: SparkPlanMeta[_]): Double +} + +class DefaultCostModel(conf: RapidsConf) extends CostModel { + + def transitionToGpuCost(plan: SparkPlanMeta[_]) = { + // this is a placeholder for now - we would want to try and calculate the transition cost + // based on the data types and size (if known) + conf.defaultTransitionToGpuCost + } + + def transitionToCpuCost(plan: SparkPlanMeta[_]) = { + // this is a placeholder for now - we would want to try and calculate the transition cost + // based on the data types and size (if known) + conf.defaultTransitionToCpuCost + } + + override def applyCost(plan: SparkPlanMeta[_]): (Double, Double) = { + + // for now we have a constant cost for CPU operations and we make the GPU cost relative + // to this but later we may want to calculate actual CPU costs + val cpuCost = 1.0 + + // always check for user overrides first + val gpuCost = plan.conf.getOperatorCost(plan.wrapped.getClass.getSimpleName).getOrElse { + plan.wrapped match { + case _: ProjectExec => + // the cost of a projection is the average cost of its expressions + plan.childExprs + .map(expr => exprCost(expr.asInstanceOf[BaseExprMeta[Expression]])) + .sum / plan.childExprs.length + + case _: ShuffleExchangeExec => + // setting the GPU cost of ShuffleExchangeExec to 1.0 avoids moving from CPU to GPU for + // a shuffle. This must happen before the join consistency or we risk running into issues + // with disabling one exchange that would make a join inconsistent + 1.0 + + case _ => conf.defaultOperatorCost + } + } + + plan.cpuCost = cpuCost + plan.gpuCost = gpuCost + + (cpuCost, gpuCost) + } + + private def exprCost[INPUT <: Expression](expr: BaseExprMeta[INPUT]): Double = { + // always check for user overrides first + expr.conf.getExpressionCost(expr.getClass.getSimpleName).getOrElse { + expr match { + case cast: CastExprMeta[_] => + // different CAST operations have different costs, so we allow these to be configured + // based on the data types involved + expr.conf.getExpressionCost(s"Cast${cast.fromType}To${cast.toType}") + .getOrElse(conf.defaultExpressionCost) + case _ => + // many of our BaseExprMeta implementations are anonymous classes so we look directly at + // the wrapped expressions in some cases + expr.wrapped match { + case _: AttributeReference => 1.0 // no benefit on GPU + case Alias(_: AttributeReference, _) => 1.0 // no benefit on GPU + case _ => conf.defaultExpressionCost + } + } + } + } + +} + +sealed abstract class Optimization + +case class AvoidTransition[INPUT <: SparkPlan](plan: SparkPlanMeta[INPUT]) extends Optimization { + override def toString: String = s"It is not worth moving to GPU for operator: " + + s"${Explain.format(plan)}" +} + +case class ReplaceSection[INPUT <: SparkPlan]( + plan: SparkPlanMeta[INPUT], + totalCpuCost: Double, + totalGpuCost: Double) extends Optimization { + override def toString: String = s"It is not worth keeping this section on GPU; " + + s"gpuCost=$totalGpuCost, cpuCost=$totalCpuCost:\n${Explain.format(plan)}" +} + +object Explain { + + def format(plan: SparkPlanMeta[_]): String = { + plan.wrapped match { + case p: SparkPlan => p.simpleString(SQLConf.get.maxToStringFields) + case other => other.toString + } + } + + def formatTree(plan: SparkPlanMeta[_]): String = { + val b = new StringBuilder + formatTree(plan, b, "") + b.toString + } + + def formatTree(plan: SparkPlanMeta[_], b: StringBuilder, indent: String): Unit = { + b.append(indent) + b.append(format(plan)) + b.append('\n') + plan.childPlans.filter(_.canThisBeReplaced) + .foreach(child => formatTree(child, b, indent + " ")) + } + +} \ No newline at end of file diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index 3ae3b84d8cc..4ee26765271 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -18,6 +18,7 @@ package com.nvidia.spark.rapids import java.time.ZoneId +import scala.collection.mutable.ListBuffer import scala.reflect.ClassTag import ai.rapids.cudf.DType @@ -397,6 +398,16 @@ final class CreateDataSourceTableAsSelectCommandMeta( } } +/** + * Listener trait so that tests can confirm that the expected optimizations are being applied + */ +trait GpuOverridesListener { + def optimizedPlan( + plan: SparkPlanMeta[SparkPlan], + sparkPlan: SparkPlan, + costOptimizations: Seq[Optimization]) +} + object GpuOverrides { val FLOAT_DIFFERS_GROUP_INCOMPAT = "when enabling these, there may be extra groups produced for floating point grouping " + @@ -412,6 +423,21 @@ object GpuOverrides { "\\S", "\\v", "\\V", "\\w", "\\w", "\\p", "$", "\\b", "\\B", "\\A", "\\G", "\\Z", "\\z", "\\R", "?", "|", "(", ")", "{", "}", "\\k", "\\Q", "\\E", ":", "!", "<=", ">") + // this listener mechanism is global and is intended for use by unit tests only + private val listeners: ListBuffer[GpuOverridesListener] = new ListBuffer[GpuOverridesListener]() + + def addListener(listener: GpuOverridesListener): Unit = { + listeners += listener + } + + def removeListener(listener: GpuOverridesListener): Unit = { + listeners -= listener + } + + def removeAllListeners(): Unit = { + listeners.clear() + } + def canRegexpBeTreatedLikeARegularString(strLit: UTF8String): Boolean = { val s = strLit.toString !regexList.exists(pattern => s.contains(pattern)) @@ -2746,7 +2772,10 @@ case class GpuQueryStagePrepOverrides() extends Rule[SparkPlan] with Logging { } case class GpuOverrides() extends Rule[SparkPlan] with Logging { - override def apply(plan: SparkPlan): SparkPlan = { + + // Spark calls this method once for the whole plan when AQE is off. When AQE is on, it + // gets called once for each query stage (where a query stage is an `Exchange`). + override def apply(plan: SparkPlan) :SparkPlan = { val conf = new RapidsConf(plan.conf) if (conf.isSqlEnabled) { val updatedPlan = if (plan.conf.adaptiveExecutionEnabled) { @@ -2774,16 +2803,30 @@ case class GpuOverrides() extends Rule[SparkPlan] with Logging { } plan } else { + val optimizations = if (conf.optimizerEnabled) { + // we need to run these rules both before and after CBO because the cost + // is impacted by forcing operators onto CPU due to other rules that we have + wrap.runAfterTagRules() + val optimizer = new CostBasedOptimizer(conf) + optimizer.optimize(wrap) + } else { + Seq.empty + } wrap.runAfterTagRules() if (!exp.equalsIgnoreCase("NONE")) { wrap.tagForExplain() val explain = wrap.explain(exp.equalsIgnoreCase("ALL")) if (!explain.isEmpty) { logWarning(s"\n$explain") + if (conf.optimizerExplain.equalsIgnoreCase("ALL") && optimizations.nonEmpty) { + logWarning(s"Cost-based optimizations applied:\n${optimizations.mkString("\n")}") + } } } val convertedPlan = wrap.convertIfNeeded() - addSortsIfNeeded(convertedPlan, conf) + val sparkPlan = addSortsIfNeeded(convertedPlan, conf) + GpuOverrides.listeners.foreach(_.optimizedPlan(wrap, sparkPlan, optimizations)) + sparkPlan } } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala index 41048a8eb6e..eb30ceb4e72 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala @@ -889,7 +889,48 @@ object RapidsConf { .booleanConf .createWithDefault(true) - val USE_ARROW_OPT = conf("spark.rapids.arrowCopyOptimizationEnabled") + val OPTIMIZER_ENABLED = conf("spark.rapids.sql.optimizer.enabled") + .internal() + .doc("Enable cost-based optimizer that will attempt to avoid " + + "transitions to GPU for operations that will not result in improved performance " + + "over CPU") + .booleanConf + .createWithDefault(false) + + val OPTIMIZER_EXPLAIN = conf("spark.rapids.sql.optimizer.explain") + .internal() + .doc("Explain why some parts of a query were not placed on a GPU due to " + + "optimization rules. Possible values are ALL: print everything, NONE: print nothing") + .stringConf + .createWithDefault("NONE") + + val OPTIMIZER_DEFAULT_GPU_OPERATOR_COST = conf("spark.rapids.sql.optimizer.defaultExecGpuCost") + .internal() + .doc("Default relative GPU cost of running an operator on the GPU") + .doubleConf + .createWithDefault(0.8) + + val OPTIMIZER_DEFAULT_GPU_EXPRESSION_COST = conf("spark.rapids.sql.optimizer.defaultExprGpuCost") + .internal() + .doc("Default relative GPU cost of running an expression on the GPU") + .doubleConf + .createWithDefault(0.8) + + val OPTIMIZER_DEFAULT_TRANSITION_TO_CPU_COST = conf( + "spark.rapids.sql.optimizer.defaultTransitionToCpuCost") + .internal() + .doc("Default cost of transitioning from GPU to CPU") + .doubleConf + .createWithDefault(0.15) + + val OPTIMIZER_DEFAULT_TRANSITION_TO_GPU_COST = conf( + "spark.rapids.sql.optimizer.defaultTransitionToGpuCost") + .internal() + .doc("Default cost of transitioning from CPU to GPU") + .doubleConf + .createWithDefault(0.15) + + val USE_ARROW_OPT = conf("spark.rapids.arrowCopyOptmizationEnabled") .doc("Option to turn off using the optimized Arrow copy code when reading from " + "ArrowColumnVector in HostColumnarToGpu. Left as internal as user shouldn't " + "have to turn it off, but its convenient for testing.") @@ -1202,10 +1243,39 @@ class RapidsConf(conf: Map[String, String]) extends Logging { lazy val getCloudSchemes: Option[Seq[String]] = get(CLOUD_SCHEMES) + lazy val optimizerEnabled: Boolean = get(OPTIMIZER_ENABLED) + + lazy val optimizerExplain: String = get(OPTIMIZER_EXPLAIN) + + lazy val defaultOperatorCost: Double = get(OPTIMIZER_DEFAULT_GPU_OPERATOR_COST) + + lazy val defaultExpressionCost: Double = get(OPTIMIZER_DEFAULT_GPU_EXPRESSION_COST) + + lazy val defaultTransitionToCpuCost: Double = get(OPTIMIZER_DEFAULT_TRANSITION_TO_CPU_COST) + + lazy val defaultTransitionToGpuCost: Double = get(OPTIMIZER_DEFAULT_TRANSITION_TO_GPU_COST) + lazy val getAlluxioPathsToReplace: Option[Seq[String]] = get(ALLUXIO_PATHS_REPLACE) def isOperatorEnabled(key: String, incompat: Boolean, isDisabledByDefault: Boolean): Boolean = { val default = !(isDisabledByDefault || incompat) || (incompat && isIncompatEnabled) conf.get(key).map(toBoolean(_, key)).getOrElse(default) } + + /** + * Get the GPU cost of an expression, for use in the cost-based optimizer. + */ + def getExpressionCost(operatorName: String): Option[Double] = { + val key = s"spark.rapids.sql.optimizer.expr.$operatorName" + conf.get(key).map(toDouble(_, key)) + } + + /** + * Get the GPU cost of an operator, for use in the cost-based optimizer. + */ + def getOperatorCost(operatorName: String): Option[Double] = { + val key = s"spark.rapids.sql.optimizer.exec.$operatorName" + conf.get(key).map(toDouble(_, key)) + } + } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala index 00df25aa698..c75a3fbb8f3 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsMeta.scala @@ -116,9 +116,29 @@ abstract class RapidsMeta[INPUT <: BASE, BASE, OUTPUT <: BASE]( private var cannotReplaceAnyOfPlanReasons: Option[mutable.Set[String]] = None private var shouldBeRemovedReasons: Option[mutable.Set[String]] = None protected var cannotRunOnGpuBecauseOfSparkPlan: Boolean = false + protected var cannotRunOnGpuBecauseOfCost: Boolean = false val gpuSupportedTag = TreeNodeTag[Set[String]]("rapids.gpu.supported") + /** + * Recursively force a section of the plan back onto CPU, stopping once a plan + * is reached that is already on CPU. + */ + final def recursiveCostPreventsRunningOnGpu(): Unit = { + if (canThisBeReplaced) { + costPreventsRunningOnGpu() + childDataWriteCmds.foreach(_.recursiveCostPreventsRunningOnGpu()) + } + } + + final def costPreventsRunningOnGpu(): Unit = { + cannotRunOnGpuBecauseOfCost = true + willNotWorkOnGpu("Removed by cost-based optimizer") + childExprs.foreach(_.recursiveCostPreventsRunningOnGpu()) + childParts.foreach(_.recursiveCostPreventsRunningOnGpu()) + childScans.foreach(_.recursiveCostPreventsRunningOnGpu()) + } + final def recursiveSparkPlanPreventsRunningOnGpu(): Unit = { cannotRunOnGpuBecauseOfSparkPlan = true childExprs.foreach(_.recursiveSparkPlanPreventsRunningOnGpu()) @@ -290,9 +310,13 @@ abstract class RapidsMeta[INPUT <: BASE, BASE, OUTPUT <: BASE]( final private def getIndicatorChar: String = { if (shouldThisBeRemoved) { "#" + } else if (cannotRunOnGpuBecauseOfCost) { + "$" } else if (canThisBeReplaced) { if (cannotRunOnGpuBecauseOfSparkPlan) { "@" + } else if (cannotRunOnGpuBecauseOfCost) { + "$" } else { "*" } @@ -495,6 +519,9 @@ abstract class SparkPlanMeta[INPUT <: SparkPlan](plan: INPUT, override val childParts: Seq[PartMeta[_]] = Seq.empty override val childDataWriteCmds: Seq[DataWritingCommandMeta[_]] = Seq.empty + var cpuCost: Double = 0 + var gpuCost: Double = 0 + override def convertToCpu(): SparkPlan = { wrapped.withNewChildren(childPlans.map(_.convertIfNeeded())) } @@ -947,4 +974,4 @@ final class RuleNotFoundExprMeta[INPUT <: Expression]( override def convertToGpu(): GpuExpression = throw new IllegalStateException("Cannot be converted to GPU") -} +} \ No newline at end of file diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/CostBasedOptimizerSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/CostBasedOptimizerSuite.scala new file mode 100644 index 00000000000..0118ef1baed --- /dev/null +++ b/tests/src/test/scala/com/nvidia/spark/rapids/CostBasedOptimizerSuite.scala @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids + +import scala.collection.mutable.ListBuffer + +import org.scalatest.BeforeAndAfter + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{DataFrame, SparkSession} +import org.apache.spark.sql.execution.{ProjectExec, SortExec, SparkPlan, WholeStageCodegenExec} +import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.rapids.execution.GpuShuffleExchangeExecBase +import org.apache.spark.sql.types.DataTypes + +class CostBasedOptimizerSuite extends SparkQueryCompareTestSuite with BeforeAndAfter { + + before { + GpuOverrides.removeAllListeners() + } + + after { + GpuOverrides.removeAllListeners() + } + + test("Force section of plan back onto CPU, AQE on") { + + val conf = new SparkConf() + .set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "true") + .set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1") + .set(RapidsConf.OPTIMIZER_ENABLED.key, "true") + .set(RapidsConf.ENABLE_CAST_STRING_TO_TIMESTAMP.key, "false") + .set(RapidsConf.EXPLAIN.key, "ALL") + .set(RapidsConf.ENABLE_REPLACE_SORTMERGEJOIN.key, "false") + .set(RapidsConf.TEST_ALLOWED_NONGPU.key, + "ProjectExec,BroadcastExchangeExec,BroadcastHashJoinExec,SortExec,SortMergeJoinExec," + + "Alias,Cast,LessThan") + + val optimizations: ListBuffer[Seq[Optimization]] = new ListBuffer[Seq[Optimization]]() + GpuOverrides.addListener( + (plan: SparkPlanMeta[SparkPlan], + sparkPlan: SparkPlan, + costOptimizations: Seq[Optimization]) => { + optimizations += costOptimizations + }) + + withGpuSparkSession(spark => { + val df1: DataFrame = createQuery(spark) + .alias("df1") + .orderBy("more_strings_1") + val df2: DataFrame = createQuery(spark) + .alias("df2") + .orderBy("more_strings_2") + val df = df1.join(df2, col("df1.more_strings_1").equalTo(col("df2.more_strings_2"))) + .orderBy("df2.more_strings_2") + + df.collect() + + // check that the expected optimization was applied + val opt = optimizations.last.last.asInstanceOf[ReplaceSection[_]] + assert(opt.totalGpuCost > opt.totalCpuCost) + assert(opt.plan.wrapped.isInstanceOf[SortExec]) + + // check that the final plan has a CPU sort and no GPU sort + val cpuSort = ShimLoader.getSparkShims + .findOperators(df.queryExecution.executedPlan, + _.isInstanceOf[SortExec]) + + val gpuSort = ShimLoader.getSparkShims + .findOperators(df.queryExecution.executedPlan, + _.isInstanceOf[GpuSortExec]) + + assert(cpuSort.nonEmpty) + assert(gpuSort.isEmpty) + + df + }, conf) + + } + + test("Force section of plan back onto CPU, AQE off") { + + val conf = new SparkConf() + .set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "false") + .set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1") + .set(RapidsConf.OPTIMIZER_ENABLED.key, "true") + .set(RapidsConf.ENABLE_CAST_STRING_TO_TIMESTAMP.key, "false") + .set(RapidsConf.EXPLAIN.key, "ALL") + .set(RapidsConf.ENABLE_REPLACE_SORTMERGEJOIN.key, "false") + .set(RapidsConf.TEST_ALLOWED_NONGPU.key, + "ProjectExec,BroadcastExchangeExec,BroadcastHashJoinExec,SortExec,SortMergeJoinExec," + + "Alias,Cast,LessThan") + + val optimizations: ListBuffer[Seq[Optimization]] = new ListBuffer[Seq[Optimization]]() + GpuOverrides.addListener( + (plan: SparkPlanMeta[SparkPlan], + sparkPlan: SparkPlan, + costOptimizations: Seq[Optimization]) => { + optimizations += costOptimizations + }) + + withGpuSparkSession(spark => { + val df1: DataFrame = createQuery(spark) + .alias("df1") + .orderBy("more_strings_1") + val df2: DataFrame = createQuery(spark) + .alias("df2") + .orderBy("more_strings_2") + val df = df1.join(df2, col("df1.more_strings_1").equalTo(col("df2.more_strings_2"))) + .orderBy("df2.more_strings_2") + + df.collect() + + // check that the expected optimization was applied + assert(7 == optimizations.flatten + .filter(_.isInstanceOf[ReplaceSection[_]]) + .map(_.asInstanceOf[ReplaceSection[_]]) + .count(_.plan.wrapped.isInstanceOf[SortExec])) + + // check that the final plan has a CPU sort and no GPU sort + val cpuSort = ShimLoader.getSparkShims + .findOperators(df.queryExecution.executedPlan, + _.isInstanceOf[SortExec]) + + val gpuSort = ShimLoader.getSparkShims + .findOperators(df.queryExecution.executedPlan, + _.isInstanceOf[GpuSortExec]) + + assert(cpuSort.nonEmpty) + assert(gpuSort.isEmpty) + + df + }, conf) + + } + + test("Force last section of plan back onto CPU, AQE on") { + + val conf = new SparkConf() + .set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "true") + .set(RapidsConf.OPTIMIZER_ENABLED.key, "true") + .set(RapidsConf.ENABLE_CAST_STRING_TO_TIMESTAMP.key, "false") + .set(RapidsConf.EXPLAIN.key, "ALL") + .set(RapidsConf.TEST_ALLOWED_NONGPU.key, + "ProjectExec,BroadcastExchangeExec,BroadcastHashJoinExec,SortExec," + + "Alias,Cast,LessThan") + + val optimizations: ListBuffer[Seq[Optimization]] = new ListBuffer[Seq[Optimization]]() + GpuOverrides.addListener( + (plan: SparkPlanMeta[SparkPlan], + sparkPlan: SparkPlan, + costOptimizations: Seq[Optimization]) => { + optimizations += costOptimizations + }) + + withGpuSparkSession(spark => { + val df: DataFrame = createQuery(spark) + .orderBy("more_strings_1") + df.collect() + + // check that the expected optimization was applied + val opt = optimizations.last.last.asInstanceOf[ReplaceSection[_]] + assert(opt.totalGpuCost > opt.totalCpuCost) + assert(opt.plan.wrapped.isInstanceOf[SortExec]) + + //assert that the top-level sort stayed on the CPU + df.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec] + .executedPlan.asInstanceOf[WholeStageCodegenExec] + .child.asInstanceOf[SortExec] + + df + }, conf) + + } + + test("Force last section of plan back onto CPU, AQE off") { + + val conf = new SparkConf() + .set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "false") + .set(RapidsConf.OPTIMIZER_ENABLED.key, "true") + .set(RapidsConf.ENABLE_CAST_STRING_TO_TIMESTAMP.key, "false") + .set(RapidsConf.EXPLAIN.key, "ALL") + .set(RapidsConf.TEST_ALLOWED_NONGPU.key, + "ProjectExec,BroadcastExchangeExec,BroadcastHashJoinExec,SortExec," + + "Alias,Cast,LessThan") + + val optimizations: ListBuffer[Seq[Optimization]] = new ListBuffer[Seq[Optimization]]() + GpuOverrides.addListener( + (plan: SparkPlanMeta[SparkPlan], + sparkPlan: SparkPlan, + costOptimizations: Seq[Optimization]) => { + optimizations += costOptimizations + }) + + withGpuSparkSession(spark => { + val df: DataFrame = createQuery(spark) + .orderBy("more_strings_1") + df.collect() + + // check that the expected optimization was applied + val opt = optimizations.last.last.asInstanceOf[ReplaceSection[_]] + assert(opt.totalGpuCost > opt.totalCpuCost) + assert(opt.plan.wrapped.isInstanceOf[SortExec]) + + //assert that the top-level sort stayed on the CPU + df.queryExecution.executedPlan.asInstanceOf[WholeStageCodegenExec] + .child.asInstanceOf[SortExec] + + df + }, conf) + + } + + test("Avoid move to GPU for trivial projection, AQE on") { + + val conf = new SparkConf() + .set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "true") + .set(RapidsConf.OPTIMIZER_ENABLED.key, "true") + .set(RapidsConf.ENABLE_CAST_STRING_TO_TIMESTAMP.key, "false") + .set(RapidsConf.EXPLAIN.key, "ALL") + .set(RapidsConf.TEST_ALLOWED_NONGPU.key, + "ProjectExec,BroadcastExchangeExec,BroadcastHashJoinExec," + + "Alias,Cast,LessThan") + + val optimizations: ListBuffer[Seq[Optimization]] = new ListBuffer[Seq[Optimization]]() + GpuOverrides.addListener( + (plan: SparkPlanMeta[SparkPlan], + sparkPlan: SparkPlan, + costOptimizations: Seq[Optimization]) => { + optimizations += costOptimizations + }) + + withGpuSparkSession(spark => { + val df: DataFrame = createQuery(spark) + df.collect() + + // assert that the top-level projection stayed on the CPU + df.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec] + .executedPlan.asInstanceOf[WholeStageCodegenExec] + .child.asInstanceOf[ProjectExec] + + df + }, conf) + + } + + test("Avoid move to GPU for trivial projection, AQE off") { + + val conf = new SparkConf() + .set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "false") + .set(RapidsConf.OPTIMIZER_ENABLED.key, "true") + .set(RapidsConf.ENABLE_CAST_STRING_TO_TIMESTAMP.key, "false") + .set(RapidsConf.EXPLAIN.key, "ALL") + .set(RapidsConf.TEST_ALLOWED_NONGPU.key, + "ProjectExec,BroadcastExchangeExec,BroadcastHashJoinExec," + + "Alias,Cast,LessThan") + + var optimizations: ListBuffer[Seq[Optimization]] = new ListBuffer[Seq[Optimization]]() + GpuOverrides.addListener( + (plan: SparkPlanMeta[SparkPlan], + sparkPlan: SparkPlan, + costOptimizations: Seq[Optimization]) => { + optimizations += costOptimizations + }) + + withGpuSparkSession(spark => { + val df: DataFrame = createQuery(spark) + df.collect() + + // check that the expected optimization was applied + assert(3 == optimizations + .flatten + .filter(_.isInstanceOf[AvoidTransition[_]]) + .map(_.asInstanceOf[AvoidTransition[_]]) + .count(_.plan.wrapped.isInstanceOf[ProjectExec])) + + // check that the expected optimization was applied + assert(3 == optimizations + .flatten + .filter(_.isInstanceOf[AvoidTransition[_]]) + .map(_.asInstanceOf[AvoidTransition[_]]) + .count(_.plan.wrapped.isInstanceOf[ProjectExec])) + + // assert that the top-level projection stayed on the CPU + assert(df.queryExecution.executedPlan.asInstanceOf[WholeStageCodegenExec] + .child.isInstanceOf[ProjectExec]) + + df + }, conf) + } + + test("Avoid move to GPU for shuffle, AQE on") { + + val conf = new SparkConf() + .set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "true") + .set(RapidsConf.OPTIMIZER_ENABLED.key, "true") + .set(RapidsConf.ENABLE_CAST_STRING_TO_TIMESTAMP.key, "false") + .set(RapidsConf.EXPLAIN.key, "ALL") + .set(RapidsConf.TEST_ALLOWED_NONGPU.key, + "ProjectExec,BroadcastExchangeExec,BroadcastHashJoinExec," + + "Alias,Cast,LessThan") + + withGpuSparkSession(spark => { + val df: DataFrame = createQuery(spark) + df.collect() + + val gpuExchanges = ShimLoader.getSparkShims + .findOperators(df.queryExecution.executedPlan, + _.isInstanceOf[GpuShuffleExchangeExecBase]) + assert(gpuExchanges.isEmpty) + + df + }, conf) + } + + test("Avoid move to GPU for shuffle, AQE off") { + + val conf = new SparkConf() + .set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "false") + .set(RapidsConf.OPTIMIZER_ENABLED.key, "true") + .set(RapidsConf.ENABLE_CAST_STRING_TO_TIMESTAMP.key, "false") + .set(RapidsConf.EXPLAIN.key, "ALL") + .set(RapidsConf.TEST_ALLOWED_NONGPU.key, + "ProjectExec,BroadcastExchangeExec,BroadcastHashJoinExec," + + "Alias,Cast,LessThan") + + withGpuSparkSession(spark => { + val df: DataFrame = createQuery(spark) + df.collect() + + val gpuExchanges = ShimLoader.getSparkShims + .findOperators(df.queryExecution.executedPlan, + _.isInstanceOf[GpuShuffleExchangeExecBase]) + assert(gpuExchanges.isEmpty) + + df + }, conf) + } + + private def createQuery(spark: SparkSession) = { + val df1 = nullableStringsDf(spark) + .repartition(2) + .withColumnRenamed("more_strings", "more_strings_1") + + val df2 = nullableStringsDf(spark) + .repartition(2) + .withColumnRenamed("more_strings", "more_strings_2") + + val df = df1.join(df2, "strings") + // filter on unsupported CAST to force operation onto CPU + .filter(col("more_strings_2").cast(DataTypes.TimestampType) + .lt(col("more_strings_1").cast(DataTypes.TimestampType))) + // this projection just swaps the order of the attributes and we want CBO to keep + // this on CPU + .select("more_strings_2", "more_strings_1") + df + } + + private def addListener(optimizations: ListBuffer[Optimization]): Unit = { + GpuOverrides.addListener( + (plan: SparkPlanMeta[SparkPlan], + sparkPlan: SparkPlan, + costOptimizations: Seq[Optimization]) => { + optimizations.appendAll(costOptimizations) + }) + } +} diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala index 8878d9ed645..33c455bf4aa 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/SparkQueryCompareTestSuite.scala @@ -1521,6 +1521,19 @@ trait SparkQueryCompareTestSuite extends FunSuite with Arm { ).toDF("float", "int") } + def nullableStringsDf(session: SparkSession): DataFrame = { + import session.sqlContext.implicits._ + Seq[(String, String)]( + ("100.0", "1.0"), + (null, "2.0"), + ("300.0", "3.0"), + ("400.0", null), + ("500.0", "5.0"), + ("-100.0", null), + ("-500.0", "0.0") + ).toDF("strings", "more_strings") + } + def nullableStringsIntsDf(session: SparkSession): DataFrame = { import session.sqlContext.implicits._ Seq[(String, Integer)]( From 24ab0ae9073d9567d3ed3caabf23f0a93c27fef5 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Wed, 3 Mar 2021 10:29:54 -0600 Subject: [PATCH 19/28] Fix Part Suite Tests (#1852) * Fix Part Suite Tests Signed-off-by: Robert (Bobby) Evans * Addressed review comments --- .../spark/rapids/GpuPartitioningSuite.scala | 68 ++++++++++++------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/GpuPartitioningSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/GpuPartitioningSuite.scala index 7d36fc73e8e..abcb0466860 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/GpuPartitioningSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/GpuPartitioningSuite.scala @@ -19,7 +19,7 @@ package com.nvidia.spark.rapids import java.io.File import java.math.RoundingMode -import ai.rapids.cudf.{ColumnVector, DType, Table} +import ai.rapids.cudf.{ColumnVector, Cuda, DType, Table} import org.scalatest.FunSuite import org.apache.spark.SparkConf @@ -43,45 +43,63 @@ class GpuPartitioningSuite extends FunSuite with Arm { } /** - * Retrieves the underlying column vectors for a batch without incrementing - * the refcounts of those columns. Therefore the column vectors are only - * valid as long as the batch is valid. + * Retrieves the underlying column vectors for a batch. It increments the reference counts for + * them if needed so the results need to be closed. */ - private def extractBases(batch: ColumnarBatch): Array[ColumnVector] = { + private def extractColumnVectors(batch: ColumnarBatch): Array[ColumnVector] = { if (GpuPackedTableColumn.isBatchPacked(batch)) { val packedColumn = batch.column(0).asInstanceOf[GpuPackedTableColumn] val table = packedColumn.getContiguousTable.getTable // The contiguous table is still responsible for closing these columns. - (0 until table.getNumberOfColumns).map(table.getColumn).toArray + (0 until table.getNumberOfColumns).map(i => table.getColumn(i).incRefCount()).toArray + } else if (GpuCompressedColumnVector.isBatchCompressed(batch)) { + val compressedColumn = batch.column(0).asInstanceOf[GpuCompressedColumnVector] + val descr = compressedColumn.getTableMeta.bufferMeta.codecBufferDescrs(0) + val codec = TableCompressionCodec.getCodec(descr.codec) + withResource(codec.createBatchDecompressor(100 * 1024 * 1024L, + Cuda.DEFAULT_STREAM)) { decompressor => + compressedColumn.getTableBuffer.incRefCount() + decompressor.addBufferToDecompress(compressedColumn.getTableBuffer, + compressedColumn.getTableMeta.bufferMeta) + withResource(decompressor.finishAsync()) { outputBuffers => + val outputBuffer = outputBuffers.head + // There should be only one + withResource( + MetaUtils.getTableFromMeta(outputBuffer, compressedColumn.getTableMeta)) { table => + (0 until table.getNumberOfColumns).map(i => table.getColumn(i).incRefCount()).toArray + } + } + } } else { - GpuColumnVector.extractBases(batch) + GpuColumnVector.extractBases(batch).map(_.incRefCount()) } } private def buildSubBatch(batch: ColumnarBatch, startRow: Int, endRow: Int): ColumnarBatch = { - val columns = extractBases(batch) - val types = GpuColumnVector.extractTypes(batch) - val sliced = columns.zip(types).map { case (c, t) => - GpuColumnVector.from(c.subVector(startRow, endRow), t) + withResource(extractColumnVectors(batch)) { columns => + val types = GpuColumnVector.extractTypes(batch) + val sliced = columns.zip(types).map { case (c, t) => + GpuColumnVector.from(c.subVector(startRow, endRow), t) + } + new ColumnarBatch(sliced.toArray, endRow - startRow) } - new ColumnarBatch(sliced.toArray, endRow - startRow) } private def compareBatches(expected: ColumnarBatch, actual: ColumnarBatch): Unit = { assertResult(expected.numRows)(actual.numRows) - val expectedColumns = extractBases(expected) - val actualColumns = extractBases(expected) - assertResult(expectedColumns.length)(actualColumns.length) - expectedColumns.zip(actualColumns).foreach { case (expected, actual) => - // FIXME: For decimal types, NULL_EQUALS has not been supported in cuDF yet - val cpVec = if (expected.getType.isDecimalType) { - expected.equalTo(actual) - } else { - expected.equalToNullAware(actual) - } - withResource(cpVec) { compareVector => - withResource(compareVector.all()) { compareResult => - assert(compareResult.getBoolean) + withResource(extractColumnVectors(expected)) { expectedColumns => + withResource(extractColumnVectors(actual)) { actualColumns => + assertResult(expectedColumns.length)(actualColumns.length) + expectedColumns.zip(actualColumns).foreach { case (expected, actual) => + if (expected.getRowCount == 0) { + assertResult(expected.getType)(actual.getType) + } else { + withResource(expected.equalToNullAware(actual)) { compareVector => + withResource(compareVector.all()) { compareResult => + assert(compareResult.getBoolean) + } + } + } } } } From eab507e3614ad9fa9efe2da103380be744034c4b Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Wed, 3 Mar 2021 14:03:02 -0600 Subject: [PATCH 20/28] Add shim for Spark 3.1.2 (#1836) * Add shim for Spark 3.1.2 Signed-off-by: Jason Lowe * Add Spark 3.1.2 to premerge testing --- .../rapids-shuffle.md | 1 + jenkins/Jenkinsfile-blossom.premerge | 2 +- jenkins/spark-nightly-build.sh | 1 + jenkins/spark-premerge-build.sh | 1 + pom.xml | 10 ++ shims/aggregator/pom.xml | 6 ++ shims/pom.xml | 1 + shims/spark312/pom.xml | 91 +++++++++++++++++++ ...idia.spark.rapids.SparkShimServiceProvider | 1 + .../rapids/shims/spark312/Spark312Shims.scala | 30 ++++++ .../spark312/SparkShimServiceProvider.scala | 35 +++++++ .../spark312/RapidsShuffleManager.scala | 26 ++++++ tests/README.md | 1 + 13 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 shims/spark312/pom.xml create mode 100644 shims/spark312/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider create mode 100644 shims/spark312/src/main/scala/com/nvidia/spark/rapids/shims/spark312/Spark312Shims.scala create mode 100644 shims/spark312/src/main/scala/com/nvidia/spark/rapids/shims/spark312/SparkShimServiceProvider.scala create mode 100644 shims/spark312/src/main/scala/com/nvidia/spark/rapids/spark312/RapidsShuffleManager.scala diff --git a/docs/additional-functionality/rapids-shuffle.md b/docs/additional-functionality/rapids-shuffle.md index 45e3f1734fb..ca89ddb2b29 100644 --- a/docs/additional-functionality/rapids-shuffle.md +++ b/docs/additional-functionality/rapids-shuffle.md @@ -259,6 +259,7 @@ In this section, we are using a docker container built using the sample dockerfi | 3.0.2 | com.nvidia.spark.rapids.spark302.RapidsShuffleManager | | 3.0.3 | com.nvidia.spark.rapids.spark303.RapidsShuffleManager | | 3.1.1 | com.nvidia.spark.rapids.spark311.RapidsShuffleManager | + | 3.1.2 | com.nvidia.spark.rapids.spark312.RapidsShuffleManager | | 3.2.0 | com.nvidia.spark.rapids.spark320.RapidsShuffleManager | 2. Recommended settings for UCX 1.9.0+ diff --git a/jenkins/Jenkinsfile-blossom.premerge b/jenkins/Jenkinsfile-blossom.premerge index 306e7b5783e..3d7799cee90 100644 --- a/jenkins/Jenkinsfile-blossom.premerge +++ b/jenkins/Jenkinsfile-blossom.premerge @@ -161,7 +161,7 @@ pipeline { step([$class : 'JacocoPublisher', execPattern : '**/target/jacoco.exec', classPattern : 'target/jacoco_classes/', - sourcePattern : 'shuffle-plugin/src/main/scala/,udf-compiler/src/main/scala/,sql-plugin/src/main/java/,sql-plugin/src/main/scala/,shims/spark311/src/main/scala/,shims/spark300/src/main/scala/,shims/spark301db/src/main/scala/,shims/spark301/src/main/scala/,shims/spark302/src/main/scala/,shims/spark303/src/main/scala/', + sourcePattern : 'shuffle-plugin/src/main/scala/,udf-compiler/src/main/scala/,sql-plugin/src/main/java/,sql-plugin/src/main/scala/,shims/spark311/src/main/scala/,shims/spark300/src/main/scala/,shims/spark301db/src/main/scala/,shims/spark301/src/main/scala/,shims/spark302/src/main/scala/,shims/spark303/src/main/scala/,shims/spark312/src/main/scala/', sourceInclusionPattern: '**/*.java,**/*.scala' ]) } diff --git a/jenkins/spark-nightly-build.sh b/jenkins/spark-nightly-build.sh index b0c5029fae9..501aa96985d 100755 --- a/jenkins/spark-nightly-build.sh +++ b/jenkins/spark-nightly-build.sh @@ -27,6 +27,7 @@ mvn -U -B -Pspark301tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local mvn -U -B -Pspark302tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR mvn -U -B -Pspark303tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR mvn -U -B -Pspark311tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR +mvn -U -B -Pspark312tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR # Parse cudf and spark files from local mvn repo jenkins/printJarVersion.sh "CUDFVersion" "$M2DIR/ai/rapids/cudf/${CUDF_VER}" "cudf-${CUDF_VER}" "-${CUDA_CLASSIFIER}.jar" $SERVER_ID diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh index 5cf4e39df67..909236e995e 100755 --- a/jenkins/spark-premerge-build.sh +++ b/jenkins/spark-premerge-build.sh @@ -43,6 +43,7 @@ env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark301tests,snapshot-shims test env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark302tests,snapshot-shims test -Dpytest.TEST_TAGS='' env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark303tests,snapshot-shims test -Dpytest.TEST_TAGS='' env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark311tests,snapshot-shims test -Dpytest.TEST_TAGS='' +env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark312tests,snapshot-shims test -Dpytest.TEST_TAGS='' # The jacoco coverage should have been collected, but because of how the shade plugin # works and jacoco we need to clean some things up so jacoco will only report for the diff --git a/pom.xml b/pom.xml index 8251ca4704b..9c3c67c865d 100644 --- a/pom.xml +++ b/pom.xml @@ -165,6 +165,15 @@ tests-spark310+ + + spark312tests + + ${spark312.version} + + + tests-spark310+ + + spark320tests @@ -213,6 +222,7 @@ 3.0.2 3.0.3-SNAPSHOT 3.1.1 + 3.1.2-SNAPSHOT 3.2.0-SNAPSHOT 3.6.0 4.3.0 diff --git a/shims/aggregator/pom.xml b/shims/aggregator/pom.xml index 2f06dc2d2c2..86b4f67931e 100644 --- a/shims/aggregator/pom.xml +++ b/shims/aggregator/pom.xml @@ -74,6 +74,12 @@ ${project.version} compile + + com.nvidia + rapids-4-spark-shims-spark312_${scala.binary.version} + ${project.version} + compile + diff --git a/shims/pom.xml b/shims/pom.xml index 84669d2bbda..72785c2db08 100644 --- a/shims/pom.xml +++ b/shims/pom.xml @@ -46,6 +46,7 @@ spark303 + spark312 spark320 diff --git a/shims/spark312/pom.xml b/shims/spark312/pom.xml new file mode 100644 index 00000000000..f61ba9b4618 --- /dev/null +++ b/shims/spark312/pom.xml @@ -0,0 +1,91 @@ + + + + 4.0.0 + + + com.nvidia + rapids-4-spark-shims_2.12 + 0.5.0-SNAPSHOT + ../pom.xml + + com.nvidia + rapids-4-spark-shims-spark312_2.12 + RAPIDS Accelerator for Apache Spark SQL Plugin Spark 3.1.2 Shim + The RAPIDS SQL plugin for Apache Spark 3.1.2 Shim + 0.5.0-SNAPSHOT + + + + + + + maven-antrun-plugin + + + dependency + generate-resources + + + + + + + + + + + + + run + + + + + + org.scalastyle + scalastyle-maven-plugin + + + + + + + ${project.build.directory}/extra-resources + + + src/main/resources + + + + + + + com.nvidia + rapids-4-spark-shims-spark311_${scala.binary.version} + ${project.version} + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark312.version} + provided + + + diff --git a/shims/spark312/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider b/shims/spark312/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider new file mode 100644 index 00000000000..9778962c1fe --- /dev/null +++ b/shims/spark312/src/main/resources/META-INF/services/com.nvidia.spark.rapids.SparkShimServiceProvider @@ -0,0 +1 @@ +com.nvidia.spark.rapids.shims.spark312.SparkShimServiceProvider diff --git a/shims/spark312/src/main/scala/com/nvidia/spark/rapids/shims/spark312/Spark312Shims.scala b/shims/spark312/src/main/scala/com/nvidia/spark/rapids/shims/spark312/Spark312Shims.scala new file mode 100644 index 00000000000..e37f6ea07e1 --- /dev/null +++ b/shims/spark312/src/main/scala/com/nvidia/spark/rapids/shims/spark312/Spark312Shims.scala @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.shims.spark312 + +import com.nvidia.spark.rapids._ +import com.nvidia.spark.rapids.shims.spark311.Spark311Shims +import com.nvidia.spark.rapids.spark312.RapidsShuffleManager + +class Spark312Shims extends Spark311Shims { + + override def getSparkShimVersion: ShimVersion = SparkShimServiceProvider.VERSION + + override def getRapidsShuffleManagerClass: String = { + classOf[RapidsShuffleManager].getCanonicalName + } +} diff --git a/shims/spark312/src/main/scala/com/nvidia/spark/rapids/shims/spark312/SparkShimServiceProvider.scala b/shims/spark312/src/main/scala/com/nvidia/spark/rapids/shims/spark312/SparkShimServiceProvider.scala new file mode 100644 index 00000000000..acd6b364325 --- /dev/null +++ b/shims/spark312/src/main/scala/com/nvidia/spark/rapids/shims/spark312/SparkShimServiceProvider.scala @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.shims.spark312 + +import com.nvidia.spark.rapids.{SparkShims, SparkShimVersion} + +object SparkShimServiceProvider { + val VERSION = SparkShimVersion(3, 1, 2) + val VERSIONNAMES = Seq(s"$VERSION", s"$VERSION-SNAPSHOT") +} + +class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider { + + def matchesVersion(version: String): Boolean = { + SparkShimServiceProvider.VERSIONNAMES.contains(version) + } + + def buildShim: SparkShims = { + new Spark312Shims() + } +} diff --git a/shims/spark312/src/main/scala/com/nvidia/spark/rapids/spark312/RapidsShuffleManager.scala b/shims/spark312/src/main/scala/com/nvidia/spark/rapids/spark312/RapidsShuffleManager.scala new file mode 100644 index 00000000000..93535c1c999 --- /dev/null +++ b/shims/spark312/src/main/scala/com/nvidia/spark/rapids/spark312/RapidsShuffleManager.scala @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.spark312 + +import org.apache.spark.SparkConf +import org.apache.spark.sql.rapids.shims.spark311.RapidsShuffleInternalManager + +/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */ +sealed class RapidsShuffleManager( + conf: SparkConf, + isDriver: Boolean) extends RapidsShuffleInternalManager(conf, isDriver) { +} diff --git a/tests/README.md b/tests/README.md index f86a120d0fd..656f4881921 100644 --- a/tests/README.md +++ b/tests/README.md @@ -32,6 +32,7 @@ default version runs against Spark 3.0.0, to run against other versions use one - `-Pspark302tests` (spark 3.0.2) - `-Pspark303tests` (spark 3.0.3) - `-Pspark311tests` (spark 3.1.1) + - `-Pspark312tests` (spark 3.1.2) Please refer to the [tests project POM](pom.xml) to see the list of test profiles supported. Apache Spark specific configurations can be passed in by setting the `SPARK_CONF` environment From ad0b6d93b250841f69568ebebcd44415be6fd555 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 4 Mar 2021 06:30:05 -0800 Subject: [PATCH 21/28] fix shuffle manager doc on ucx library path (#1858) * fix shuffle manager doc on ucx library path Signed-off-by: Rong Ou * remove ld library path line Signed-off-by: Rong Ou --- docs/additional-functionality/rapids-shuffle.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/additional-functionality/rapids-shuffle.md b/docs/additional-functionality/rapids-shuffle.md index ca89ddb2b29..3346674d958 100644 --- a/docs/additional-functionality/rapids-shuffle.md +++ b/docs/additional-functionality/rapids-shuffle.md @@ -273,7 +273,6 @@ In this section, we are using a docker container built using the sample dockerfi --conf spark.executorEnv.UCX_MAX_RNDV_RAILS=1 \ --conf spark.executorEnv.UCX_MEMTYPE_CACHE=n \ --conf spark.executorEnv.UCX_IB_RX_QUEUE_LEN=1024 \ ---conf spark.executorEnv.LD_LIBRARY_PATH=/usr/lib:/usr/lib/ucx \ --conf spark.executor.extraClassPath=${SPARK_CUDF_JAR}:${SPARK_RAPIDS_PLUGIN_JAR} ``` From dc2847c8eca5fe089737bcb9c48b30ac39d8795d Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Thu, 4 Mar 2021 16:10:31 -0600 Subject: [PATCH 22/28] Disable coalesce batch spilling to avoid cudf contiguous_split bug (#1871) Signed-off-by: Jason Lowe --- .../nvidia/spark/rapids/GpuCoalesceBatches.scala | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCoalesceBatches.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCoalesceBatches.scala index 22ce1e30c59..5de0f4f6884 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCoalesceBatches.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCoalesceBatches.scala @@ -608,13 +608,15 @@ case class GpuCoalesceBatches(child: SparkPlan, goal: CoalesceGoal) // cache in local vars to avoid serializing the plan val outputSchema = schema val decompressMemoryTarget = maxDecompressBatchMemory - val cannotSpill = child.schema.fields.exists { f => - f.dataType match { - case MapType(_, _, _) | ArrayType(_, _) | StructType(_) => true - case _ => false - } - } - + // disabling spillable batches due to + // https://github.com/rapidsai/cudf/issues/7514 + val cannotSpill = true +// val cannotSpill = child.schema.fields.exists { f => +// f.dataType match { +// case MapType(_, _, _) | ArrayType(_, _) | StructType(_) => true +// case _ => false +// } +// } val batches = child.executeColumnar() batches.mapPartitions { iter => if (outputSchema.isEmpty) { From 2439b4be5ac0dda77a72235480ab179dda07f945 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Thu, 4 Mar 2021 17:41:07 -0600 Subject: [PATCH 23/28] Fix tests for Spark 3.2.0 shim (#1869) Signed-off-by: Robert (Bobby) Evans --- integration_tests/src/main/python/orc_test.py | 22 ++++++++-------- jenkins/spark-nightly-build.sh | 1 + jenkins/spark-premerge-build.sh | 4 ++- .../rapids/shims/spark300/Spark300Shims.scala | 21 +++++++++++++--- .../ParquetCachedBatchSerializer.scala | 4 +-- .../rapids/shims/spark320/Spark320Shims.scala | 25 ++++++++++++++++++- .../scala/com/nvidia/spark/RebaseHelper.scala | 9 +++---- .../spark/rapids/GpuParquetFileFormat.scala | 8 +++--- .../nvidia/spark/rapids/GpuParquetScan.scala | 8 +++--- .../com/nvidia/spark/rapids/SparkShims.scala | 11 +++++++- ...CreateDataSourceTableAsSelectCommand.scala | 9 ++++--- .../spark/rapids/AdaptiveQueryExecSuite.scala | 17 +++++++++++-- .../com/nvidia/spark/rapids/CastOpSuite.scala | 13 +++++++--- .../spark/rapids/ParquetWriterSuite.scala | 9 +------ 14 files changed, 112 insertions(+), 49 deletions(-) diff --git a/integration_tests/src/main/python/orc_test.py b/integration_tests/src/main/python/orc_test.py index 3f248d27e43..1feb8d9fc69 100644 --- a/integration_tests/src/main/python/orc_test.py +++ b/integration_tests/src/main/python/orc_test.py @@ -169,26 +169,28 @@ def test_input_meta(spark_tmp_path): 'input_file_block_start()', 'input_file_block_length()')) -def setup_orc_file_no_column_names(spark): - drop_query = "DROP TABLE IF EXISTS test_orc_data" - create_query = "CREATE TABLE `test_orc_data` (`_col1` INT, `_col2` STRING, `_col3` INT) USING orc" - insert_query = "INSERT INTO test_orc_data VALUES(13, '155', 2020)" +def setup_orc_file_no_column_names(spark, table_name): + drop_query = "DROP TABLE IF EXISTS {}".format(table_name) + create_query = "CREATE TABLE `{}` (`_col1` INT, `_col2` STRING, `_col3` INT) USING orc".format(table_name) + insert_query = "INSERT INTO {} VALUES(13, '155', 2020)".format(table_name) spark.sql(drop_query).collect spark.sql(create_query).collect spark.sql(insert_query).collect -def test_missing_column_names(): +def test_missing_column_names(spark_tmp_table_factory): if is_spark_300(): pytest.skip("Apache Spark 3.0.0 does not handle ORC files without column names") - with_cpu_session(setup_orc_file_no_column_names) + table_name = spark_tmp_table_factory.get() + with_cpu_session(lambda spark : setup_orc_file_no_column_names(spark, table_name)) assert_gpu_and_cpu_are_equal_collect( - lambda spark : spark.sql("SELECT _col3,_col2 FROM test_orc_data")) + lambda spark : spark.sql("SELECT _col3,_col2 FROM {}".format(table_name))) -def test_missing_column_names_filter(): +def test_missing_column_names_filter(spark_tmp_table_factory): if is_spark_300(): pytest.skip("Apache Spark 3.0.0 does not handle ORC files without column names") - with_cpu_session(setup_orc_file_no_column_names) + table_name = spark_tmp_table_factory.get() + with_cpu_session(lambda spark : setup_orc_file_no_column_names(spark, table_name)) assert_gpu_and_cpu_are_equal_collect( - lambda spark : spark.sql("SELECT _col3,_col2 FROM test_orc_data WHERE _col2 = '155'")) + lambda spark : spark.sql("SELECT _col3,_col2 FROM {} WHERE _col2 = '155'".format(table_name))) diff --git a/jenkins/spark-nightly-build.sh b/jenkins/spark-nightly-build.sh index 501aa96985d..6e75e28d9ae 100755 --- a/jenkins/spark-nightly-build.sh +++ b/jenkins/spark-nightly-build.sh @@ -28,6 +28,7 @@ mvn -U -B -Pspark302tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local mvn -U -B -Pspark303tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR mvn -U -B -Pspark311tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR mvn -U -B -Pspark312tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR +mvn -U -B -Pspark320tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR # Parse cudf and spark files from local mvn repo jenkins/printJarVersion.sh "CUDFVersion" "$M2DIR/ai/rapids/cudf/${CUDF_VER}" "cudf-${CUDF_VER}" "-${CUDA_CLASSIFIER}.jar" $SERVER_ID diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh index 909236e995e..6ea3b972ae3 100755 --- a/jenkins/spark-premerge-build.sh +++ b/jenkins/spark-premerge-build.sh @@ -39,11 +39,13 @@ tar zxf $SPARK_HOME.tgz -C $ARTF_ROOT && \ mvn -U -B $MVN_URM_MIRROR '-P!snapshot-shims,pre-merge' clean verify -Dpytest.TEST_TAGS='' -Dpytest.TEST_TYPE="pre-commit" -Dpytest.TEST_PARALLEL=4 # Run the unit tests for other Spark versions but dont run full python integration tests -env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark301tests,snapshot-shims test -Dpytest.TEST_TAGS='' +# NOT ALL TESTS NEEDED FOR PREMERGE +#env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark301tests,snapshot-shims test -Dpytest.TEST_TAGS='' env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark302tests,snapshot-shims test -Dpytest.TEST_TAGS='' env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark303tests,snapshot-shims test -Dpytest.TEST_TAGS='' env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark311tests,snapshot-shims test -Dpytest.TEST_TAGS='' env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark312tests,snapshot-shims test -Dpytest.TEST_TAGS='' +env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark320tests,snapshot-shims test -Dpytest.TEST_TAGS='' # The jacoco coverage should have been collected, but because of how the shade plugin # works and jacoco we need to clean some things up so jacoco will only report for the diff --git a/shims/spark300/src/main/scala/com/nvidia/spark/rapids/shims/spark300/Spark300Shims.scala b/shims/spark300/src/main/scala/com/nvidia/spark/rapids/shims/spark300/Spark300Shims.scala index 96cbbbebda4..01e1cf4e279 100644 --- a/shims/spark300/src/main/scala/com/nvidia/spark/rapids/shims/spark300/Spark300Shims.scala +++ b/shims/spark300/src/main/scala/com/nvidia/spark/rapids/shims/spark300/Spark300Shims.scala @@ -29,7 +29,7 @@ import org.apache.hadoop.fs.Path import org.apache.spark.SparkEnv import org.apache.spark.rdd.RDD import org.apache.spark.sql.{SparkSession, SparkSessionExtensions} -import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} import org.apache.spark.sql.catalyst.analysis.Resolver import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.errors.attachTree @@ -42,13 +42,13 @@ import org.apache.spark.sql.catalyst.trees.TreeNode import org.apache.spark.sql.connector.read.Scan import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, BroadcastQueryStageExec, ShuffleQueryStageExec} +import org.apache.spark.sql.execution.command.{AlterTableRecoverPartitionsCommand, RunnableCommand} import org.apache.spark.sql.execution.datasources.{FileIndex, FilePartition, FileScanRDD, HadoopFsRelation, InMemoryFileIndex, PartitionDirectory, PartitionedFile} import org.apache.spark.sql.execution.datasources.rapids.GpuPartitioningUtils import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeExec} -import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, HashJoin, SortMergeJoinExec} -import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec +import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, HashJoin, ShuffledHashJoinExec, SortMergeJoinExec} import org.apache.spark.sql.execution.python.WindowInPandasExec import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.rapids.{GpuFileSourceScanExec, GpuStringReplace, GpuTimeSub, ShuffleManagerShimBase} @@ -62,6 +62,21 @@ import org.apache.spark.unsafe.types.CalendarInterval class Spark300Shims extends SparkShims { override def getSparkShimVersion: ShimVersion = SparkShimServiceProvider.VERSION + override def parquetRebaseReadKey: String = + SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key + override def parquetRebaseWriteKey: String = + SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key + override def avroRebaseReadKey: String = + SQLConf.LEGACY_AVRO_REBASE_MODE_IN_READ.key + override def avroRebaseWriteKey: String = + SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE.key + override def parquetRebaseRead(conf: SQLConf): String = + conf.getConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ) + override def parquetRebaseWrite(conf: SQLConf): String = + conf.getConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE) + + override def v1RepairTableCommand(tableName: TableIdentifier): RunnableCommand = + AlterTableRecoverPartitionsCommand(tableName) override def getScalaUDFAsExpression( function: AnyRef, diff --git a/shims/spark311/src/main/scala/com/nvidia/spark/rapids/shims/spark311/ParquetCachedBatchSerializer.scala b/shims/spark311/src/main/scala/com/nvidia/spark/rapids/shims/spark311/ParquetCachedBatchSerializer.scala index 1075eb3139c..336097783fc 100644 --- a/shims/spark311/src/main/scala/com/nvidia/spark/rapids/shims/spark311/ParquetCachedBatchSerializer.scala +++ b/shims/spark311/src/main/scala/com/nvidia/spark/rapids/shims/spark311/ParquetCachedBatchSerializer.scala @@ -1077,7 +1077,7 @@ class ParquetCachedBatchSerializer extends CachedBatchSerializer with Arm { // at least a single block val stream = new ByteArrayOutputStream(ByteArrayOutputFile.BLOCK_SIZE) val outputFile: OutputFile = new ByteArrayOutputFile(stream) - sharedConf.setConfString(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key, + sharedConf.setConfString(ShimLoader.getSparkShims.parquetRebaseWriteKey, LegacyBehaviorPolicy.CORRECTED.toString) val recordWriter = SQLConf.withExistingConf(sharedConf) { parquetOutputFileFormat.getRecordWriter(outputFile, sharedHadoopConf) @@ -1218,7 +1218,7 @@ class ParquetCachedBatchSerializer extends CachedBatchSerializer with Arm { hadoopConf.setBoolean(SQLConf.PARQUET_BINARY_AS_STRING.key, false) hadoopConf.setBoolean(SQLConf.PARQUET_INT96_AS_TIMESTAMP.key, false) - hadoopConf.set(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key, + hadoopConf.set(ShimLoader.getSparkShims.parquetRebaseWriteKey, LegacyBehaviorPolicy.CORRECTED.toString) hadoopConf.set(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key, diff --git a/shims/spark320/src/main/scala/com/nvidia/spark/rapids/shims/spark320/Spark320Shims.scala b/shims/spark320/src/main/scala/com/nvidia/spark/rapids/shims/spark320/Spark320Shims.scala index ceddf82f741..49a02af0883 100644 --- a/shims/spark320/src/main/scala/com/nvidia/spark/rapids/shims/spark320/Spark320Shims.scala +++ b/shims/spark320/src/main/scala/com/nvidia/spark/rapids/shims/spark320/Spark320Shims.scala @@ -20,15 +20,38 @@ import com.nvidia.spark.rapids.ShimVersion import com.nvidia.spark.rapids.shims.spark311.Spark311Shims import com.nvidia.spark.rapids.spark320.RapidsShuffleManager +import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.trees.TreeNode import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.adaptive.{BroadcastQueryStageExec, ShuffleQueryStageExec} +import org.apache.spark.sql.execution.command.{RepairTableCommand, RunnableCommand} import org.apache.spark.sql.execution.exchange.ReusedExchangeExec +import org.apache.spark.sql.internal.SQLConf class Spark320Shims extends Spark311Shims { - override def getSparkShimVersion: ShimVersion = SparkShimServiceProvider.VERSION320 + override def parquetRebaseReadKey: String = + SQLConf.PARQUET_REBASE_MODE_IN_READ.key + override def parquetRebaseWriteKey: String = + SQLConf.PARQUET_REBASE_MODE_IN_WRITE.key + override def avroRebaseReadKey: String = + SQLConf.AVRO_REBASE_MODE_IN_READ.key + override def avroRebaseWriteKey: String = + SQLConf.AVRO_REBASE_MODE_IN_WRITE.key + override def parquetRebaseRead(conf: SQLConf): String = + conf.getConf(SQLConf.PARQUET_REBASE_MODE_IN_READ) + override def parquetRebaseWrite(conf: SQLConf): String = + conf.getConf(SQLConf.PARQUET_REBASE_MODE_IN_WRITE) + + override def v1RepairTableCommand(tableName: TableIdentifier): RunnableCommand = + RepairTableCommand(tableName, + // These match the one place that this is called, if we start to call this in more places + // we will need to change the API to pass these values in. + enableAddPartitions = true, + enableDropPartitions = false) + + override def getRapidsShuffleManagerClass: String = { classOf[RapidsShuffleManager].getCanonicalName } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/RebaseHelper.scala b/sql-plugin/src/main/scala/com/nvidia/spark/RebaseHelper.scala index 858fc74da2c..46c66a34078 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/RebaseHelper.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/RebaseHelper.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,10 +17,9 @@ package com.nvidia.spark import ai.rapids.cudf.{ColumnVector, DType, Scalar} -import com.nvidia.spark.rapids.Arm +import com.nvidia.spark.rapids.{Arm, ShimLoader} import org.apache.spark.sql.catalyst.util.RebaseDateTime -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.rapids.execution.TrampolineUtil object RebaseHelper extends Arm { @@ -67,9 +66,9 @@ object RebaseHelper extends Arm { def newRebaseExceptionInRead(format: String): Exception = { val config = if (format == "Parquet") { - SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key + ShimLoader.getSparkShims.parquetRebaseReadKey } else if (format == "Avro") { - SQLConf.LEGACY_AVRO_REBASE_MODE_IN_READ.key + ShimLoader.getSparkShims.avroRebaseReadKey } else { throw new IllegalStateException("unrecognized format " + format) } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetFileFormat.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetFileFormat.scala index 3146c8daed2..f4c15987d35 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetFileFormat.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetFileFormat.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType import org.apache.spark.sql.rapids.ColumnarWriteTaskStatsTracker import org.apache.spark.sql.rapids.execution.TrampolineUtil -import org.apache.spark.sql.types.{ArrayType, DataType, DataTypes, DateType, DecimalType, MapType, StructType, TimestampType} +import org.apache.spark.sql.types.{ArrayType, DataType, DataTypes, DateType, DecimalType, StructType, TimestampType} import org.apache.spark.sql.vectorized.ColumnarBatch object GpuParquetFileFormat { @@ -83,7 +83,7 @@ object GpuParquetFileFormat { TrampolineUtil.dataTypeExistsRecursively(field.dataType, _.isInstanceOf[DateType]) } - sqlConf.getConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE) match { + ShimLoader.getSparkShims.parquetRebaseWrite(sqlConf) match { case "EXCEPTION" => //Good case "CORRECTED" => //Good case "LEGACY" => @@ -148,8 +148,8 @@ class GpuParquetFileFormat extends ColumnarFileFormat with Logging { val conf = ContextUtil.getConfiguration(job) - val dateTimeRebaseException = - "EXCEPTION".equals(conf.get(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key)) + val dateTimeRebaseException = "EXCEPTION".equals( + sparkSession.sqlContext.getConf(ShimLoader.getSparkShims.parquetRebaseWriteKey)) val committerClass = conf.getClass( diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala index bd65d862d16..1e39a544713 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala @@ -182,16 +182,16 @@ object GpuParquetScanBase { meta.willNotWorkOnGpu("GpuParquetScan does not support int96 timestamp conversion") } - sqlConf.get(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key) match { + sqlConf.get(ShimLoader.getSparkShims.parquetRebaseReadKey) match { case "EXCEPTION" => if (schemaMightNeedNestedRebase) { meta.willNotWorkOnGpu("Nested timestamp and date values are not supported when " + - s"${SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key} is EXCEPTION") + s"${ShimLoader.getSparkShims.parquetRebaseReadKey} is EXCEPTION") } case "CORRECTED" => // Good case "LEGACY" => // really is EXCEPTION for us... if (schemaMightNeedNestedRebase) { meta.willNotWorkOnGpu("Nested timestamp and date values are not supported when " + - s"${SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key} is LEGACY") + s"${ShimLoader.getSparkShims.parquetRebaseReadKey} is LEGACY") } case other => meta.willNotWorkOnGpu(s"$other is not a supported read rebase mode") @@ -294,7 +294,7 @@ private case class GpuParquetFileFilterHandler(@transient sqlConf: SQLConf) exte private val pushDownStringStartWith = sqlConf.parquetFilterPushDownStringStartWith private val pushDownInFilterThreshold = sqlConf.parquetFilterPushDownInFilterThreshold private val isCorrectedRebase = - "CORRECTED" == sqlConf.getConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ) + "CORRECTED" == ShimLoader.getSparkShims.parquetRebaseRead(sqlConf) def filterBlocks( file: PartitionedFile, diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SparkShims.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SparkShims.scala index 1ba4a055956..cc221a37996 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SparkShims.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SparkShims.scala @@ -23,7 +23,7 @@ import org.apache.hadoop.fs.Path import org.apache.spark.rdd.RDD import org.apache.spark.sql.{SparkSession, SparkSessionExtensions} -import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} import org.apache.spark.sql.catalyst.analysis.Resolver import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, ExprId, NullOrdering, SortDirection, SortOrder} @@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.trees.TreeNode import org.apache.spark.sql.connector.read.Scan import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.adaptive.ShuffleQueryStageExec +import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.{FileIndex, FilePartition, HadoopFsRelation, PartitionDirectory, PartitionedFile} import org.apache.spark.sql.execution.exchange.{ReusedExchangeExec, ShuffleExchangeExec} import org.apache.spark.sql.execution.joins._ @@ -69,6 +70,14 @@ case class EMRShimVersion(major: Int, minor: Int, patch: Int) extends ShimVersio trait SparkShims { def getSparkShimVersion: ShimVersion + def parquetRebaseReadKey: String + def parquetRebaseWriteKey: String + def avroRebaseReadKey: String + def avroRebaseWriteKey: String + def parquetRebaseRead(conf: SQLConf): String + def parquetRebaseWrite(conf: SQLConf): String + def v1RepairTableCommand(tableName: TableIdentifier): RunnableCommand + def isGpuBroadcastHashJoin(plan: SparkPlan): Boolean def isGpuShuffledHashJoin(plan: SparkPlan): Boolean def isBroadcastExchangeLike(plan: SparkPlan): Boolean diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCreateDataSourceTableAsSelectCommand.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCreateDataSourceTableAsSelectCommand.scala index 5b5509df512..505471ef411 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCreateDataSourceTableAsSelectCommand.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCreateDataSourceTableAsSelectCommand.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,13 +18,13 @@ package org.apache.spark.sql.rapids import java.net.URI -import com.nvidia.spark.rapids.{ColumnarFileFormat, GpuDataWritingCommand} +import com.nvidia.spark.rapids.{ColumnarFileFormat, GpuDataWritingCommand, ShimLoader} import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.execution.command.{AlterTableRecoverPartitionsCommand, CommandUtils} +import org.apache.spark.sql.execution.command.CommandUtils import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.vectorized.ColumnarBatch @@ -84,7 +84,8 @@ case class GpuCreateDataSourceTableAsSelectCommand( case fs: HadoopFsRelation if table.partitionColumnNames.nonEmpty && sparkSession.sqlContext.conf.manageFilesourcePartitions => // Need to recover partitions into the metastore so our saved data is visible. - sessionState.executePlan(AlterTableRecoverPartitionsCommand(table.identifier)).toRdd + sessionState.executePlan( + ShimLoader.getSparkShims.v1RepairTableCommand(table.identifier)).toRdd case _ => } } diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala index dd78bbef58f..7f2a2ae4779 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/AdaptiveQueryExecSuite.scala @@ -127,8 +127,9 @@ class AdaptiveQueryExecSuite } } - test("Join partitioned tables") { + test("Join partitioned tables DPP fallback") { assumeSpark301orLater + assumePriorToSpark320 // In 3.2.0 AQE works with DPP val conf = new SparkConf() .set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "true") @@ -443,7 +444,19 @@ class AdaptiveQueryExecSuite case DatabricksShimVersion(3, 0, 0) => false case _ => true } - assume(isValidTestForSparkVersion) + assume(isValidTestForSparkVersion, "SPARK 3.1.0 or later required") + } + + private def assumePriorToSpark320 = { + val sparkShimVersion = ShimLoader.getSparkShims.getSparkShimVersion + val isValidTestForSparkVersion = sparkShimVersion match { + case ver: SparkShimVersion => + (ver.major == 3 && ver.minor < 2) || ver.major < 3 + case ver: DatabricksShimVersion => + (ver.major == 3 && ver.minor < 2) || ver.major < 3 + case _ => true + } + assume(isValidTestForSparkVersion, "Prior to SPARK 3.2.0 required") } def checkSkewJoin( diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/CastOpSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/CastOpSuite.scala index 403fe2aadfb..667f4f1be47 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/CastOpSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/CastOpSuite.scala @@ -935,10 +935,15 @@ object CastOpSuite { "2010-1-7 T") } - val timestampWithoutDate = Seq( - "23:59:59.333666Z", - "T21:34:56.333666Z" - ) + val timestampWithoutDate = if (validOnly && !castStringToTimestamp) { + // 3.2.0+ throws exceptions on string to date ANSI cast errors + Seq.empty + } else { + Seq( + "23:59:59.333666Z", + "T21:34:56.333666Z" + ) + } val allValues = specialDates ++ validYear ++ diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/ParquetWriterSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/ParquetWriterSuite.scala index 47a2393c52c..a850612dfba 100644 --- a/tests/src/test/scala/com/nvidia/spark/rapids/ParquetWriterSuite.scala +++ b/tests/src/test/scala/com/nvidia/spark/rapids/ParquetWriterSuite.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,19 +17,12 @@ package com.nvidia.spark.rapids import java.io.File -import java.lang.reflect.Method import java.nio.charset.StandardCharsets -import ai.rapids.cudf.{ColumnVector, DType, Table, TableWriter} import org.apache.hadoop.fs.Path import org.apache.parquet.hadoop.ParquetFileReader -import org.mockito.ArgumentMatchers._ -import org.mockito.Mockito._ -import org.mockito.invocation.InvocationOnMock import org.apache.spark.{SparkConf, SparkException} -import org.apache.spark.sql.types.{ByteType, DataType} -import org.apache.spark.sql.vectorized.ColumnarBatch /** * Tests for writing Parquet files with the GPU. From 6e57e2790598b3d50a4f2d7baed6e60734acc046 Mon Sep 17 00:00:00 2001 From: Niranjan Artal <50492963+nartal1@users.noreply.github.com> Date: Thu, 4 Mar 2021 16:24:32 -0800 Subject: [PATCH 24/28] Add in support for DateAddInterval (#1841) Signed-off-by: Niranjan Artal --- docs/configs.md | 1 + docs/supported_ops.md | 132 ++++++++++++++++++ .../src/main/python/date_time_test.py | 8 ++ .../nvidia/spark/rapids/GpuOverrides.scala | 25 ++++ .../sql/rapids/datetimeExpressions.scala | 67 ++++++++- 5 files changed, 228 insertions(+), 5 deletions(-) diff --git a/docs/configs.md b/docs/configs.md index 1cfd46ab609..3959eea23ec 100644 --- a/docs/configs.md +++ b/docs/configs.md @@ -139,6 +139,7 @@ Name | SQL Function(s) | Description | Default Value | Notes spark.rapids.sql.expression.CreateNamedStruct|`named_struct`, `struct`|Creates a struct with the given field names and values|true|None| spark.rapids.sql.expression.CurrentRow$| |Special boundary for a window frame, indicating stopping at the current row|true|None| spark.rapids.sql.expression.DateAdd|`date_add`|Returns the date that is num_days after start_date|true|None| +spark.rapids.sql.expression.DateAddInterval| |Adds interval to date|true|None| spark.rapids.sql.expression.DateDiff|`datediff`|Returns the number of days from startDate to endDate|true|None| spark.rapids.sql.expression.DateSub|`date_sub`|Returns the date that is num_days before start_date|true|None| spark.rapids.sql.expression.DayOfMonth|`dayofmonth`, `day`|Returns the day of the month from a date or timestamp|true|None| diff --git a/docs/supported_ops.md b/docs/supported_ops.md index f18b65ffb8c..ecd8573bdf5 100644 --- a/docs/supported_ops.md +++ b/docs/supported_ops.md @@ -4042,6 +4042,138 @@ Accelerator support is described below. +DateAddInterval + +Adds interval to date +None +project +start + + + + + + + +S + + + + + + + + + + + + +interval + + + + + + + + + + + + + +PS (month intervals are not supported; Literal value only) + + + + + + +result + + + + + + + +S + + + + + + + + + + + + +lambda +start + + + + + + + +NS + + + + + + + + + + + + +interval + + + + + + + + + + + + + +NS + + + + + + +result + + + + + + + +NS + + + + + + + + + + + + DateDiff `datediff` Returns the number of days from startDate to endDate diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index 4ac34b608aa..a2a21643aaf 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -41,6 +41,14 @@ def test_timeadd(data_gen): lambda spark: unary_op_df(spark, TimestampGen(start=datetime(5, 1, 1, tzinfo=timezone.utc), end=datetime(15, 1, 1, tzinfo=timezone.utc)), seed=1) .selectExpr("a + (interval {} days {} seconds)".format(days, seconds))) +@pytest.mark.parametrize('data_gen', vals, ids=idfn) +def test_dateaddinterval(data_gen): + days, seconds = data_gen + assert_gpu_and_cpu_are_equal_collect( + lambda spark : unary_op_df(spark, DateGen(start=date(200, 1, 1), end=date(800, 1, 1)), seed=1) + .selectExpr('a + (interval {} days {} seconds)'.format(days, seconds), + 'a - (interval {} days {} seconds)'.format(days, seconds))) + @pytest.mark.parametrize('data_gen', date_gens, ids=idfn) def test_datediff(data_gen): assert_gpu_and_cpu_are_equal_collect( diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index 4ee26765271..989ee7c09ef 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -1330,6 +1330,31 @@ object GpuOverrides { override def convertToGpu(lhs: Expression, rhs: Expression): GpuExpression = GpuTimeAdd(lhs, rhs) }), + expr[DateAddInterval]( + "Adds interval to date", + ExprChecks.binaryProjectNotLambda(TypeSig.DATE, TypeSig.DATE, + ("start", TypeSig.DATE, TypeSig.DATE), + ("interval", TypeSig.lit(TypeEnum.CALENDAR) + .withPsNote(TypeEnum.CALENDAR, "month intervals are not supported"), + TypeSig.CALENDAR)), + (a, conf, p, r) => new BinaryExprMeta[DateAddInterval](a, conf, p, r) { + override def tagExprForGpu(): Unit = { + GpuOverrides.extractLit(a.interval).foreach { lit => + val intvl = lit.value.asInstanceOf[CalendarInterval] + if (intvl.months != 0) { + willNotWorkOnGpu("interval months isn't supported") + } + } + a.timeZoneId.foreach { + case zoneId if ZoneId.of(zoneId).normalized() != GpuOverrides.UTC_TIMEZONE_ID => + willNotWorkOnGpu(s"Only UTC zone id is supported. Actual zone id: $zoneId") + case _ => + } + } + + override def convertToGpu(lhs: Expression, rhs: Expression): GpuExpression = + GpuDateAddInterval(lhs, rhs) + }), expr[ToUnixTimestamp]( "Returns the UNIX timestamp of the given time", ExprChecks.binaryProjectNotLambda(TypeSig.LONG, TypeSig.LONG, diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala index 37013f38aab..fbeeb4f7ac8 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala @@ -17,8 +17,9 @@ package org.apache.spark.sql.rapids import java.time.ZoneId +import java.util.concurrent.TimeUnit -import ai.rapids.cudf.{BinaryOp, ColumnVector, DType, Scalar} +import ai.rapids.cudf.{BinaryOp, ColumnVector, ColumnView, DType, Scalar} import com.nvidia.spark.rapids.{Arm, BinaryExprMeta, DataFromReplacementRule, DateUtils, GpuBinaryExpression, GpuColumnVector, GpuExpression, GpuOverrides, GpuScalar, GpuUnaryExpression, RapidsConf, RapidsMeta} import com.nvidia.spark.rapids.DateUtils.TimestampFormatConversionException import com.nvidia.spark.rapids.GpuOverrides.{extractStringLit, getTimeParserPolicy} @@ -149,7 +150,7 @@ abstract class GpuTimeMath( val usToSub = intvl.days.toLong * 24 * 60 * 60 * 1000 * 1000 + intvl.microseconds if (usToSub != 0) { withResource(Scalar.fromLong(usToSub)) { us_s => - withResource(l.getBase.castTo(DType.INT64)) { us => + withResource(l.getBase.logicalCastTo(DType.INT64)) { us => withResource(intervalMath(us_s, us)) { longResult => GpuColumnVector.from(longResult.castTo(DType.TIMESTAMP_MICROSECONDS), dataType) } @@ -172,7 +173,7 @@ abstract class GpuTimeMath( } } - def intervalMath(us_s: Scalar, us: ColumnVector): ColumnVector + def intervalMath(us_s: Scalar, us: ColumnView): ColumnVector } case class GpuTimeAdd(start: Expression, @@ -184,7 +185,7 @@ case class GpuTimeAdd(start: Expression, copy(timeZoneId = Option(timeZoneId)) } - override def intervalMath(us_s: Scalar, us: ColumnVector): ColumnVector = { + override def intervalMath(us_s: Scalar, us: ColumnView): ColumnVector = { us.add(us_s) } } @@ -198,11 +199,67 @@ case class GpuTimeSub(start: Expression, copy(timeZoneId = Option(timeZoneId)) } - def intervalMath(us_s: Scalar, us: ColumnVector): ColumnVector = { + override def intervalMath(us_s: Scalar, us: ColumnView): ColumnVector = { us.sub(us_s) } } +case class GpuDateAddInterval(start: Expression, + interval: Expression, + timeZoneId: Option[String] = None) + extends GpuTimeMath(start, interval, timeZoneId) { + + override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = { + copy(timeZoneId = Option(timeZoneId)) + } + + override def intervalMath(us_s: Scalar, us: ColumnView): ColumnVector = { + us.add(us_s) + } + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType, CalendarIntervalType) + + override def dataType: DataType = DateType + + override def columnarEval(batch: ColumnarBatch): Any = { + + withResourceIfAllowed(left.columnarEval(batch)) { lhs => + withResourceIfAllowed(right.columnarEval(batch)) { rhs => + (lhs, rhs) match { + case (l: GpuColumnVector, intvl: CalendarInterval) => + if (intvl.months != 0) { + throw new UnsupportedOperationException("Months aren't supported at the moment") + } + val microSecondsInOneDay = TimeUnit.DAYS.toMicros(1) + val microSecToDays = if (intvl.microseconds < 0) { + // This is to calculate when subtraction is performed. Need to take into account the + // interval( which are less than days). Convert it into days which needs to be + // subtracted along with intvl.days(if provided). + (intvl.microseconds.abs.toDouble / microSecondsInOneDay).ceil.toInt * -1 + } else { + (intvl.microseconds.toDouble / microSecondsInOneDay).toInt + } + val daysToAdd = intvl.days + microSecToDays + if (daysToAdd != 0) { + withResource(Scalar.fromInt(daysToAdd)) { us_s => + withResource(l.getBase.logicalCastTo(DType.INT32)) { us => + withResource(intervalMath(us_s, us)) { intResult => + GpuColumnVector.from(intResult.castTo(DType.TIMESTAMP_DAYS), dataType) + } + } + } + } else { + l.incRefCount() + } + case _ => + throw new UnsupportedOperationException("GpuDateAddInterval takes column and " + + "interval as an argument only") + } + } + } + } +} + case class GpuDateDiff(endDate: Expression, startDate: Expression) extends GpuBinaryExpression with ImplicitCastInputTypes { From 1a32484a861578fe7321e0090dde1da129e4308b Mon Sep 17 00:00:00 2001 From: sperlingxx Date: Mon, 22 Feb 2021 19:09:55 +0800 Subject: [PATCH 25/28] spillable cache for GpuCartesianRDD Signed-off-by: sperlingxx --- .../sql/rapids/GpuCartesianProductExec.scala | 50 ++++++++++++------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala index fc6c9b89f28..64916825a2c 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala @@ -18,8 +18,10 @@ package org.apache.spark.sql.rapids import java.io.{IOException, ObjectInputStream, ObjectOutputStream} +import scala.collection.mutable + import ai.rapids.cudf.{JCudfSerialization, NvtxColor, NvtxRange, Table} -import com.nvidia.spark.rapids.{Arm, GpuBindReferences, GpuBuildLeft, GpuColumnVector, GpuExec, GpuExpression, GpuMetric, GpuSemaphore, MetricsLevel} +import com.nvidia.spark.rapids.{Arm, GpuBindReferences, GpuBuildLeft, GpuColumnVector, GpuExec, GpuExpression, GpuMetric, GpuSemaphore, MetricsLevel, SpillableColumnarBatch, SpillPriorities} import com.nvidia.spark.rapids.RapidsPluginImplicits._ import org.apache.spark.{Dependency, NarrowDependency, Partition, SparkContext, TaskContext} @@ -141,27 +143,41 @@ class GpuCartesianRDD( override def compute(split: Partition, context: TaskContext): Iterator[ColumnarBatch] = { val currSplit = split.asInstanceOf[GpuCartesianPartition] + + // create a buffer to cache stream-side data in a spillable manner + val spillBatchBuffer = mutable.ArrayBuffer[SpillableColumnarBatch]() + closeOnExcept(spillBatchBuffer) { buffer => + rdd2.iterator(currSplit.s2, context).foreach { cb => + // TODO: is it necessary to create a specific spill priorities for spillBatchBuffer? + buffer += SpillableColumnarBatch( + cb.getBatch, SpillPriorities.ACTIVE_ON_DECK_PRIORITY) + } + } + rdd1.iterator(currSplit.s1, context).flatMap { lhs => val table = withResource(lhs) { lhs => GpuColumnVector.from(lhs.getBatch) } - // Ideally instead of looping through and recomputing rdd2 for - // each batch in rdd1 we would instead cache rdd2 in a way that - // it could spill to disk so we can avoid re-computation - val ret = GpuBroadcastNestedLoopJoinExecBase.innerLikeJoin( - rdd2.iterator(currSplit.s2, context).map(i => i.getBatch), - table, - GpuBuildLeft, - boundCondition, - outputSchema, - joinTime, - joinOutputRows, - numOutputRows, - numOutputBatches, - filterTime, - totalTime) + val ret = closeOnExcept(spillBatchBuffer) { buffer => + GpuBroadcastNestedLoopJoinExecBase.innerLikeJoin( + // fetch stream-side data from buffer in case of re-computation + buffer.toIterator.map(spill => spill.getColumnarBatch()), + table, + GpuBuildLeft, + boundCondition, + outputSchema, + joinTime, + joinOutputRows, + numOutputRows, + numOutputBatches, + filterTime, + totalTime) + } - CompletionIterator[ColumnarBatch, Iterator[ColumnarBatch]](ret, table.close()) + CompletionIterator[ColumnarBatch, Iterator[ColumnarBatch]](ret, { + table.close() + spillBatchBuffer.safeClose() + }) } } From b908c73f187e06b4b82dd9f4a66168a16ec5cd68 Mon Sep 17 00:00:00 2001 From: sperlingxx Date: Tue, 23 Feb 2021 18:05:22 +0800 Subject: [PATCH 26/28] lazy cache --- .../sql/rapids/GpuCartesianProductExec.scala | 50 +++++++++++-------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala index 64916825a2c..e88b44e4a82 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala @@ -146,34 +146,40 @@ class GpuCartesianRDD( // create a buffer to cache stream-side data in a spillable manner val spillBatchBuffer = mutable.ArrayBuffer[SpillableColumnarBatch]() - closeOnExcept(spillBatchBuffer) { buffer => - rdd2.iterator(currSplit.s2, context).foreach { cb => - // TODO: is it necessary to create a specific spill priorities for spillBatchBuffer? - buffer += SpillableColumnarBatch( - cb.getBatch, SpillPriorities.ACTIVE_ON_DECK_PRIORITY) - } - } - rdd1.iterator(currSplit.s1, context).flatMap { lhs => + rdd1.iterator(currSplit.s1, context).zipWithIndex.flatMap { case (lhs, index) => val table = withResource(lhs) { lhs => GpuColumnVector.from(lhs.getBatch) } - val ret = closeOnExcept(spillBatchBuffer) { buffer => - GpuBroadcastNestedLoopJoinExecBase.innerLikeJoin( - // fetch stream-side data from buffer in case of re-computation - buffer.toIterator.map(spill => spill.getColumnarBatch()), - table, - GpuBuildLeft, - boundCondition, - outputSchema, - joinTime, - joinOutputRows, - numOutputRows, - numOutputBatches, - filterTime, - totalTime) + + val streamIterator = if (index == 0) { + // lazily compute and cache stream-side data + rdd2.iterator(currSplit.s2, context).map { serializableBatch => + closeOnExcept(spillBatchBuffer) { buffer => + val batch = SpillableColumnarBatch( + serializableBatch.getBatch, SpillPriorities.ACTIVE_ON_DECK_PRIORITY) + buffer += batch + batch.getColumnarBatch() + } + } + } else { + // fetch stream-side data directly if they are cached + spillBatchBuffer.toIterator.map(_.getColumnarBatch()) } + val ret = GpuBroadcastNestedLoopJoinExecBase.innerLikeJoin( + streamIterator, + table, + GpuBuildLeft, + boundCondition, + outputSchema, + joinTime, + joinOutputRows, + numOutputRows, + numOutputBatches, + filterTime, + totalTime) + CompletionIterator[ColumnarBatch, Iterator[ColumnarBatch]](ret, { table.close() spillBatchBuffer.safeClose() From 4718d00592bf3298c2005d90bafd51e392fc63b9 Mon Sep 17 00:00:00 2001 From: sperlingxx Date: Fri, 5 Mar 2021 17:47:38 +0800 Subject: [PATCH 27/28] adapt new interface of SpillableColumnarBatch --- .../apache/spark/sql/rapids/GpuCartesianProductExec.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala index e88b44e4a82..65b8ff62c32 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala @@ -21,7 +21,7 @@ import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable import ai.rapids.cudf.{JCudfSerialization, NvtxColor, NvtxRange, Table} -import com.nvidia.spark.rapids.{Arm, GpuBindReferences, GpuBuildLeft, GpuColumnVector, GpuExec, GpuExpression, GpuMetric, GpuSemaphore, MetricsLevel, SpillableColumnarBatch, SpillPriorities} +import com.nvidia.spark.rapids.{Arm, GpuBindReferences, GpuBuildLeft, GpuColumnVector, GpuExec, GpuExpression, GpuMetric, GpuSemaphore, MetricsLevel, RapidsBuffer, SpillableColumnarBatch, SpillPriorities} import com.nvidia.spark.rapids.RapidsPluginImplicits._ import org.apache.spark.{Dependency, NarrowDependency, Partition, SparkContext, TaskContext} @@ -156,8 +156,9 @@ class GpuCartesianRDD( // lazily compute and cache stream-side data rdd2.iterator(currSplit.s2, context).map { serializableBatch => closeOnExcept(spillBatchBuffer) { buffer => - val batch = SpillableColumnarBatch( - serializableBatch.getBatch, SpillPriorities.ACTIVE_ON_DECK_PRIORITY) + val batch = SpillableColumnarBatch(serializableBatch.getBatch, + SpillPriorities.ACTIVE_ON_DECK_PRIORITY, + RapidsBuffer.defaultSpillCallback) buffer += batch batch.getColumnarBatch() } From 8a46305350482cd083374869290b88a51562f774 Mon Sep 17 00:00:00 2001 From: sperlingxx Date: Fri, 5 Mar 2021 17:50:41 +0800 Subject: [PATCH 28/28] fix merge conflicts --- .../apache/spark/sql/rapids/GpuCartesianProductExec.scala | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala index eebab7d3b0a..65b8ff62c32 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuCartesianProductExec.scala @@ -156,14 +156,9 @@ class GpuCartesianRDD( // lazily compute and cache stream-side data rdd2.iterator(currSplit.s2, context).map { serializableBatch => closeOnExcept(spillBatchBuffer) { buffer => -<<<<<<< HEAD val batch = SpillableColumnarBatch(serializableBatch.getBatch, SpillPriorities.ACTIVE_ON_DECK_PRIORITY, RapidsBuffer.defaultSpillCallback) -======= - val batch = SpillableColumnarBatch( - serializableBatch.getBatch, SpillPriorities.ACTIVE_ON_DECK_PRIORITY) ->>>>>>> 07b2d1585ea918d21c668653f8429096ece21f0b buffer += batch batch.getColumnarBatch() }