From 5dadf52beb822f67bf294dfda0e0380e46674656 Mon Sep 17 00:00:00 2001 From: chenzhx Date: Thu, 11 Aug 2022 10:39:57 +0800 Subject: [PATCH] [SPARK-38899][SQL][FOLLOWUP] Fix bug extract datetime in DS V2 pushdown ### What changes were proposed in this pull request? [SPARK-38899](https://github.com/apache/spark/pull/36663) supports extract function in JDBC data source. But the implement is incorrect. This PR just add a test case and it will be failed ! The test case show below. ``` test("scan with filter push-down with date time functions") { val df9 = sql("SELECT name FROM h2.test.datetime WHERE " + "dayofyear(date1) > 100 order by dayofyear(date1) limit 1") checkFiltersRemoved(df9) val expectedPlanFragment9 = "PushedFilters: [DATE1 IS NOT NULL, EXTRACT(DAY_OF_YEAR FROM DATE1) > 100], " + "PushedTopN: ORDER BY [EXTRACT(DAY_OF_YEAR FROM DATE1) ASC NULLS FIRST] LIMIT 1," checkPushedInfo(df9, expectedPlanFragment9) checkAnswer(df9, Seq(Row("alex"))) } ``` The test case output failure show below. ``` "== Parsed Logical Plan == 'GlobalLimit 1 +- 'LocalLimit 1 +- 'Sort ['dayofyear('date1) ASC NULLS FIRST], true +- 'Project ['name] +- 'Filter ('dayofyear('date1) > 100) +- 'UnresolvedRelation [h2, test, datetime], [], false == Analyzed Logical Plan == name: string GlobalLimit 1 +- LocalLimit 1 +- Project [name#x] +- Sort [dayofyear(date1#x) ASC NULLS FIRST], true +- Project [name#x, date1#x] +- Filter (dayofyear(date1#x) > 100) +- SubqueryAlias h2.test.datetime +- RelationV2[NAME#x, DATE1#x, TIME1#x] h2.test.datetime test.datetime == Optimized Logical Plan == Project [name#x] +- RelationV2[NAME#x] test.datetime == Physical Plan == *(1) Scan org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCScan$$anon$145f6181a [NAME#x] PushedFilters: [DATE1 IS NOT NULL, EXTRACT(DAY_OF_YEAR FROM DATE1) > 100], PushedTopN: ORDER BY [org.apache.spark.sql.connector.expressions.Extract3b95fce9 ASC NULLS FIRST] LIMIT 1, ReadSchema: struct " did not contain "PushedFilters: [DATE1 IS NOT NULL, EXTRACT(DAY_OF_YEAR FROM DATE1) > 100], PushedTopN: ORDER BY [EXTRACT(DAY_OF_YEAR FROM DATE1) ASC NULLS FIRST] LIMIT 1," ``` ### Why are the changes needed? Fix an implement bug. The reason of the bug is the Extract the function does not implement the toString method when pushing down the JDBC data source. ### Does this PR introduce _any_ user-facing change? 'No'. New feature. ### How was this patch tested? New test case. Closes #37469 from chenzhx/spark-master. Authored-by: chenzhx Signed-off-by: Wenchen Fan --- .../apache/spark/sql/connector/expressions/Extract.java | 7 +++++++ .../scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Extract.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Extract.java index a925f1ee31a98..ed9f4415f7da1 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Extract.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Extract.java @@ -18,6 +18,7 @@ package org.apache.spark.sql.connector.expressions; import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.internal.connector.ToStringSQLBuilder; import java.io.Serializable; @@ -59,4 +60,10 @@ public Extract(String field, Expression source) { @Override public Expression[] children() { return new Expression[]{ source() }; } + + @Override + public String toString() { + ToStringSQLBuilder builder = new ToStringSQLBuilder(); + return builder.build(this); + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index a5ea2589b6303..f47efae88c865 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -1374,6 +1374,15 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel "PushedFilters: [DATE1 IS NOT NULL, ((EXTRACT(DAY_OF_WEEK FROM DATE1) % 7) + 1) = 4]" checkPushedInfo(df8, expectedPlanFragment8) checkAnswer(df8, Seq(Row("alex"))) + + val df9 = sql("SELECT name FROM h2.test.datetime WHERE " + + "dayofyear(date1) > 100 order by dayofyear(date1) limit 1") + checkFiltersRemoved(df9) + val expectedPlanFragment9 = + "PushedFilters: [DATE1 IS NOT NULL, EXTRACT(DAY_OF_YEAR FROM DATE1) > 100], " + + "PushedTopN: ORDER BY [EXTRACT(DAY_OF_YEAR FROM DATE1) ASC NULLS FIRST] LIMIT 1," + checkPushedInfo(df9, expectedPlanFragment9) + checkAnswer(df9, Seq(Row("alex"))) } test("scan with filter push-down with misc functions") {