-
Notifications
You must be signed in to change notification settings - Fork 28.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-31255][SQL] Add SupportsMetadataColumns to DSv2
### What changes were proposed in this pull request? This adds support for metadata columns to DataSourceV2. If a source implements `SupportsMetadataColumns` it must also implement `SupportsPushDownRequiredColumns` to support projecting those columns. The analyzer is updated to resolve metadata columns from `LogicalPlan.metadataOutput`, and this adds a rule that will add metadata columns to the output of `DataSourceV2Relation` if one is used. ### Why are the changes needed? This is the solution discussed for exposing additional data in the Kafka source. It is also needed for a generic `MERGE INTO` plan. ### Does this PR introduce any user-facing change? Yes. Users can project additional columns from sources that implement the new API. This also updates `DescribeTableExec` to show metadata columns. ### How was this patch tested? Will include new unit tests. Closes #28027 from rdblue/add-dsv2-metadata-columns. Authored-by: Ryan Blue <blue@apache.org> Signed-off-by: Burak Yavuz <brkyvz@gmail.com>
- Loading branch information
Showing
11 changed files
with
296 additions
and
22 deletions.
There are no files selected for viewing
58 changes: 58 additions & 0 deletions
58
sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
package org.apache.spark.sql.connector.catalog; | ||
|
||
import org.apache.spark.annotation.Evolving; | ||
import org.apache.spark.sql.connector.expressions.Transform; | ||
import org.apache.spark.sql.types.DataType; | ||
|
||
/** | ||
* Interface for a metadata column. | ||
* <p> | ||
* A metadata column can expose additional metadata about a row. For example, rows from Kafka can | ||
* use metadata columns to expose a message's topic, partition number, and offset. | ||
* <p> | ||
* A metadata column could also be the result of a transform applied to a value in the row. For | ||
* example, a partition value produced by bucket(id, 16) could be exposed by a metadata column. In | ||
* this case, {@link #transform()} should return a non-null {@link Transform} that produced the | ||
* metadata column's values. | ||
*/ | ||
@Evolving | ||
public interface MetadataColumn { | ||
/** | ||
* The name of this metadata column. | ||
* | ||
* @return a String name | ||
*/ | ||
String name(); | ||
|
||
/** | ||
* The data type of values in this metadata column. | ||
* | ||
* @return a {@link DataType} | ||
*/ | ||
DataType dataType(); | ||
|
||
/** | ||
* @return whether values produced by this metadata column may be null | ||
*/ | ||
default boolean isNullable() { | ||
return true; | ||
} | ||
|
||
/** | ||
* Documentation for this metadata column, or null. | ||
* | ||
* @return a documentation String | ||
*/ | ||
default String comment() { | ||
return null; | ||
} | ||
|
||
/** | ||
* The {@link Transform} used to produce this metadata column from data rows, or null. | ||
* | ||
* @return a {@link Transform} used to produce the column's values, or null if there isn't one | ||
*/ | ||
default Transform transform() { | ||
return null; | ||
} | ||
} |
37 changes: 37 additions & 0 deletions
37
...atalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package org.apache.spark.sql.connector.catalog; | ||
|
||
import org.apache.spark.annotation.Evolving; | ||
import org.apache.spark.sql.connector.read.SupportsPushDownRequiredColumns; | ||
import org.apache.spark.sql.types.StructField; | ||
import org.apache.spark.sql.types.StructType; | ||
|
||
/** | ||
* An interface for exposing data columns for a table that are not in the table schema. For example, | ||
* a file source could expose a "file" column that contains the path of the file that contained each | ||
* row. | ||
* <p> | ||
* The columns returned by {@link #metadataColumns()} may be passed as {@link StructField} in | ||
* requested projections. Sources that implement this interface and column projection using | ||
* {@link SupportsPushDownRequiredColumns} must accept metadata fields passed to | ||
* {@link SupportsPushDownRequiredColumns#pruneColumns(StructType)}. | ||
* <p> | ||
* If a table column and a metadata column have the same name, the metadata column will never be | ||
* requested. It is recommended that Table implementations reject data column name that conflict | ||
* with metadata column names. | ||
*/ | ||
@Evolving | ||
public interface SupportsMetadataColumns extends Table { | ||
/** | ||
* Metadata columns that are supported by this {@link Table}. | ||
* <p> | ||
* The columns returned by this method may be passed as {@link StructField} in requested | ||
* projections using {@link SupportsPushDownRequiredColumns#pruneColumns(StructType)}. | ||
* <p> | ||
* If a table column and a metadata column have the same name, the metadata column will never be | ||
* requested and is ignored. It is recommended that Table implementations reject data column names | ||
* that conflict with metadata column names. | ||
* | ||
* @return an array of {@link MetadataColumn} | ||
*/ | ||
MetadataColumn[] metadataColumns(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.