Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support multi-columns expr #10222

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions datafusion/core/src/datasource/listing/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ pub fn expr_applicable_for_cols(col_names: &[String], expr: &Expr) -> bool {
Ok(TreeNodeRecursion::Stop)
}
}
Expr::Columns(cols) => {
is_applicable &= cols.iter().all(|col| col_names.contains(&col.name));
if is_applicable {
Ok(TreeNodeRecursion::Jump)
} else {
Ok(TreeNodeRecursion::Stop)
}
}
Expr::Literal(_)
| Expr::Alias(_)
| Expr::OuterReferenceColumn(_, _)
Expand Down
4 changes: 4 additions & 0 deletions datafusion/core/src/physical_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result<String> {
Ok(c.flat_name())
}
}
Expr::Columns(c) => {
let names: Vec<String> = c.iter().map(|c| c.flat_name()).collect();
Ok(names.join(","))
}
Expr::Alias(Alias { name, .. }) => Ok(name.clone()),
Expr::ScalarVariable(_, variable_names) => Ok(variable_names.join(".")),
Expr::Literal(value) => Ok(format!("{value:?}")),
Expand Down
11 changes: 11 additions & 0 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ pub enum Expr {
Alias(Alias),
/// A named reference to a qualified filed in a schema.
Column(Column),
/// A collection of references to a qualified filed in a schema.
Columns(Vec<Column>),
/// A named reference to a variable in a registry.
ScalarVariable(DataType, Vec<String>),
/// A constant value.
Expand Down Expand Up @@ -922,6 +924,7 @@ impl Expr {
Expr::Case { .. } => "Case",
Expr::Cast { .. } => "Cast",
Expr::Column(..) => "Column",
Expr::Columns(..) => "Columns",
Expr::OuterReferenceColumn(_, _) => "Outer",
Expr::Exists { .. } => "Exists",
Expr::GetIndexedField { .. } => "GetIndexedField",
Expand Down Expand Up @@ -1329,6 +1332,7 @@ impl Expr {
| Expr::Between(..)
| Expr::Cast(..)
| Expr::Column(..)
| Expr::Columns(..)
| Expr::Exists(..)
| Expr::GetIndexedField(..)
| Expr::GroupingSet(..)
Expand Down Expand Up @@ -1398,6 +1402,9 @@ impl fmt::Display for Expr {
match self {
Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"),
Expr::Column(c) => write!(f, "{c}"),
Expr::Columns(c) => {
Ok(c.iter().try_for_each(|column| write!(f, "{},", column))?)
}
Expr::OuterReferenceColumn(_, c) => write!(f, "outer_ref({c})"),
Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")),
Expr::Literal(v) => write!(f, "{v:?}"),
Expand Down Expand Up @@ -1657,6 +1664,10 @@ fn create_name(e: &Expr) -> Result<String> {
match e {
Expr::Alias(Alias { name, .. }) => Ok(name.clone()),
Expr::Column(c) => Ok(c.flat_name()),
Expr::Columns(c) => {
let names: Vec<String> = c.iter().map(|c| c.flat_name()).collect();
Ok(names.join(", "))
}
Expr::OuterReferenceColumn(_, c) => Ok(format!("outer_ref({})", c.flat_name())),
Expr::ScalarVariable(_, variable_names) => Ok(variable_names.join(".")),
Expr::Literal(value) => Ok(format!("{value:?}")),
Expand Down
23 changes: 20 additions & 3 deletions datafusion/expr/src/expr_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ use crate::type_coercion::binary::get_result_type;
use crate::type_coercion::functions::data_types;
use crate::{utils, LogicalPlan, Projection, Subquery};
use arrow::compute::can_cast_types;
use arrow::datatypes::{DataType, Field};
use arrow::datatypes::{DataType, Field, Fields};
use datafusion_common::{
internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, ExprSchema,
Result, TableReference,
internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError,
ExprSchema, Result, TableReference,
};
use std::collections::HashMap;
use std::sync::Arc;
Expand Down Expand Up @@ -109,6 +109,15 @@ impl ExprSchemable for Expr {
},
Expr::Sort(Sort { expr, .. }) | Expr::Negative(expr) => expr.get_type(schema),
Expr::Column(c) => Ok(schema.data_type(c)?.clone()),
Expr::Columns(c) => {
let fields = c.iter()
.map(|col| {
let data_type = schema.data_type(col)?;
let nullable = schema.nullable(col)?;
Ok(Field::new(col.name.clone(), data_type.clone(), nullable))
}).collect::<Result<Vec<Field>,DataFusionError>>();
Ok(DataType::Struct(Fields::from(fields?)))
}
Expr::OuterReferenceColumn(ty, _) => Ok(ty.clone()),
Expr::ScalarVariable(ty, _) => Ok(ty.clone()),
Expr::Literal(l) => Ok(l.data_type()),
Expand Down Expand Up @@ -277,6 +286,13 @@ impl ExprSchemable for Expr {
|| high.nullable(input_schema)?),

Expr::Column(c) => input_schema.nullable(c),
Expr::Columns(c) => {
let column_nullables: Vec<bool> = c
.iter()
.map(|col| input_schema.nullable(col))
.collect::<Result<Vec<_>>>()?;
Ok(column_nullables.iter().any(|&x| x))
}
Expr::OuterReferenceColumn(_, _) => Ok(true),
Expr::Literal(value) => Ok(value.is_null()),
Expr::Case(case) => {
Expand Down Expand Up @@ -326,6 +342,7 @@ impl ExprSchemable for Expr {
| Expr::SimilarTo(Like { expr, pattern, .. }) => {
Ok(expr.nullable(input_schema)? || pattern.nullable(input_schema)?)
}

Expr::Wildcard { .. } => internal_err!(
"Wildcard expressions are not valid in a logical query plan"
),
Expand Down
2 changes: 2 additions & 0 deletions datafusion/expr/src/tree_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ impl TreeNode for Expr {
lists_of_exprs.iter().flatten().collect()
}
Expr::Column(_)
| Expr::Columns(_)
// Treat OuterReferenceColumn as a leaf expression
| Expr::OuterReferenceColumn(_, _)
| Expr::ScalarVariable(_, _)
Expand Down Expand Up @@ -140,6 +141,7 @@ impl TreeNode for Expr {
) -> Result<Transformed<Self>> {
Ok(match self {
Expr::Column(_)
| Expr::Columns(_)
| Expr::Wildcard { .. }
| Expr::Placeholder(Placeholder { .. })
| Expr::OuterReferenceColumn(_, _)
Expand Down
5 changes: 5 additions & 0 deletions datafusion/expr/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,11 @@ pub fn expr_to_columns(expr: &Expr, accum: &mut HashSet<Column>) -> Result<()> {
Expr::Column(qc) => {
accum.insert(qc.clone());
}
Expr::Columns(cols) => {
cols.iter().for_each(|c| {
accum.insert(c.clone());
});
}
// Use explicit pattern match instead of a default
// implementation, so that in the future if someone adds
// new Expr types, they will check here as well
Expand Down
1 change: 1 addition & 0 deletions datafusion/optimizer/src/analyzer/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ impl TreeNodeRewriter for TypeCoercionRewriter {
}
Expr::Alias(_)
| Expr::Column(_)
| Expr::Columns(_)
| Expr::ScalarVariable(_, _)
| Expr::Literal(_)
| Expr::SimilarTo(_)
Expand Down
1 change: 1 addition & 0 deletions datafusion/optimizer/src/push_down_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ fn can_evaluate_as_join_condition(predicate: &Expr) -> Result<bool> {
let mut is_evaluate = true;
predicate.apply(|expr| match expr {
Expr::Column(_)
| Expr::Columns(_)
| Expr::Literal(_)
| Expr::Placeholder(_)
| Expr::ScalarVariable(_, _) => Ok(TreeNodeRecursion::Jump),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ impl<'a> ConstEvaluator<'a> {
| Expr::AggregateFunction { .. }
| Expr::ScalarVariable(_, _)
| Expr::Column(_)
| Expr::Columns(_)
| Expr::OuterReferenceColumn(_, _)
| Expr::Exists { .. }
| Expr::InSubquery(_)
Expand Down
1 change: 1 addition & 0 deletions datafusion/proto/gen/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ fn main() -> Result<(), String> {
.file_descriptor_set_path(&descriptor_path)
.out_dir("src")
.compile_well_known_types()
.protoc_arg("--experimental_allow_proto3_optional")
.extern_path(".google.protobuf", "::pbjson_types")
.compile_protos(&[proto_path], &["proto"])
.map_err(|e| format!("protobuf compilation failed: {e}"))?;
Expand Down
6 changes: 6 additions & 0 deletions datafusion/proto/proto/datafusion.proto
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ message Column {
ColumnRelation relation = 2;
}

message Columns {
repeated Column column = 1;
}

message DfField{
Field field = 1;
ColumnRelation qualifier = 2;
Expand Down Expand Up @@ -422,6 +426,8 @@ message LogicalExprNode {

Unnest unnest = 35;

Columns columns = 36;

}
}

Expand Down
104 changes: 104 additions & 0 deletions datafusion/proto/src/generated/pbjson.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 9 additions & 1 deletion datafusion/proto/src/generated/prost.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions datafusion/proto/src/logical_plan/from_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -937,6 +937,11 @@ pub fn parse_expr(
)))
}
ExprType::Column(column) => Ok(Expr::Column(column.into())),
ExprType::Columns(columns) => {
let columns: Vec<Column> =
columns.column.iter().map(|c| c.into()).collect::<Vec<_>>();
Ok(Expr::Columns(columns))
}
ExprType::Literal(literal) => {
let scalar_value: ScalarValue = literal.try_into()?;
Ok(Expr::Literal(scalar_value))
Expand Down
Loading