Skip to content

Commit

Permalink
feat(python,rust): extend dtype/selector matching for Datetime with…
Browse files Browse the repository at this point in the history
… a "*" wildcard for timezones (#9641)
  • Loading branch information
alexander-beedie authored Jul 2, 2023
1 parent 7b32d82 commit a5f2604
Show file tree
Hide file tree
Showing 8 changed files with 243 additions and 87 deletions.
31 changes: 22 additions & 9 deletions polars/polars-lazy/polars-plan/src/logical_plan/projection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,20 @@ pub(super) fn replace_dtype_with_column(mut expr: Expr, column_name: Arc<str>) -
expr
}

fn dtypes_match(d1: &DataType, d2: &DataType) -> bool {
match (d1, d2) {
// note: allow Datetime "*" wildcard for timezones...
(DataType::Datetime(tu_l, tz_l), DataType::Datetime(tu_r, tz_r)) => {
tu_l == tu_r
&& (tz_l == tz_r
|| tz_r.is_some() && (tz_l.as_deref().unwrap_or("") == "*")
|| tz_l.is_some() && (tz_r.as_deref().unwrap_or("") == "*"))
}
// ...but otherwise require exact match
_ => d1 == d2,
}
}

/// replace `DtypeColumn` with `col("foo")..col("bar")`
fn expand_dtypes(
expr: &Expr,
Expand All @@ -205,10 +219,10 @@ fn expand_dtypes(
) -> PolarsResult<()> {
// note: we loop over the schema to guarantee that we return a stable
// field-order, irrespective of which dtypes are filtered against
for field in schema
.iter_fields()
.filter(|f| (dtypes.contains(&f.dtype) && !exclude.contains(f.name().as_str())))
{
for field in schema.iter_fields().filter(|f| {
dtypes.iter().any(|dtype| dtypes_match(dtype, &f.dtype))
&& !exclude.contains(f.name().as_str())
}) {
let name = field.name();
let new_expr = expr.clone();
let new_expr = replace_dtype_with_column(new_expr, Arc::from(name.as_str()));
Expand All @@ -230,10 +244,9 @@ fn prepare_excluded(
if let Expr::Exclude(_, to_exclude) = e {
#[cfg(feature = "regex")]
{
// instead of matching the names for regex patterns
// and expanding the matches in the schema we
// reuse the `replace_regex` function. This is a bit
// slower but DRY.
// instead of matching the names for regex patterns and
// expanding the matches in the schema we reuse the
// `replace_regex` func; this is a bit slower but DRY.
let mut buf = vec![];
for to_exclude_single in to_exclude {
match to_exclude_single {
Expand All @@ -249,7 +262,7 @@ fn prepare_excluded(
}
Excluded::Dtype(dt) => {
for fld in schema.iter_fields() {
if fld.data_type() == dt {
if dtypes_match(fld.data_type(), dt) {
exclude.insert(Arc::from(fld.name().as_ref()));
}
}
Expand Down
6 changes: 4 additions & 2 deletions py-polars/polars/datatypes/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,10 +311,12 @@ def __init__(
Parameters
----------
time_unit : {'us', 'ns', 'ms'}
Unit of time.
Unit of time / precision.
time_zone
Time zone string as defined in zoneinfo (run
Time zone string, as defined in zoneinfo (to see valid strings run
``import zoneinfo; zoneinfo.available_timezones()`` for a full list).
When using to match dtypes, can use "*" to check for Datetime columns
that have any timezone.
"""
if isinstance(time_zone, timezone):
Expand Down
3 changes: 3 additions & 0 deletions py-polars/polars/datatypes/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
Datetime("ms"),
Datetime("us"),
Datetime("ns"),
Datetime("ms", "*"),
Datetime("us", "*"),
Datetime("ns", "*"),
]
)
DURATION_DTYPES: frozenset[PolarsDataType] = DataTypeGroup(
Expand Down
Loading

0 comments on commit a5f2604

Please sign in to comment.