From 4fdc94983793fc88701057bb6c4fad4b336c4c47 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Thu, 2 May 2024 10:11:43 -0400 Subject: [PATCH 1/3] Convert date/datetime in lit construction --- py-polars/polars/functions/lit.py | 47 ++++++++--- .../namespaces/temporal/test_datetime.py | 78 +++++++++++++++++++ 2 files changed, 115 insertions(+), 10 deletions(-) diff --git a/py-polars/polars/functions/lit.py b/py-polars/polars/functions/lit.py index 92d43f88b726..5ae39f0eeb0a 100644 --- a/py-polars/polars/functions/lit.py +++ b/py-polars/polars/functions/lit.py @@ -78,25 +78,43 @@ def lit( time_unit: TimeUnit if isinstance(value, datetime): + if dtype == Date: + dt_int = date_to_int(value.date()) + return lit(dt_int).cast(Date) + + # parse time unit if dtype is not None and (tu := getattr(dtype, "time_unit", "us")) is not None: time_unit = tu # type: ignore[assignment] else: time_unit = "us" - time_zone: str | None = getattr(dtype, "time_zone", None) - if (tzinfo := value.tzinfo) is not None: - tzinfo_str = str(tzinfo) - if time_zone is not None and time_zone != tzinfo_str: - msg = f"time zone of dtype ({time_zone!r}) differs from time zone of value ({tzinfo!r})" + # parse time zone + dtype_tz = getattr(dtype, "time_zone", None) + value_tz = value.tzinfo + if value_tz is None: + tz = dtype_tz + else: + if dtype_tz is None: + # value has time zone, but dtype does not: keep value time zone + tz = str(value_tz) + elif str(value_tz) == dtype_tz: + # dtype and value both have same time zone + tz = str(value_tz) + else: + # value has time zone that differs from dtype time zone + msg = ( + f"time zone of dtype ({dtype_tz!r}) differs from time zone of " + f"value ({value_tz!r})" + ) raise TypeError(msg) - time_zone = tzinfo_str dt_utc = value.replace(tzinfo=timezone.utc) dt_int = datetime_to_int(dt_utc, time_unit) expr = lit(dt_int).cast(Datetime(time_unit)) - if time_zone is not None: + if tz is not None: + print(f"tz is {tz}") expr = expr.dt.replace_time_zone( - time_zone, ambiguous="earliest" if value.fold == 0 else "latest" + tz, ambiguous="earliest" if value.fold == 0 else "latest" ) return expr @@ -114,8 +132,17 @@ def lit( return lit(time_int).cast(Time) elif isinstance(value, date): - date_int = date_to_int(value) - return lit(date_int).cast(Date) + if dtype == Datetime: + time_unit = getattr(dtype, "time_unit", "us") or "us" + dt_utc = datetime(value.year, value.month, value.day) + dt_int = datetime_to_int(dt_utc, time_unit) + expr = lit(dt_int).cast(Datetime(time_unit)) + if (time_zone := getattr(dtype, "time_zone", None)) is not None: + expr = expr.dt.replace_time_zone(str(time_zone)) + return expr + else: + date_int = date_to_int(value) + return lit(date_int).cast(Date) elif isinstance(value, pl.Series): value = value._s diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py index c9a43984cd45..5ea3259f866e 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections import OrderedDict from datetime import date, datetime, time, timedelta from typing import TYPE_CHECKING @@ -16,6 +17,7 @@ from zoneinfo import ZoneInfo from polars._typing import TemporalLiteral, TimeUnit + from polars.datatypes import PolarsDataType else: from polars._utils.convert import string_to_zoneinfo as ZoneInfo @@ -1350,3 +1352,79 @@ def test_dt_mean_deprecated() -> None: with pytest.deprecated_call(): result = s.dt.mean() assert result == s.mean() + + +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("ms", "EST"), + pl.Datetime("us"), + pl.Datetime("us", "EST"), + pl.Datetime("ns"), + pl.Datetime("ns", "EST"), + ], +) +@pytest.mark.parametrize( + "value", + [ + date(1677, 9, 22), + date(1970, 1, 1), + date(2024, 2, 29), + date(2262, 4, 11), + ], +) +def test_literal_from_date( + value: date, + dtype: PolarsDataType, +) -> None: + out = pl.select(pl.lit(value, dtype=dtype)) + assert out.schema == OrderedDict({"literal": dtype}) + if dtype == pl.Datetime: + tz = ZoneInfo(dtype.time_zone) if dtype.time_zone is not None else None # type: ignore[union-attr] + value = datetime(value.year, value.month, value.day, tzinfo=tz) + assert out.item() == value + + +@pytest.mark.parametrize( + "dtype", + [ + pl.Date, + pl.Datetime("ms"), + pl.Datetime("ms", "EST"), + pl.Datetime("us"), + pl.Datetime("us", "EST"), + pl.Datetime("ns"), + pl.Datetime("ns", "EST"), + ], +) +@pytest.mark.parametrize( + "value", + [ + datetime(1677, 9, 22), + datetime(1677, 9, 22, tzinfo=ZoneInfo("EST")), + datetime(1970, 1, 1), + datetime(1970, 1, 1, tzinfo=ZoneInfo("EST")), + datetime(2024, 2, 29), + datetime(2024, 2, 29, tzinfo=ZoneInfo("EST")), + datetime(2262, 4, 11), + datetime(2262, 4, 11, tzinfo=ZoneInfo("EST")), + ], +) +def test_literal_from_datetime( + value: datetime, + dtype: pl.Date | pl.Datetime, +) -> None: + out = pl.select(pl.lit(value, dtype=dtype)) + if dtype == pl.Date: + value = value.date() # type: ignore[assignment] + elif dtype.time_zone is None and value.tzinfo is not None: # type: ignore[union-attr] + # update the dtype with the supplied time zone in the value + dtype = pl.Datetime(dtype.time_unit, str(value.tzinfo)) # type: ignore[arg-type, union-attr] + elif dtype.time_zone is not None and value.tzinfo is None: # type: ignore[union-attr] + # cast from dt without tz to dtype with tz + value = value.replace(tzinfo=ZoneInfo(dtype.time_zone)) # type: ignore[union-attr] + + assert out.schema == OrderedDict({"literal": dtype}) + assert out.item() == value From c2c162f70526560553393802a1f5f67774c76d6a Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Mon, 3 Jun 2024 10:05:07 -0400 Subject: [PATCH 2/3] Remove oopsie print --- py-polars/polars/functions/lit.py | 1 - .../unit/operations/namespaces/temporal/test_datetime.py | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/py-polars/polars/functions/lit.py b/py-polars/polars/functions/lit.py index 5ae39f0eeb0a..5bf327257694 100644 --- a/py-polars/polars/functions/lit.py +++ b/py-polars/polars/functions/lit.py @@ -112,7 +112,6 @@ def lit( dt_int = datetime_to_int(dt_utc, time_unit) expr = lit(dt_int).cast(Datetime(time_unit)) if tz is not None: - print(f"tz is {tz}") expr = expr.dt.replace_time_zone( tz, ambiguous="earliest" if value.fold == 0 else "latest" ) diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py index 5ea3259f866e..b212c7be133d 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py @@ -16,8 +16,7 @@ if TYPE_CHECKING: from zoneinfo import ZoneInfo - from polars._typing import TemporalLiteral, TimeUnit - from polars.datatypes import PolarsDataType + from polars._typing import PolarsDataType, TemporalLiteral, TimeUnit else: from polars._utils.convert import string_to_zoneinfo as ZoneInfo @@ -1421,7 +1420,7 @@ def test_literal_from_datetime( value = value.date() # type: ignore[assignment] elif dtype.time_zone is None and value.tzinfo is not None: # type: ignore[union-attr] # update the dtype with the supplied time zone in the value - dtype = pl.Datetime(dtype.time_unit, str(value.tzinfo)) # type: ignore[arg-type, union-attr] + dtype = pl.Datetime(dtype.time_unit, str(value.tzinfo)) # type: ignore[union-attr] elif dtype.time_zone is not None and value.tzinfo is None: # type: ignore[union-attr] # cast from dt without tz to dtype with tz value = value.replace(tzinfo=ZoneInfo(dtype.time_zone)) # type: ignore[union-attr] From 5480c5a52a030450f5eddd604de872490d0535c1 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Thu, 15 Aug 2024 11:07:46 -0400 Subject: [PATCH 3/3] Enable direct lit --- py-polars/polars/functions/lit.py | 16 ++++++---------- py-polars/src/functions/lazy.rs | 5 ++++- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/py-polars/polars/functions/lit.py b/py-polars/polars/functions/lit.py index 5bf327257694..700c65dbee7c 100644 --- a/py-polars/polars/functions/lit.py +++ b/py-polars/polars/functions/lit.py @@ -7,8 +7,6 @@ import polars._reexport as pl from polars._utils.convert import ( - date_to_int, - datetime_to_int, time_to_int, timedelta_to_int, ) @@ -79,8 +77,7 @@ def lit( if isinstance(value, datetime): if dtype == Date: - dt_int = date_to_int(value.date()) - return lit(dt_int).cast(Date) + return wrap_expr(plr.lit(value.date(), allow_object=False)) # parse time unit if dtype is not None and (tu := getattr(dtype, "time_unit", "us")) is not None: @@ -109,8 +106,7 @@ def lit( raise TypeError(msg) dt_utc = value.replace(tzinfo=timezone.utc) - dt_int = datetime_to_int(dt_utc, time_unit) - expr = lit(dt_int).cast(Datetime(time_unit)) + expr = wrap_expr(plr.lit(dt_utc, allow_object=False)).cast(Datetime(time_unit)) if tz is not None: expr = expr.dt.replace_time_zone( tz, ambiguous="earliest" if value.fold == 0 else "latest" @@ -134,14 +130,14 @@ def lit( if dtype == Datetime: time_unit = getattr(dtype, "time_unit", "us") or "us" dt_utc = datetime(value.year, value.month, value.day) - dt_int = datetime_to_int(dt_utc, time_unit) - expr = lit(dt_int).cast(Datetime(time_unit)) + expr = wrap_expr(plr.lit(dt_utc, allow_object=False)).cast( + Datetime(time_unit) + ) if (time_zone := getattr(dtype, "time_zone", None)) is not None: expr = expr.dt.replace_time_zone(str(time_zone)) return expr else: - date_int = date_to_int(value) - return lit(date_int).cast(Date) + return wrap_expr(plr.lit(value, allow_object=False)) elif isinstance(value, pl.Series): value = value._s diff --git a/py-polars/src/functions/lazy.rs b/py-polars/src/functions/lazy.rs index 49325e617170..aa098aee2cb0 100644 --- a/py-polars/src/functions/lazy.rs +++ b/py-polars/src/functions/lazy.rs @@ -435,7 +435,10 @@ pub fn lit(value: &Bound<'_, PyAny>, allow_object: bool) -> PyResult { Ok(dsl::lit(Null {}).into()) } else if let Ok(value) = value.downcast::() { Ok(dsl::lit(value.as_bytes()).into()) - } else if value.get_type().qualname().unwrap() == "Decimal" { + } else if matches!( + value.get_type().qualname().unwrap().as_str(), + "date" | "datetime" | "Decimal" + ) { let av = py_object_to_any_value(value, true)?; Ok(Expr::Literal(LiteralValue::try_from(av).unwrap()).into()) } else if allow_object {