diff --git a/Cargo.lock b/Cargo.lock index 99c5dc7fbd3c..8321bd405f38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3289,6 +3289,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" dependencies = [ "cfg-if", + "chrono", "indoc", "inventory", "libc", diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml index 6774a94e3845..b482d383d7ba 100644 --- a/py-polars/Cargo.toml +++ b/py-polars/Cargo.toml @@ -27,7 +27,7 @@ ndarray = { workspace = true } num-traits = { workspace = true } numpy = { version = "0.21", default-features = false } once_cell = { workspace = true } -pyo3 = { workspace = true, features = ["abi3-py38", "extension-module", "multiple-pymethods"] } +pyo3 = { workspace = true, features = ["abi3-py38", "chrono", "extension-module", "multiple-pymethods"] } pyo3-built = { version = "0.5", optional = true } recursive = { workspace = true } serde_json = { workspace = true, optional = true } diff --git a/py-polars/src/conversion/any_value.rs b/py-polars/src/conversion/any_value.rs index 7f42a69816a7..96ba2bf26ca9 100644 --- a/py-polars/src/conversion/any_value.rs +++ b/py-polars/src/conversion/any_value.rs @@ -4,12 +4,17 @@ use std::borrow::Cow; use polars::chunked_array::object::PolarsObjectSafe; use polars::datatypes::{DataType, Field, OwnedObject, PlHashMap, TimeUnit}; use polars::prelude::{AnyValue, Series}; +use polars_core::export::chrono::{NaiveDate, NaiveTime, TimeDelta, Timelike}; use polars_core::utils::any_values_to_supertype_and_n_dtypes; +use polars_core::utils::arrow::temporal_conversions::date32_to_date; use pyo3::exceptions::{PyOverflowError, PyTypeError}; use pyo3::intern; use pyo3::prelude::*; use pyo3::types::{PyBool, PyBytes, PyDict, PyFloat, PyInt, PyList, PySequence, PyString, PyTuple}; +use super::datetime::{ + elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime, timestamp_to_naive_datetime, +}; use super::{decimal_to_digits, struct_dict, ObjectValue, Wrap}; use crate::error::PyPolarsErr; use crate::py_modules::{SERIES, UTILS}; @@ -59,26 +64,32 @@ pub(crate) fn any_value_into_py_object(av: AnyValue, py: Python) -> PyObject { s.into_py(py) }, AnyValue::Date(v) => { - let convert = utils.getattr(intern!(py, "to_py_date")).unwrap(); - convert.call1((v,)).unwrap().into_py(py) + let date = date32_to_date(v); + date.into_py(py) }, AnyValue::Datetime(v, time_unit, time_zone) => { - let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap(); - let time_unit = time_unit.to_ascii(); - convert - .call1((v, time_unit, time_zone.as_ref().map(|s| s.as_str()))) - .unwrap() - .into_py(py) + if let Some(time_zone) = time_zone { + // When https://github.com/pola-rs/polars/issues/16199 is + // implemented, we'll switch to something like: + // + // let tz: chrono_tz::Tz = time_zone.parse().unwrap(); + // let datetime = tz.from_local_datetime(&naive_datetime).earliest().unwrap(); + // datetime.into_py(py) + let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap(); + let time_unit = time_unit.to_ascii(); + convert + .call1((v, time_unit, time_zone.as_str())) + .unwrap() + .into_py(py) + } else { + timestamp_to_naive_datetime(v, time_unit).into_py(py) + } }, AnyValue::Duration(v, time_unit) => { - let convert = utils.getattr(intern!(py, "to_py_timedelta")).unwrap(); - let time_unit = time_unit.to_ascii(); - convert.call1((v, time_unit)).unwrap().into_py(py) - }, - AnyValue::Time(v) => { - let convert = utils.getattr(intern!(py, "to_py_time")).unwrap(); - convert.call1((v,)).unwrap().into_py(py) + let time_delta = elapsed_offset_to_timedelta(v, time_unit); + time_delta.into_py(py) }, + AnyValue::Time(v) => nanos_since_midnight_to_naivetime(v).into_py(py), AnyValue::Array(v, _) | AnyValue::List(v) => PySeries::new(v).to_list(), ref av @ AnyValue::Struct(_, _, flds) => struct_dict(py, av._iter_struct_av(), flds), AnyValue::StructOwned(payload) => struct_dict(py, payload.0.into_iter(), &payload.1), @@ -176,19 +187,16 @@ pub(crate) fn py_object_to_any_value<'py>( } fn get_date(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { - Python::with_gil(|py| { - let date = UTILS - .bind(py) - .getattr(intern!(py, "date_to_int")) - .unwrap() - .call1((ob,)) - .unwrap(); - let v = date.extract::().unwrap(); - Ok(AnyValue::Date(v)) - }) + // unwrap() isn't yet const safe. + const UNIX_EPOCH: Option = NaiveDate::from_ymd_opt(1970, 1, 1); + let date = ob.extract::()?; + let elapsed = date.signed_duration_since(UNIX_EPOCH.unwrap()); + Ok(AnyValue::Date(elapsed.num_days() as i32)) } fn get_datetime(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { + // Probably needs to wait for + // https://github.com/pola-rs/polars/issues/16199 to do it a faster way. Python::with_gil(|py| { let date = UTILS .bind(py) @@ -202,36 +210,23 @@ pub(crate) fn py_object_to_any_value<'py>( } fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { - Python::with_gil(|py| { - let f = UTILS - .bind(py) - .getattr(intern!(py, "timedelta_to_int")) - .unwrap(); - let py_int = f.call1((ob, intern!(py, "us"))).unwrap(); - - let av = if let Ok(v) = py_int.extract::() { - AnyValue::Duration(v, TimeUnit::Microseconds) - } else { - // This should be faster than calling `timedelta_to_int` again with `"ms"` input. - let v_us = py_int.extract::().unwrap(); - let v = (v_us / 1000) as i64; - AnyValue::Duration(v, TimeUnit::Milliseconds) - }; - Ok(av) - }) + let timedelta = ob.extract::()?; + if let Some(micros) = timedelta.num_microseconds() { + Ok(AnyValue::Duration(micros, TimeUnit::Microseconds)) + } else { + Ok(AnyValue::Duration( + timedelta.num_milliseconds(), + TimeUnit::Milliseconds, + )) + } } fn get_time(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { - Python::with_gil(|py| { - let time = UTILS - .bind(py) - .getattr(intern!(py, "time_to_int")) - .unwrap() - .call1((ob,)) - .unwrap(); - let v = time.extract::().unwrap(); - Ok(AnyValue::Time(v)) - }) + let time = ob.extract::()?; + + Ok(AnyValue::Time( + (time.num_seconds_from_midnight() as i64) * 1_000_000_000 + time.nanosecond() as i64, + )) } fn get_decimal(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult> { diff --git a/py-polars/src/conversion/chunked_array.rs b/py-polars/src/conversion/chunked_array.rs index 455b3a0fd525..4a970ca04880 100644 --- a/py-polars/src/conversion/chunked_array.rs +++ b/py-polars/src/conversion/chunked_array.rs @@ -1,7 +1,12 @@ +use polars_core::export::chrono::NaiveTime; +use polars_core::utils::arrow::temporal_conversions::date32_to_date; use pyo3::intern; use pyo3::prelude::*; use pyo3::types::{PyBytes, PyList, PyTuple}; +use super::datetime::{ + elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime, timestamp_to_naive_datetime, +}; use super::{decimal_to_digits, struct_dict}; use crate::prelude::*; use crate::py_modules::UTILS; @@ -43,56 +48,58 @@ impl ToPyObject for Wrap<&StructChunked> { impl ToPyObject for Wrap<&DurationChunked> { fn to_object(&self, py: Python) -> PyObject { - let utils = UTILS.bind(py); - let convert = utils.getattr(intern!(py, "to_py_timedelta")).unwrap(); - let time_unit = self.0.time_unit().to_ascii(); + let time_unit = self.0.time_unit(); let iter = self .0 .iter() - .map(|opt_v| opt_v.map(|v| convert.call1((v, time_unit)).unwrap())); + .map(|opt_v| opt_v.map(|v| elapsed_offset_to_timedelta(v, time_unit))); PyList::new_bound(py, iter).into_py(py) } } impl ToPyObject for Wrap<&DatetimeChunked> { fn to_object(&self, py: Python) -> PyObject { - let utils = UTILS.bind(py); - let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap(); - let time_unit = self.0.time_unit().to_ascii(); - let time_zone = self.0.time_zone().to_object(py); - let iter = self - .0 - .iter() - .map(|opt_v| opt_v.map(|v| convert.call1((v, time_unit, &time_zone)).unwrap())); - PyList::new_bound(py, iter).into_py(py) + let time_zone = self.0.time_zone(); + if time_zone.is_some() { + // Switch to more efficient code path in + // https://github.com/pola-rs/polars/issues/16199 + let utils = UTILS.bind(py); + let convert = utils.getattr(intern!(py, "to_py_datetime")).unwrap(); + let time_unit = self.0.time_unit().to_ascii(); + let time_zone = time_zone.to_object(py); + let iter = self + .0 + .iter() + .map(|opt_v| opt_v.map(|v| convert.call1((v, time_unit, &time_zone)).unwrap())); + PyList::new_bound(py, iter).into_py(py) + } else { + let time_unit = self.0.time_unit(); + let iter = self + .0 + .iter() + .map(|opt_v| opt_v.map(|v| timestamp_to_naive_datetime(v, time_unit))); + PyList::new_bound(py, iter).into_py(py) + } } } impl ToPyObject for Wrap<&TimeChunked> { fn to_object(&self, py: Python) -> PyObject { - let iter = time_to_pyobject_iter(py, self.0); + let iter = time_to_pyobject_iter(self.0); PyList::new_bound(py, iter).into_py(py) } } -pub(crate) fn time_to_pyobject_iter<'a>( - py: Python<'a>, - ca: &'a TimeChunked, -) -> impl ExactSizeIterator>> { - let utils = UTILS.bind(py); - let convert = utils.getattr(intern!(py, "to_py_time")).unwrap().clone(); +pub(crate) fn time_to_pyobject_iter( + ca: &TimeChunked, +) -> impl '_ + ExactSizeIterator> { ca.0.iter() - .map(move |opt_v| opt_v.map(|v| convert.call1((v,)).unwrap())) + .map(move |opt_v| opt_v.map(nanos_since_midnight_to_naivetime)) } impl ToPyObject for Wrap<&DateChunked> { fn to_object(&self, py: Python) -> PyObject { - let utils = UTILS.bind(py); - let convert = utils.getattr(intern!(py, "to_py_date")).unwrap(); - let iter = self - .0 - .into_iter() - .map(|opt_v| opt_v.map(|v| convert.call1((v,)).unwrap())); + let iter = self.0.into_iter().map(|opt_v| opt_v.map(date32_to_date)); PyList::new_bound(py, iter).into_py(py) } } diff --git a/py-polars/src/conversion/datetime.rs b/py-polars/src/conversion/datetime.rs new file mode 100644 index 000000000000..4d7e6339c685 --- /dev/null +++ b/py-polars/src/conversion/datetime.rs @@ -0,0 +1,31 @@ +//! Utilities for converting dates, times, datetimes, and so on. + +use polars::datatypes::TimeUnit; +use polars_core::export::chrono::{NaiveDateTime, NaiveTime, TimeDelta}; + +pub fn elapsed_offset_to_timedelta(elapsed: i64, time_unit: TimeUnit) -> TimeDelta { + let (in_second, nano_multiplier) = match time_unit { + TimeUnit::Nanoseconds => (1_000_000_000, 1), + TimeUnit::Microseconds => (1_000_000, 1_000), + TimeUnit::Milliseconds => (1_000, 1_000_000), + }; + let mut elapsed_sec = elapsed / in_second; + let mut elapsed_nanos = nano_multiplier * (elapsed % in_second); + if elapsed_nanos < 0 { + // TimeDelta expects nanos to always be positive. + elapsed_sec -= 1; + elapsed_nanos += 1_000_000_000; + } + TimeDelta::new(elapsed_sec, elapsed_nanos as u32).unwrap() +} + +/// Convert time-units-since-epoch to a more structured object. +pub fn timestamp_to_naive_datetime(since_epoch: i64, time_unit: TimeUnit) -> NaiveDateTime { + NaiveDateTime::UNIX_EPOCH + elapsed_offset_to_timedelta(since_epoch, time_unit) +} + +/// Convert nanoseconds-since-midnight to a more structured object. +pub fn nanos_since_midnight_to_naivetime(nanos_since_midnight: i64) -> NaiveTime { + NaiveTime::from_hms_opt(0, 0, 0).unwrap() + + elapsed_offset_to_timedelta(nanos_since_midnight, TimeUnit::Nanoseconds) +} diff --git a/py-polars/src/conversion/mod.rs b/py-polars/src/conversion/mod.rs index b664392aab0f..ec46726a6b2e 100644 --- a/py-polars/src/conversion/mod.rs +++ b/py-polars/src/conversion/mod.rs @@ -1,5 +1,6 @@ pub(crate) mod any_value; pub(crate) mod chunked_array; +mod datetime; use std::fmt::{Display, Formatter}; use std::hash::{Hash, Hasher}; diff --git a/py-polars/src/series/export.rs b/py-polars/src/series/export.rs index c59c6129d02b..8af513cc718b 100644 --- a/py-polars/src/series/export.rs +++ b/py-polars/src/series/export.rs @@ -241,7 +241,7 @@ fn series_to_numpy_with_copy(py: Python, s: &Series) -> PyResult { }, Time => { let ca = s.time().unwrap(); - let values = time_to_pyobject_iter(py, ca).map(|v| v.into_py(py)); + let values = time_to_pyobject_iter(ca).map(|v| v.into_py(py)); PyArray1::from_iter_bound(py, values).into_py(py) }, String => {