Skip to content

Commit

Permalink
depr(python): Rename Expr.meta.write_json/Expr.from_json to `Expr.m…
Browse files Browse the repository at this point in the history
…eta.serialize/Expr.deserialize` (#14490)
  • Loading branch information
stinodego authored Feb 27, 2024
1 parent 493da8b commit bdcbf1a
Show file tree
Hide file tree
Showing 10 changed files with 172 additions and 42 deletions.
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expressions/meta.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@ The following methods are available under the `expr.meta` attribute.
Expr.meta.pop
Expr.meta.tree_format
Expr.meta.root_names
Expr.meta.serialize
Expr.meta.undo_aliases
Expr.meta.write_json
5 changes: 3 additions & 2 deletions py-polars/docs/source/reference/expressions/miscellaneous.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ Miscellaneous
.. autosummary::
:toctree: api/

Expr.from_json
Expr.set_sorted
Expr.deserialize
Expr.from_json
Expr.set_sorted
2 changes: 1 addition & 1 deletion py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9224,7 +9224,7 @@ def approx_n_unique(self) -> DataFrame:
"""
Approximate count of unique values.
.. deprecated: 0.20.11
.. deprecated:: 0.20.11
Use `select(pl.all().approx_n_unique())` instead.
This is done using the HyperLogLog++ algorithm for cardinality estimation.
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def round(
- `'earliest'`: use the earliest datetime
- `'latest'`: use the latest datetime
.. deprecated: 0.19.3
.. deprecated:: 0.19.3
This is now auto-inferred, you can safely remove this argument.
Returns
Expand Down
56 changes: 51 additions & 5 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import warnings
from datetime import timedelta
from functools import reduce
from io import BytesIO, StringIO
from pathlib import Path
from typing import (
TYPE_CHECKING,
Any,
Expand Down Expand Up @@ -60,6 +62,7 @@
BUILDING_SPHINX_DOCS,
find_stacklevel,
no_default,
normalize_filepath,
sphinx_accessor,
warn_null_comparison,
)
Expand All @@ -72,6 +75,7 @@

if TYPE_CHECKING:
import sys
from io import IOBase

from polars import DataFrame, LazyFrame, Series
from polars.type_aliases import (
Expand Down Expand Up @@ -330,17 +334,36 @@ def function(s: Series) -> Series: # pragma: no cover
return root_expr.map_batches(function, is_elementwise=True).meta.undo_aliases()

@classmethod
def from_json(cls, value: str) -> Self:
def deserialize(cls, source: str | Path | IOBase) -> Self:
"""
Read an expression from a JSON encoded string to construct an Expression.
Read an expression from a JSON file.
Parameters
----------
value
JSON encoded string value
source
Path to a file or a file-like object (by file-like object, we refer to
objects that have a `read()` method, such as a file handler (e.g.
via builtin `open` function) or `BytesIO`).
See Also
--------
Expr.meta.serialize
Examples
--------
>>> from io import StringIO
>>> expr = pl.col("foo").sum().over("bar")
>>> json = expr.meta.serialize()
>>> pl.Expr.deserialize(StringIO(json)) # doctest: +ELLIPSIS
<Expr ['col("foo").sum().over([col("ba…'] at ...>
"""
if isinstance(source, StringIO):
source = BytesIO(source.getvalue().encode())
elif isinstance(source, (str, Path)):
source = normalize_filepath(source)

expr = cls.__new__(cls)
expr._pyexpr = PyExpr.meta_read_json(value)
expr._pyexpr = PyExpr.deserialize(source)
return expr

def to_physical(self) -> Self:
Expand Down Expand Up @@ -9850,6 +9873,29 @@ def map_dict(
"""
return self.replace(mapping, default=default, return_dtype=return_dtype)

@classmethod
def from_json(cls, value: str) -> Self:
"""
Read an expression from a JSON encoded string to construct an Expression.
.. deprecated:: 0.20.11
This method has been renamed to :meth:`deserialize`.
Note that the new method operates on file-like inputs rather than strings.
Enclose your input in `io.StringIO` to keep the same behavior.
Parameters
----------
value
JSON encoded string value
"""
issue_deprecation_warning(
"`Expr.from_json` is deprecated. It has been renamed to `Expr.deserialize`."
" Note that the new method operates on file-like inputs rather than strings."
" Enclose your input in `io.StringIO` to keep the same behavior.",
version="0.20.11",
)
return cls.deserialize(StringIO(value))

@property
def bin(self) -> ExprBinaryNameSpace:
"""
Expand Down
62 changes: 55 additions & 7 deletions py-polars/polars/expr/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@

from polars.exceptions import ComputeError
from polars.utils._wrap import wrap_expr
from polars.utils.deprecation import deprecate_nonkeyword_arguments
from polars.utils.deprecation import (
deprecate_nonkeyword_arguments,
deprecate_renamed_function,
)
from polars.utils.various import normalize_filepath

if TYPE_CHECKING:
Expand Down Expand Up @@ -218,21 +221,48 @@ def _selector_and(self, other: Expr) -> Expr:
return wrap_expr(self._pyexpr._meta_selector_and(other._pyexpr))

@overload
def write_json(self, file: None = ...) -> str:
def serialize(self, file: None = ...) -> str:
...

@overload
def write_json(self, file: IOBase | str | Path) -> None:
def serialize(self, file: IOBase | str | Path) -> None:
...

def write_json(self, file: IOBase | str | Path | None = None) -> str | None:
"""Write expression to json."""
def serialize(self, file: IOBase | str | Path | None = None) -> str | None:
"""
Serialize this expression to a file or string in JSON format.
Parameters
----------
file
File path to which the result should be written. If set to `None`
(default), the output is returned as a string instead.
See Also
--------
Expr.deserialize
Examples
--------
Serialize the expression into a JSON string.
>>> expr = pl.col("foo").sum().over("bar")
>>> json = expr.meta.serialize()
>>> json
'{"Window":{"function":{"Agg":{"Sum":{"Column":"foo"}}},"partition_by":[{"Column":"bar"}],"options":{"Over":"GroupsToRows"}}}'
The expression can later be deserialized back into an `Expr` object.
>>> from io import StringIO
>>> pl.Expr.deserialize(StringIO(json)) # doctest: +ELLIPSIS
<Expr ['col("foo").sum().over([col("ba…'] at ...>
"""
if isinstance(file, (str, Path)):
file = normalize_filepath(file)
to_string_io = (file is not None) and isinstance(file, StringIO)
if file is None or to_string_io:
with BytesIO() as buf:
self._pyexpr.meta_write_json(buf)
self._pyexpr.serialize(buf)
json_bytes = buf.getvalue()

json_str = json_bytes.decode("utf8")
Expand All @@ -241,9 +271,27 @@ def write_json(self, file: IOBase | str | Path | None = None) -> str | None:
else:
return json_str
else:
self._pyexpr.meta_write_json(file)
self._pyexpr.serialize(file)
return None

@overload
def write_json(self, file: None = ...) -> str:
...

@overload
def write_json(self, file: IOBase | str | Path) -> None:
...

@deprecate_renamed_function("Expr.meta.serialize", version="0.20.11")
def write_json(self, file: IOBase | str | Path | None = None) -> str | None:
"""
Write expression to json.
.. deprecated:: 0.20.11
This method has been renamed to :meth:`serialize`.
"""
return self.serialize(file)

@overload
def tree_format(self, *, return_as_string: Literal[False]) -> None:
...
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4817,7 +4817,7 @@ def approx_n_unique(self) -> Self:
"""
Approximate count of unique values.
.. deprecated: 0.20.11
.. deprecated:: 0.20.11
Use `select(pl.all().approx_n_unique())` instead.
This is done using the HyperLogLog++ algorithm for cardinality estimation.
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4315,7 +4315,7 @@ def to_numpy(
the underlying data. Data copy occurs, for example, when the Series contains
nulls or non-numeric types.
.. deprecated: 0.20.10
.. deprecated:: 0.20.10
Use the `allow_copy` parameter instead, which is the inverse of this
one.
Expand Down
35 changes: 23 additions & 12 deletions py-polars/src/expr/meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,25 +89,36 @@ impl PyExpr {
}

#[cfg(all(feature = "json", feature = "serde_json"))]
fn meta_write_json(&self, py_f: PyObject) -> PyResult<()> {
fn serialize(&self, py_f: PyObject) -> PyResult<()> {
let file = BufWriter::new(get_file_like(py_f, true)?);
serde_json::to_writer(file, &self.inner)
.map_err(|err| PyValueError::new_err(format!("{err:?}")))?;
Ok(())
}

#[staticmethod]
fn meta_read_json(value: &str) -> PyResult<PyExpr> {
#[cfg(feature = "json")]
{
let inner: polars_lazy::prelude::Expr = serde_json::from_str(value)
.map_err(|_| PyPolarsErr::from(polars_err!(ComputeError: "could not serialize")))?;
Ok(PyExpr { inner })
}
#[cfg(not(feature = "json"))]
{
panic!("activate 'json' feature")
}
#[cfg(feature = "json")]
fn deserialize(py_f: PyObject) -> PyResult<PyExpr> {
// it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
// so don't bother with files.
let mut json = String::new();
let _ = get_file_like(py_f, false)?
.read_to_string(&mut json)
.unwrap();

// SAFETY:
// we skipped the serializing/deserializing of the static in lifetime in `DataType`
// so we actually don't have a lifetime at all when serializing.

// &str still has a lifetime. But it's ok, because we drop it immediately
// in this scope
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };

let inner: polars_lazy::prelude::Expr = serde_json::from_str(json).map_err(|_| {
let msg = "could not deserialize input into an expression";
PyPolarsErr::from(polars_err!(ComputeError: msg))
})?;
Ok(PyExpr { inner })
}

fn meta_tree_format(&self) -> PyResult<String> {
Expand Down
47 changes: 35 additions & 12 deletions py-polars/tests/unit/test_serde.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,6 @@ def test_deser_empty_list() -> None:
assert s.to_list() == [[[42.0]], []]


def test_expression_json() -> None:
e = pl.col("foo").sum().over("bar")
json = e.meta.write_json()

round_tripped = pl.Expr.from_json(json)
assert round_tripped.meta == e


def times2(x: pl.Series) -> pl.Series:
return x * 2

Expand Down Expand Up @@ -202,9 +194,40 @@ def test_serde_array_dtype() -> None:
assert_series_equal(pickle.loads(pickle.dumps(nested_s)), nested_s)


def test_expr_serialization_roundtrip() -> None:
expr = pl.col("foo").sum().over("bar")
json = expr.meta.serialize()
round_tripped = pl.Expr.deserialize(io.StringIO(json))
assert round_tripped.meta == expr


def test_expr_deserialize_file_not_found() -> None:
with pytest.raises(FileNotFoundError):
pl.Expr.deserialize("abcdef")


def test_expr_deserialize_invalid_json() -> None:
with pytest.raises(
pl.ComputeError, match="could not deserialize input into an expression"
):
pl.Expr.deserialize(io.StringIO("abcdef"))


def test_expr_write_json_from_json_deprecated() -> None:
expr = pl.col("foo").sum().over("bar")

with pytest.deprecated_call():
json = expr.meta.write_json()

with pytest.deprecated_call():
round_tripped = pl.Expr.from_json(json)

assert round_tripped.meta == expr


def test_expression_json_13991() -> None:
e = pl.col("foo").cast(pl.Decimal)
json = e.meta.write_json()
expr = pl.col("foo").cast(pl.Decimal)
json = expr.meta.serialize()

round_tripped = pl.Expr.from_json(json)
assert round_tripped.meta == e
round_tripped = pl.Expr.deserialize(io.StringIO(json))
assert round_tripped.meta == expr

0 comments on commit bdcbf1a

Please sign in to comment.