Skip to content

Commit

Permalink
feat(python,rust): add str.reverse (pola-rs#12878)
Browse files Browse the repository at this point in the history
Co-authored-by: Stijn de Gooijer <stijndegooijer@gmail.com>
  • Loading branch information
ion-elgreco and stinodego authored Dec 5, 2023
1 parent d2aea50 commit a4757b2
Show file tree
Hide file tree
Showing 18 changed files with 138 additions and 0 deletions.
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ strum_macros = "0.25"
thiserror = "1"
tokio = "1.26"
tokio-util = "0.7.8"
unicode-reverse = "1.0.8"
url = "2.4"
version_check = "0.9.4"
xxhash-rust = { version = "0.8.6", features = ["xxh3"] }
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ list_to_struct = ["polars-plan/list_to_struct"]
python = ["pyo3", "polars-plan/python", "polars-core/python", "polars-io/python"]
row_hash = ["polars-plan/row_hash"]
string_pad = ["polars-plan/string_pad"]
string_reverse = ["polars-plan/string_reverse"]
string_to_integer = ["polars-plan/string_to_integer"]
arg_where = ["polars-plan/arg_where"]
search_sorted = ["polars-plan/search_sorted"]
Expand Down Expand Up @@ -251,6 +252,7 @@ features = [
"top_k",
"approx_unique",
"concat_str",
"string_reverse",
"string_to_integer",
"cse",
"dot_diagram",
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ regex = { workspace = true }
serde = { workspace = true, features = ["derive"], optional = true }
serde_json = { workspace = true, optional = true }
smartstring = { workspace = true }
unicode-reverse = { workspace = true, optional = true }

[dev-dependencies]
rand = { workspace = true }
Expand Down Expand Up @@ -87,6 +88,7 @@ diff = []
pct_change = ["diff"]
strings = ["polars-core/strings"]
string_pad = ["polars-core/strings"]
string_reverse = ["polars-core/strings", "unicode-reverse"]
string_to_integer = ["polars-core/strings"]
extract_jsonpath = ["serde_json", "jsonpath_lib", "polars-json"]
log = []
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-ops/src/chunked_array/strings/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ use polars_core::prelude::*;
pub use split::*;
#[cfg(feature = "strings")]
pub use strip::*;
#[cfg(feature = "string_reverse")]
mod reverse;

pub trait AsUtf8 {
fn as_utf8(&self) -> &Utf8Chunked;
Expand Down
8 changes: 8 additions & 0 deletions crates/polars-ops/src/chunked_array/strings/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,14 @@ pub trait Utf8NameSpaceImpl: AsUtf8 {
ca + other
}

/// Reverses the string values
#[must_use]
#[cfg(feature = "string_reverse")]
fn str_reverse(&self) -> Utf8Chunked {
let ca = self.as_utf8();
reverse::reverse(ca)
}

/// Slice the string values.
///
/// Determines a substring starting from `start` and with optional length `length` of each of the elements in `array`.
Expand Down
14 changes: 14 additions & 0 deletions crates/polars-ops/src/chunked_array/strings/reverse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use polars_core::prelude::Utf8Chunked;
use unicode_reverse::reverse_grapheme_clusters_in_place;

fn to_reverse_helper(s: Option<&str>) -> Option<String> {
s.map(|v| {
let mut text = v.to_string();
reverse_grapheme_clusters_in_place(&mut text);
text
})
}

pub fn reverse(ca: &Utf8Chunked) -> Utf8Chunked {
ca.apply_generic(to_reverse_helper)
}
1 change: 1 addition & 0 deletions crates/polars-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ chunked_ids = ["polars-core/chunked_ids"]
list_to_struct = ["polars-ops/list_to_struct"]
row_hash = ["polars-core/row_hash", "polars-ops/hash"]
string_pad = ["polars-ops/string_pad"]
string_reverse = ["polars-ops/string_reverse"]
string_to_integer = ["polars-ops/string_to_integer"]
arg_where = []
search_sorted = ["polars-ops/search_sorted"]
Expand Down
14 changes: 14 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ pub enum StringFunction {
n: i64,
literal: bool,
},
#[cfg(feature = "string_reverse")]
Reverse,
#[cfg(feature = "string_pad")]
PadStart {
length: usize,
Expand Down Expand Up @@ -131,6 +133,8 @@ impl StringFunction {
LenChars => mapper.with_dtype(DataType::UInt32),
#[cfg(feature = "regex")]
Replace { .. } => mapper.with_same_dtype(),
#[cfg(feature = "string_reverse")]
Reverse => mapper.with_same_dtype(),
#[cfg(feature = "temporal")]
Strptime(dtype, _) => mapper.with_dtype(dtype.clone()),
Split(_) => mapper.with_dtype(DataType::List(Box::new(DataType::Utf8))),
Expand Down Expand Up @@ -202,6 +206,8 @@ impl Display for StringFunction {
PadStart { .. } => "pad_start",
#[cfg(feature = "regex")]
Replace { .. } => "replace",
#[cfg(feature = "string_reverse")]
Reverse => "reverse",
#[cfg(feature = "string_encoding")]
HexEncode => "hex_encode",
#[cfg(feature = "binary_encoding")]
Expand Down Expand Up @@ -303,6 +309,8 @@ impl From<StringFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
ConcatHorizontal(delimiter) => map_as_slice!(strings::concat_hor, &delimiter),
#[cfg(feature = "regex")]
Replace { n, literal } => map_as_slice!(strings::replace, literal, n),
#[cfg(feature = "string_reverse")]
Reverse => map!(strings::reverse),
Uppercase => map!(strings::uppercase),
Lowercase => map!(strings::lowercase),
#[cfg(feature = "nightly")]
Expand Down Expand Up @@ -802,6 +810,12 @@ pub(super) fn replace(s: &[Series], literal: bool, n: i64) -> PolarsResult<Serie
.map(|ca| ca.into_series())
}

#[cfg(feature = "string_reverse")]
pub(super) fn reverse(s: &Series) -> PolarsResult<Series> {
let ca = s.utf8()?;
Ok(ca.str_reverse().into_series())
}

#[cfg(feature = "string_to_integer")]
pub(super) fn to_integer(s: &Series, base: u32, strict: bool) -> PolarsResult<Series> {
let ca = s.utf8()?;
Expand Down
11 changes: 11 additions & 0 deletions crates/polars-plan/src/dsl/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,17 @@ impl StringNameSpace {
)
}

#[cfg(feature = "string_reverse")]
/// Reverse each string
pub fn reverse(self) -> Expr {
self.0.map_many_private(
FunctionExpr::StringExpr(StringFunction::Reverse),
&[],
false,
false,
)
}

/// Remove leading and trailing characters, or whitespace if matches is None.
pub fn strip_chars(self, matches: Expr) -> Expr {
self.0.map_many_private(
Expand Down
2 changes: 2 additions & 0 deletions crates/polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ list_gather = ["polars-ops/list_gather", "polars-lazy?/list_gather"]
describe = ["polars-core/describe"]
timezones = ["polars-core/timezones", "polars-lazy?/timezones", "polars-io/timezones"]
string_pad = ["polars-lazy?/string_pad", "polars-ops/string_pad"]
string_reverse = ["polars-lazy?/string_reverse", "polars-ops/string_reverse"]
string_to_integer = ["polars-lazy?/string_to_integer", "polars-ops/string_to_integer"]
arg_where = ["polars-lazy?/arg_where"]
search_sorted = ["polars-lazy?/search_sorted"]
Expand Down Expand Up @@ -315,6 +316,7 @@ docs-selection = [
"asof_join",
"cross_join",
"concat_str",
"string_reverse",
"string_to_integer",
"decompress",
"mode",
Expand Down
1 change: 1 addition & 0 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ features = [
"semi_anti_join",
"serde-lazy",
"string_encoding",
"string_reverse",
"string_to_integer",
"string_pad",
"strings",
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expressions/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ The following methods are available under the `expr.str` attribute.
Expr.str.pad_start
Expr.str.replace
Expr.str.replace_all
Expr.str.reverse
Expr.str.rjust
Expr.str.rstrip
Expr.str.slice
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ The following methods are available under the `Series.str` attribute.
Series.str.pad_start
Series.str.replace
Series.str.replace_all
Series.str.reverse
Series.str.rjust
Series.str.rstrip
Series.str.slice
Expand Down
21 changes: 21 additions & 0 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1944,6 +1944,27 @@ def replace_all(
value = parse_as_expression(value, str_as_lit=True)
return wrap_expr(self._pyexpr.str_replace_all(pattern, value, literal))

def reverse(self) -> Expr:
"""
Returns string values in reversed order.
Examples
--------
>>> df = pl.DataFrame({"text": ["foo", "bar", "man\u0303ana"]})
>>> df.with_columns(pl.col("text").str.reverse().alias("reversed"))
shape: (3, 2)
┌────────┬──────────┐
│ text ┆ reversed │
│ --- ┆ --- │
│ str ┆ str │
╞════════╪══════════╡
│ foo ┆ oof │
│ bar ┆ rab │
│ mañana ┆ anañam │
└────────┴──────────┘
"""
return wrap_expr(self._pyexpr.str_reverse())

def slice(self, offset: int, length: int | None = None) -> Expr:
"""
Create subslices of the string values of a Utf8 Series.
Expand Down
17 changes: 17 additions & 0 deletions py-polars/polars/series/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1441,6 +1441,23 @@ def to_titlecase(self) -> Series:
"""

def reverse(self) -> Series:
"""
Returns string values in reversed order.
Examples
--------
>>> s = pl.Series("text", ["foo", "bar", "man\u0303ana"])
>>> s.str.reverse()
shape: (3,)
Series: 'text' [str]
[
"oof"
"rab"
"anañam"
]
"""

def slice(self, offset: int, length: int | None = None) -> Series:
"""
Create subslices of the string values of a Utf8 Series.
Expand Down
4 changes: 4 additions & 0 deletions py-polars/src/expr/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,10 @@ impl PyExpr {
.into()
}

fn str_reverse(&self) -> Self {
self.inner.clone().str().reverse().into()
}

fn str_pad_start(&self, length: usize, fill_char: char) -> Self {
self.inner.clone().str().pad_start(length, fill_char).into()
}
Expand Down
20 changes: 20 additions & 0 deletions py-polars/tests/unit/namespaces/string/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1137,3 +1137,23 @@ def test_string_extract_groups_lazy_schema_10305() -> None:
)

assert df.schema == {"candidate": pl.Utf8, "ref": pl.Utf8}


def test_string_reverse() -> None:
df = pl.DataFrame(
{
"text": [None, "foo", "bar", "i like pizza&#", None, "man\u0303ana"],
}
)
expected = pl.DataFrame(
[
pl.Series(
"text",
[None, "oof", "rab", "#&azzip ekil i", None, "anan\u0303am"],
dtype=pl.Utf8,
),
]
)

result = df.select(pl.col("text").str.reverse())
assert_frame_equal(result, expected)

0 comments on commit a4757b2

Please sign in to comment.