Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: String transform to_titlecase was too narrowly defined #18122

Merged
merged 1 commit into from
Aug 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/polars-ops/src/chunked_array/strings/case.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ pub(super) fn to_titlecase<'a>(ca: &'a StringChunked) -> StringChunked {
} else {
s.push(c);
}
next_is_upper = c.is_whitespace();
next_is_upper = !c.is_alphanumeric();
}

// Put buf back for next iteration.
Expand Down
33 changes: 24 additions & 9 deletions py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ def len_chars(self) -> Expr:

def to_uppercase(self) -> Expr:
"""
Transform to uppercase variant.
Modify strings to their uppercase equivalent.

Examples
--------
Expand All @@ -467,7 +467,7 @@ def to_uppercase(self) -> Expr:

def to_lowercase(self) -> Expr:
"""
Transform to lowercase variant.
Modify strings to their lowercase equivalent.

Examples
--------
Expand All @@ -487,22 +487,37 @@ def to_lowercase(self) -> Expr:

def to_titlecase(self) -> Expr:
"""
Transform to titlecase variant.
Modify strings to their titlecase equivalent.

Notes
-----
This is a form of case transform where the first letter of each word is
capitalized, with the rest of the word in lowercase. Non-alphanumeric
characters define the word boundaries.

Examples
--------
>>> df = pl.DataFrame(
... {"sing": ["welcome to my world", "THERE'S NO TURNING BACK"]}
... {
... "quotes": [
... "'e.t. phone home'",
... "you talkin' to me?",
... "to infinity,and BEYOND!",
... ]
... }
... )
>>> df.with_columns(foo_title=pl.col("sing").str.to_titlecase())
shape: (2, 2)
>>> df.with_columns(
... quotes_title=pl.col("quotes").str.to_titlecase(),
... )
shape: (3, 2)
┌─────────────────────────┬─────────────────────────┐
sing ┆ foo_title
quotes ┆ quotes_title
│ --- ┆ --- │
│ str ┆ str │
╞═════════════════════════╪═════════════════════════╡
│ welcome to my world ┆ Welcome To My World │
│ THERE'S NO TURNING BACK ┆ There's No Turning Back │
│ 'e.t. phone home' ┆ 'E.T. Phone Home' │
│ you talkin' to me? ┆ You Talkin' To Me? │
│ to infinity,and BEYOND! ┆ To Infinity,And Beyond! │
└─────────────────────────┴─────────────────────────┘
"""
return wrap_expr(self._pyexpr.str_to_titlecase())
Expand Down
30 changes: 22 additions & 8 deletions py-polars/polars/series/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1481,7 +1481,7 @@ def zfill(self, length: int | IntoExprColumn) -> Series:

def to_lowercase(self) -> Series:
"""
Modify the strings to their lowercase equivalent.
Modify strings to their lowercase equivalent.

Examples
--------
Expand All @@ -1497,7 +1497,7 @@ def to_lowercase(self) -> Series:

def to_uppercase(self) -> Series:
"""
Modify the strings to their uppercase equivalent.
Modify strings to their uppercase equivalent.

Examples
--------
Expand All @@ -1513,17 +1513,31 @@ def to_uppercase(self) -> Series:

def to_titlecase(self) -> Series:
"""
Modify the strings to their titlecase equivalent.
Modify strings to their titlecase equivalent.

Notes
-----
This is a form of case transform where the first letter of each word is
capitalized, with the rest of the word in lowercase. Non-alphanumeric
characters define the word boundaries.

Examples
--------
>>> s = pl.Series("sing", ["welcome to my world", "THERE'S NO TURNING BACK"])
>>> s = pl.Series(
... "quotes",
... [
... "'e.t. phone home'",
... "you talkin' to me?",
... "to infinity,and BEYOND!",
... ],
... )
>>> s.str.to_titlecase()
shape: (2,)
Series: 'sing' [str]
shape: (3,)
Series: 'quotes' [str]
[
"Welcome To My World"
"There's No Turning Back"
"'E.T. Phone Home'"
"You Talkin' To Me?"
"To Infinity,And Beyond!"
]
"""

Expand Down
51 changes: 40 additions & 11 deletions py-polars/tests/unit/operations/namespaces/string/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1489,23 +1489,52 @@ def test_splitn_expr() -> None:
def test_titlecase() -> None:
df = pl.DataFrame(
{
"sing": [
"misc": [
"welcome to my world",
"THERE'S NO TURNING BACK",
"double space",
"and\ta\t tab",
]
"by jean-paul sartre, 'esq'",
"SOMETIMES/life/gives/you/a/2nd/chance",
],
}
)
expected = [
"Welcome To My World",
"Double Space",
"And\tA\t Tab",
"By Jean-Paul Sartre, 'Esq'",
"Sometimes/Life/Gives/You/A/2nd/Chance",
]
actual = df.select(pl.col("misc").str.to_titlecase()).to_series()
for ex, act in zip(expected, actual):
assert ex == act, f"{ex} != {act}"

assert df.select(pl.col("sing").str.to_titlecase()).to_dict(as_series=False) == {
"sing": [
"Welcome To My World",
"There's No Turning Back",
"Double Space",
"And\tA\t Tab",
]
}
df = pl.DataFrame(
{
"quotes": [
"'e.t. phone home'",
"you talkin' to me?",
"i feel the need--the need for speed",
"to infinity,and BEYOND!",
"say 'what' again!i dare you - I\u00a0double-dare you!",
"What.we.got.here... is#failure#to#communicate",
]
}
)
expected_str = [
"'E.T. Phone Home'",
"You Talkin' To Me?",
"I Feel The Need--The Need For Speed",
"To Infinity,And Beyond!",
"Say 'What' Again!I Dare You - I\u00a0Double-Dare You!",
"What.We.Got.Here... Is#Failure#To#Communicate",
]
expected_py = [s.title() for s in df["quotes"].to_list()]
for ex_str, ex_py, act in zip(
expected_str, expected_py, df["quotes"].str.to_titlecase()
):
assert ex_str == act, f"{ex_str} != {act}"
assert ex_py == act, f"{ex_py} != {act}"


def test_string_replace_with_nulls_10124() -> None:
Expand Down