Skip to content

Commit

Permalink
fix(rust): Do not set sorted flag on lexical sorting (#16032)
Browse files Browse the repository at this point in the history
  • Loading branch information
c-peters authored May 4, 2024
1 parent c94de80 commit fd0ae1d
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pub use revmap::*;
use super::*;
use crate::chunked_array::Settings;
use crate::prelude::*;
use crate::series::IsSorted;
use crate::using_string_cache;

bitflags! {
Expand All @@ -29,7 +30,6 @@ pub struct CategoricalChunked {
physical: Logical<CategoricalType, UInt32Type>,
/// 1st bit: original local categorical
/// meaning that n_unique is the same as the cat map length
/// 2nd bit: use lexical sorting
bit_settings: BitSettings,
}

Expand Down Expand Up @@ -178,7 +178,11 @@ impl CategoricalChunked {
}

/// Set flags for the Chunked Array
pub(crate) fn set_flags(&mut self, flags: Settings) {
pub(crate) fn set_flags(&mut self, mut flags: Settings) {
// We should not set the sorted flag if we are sorting in lexical order
if self.uses_lexical_ordering() {
flags.set_sorted_flag(IsSorted::Not)
}
self.physical_mut().set_flags(flags)
}

Expand Down
9 changes: 9 additions & 0 deletions py-polars/tests/unit/datatypes/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -826,3 +826,12 @@ def test_cat_preserve_lexical_ordering_on_concat() -> None:
df = pl.DataFrame({"x": ["b", "a", "c"]}).with_columns(pl.col("x").cast(dtype))
df2 = pl.concat([df, df])
assert df2["x"].dtype == dtype


def test_cat_append_lexical_sorted_flag() -> None:
df = pl.DataFrame({"x": [0, 1, 1], "y": ["B", "B", "A"]}).with_columns(
pl.col("y").cast(pl.Categorical(ordering="lexical"))
)
df2 = pl.concat([part.sort("y") for part in df.partition_by("x")])

assert not (df2["y"].is_sorted())

0 comments on commit fd0ae1d

Please sign in to comment.