refactor: Another set of new-stream test skip/fixes (#18952)

pola-rs · Sep 26, 2024 · bb214d6 · bb214d6
1 parent 71a8b05
commit bb214d6
Show file tree

Hide file tree

Showing 7 changed files with 53 additions and 39 deletions.
diff --git a/crates/polars-plan/src/plans/conversion/expr_expansion.rs b/crates/polars-plan/src/plans/conversion/expr_expansion.rs
@@ -643,7 +643,7 @@ fn find_flags(expr: &Expr) -> PolarsResult<ExpansionFlags> {
 
 #[cfg(feature = "dtype-struct")]
 fn toggle_cse(opt_flags: &mut OptFlags) {
-    if opt_flags.contains(OptFlags::EAGER) {
+    if opt_flags.contains(OptFlags::EAGER) && !opt_flags.contains(OptFlags::NEW_STREAMING) {
         #[cfg(debug_assertions)]
         {
             use polars_core::config::verbose;

diff --git a/crates/polars-stream/src/physical_plan/lower_expr.rs b/crates/polars-stream/src/physical_plan/lower_expr.rs
@@ -348,7 +348,7 @@ fn build_fallback_node_with_ctx(
                 expr,
                 Context::Default,
                 ctx.expr_arena,
-                None,
+                Some(&ctx.phys_sm[input_node].output_schema),
                 &mut conv_state,
             )
         })

diff --git a/py-polars/tests/unit/operations/namespaces/test_categorical.py b/py-polars/tests/unit/operations/namespaces/test_categorical.py
@@ -1,3 +1,5 @@
+import pytest
+
 import polars as pl
 from polars.testing import assert_frame_equal
 
@@ -58,20 +60,26 @@ def test_categorical_lexical_ordering_after_concat() -> None:
         }
 
 
-def test_sort_categoricals_6014() -> None:
+@pytest.mark.may_fail_auto_streaming
+def test_sort_categoricals_6014_internal() -> None:
     with pl.StringCache():
         # create basic categorical
-        df1 = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns(
+        df = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns(
             pl.col("key").cast(pl.Categorical)
         )
+
+    out = df.sort("key")
+    assert out.to_dict(as_series=False) == {"key": ["bbb", "aaa", "ccc"]}
+
+
+def test_sort_categoricals_6014_lexical() -> None:
+    with pl.StringCache():
         # create lexically-ordered categorical
-        df2 = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns(
+        df = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns(
             pl.col("key").cast(pl.Categorical("lexical"))
         )
 
-    out = df1.sort("key")
-    assert out.to_dict(as_series=False) == {"key": ["bbb", "aaa", "ccc"]}
-    out = df2.sort("key")
+    out = df.sort("key")
     assert out.to_dict(as_series=False) == {"key": ["aaa", "bbb", "ccc"]}
 
 

diff --git a/py-polars/tests/unit/operations/test_transpose.py b/py-polars/tests/unit/operations/test_transpose.py
@@ -13,6 +13,7 @@
 from polars.testing import assert_frame_equal, assert_series_equal
 
 
+@pytest.mark.may_fail_auto_streaming
 def test_transpose_supertype() -> None:
     df = pl.DataFrame({"a": [1, 2, 3], "b": ["foo", "bar", "ham"]})
     result = df.transpose()
@@ -26,6 +27,7 @@ def test_transpose_supertype() -> None:
     assert_frame_equal(result, expected)
 
 
+@pytest.mark.may_fail_auto_streaming
 def test_transpose_tz_naive_and_tz_aware() -> None:
     df = pl.DataFrame(
         {
@@ -41,6 +43,7 @@ def test_transpose_tz_naive_and_tz_aware() -> None:
         df.transpose()
 
 
+@pytest.mark.may_fail_auto_streaming
 def test_transpose_struct() -> None:
     df = pl.DataFrame(
         {
@@ -82,6 +85,7 @@ def test_transpose_struct() -> None:
     assert_frame_equal(result, expected)
 
 
+@pytest.mark.may_fail_auto_streaming
 def test_transpose_arguments() -> None:
     df = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
     expected = pl.DataFrame(
@@ -136,6 +140,7 @@ def name_generator() -> Iterator[str]:
     assert_frame_equal(expected, out)
 
 
+@pytest.mark.may_fail_auto_streaming
 def test_transpose_categorical_data() -> None:
     with pl.StringCache():
         df = pl.DataFrame(
@@ -174,6 +179,7 @@ def test_transpose_categorical_data() -> None:
         ).transpose()
 
 
+@pytest.mark.may_fail_auto_streaming
 def test_transpose_logical_data() -> None:
     df = pl.DataFrame(
         {
@@ -192,6 +198,7 @@ def test_transpose_logical_data() -> None:
     assert_frame_equal(result, expected)
 
 
+@pytest.mark.may_fail_auto_streaming
 def test_err_transpose_object() -> None:
     class CustomObject:
         pass
@@ -200,12 +207,14 @@ class CustomObject:
         pl.DataFrame([CustomObject()]).transpose()
 
 
+@pytest.mark.may_fail_auto_streaming
 def test_transpose_name_from_column_13777() -> None:
     csv_file = io.BytesIO(b"id,kc\nhi,3")
     df = pl.read_csv(csv_file).transpose(column_names="id")
     assert_series_equal(df.to_series(0), pl.Series("hi", [3]))
 
 
+@pytest.mark.may_fail_auto_streaming
 def test_transpose_multiple_chunks() -> None:
     df = pl.DataFrame({"a": ["1"]})
     expected = pl.DataFrame({"column_0": ["1"], "column_1": ["1"]})

diff --git a/py-polars/tests/unit/operations/test_unpivot.py b/py-polars/tests/unit/operations/test_unpivot.py
@@ -7,48 +7,44 @@
 
 def test_unpivot() -> None:
     df = pl.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]})
+    expected = {
+        ("a", "B", 1),
+        ("b", "B", 3),
+        ("c", "B", 5),
+        ("a", "C", 2),
+        ("b", "C", 4),
+        ("c", "C", 6),
+    }
     for _idv, _vv in (("A", ("B", "C")), (cs.string(), cs.integer())):
         unpivoted_eager = df.unpivot(index="A", on=["B", "C"])
-        assert all(unpivoted_eager["value"] == [1, 3, 5, 2, 4, 6])
+        assert set(unpivoted_eager.iter_rows()) == expected
 
-        unpivoted_lazy = df.lazy().unpivot(index="A", on=["B", "C"])
-        assert all(unpivoted_lazy.collect()["value"] == [1, 3, 5, 2, 4, 6])
+        unpivoted_lazy = df.lazy().unpivot(index="A", on=["B", "C"]).collect()
+        assert set(unpivoted_lazy.iter_rows()) == expected
 
     unpivoted = df.unpivot(index="A", on="B")
-    assert all(unpivoted["value"] == [1, 3, 5])
-    n = 3
-
+    assert set(unpivoted["value"]) == {1, 3, 5}
+
+    expected_full = {
+        ("A", "a"),
+        ("A", "b"),
+        ("A", "c"),
+        ("B", "1"),
+        ("B", "3"),
+        ("B", "5"),
+        ("C", "2"),
+        ("C", "4"),
+        ("C", "6"),
+    }
     for unpivoted in [df.unpivot(), df.lazy().unpivot().collect()]:
-        assert unpivoted["variable"].to_list() == ["A"] * n + ["B"] * n + ["C"] * n
-        assert unpivoted["value"].to_list() == [
-            "a",
-            "b",
-            "c",
-            "1",
-            "3",
-            "5",
-            "2",
-            "4",
-            "6",
-        ]
+        assert set(unpivoted.iter_rows()) == expected_full
 
     with pytest.deprecated_call(match="unpivot"):
         for unpivoted in [
             df.melt(value_name="foo", variable_name="bar"),
             df.lazy().melt(value_name="foo", variable_name="bar").collect(),
         ]:
-            assert unpivoted["bar"].to_list() == ["A"] * n + ["B"] * n + ["C"] * n
-            assert unpivoted["foo"].to_list() == [
-                "a",
-                "b",
-                "c",
-                "1",
-                "3",
-                "5",
-                "2",
-                "4",
-                "6",
-            ]
+            assert set(unpivoted.iter_rows()) == expected_full
 
 
 def test_unpivot_projection_pd_7747() -> None:

diff --git a/py-polars/tests/unit/sql/test_wildcard_opts.py b/py-polars/tests/unit/sql/test_wildcard_opts.py
@@ -180,6 +180,6 @@ def test_select_wildcard_errors(df: pl.DataFrame) -> None:
     # note: missing "()" around the exclude option results in dupe col
     with pytest.raises(
         DuplicateError,
-        match="the name 'City' is duplicate",
+        match="City",
     ):
         assert df.sql("SELECT * EXCLUDE Address, City FROM self")
diff --git a/py-polars/tests/unit/test_datatypes.py b/py-polars/tests/unit/test_datatypes.py
@@ -138,6 +138,7 @@ def test_repr(dtype: PolarsDataType, representation: str) -> None:
     assert repr(dtype) == representation
 
 
+@pytest.mark.may_fail_auto_streaming
 def test_conversion_dtype() -> None:
     df = (
         pl.DataFrame(