From 034339a0548683c1a1a3414851587364757b04b2 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Fri, 17 Feb 2023 10:20:53 +0100
Subject: [PATCH 1/8] Add some module level docstrings

---
 py-polars/tests/unit/io/__init__.py              |  1 +
 py-polars/tests/unit/namespaces/__init__.py      | 11 +++++++++++
 py-polars/tests/unit/namespaces/test_strptime.py |  5 +++++
 3 files changed, 17 insertions(+)
 create mode 100644 py-polars/tests/unit/io/__init__.py
 create mode 100644 py-polars/tests/unit/namespaces/__init__.py

diff --git a/py-polars/tests/unit/io/__init__.py b/py-polars/tests/unit/io/__init__.py
new file mode 100644
index 000000000000..6657aa68ec3e
--- /dev/null
+++ b/py-polars/tests/unit/io/__init__.py
@@ -0,0 +1 @@
+"""Test module containing tests for all input/output methods."""
diff --git a/py-polars/tests/unit/namespaces/__init__.py b/py-polars/tests/unit/namespaces/__init__.py
new file mode 100644
index 000000000000..96614f45647b
--- /dev/null
+++ b/py-polars/tests/unit/namespaces/__init__.py
@@ -0,0 +1,11 @@
+"""
+Test module containing dedicated tests for all namespace methods.
+
+Namespace methods are methods that are available on Series and Expr classes for
+operations that are only available for specific data types. For example,
+``Series.str.to_lowercase()``.
+
+These methods are almost exclusively implemented as expressions, with the Series method
+dispatching to this implementation through a decorator. This means we only need to test
+the Series method, as this will indirectly test the Expr method as well.
+"""
diff --git a/py-polars/tests/unit/namespaces/test_strptime.py b/py-polars/tests/unit/namespaces/test_strptime.py
index 8f28f0721612..816b339fdd39 100644
--- a/py-polars/tests/unit/namespaces/test_strptime.py
+++ b/py-polars/tests/unit/namespaces/test_strptime.py
@@ -1,3 +1,8 @@
+"""
+Module for testing ``.str.strptime`` of the string namespace.
+
+This method gets its own module due to its complexity.
+"""
 from __future__ import annotations
 
 import sys

From 7879d022fde57032d7ce71343c9bb74d4bacd7c7 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Fri, 17 Feb 2023 10:21:03 +0100
Subject: [PATCH 2/8] Move test_binary to namespaces

---
 py-polars/tests/unit/{ => namespaces}/test_binary.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename py-polars/tests/unit/{ => namespaces}/test_binary.py (100%)

diff --git a/py-polars/tests/unit/test_binary.py b/py-polars/tests/unit/namespaces/test_binary.py
similarity index 100%
rename from py-polars/tests/unit/test_binary.py
rename to py-polars/tests/unit/namespaces/test_binary.py

From 7f5047a437df6305527bcc321e6c2333a28e0a7d Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Fri, 17 Feb 2023 11:31:58 +0100
Subject: [PATCH 3/8] Move struct

---
 .../tests/unit/namespaces/test_struct.py      | 28 +++++++++++++++++++
 py-polars/tests/unit/test_struct.py           | 24 ----------------
 2 files changed, 28 insertions(+), 24 deletions(-)
 create mode 100644 py-polars/tests/unit/namespaces/test_struct.py

diff --git a/py-polars/tests/unit/namespaces/test_struct.py b/py-polars/tests/unit/namespaces/test_struct.py
new file mode 100644
index 000000000000..b0ab63ea094d
--- /dev/null
+++ b/py-polars/tests/unit/namespaces/test_struct.py
@@ -0,0 +1,28 @@
+from __future__ import annotations
+
+import polars as pl
+from polars.testing import assert_frame_equal
+
+
+def test_struct_various() -> None:
+    df = pl.DataFrame(
+        {"int": [1, 2], "str": ["a", "b"], "bool": [True, None], "list": [[1, 2], [3]]}
+    )
+    s = df.to_struct("my_struct")
+
+    assert s.struct.fields == ["int", "str", "bool", "list"]
+    assert s[0] == {"int": 1, "str": "a", "bool": True, "list": [1, 2]}
+    assert s[1] == {"int": 2, "str": "b", "bool": None, "list": [3]}
+    assert s.struct.field("list").to_list() == [[1, 2], [3]]
+    assert s.struct.field("int").to_list() == [1, 2]
+
+    assert_frame_equal(df.to_struct("my_struct").struct.unnest(), df)
+    assert s.struct._ipython_key_completions_() == s.struct.fields
+
+
+def test_rename_fields() -> None:
+    df = pl.DataFrame({"int": [1, 2], "str": ["a", "b"], "bool": [True, None]})
+    assert df.to_struct("my_struct").struct.rename_fields(["a", "b"]).struct.fields == [
+        "a",
+        "b",
+    ]
diff --git a/py-polars/tests/unit/test_struct.py b/py-polars/tests/unit/test_struct.py
index a89c36667eb5..39ca45ef738e 100644
--- a/py-polars/tests/unit/test_struct.py
+++ b/py-polars/tests/unit/test_struct.py
@@ -12,22 +12,6 @@
 from polars.testing import assert_frame_equal
 
 
-def test_struct_various() -> None:
-    df = pl.DataFrame(
-        {"int": [1, 2], "str": ["a", "b"], "bool": [True, None], "list": [[1, 2], [3]]}
-    )
-    s = df.to_struct("my_struct")
-
-    assert s.struct.fields == ["int", "str", "bool", "list"]
-    assert s[0] == {"int": 1, "str": "a", "bool": True, "list": [1, 2]}
-    assert s[1] == {"int": 2, "str": "b", "bool": None, "list": [3]}
-    assert s.struct.field("list").to_list() == [[1, 2], [3]]
-    assert s.struct.field("int").to_list() == [1, 2]
-
-    assert_frame_equal(df.to_struct("my_struct").struct.unnest(), df)
-    assert s.struct._ipython_key_completions_() == s.struct.fields
-
-
 def test_struct_to_list() -> None:
     assert pl.DataFrame(
         {"int": [1, 2], "str": ["a", "b"], "bool": [True, None], "list": [[1, 2], [3]]}
@@ -57,14 +41,6 @@ def test_apply_unnest() -> None:
     assert_frame_equal(df, expected)
 
 
-def test_rename_fields() -> None:
-    df = pl.DataFrame({"int": [1, 2], "str": ["a", "b"], "bool": [True, None]})
-    assert df.to_struct("my_struct").struct.rename_fields(["a", "b"]).struct.fields == [
-        "a",
-        "b",
-    ]
-
-
 def test_struct_unnesting() -> None:
     df = pl.DataFrame({"a": [1, 2]})
     out = df.select(

From e195d3742a6f2e5b8fc5e54a6de1199198d7a9fd Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Fri, 17 Feb 2023 11:40:39 +0100
Subject: [PATCH 4/8] Move categorical

---
 .../tests/unit/namespaces/test_categorical.py | 74 +++++++++++++++++++
 py-polars/tests/unit/test_categorical.py      | 72 ------------------
 2 files changed, 74 insertions(+), 72 deletions(-)
 create mode 100644 py-polars/tests/unit/namespaces/test_categorical.py

diff --git a/py-polars/tests/unit/namespaces/test_categorical.py b/py-polars/tests/unit/namespaces/test_categorical.py
new file mode 100644
index 000000000000..1159c2160a30
--- /dev/null
+++ b/py-polars/tests/unit/namespaces/test_categorical.py
@@ -0,0 +1,74 @@
+import polars as pl
+from polars.testing import assert_frame_equal
+
+
+def test_categorical_lexical_sort() -> None:
+    df = pl.DataFrame(
+        {"cats": ["z", "z", "k", "a", "b"], "vals": [3, 1, 2, 2, 3]}
+    ).with_columns(
+        [
+            pl.col("cats").cast(pl.Categorical).cat.set_ordering("lexical"),
+        ]
+    )
+
+    out = df.sort(["cats"])
+    assert out["cats"].dtype == pl.Categorical
+    expected = pl.DataFrame(
+        {"cats": ["a", "b", "k", "z", "z"], "vals": [2, 3, 2, 3, 1]}
+    )
+    assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected)
+    out = df.sort(["cats", "vals"])
+    expected = pl.DataFrame(
+        {"cats": ["a", "b", "k", "z", "z"], "vals": [2, 3, 2, 1, 3]}
+    )
+    assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected)
+    out = df.sort(["vals", "cats"])
+
+    expected = pl.DataFrame(
+        {"cats": ["z", "a", "k", "b", "z"], "vals": [1, 2, 2, 3, 3]}
+    )
+    assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected)
+
+
+def test_categorical_lexical_ordering_after_concat() -> None:
+    with pl.StringCache():
+        ldf1 = (
+            pl.DataFrame([pl.Series("key1", [8, 5]), pl.Series("key2", ["fox", "baz"])])
+            .lazy()
+            .with_columns(
+                pl.col("key2").cast(pl.Categorical).cat.set_ordering("lexical")
+            )
+        )
+        ldf2 = (
+            pl.DataFrame(
+                [pl.Series("key1", [6, 8, 6]), pl.Series("key2", ["fox", "foo", "bar"])]
+            )
+            .lazy()
+            .with_columns(
+                pl.col("key2").cast(pl.Categorical).cat.set_ordering("lexical")
+            )
+        )
+        df = (
+            pl.concat([ldf1, ldf2])
+            .with_columns(pl.col("key2").cat.set_ordering("lexical"))
+            .collect()
+        )
+
+        df.sort(["key1", "key2"])
+
+
+def test_sort_categoricals_6014() -> None:
+    with pl.StringCache():
+        # create basic categorical
+        df1 = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns(
+            pl.col("key").cast(pl.Categorical)
+        )
+        # create lexically-ordered categorical
+        df2 = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns(
+            pl.col("key").cast(pl.Categorical).cat.set_ordering("lexical")
+        )
+
+    out = df1.sort("key")
+    assert out.to_dict(False) == {"key": ["bbb", "aaa", "ccc"]}
+    out = df2.sort("key")
+    assert out.to_dict(False) == {"key": ["aaa", "bbb", "ccc"]}
diff --git a/py-polars/tests/unit/test_categorical.py b/py-polars/tests/unit/test_categorical.py
index c2a51979ffdb..716570e9eff9 100644
--- a/py-polars/tests/unit/test_categorical.py
+++ b/py-polars/tests/unit/test_categorical.py
@@ -61,61 +61,6 @@ def test_read_csv_categorical() -> None:
     assert df["col1"].dtype == pl.Categorical
 
 
-def test_categorical_lexical_sort() -> None:
-    df = pl.DataFrame(
-        {"cats": ["z", "z", "k", "a", "b"], "vals": [3, 1, 2, 2, 3]}
-    ).with_columns(
-        [
-            pl.col("cats").cast(pl.Categorical).cat.set_ordering("lexical"),
-        ]
-    )
-
-    out = df.sort(["cats"])
-    assert out["cats"].dtype == pl.Categorical
-    expected = pl.DataFrame(
-        {"cats": ["a", "b", "k", "z", "z"], "vals": [2, 3, 2, 3, 1]}
-    )
-    assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected)
-    out = df.sort(["cats", "vals"])
-    expected = pl.DataFrame(
-        {"cats": ["a", "b", "k", "z", "z"], "vals": [2, 3, 2, 1, 3]}
-    )
-    assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected)
-    out = df.sort(["vals", "cats"])
-
-    expected = pl.DataFrame(
-        {"cats": ["z", "a", "k", "b", "z"], "vals": [1, 2, 2, 3, 3]}
-    )
-    assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected)
-
-
-def test_categorical_lexical_ordering_after_concat() -> None:
-    with pl.StringCache():
-        ldf1 = (
-            pl.DataFrame([pl.Series("key1", [8, 5]), pl.Series("key2", ["fox", "baz"])])
-            .lazy()
-            .with_columns(
-                pl.col("key2").cast(pl.Categorical).cat.set_ordering("lexical")
-            )
-        )
-        ldf2 = (
-            pl.DataFrame(
-                [pl.Series("key1", [6, 8, 6]), pl.Series("key2", ["fox", "foo", "bar"])]
-            )
-            .lazy()
-            .with_columns(
-                pl.col("key2").cast(pl.Categorical).cat.set_ordering("lexical")
-            )
-        )
-        df = (
-            pl.concat([ldf1, ldf2])
-            .with_columns(pl.col("key2").cat.set_ordering("lexical"))
-            .collect()
-        )
-
-        df.sort(["key1", "key2"])
-
-
 def test_cat_to_dummies() -> None:
     df = pl.DataFrame({"foo": [1, 2, 3, 4], "bar": ["a", "b", "a", "c"]})
     df = df.with_columns(pl.col("bar").cast(pl.Categorical))
@@ -295,23 +240,6 @@ def test_categorical_in_struct_nulls() -> None:
     assert s[2] == {"job": "waiter", "counts": 1}
 
 
-def test_sort_categoricals_6014() -> None:
-    with pl.StringCache():
-        # create basic categorical
-        df1 = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns(
-            pl.col("key").cast(pl.Categorical)
-        )
-        # create lexically-ordered categorical
-        df2 = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns(
-            pl.col("key").cast(pl.Categorical).cat.set_ordering("lexical")
-        )
-
-    out = df1.sort("key")
-    assert out.to_dict(False) == {"key": ["bbb", "aaa", "ccc"]}
-    out = df2.sort("key")
-    assert out.to_dict(False) == {"key": ["aaa", "bbb", "ccc"]}
-
-
 def test_cast_inner_categorical() -> None:
     dtype = pl.List(pl.Categorical)
     out = pl.Series("foo", [["a"], ["a", "b"]]).cast(dtype)

From c709cf147c93c1509c1f9fd19b64c8a74e441a4f Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Fri, 17 Feb 2023 12:24:18 +0100
Subject: [PATCH 5/8] List namespace

---
 py-polars/tests/unit/namespaces/test_list.py | 379 +++++++++++++++++++
 py-polars/tests/unit/test_lists.py           | 372 ------------------
 2 files changed, 379 insertions(+), 372 deletions(-)
 create mode 100644 py-polars/tests/unit/namespaces/test_list.py

diff --git a/py-polars/tests/unit/namespaces/test_list.py b/py-polars/tests/unit/namespaces/test_list.py
new file mode 100644
index 000000000000..cd0dc5ddc21a
--- /dev/null
+++ b/py-polars/tests/unit/namespaces/test_list.py
@@ -0,0 +1,379 @@
+from __future__ import annotations
+
+import typing
+
+import numpy as np
+import pytest
+
+import polars as pl
+from polars.testing import assert_frame_equal, assert_series_equal
+
+
+def test_list_arr_get() -> None:
+    a = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8, 9]])
+    out = a.arr.get(0)
+    expected = pl.Series("a", [1, 4, 6])
+    assert_series_equal(out, expected)
+    out = a.arr[0]
+    expected = pl.Series("a", [1, 4, 6])
+    assert_series_equal(out, expected)
+    out = a.arr.first()
+    assert_series_equal(out, expected)
+    out = pl.select(pl.lit(a).arr.first()).to_series()
+    assert_series_equal(out, expected)
+
+    out = a.arr.get(-1)
+    expected = pl.Series("a", [3, 5, 9])
+    assert_series_equal(out, expected)
+    out = a.arr.last()
+    assert_series_equal(out, expected)
+    out = pl.select(pl.lit(a).arr.last()).to_series()
+    assert_series_equal(out, expected)
+
+    a = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8, 9]])
+    out = a.arr.get(-3)
+    expected = pl.Series("a", [1, None, 7])
+    assert_series_equal(out, expected)
+
+    assert pl.DataFrame(
+        {"a": [[1], [2], [3], [4, 5, 6], [7, 8, 9], [None, 11]]}
+    ).with_columns(
+        [pl.col("a").arr.get(i).alias(f"get_{i}") for i in range(4)]
+    ).to_dict(
+        False
+    ) == {
+        "a": [[1], [2], [3], [4, 5, 6], [7, 8, 9], [None, 11]],
+        "get_0": [1, 2, 3, 4, 7, None],
+        "get_1": [None, None, None, 5, 8, 11],
+        "get_2": [None, None, None, 6, 9, None],
+        "get_3": [None, None, None, None, None, None],
+    }
+
+    # get by indexes where some are out of bounds
+    df = pl.DataFrame({"cars": [[1, 2, 3], [2, 3], [4], []], "indexes": [-2, 1, -3, 0]})
+
+    assert df.select([pl.col("cars").arr.get("indexes")]).to_dict(False) == {
+        "cars": [2, 3, None, None]
+    }
+    # exact on oob boundary
+    df = pl.DataFrame(
+        {
+            "index": [3, 3, 3],
+            "lists": [[3, 4, 5], [4, 5, 6], [7, 8, 9, 4]],
+        }
+    )
+
+    assert df.select(pl.col("lists").arr.get(3)).to_dict(False) == {
+        "lists": [None, None, 4]
+    }
+    assert df.select(pl.col("lists").arr.get(pl.col("index"))).to_dict(False) == {
+        "lists": [None, None, 4]
+    }
+
+
+def test_contains() -> None:
+    a = pl.Series("a", [[1, 2, 3], [2, 5], [6, 7, 8, 9]])
+    out = a.arr.contains(2)
+    expected = pl.Series("a", [True, True, False])
+    assert_series_equal(out, expected)
+
+    out = pl.select(pl.lit(a).arr.contains(2)).to_series()
+    assert_series_equal(out, expected)
+
+
+def test_list_concat() -> None:
+    df = pl.DataFrame({"a": [[1, 2], [1], [1, 2, 3]]})
+
+    out = df.select([pl.col("a").arr.concat(pl.Series([[1, 2]]))])
+    assert out["a"][0].to_list() == [1, 2, 1, 2]
+
+    out = df.select([pl.col("a").arr.concat([1, 4])])
+    assert out["a"][0].to_list() == [1, 2, 1, 4]
+
+    out_s = df["a"].arr.concat([4, 1])
+    assert out_s[0].to_list() == [1, 2, 4, 1]
+
+
+def test_list_arr_empty() -> None:
+    df = pl.DataFrame({"cars": [[1, 2, 3], [2, 3], [4], []]})
+
+    out = df.select(
+        [
+            pl.col("cars").arr.first().alias("cars_first"),
+            pl.when(pl.col("cars").arr.first() == 2)
+            .then(1)
+            .when(pl.col("cars").arr.contains(2))
+            .then(2)
+            .otherwise(3)
+            .alias("cars_literal"),
+        ]
+    )
+    expected = pl.DataFrame(
+        {"cars_first": [1, 2, 4, None], "cars_literal": [2, 1, 3, 3]},
+        schema_overrides={"cars_literal": pl.Int32},  # Literals default to Int32
+    )
+    assert_frame_equal(out, expected)
+
+
+def test_list_argminmax() -> None:
+    s = pl.Series("a", [[1, 2], [3, 2, 1]])
+    expected = pl.Series("a", [0, 2], dtype=pl.UInt32)
+    assert_series_equal(s.arr.arg_min(), expected)
+    expected = pl.Series("a", [1, 0], dtype=pl.UInt32)
+    assert_series_equal(s.arr.arg_max(), expected)
+
+
+def test_list_shift() -> None:
+    s = pl.Series("a", [[1, 2], [3, 2, 1]])
+    expected = pl.Series("a", [[None, 1], [None, 3, 2]])
+    assert s.arr.shift().to_list() == expected.to_list()
+
+
+def test_list_diff() -> None:
+    s = pl.Series("a", [[1, 2], [10, 2, 1]])
+    expected = pl.Series("a", [[None, 1], [None, -8, -1]])
+    assert s.arr.diff().to_list() == expected.to_list()
+
+
+def test_slice() -> None:
+    vals = [[1, 2, 3, 4], [10, 2, 1]]
+    s = pl.Series("a", vals)
+    assert s.arr.head(2).to_list() == [[1, 2], [10, 2]]
+    assert s.arr.tail(2).to_list() == [[3, 4], [2, 1]]
+    assert s.arr.tail(200).to_list() == vals
+    assert s.arr.head(200).to_list() == vals
+    assert s.arr.slice(1, 2).to_list() == [[2, 3], [2, 1]]
+
+
+def test_list_eval_dtype_inference() -> None:
+    grades = pl.DataFrame(
+        {
+            "student": ["bas", "laura", "tim", "jenny"],
+            "arithmetic": [10, 5, 6, 8],
+            "biology": [4, 6, 2, 7],
+            "geography": [8, 4, 9, 7],
+        }
+    )
+
+    rank_pct = pl.col("").rank(reverse=True) / pl.col("").count().cast(pl.UInt16)
+
+    # the .arr.first() would fail if .arr.eval did not correctly infer the output type
+    assert grades.with_columns(
+        pl.concat_list(pl.all().exclude("student")).alias("all_grades")
+    ).select(
+        [
+            pl.col("all_grades")
+            .arr.eval(rank_pct, parallel=True)
+            .alias("grades_rank")
+            .arr.first()
+        ]
+    ).to_series().to_list() == [
+        0.3333333432674408,
+        0.6666666865348816,
+        0.6666666865348816,
+        0.3333333432674408,
+    ]
+
+
+def test_list_ternary_concat() -> None:
+    df = pl.DataFrame(
+        {
+            "list1": [["123", "456"], None],
+            "list2": [["789"], ["zzz"]],
+        }
+    )
+
+    assert df.with_columns(
+        pl.when(pl.col("list1").is_null())
+        .then(pl.col("list1").arr.concat(pl.col("list2")))
+        .otherwise(pl.col("list2"))
+        .alias("result")
+    ).to_dict(False) == {
+        "list1": [["123", "456"], None],
+        "list2": [["789"], ["zzz"]],
+        "result": [["789"], None],
+    }
+
+    assert df.with_columns(
+        pl.when(pl.col("list1").is_null())
+        .then(pl.col("list2"))
+        .otherwise(pl.col("list1").arr.concat(pl.col("list2")))
+        .alias("result")
+    ).to_dict(False) == {
+        "list1": [["123", "456"], None],
+        "list2": [["789"], ["zzz"]],
+        "result": [["123", "456", "789"], ["zzz"]],
+    }
+
+
+def test_arr_contains_categorical() -> None:
+    df = pl.DataFrame(
+        {"str": ["A", "B", "A", "B", "C"], "group": [1, 1, 2, 1, 2]}
+    ).lazy()
+    df = df.with_columns(pl.col("str").cast(pl.Categorical))
+    df_groups = df.groupby("group").agg([pl.col("str").alias("str_list")])
+    assert df_groups.filter(pl.col("str_list").arr.contains("C")).collect().to_dict(
+        False
+    ) == {"group": [2], "str_list": [["A", "C"]]}
+
+
+def test_list_eval_type_coercion() -> None:
+    last_non_null_value = pl.element().fill_null(3).last()
+    df = pl.DataFrame(
+        {
+            "array_cols": [[1, None]],
+        }
+    )
+
+    assert df.select(
+        [
+            pl.col("array_cols")
+            .arr.eval(last_non_null_value, parallel=False)
+            .alias("col_last")
+        ]
+    ).to_dict(False) == {"col_last": [[3]]}
+
+
+def test_list_slice() -> None:
+    df = pl.DataFrame(
+        {
+            "lst": [[1, 2, 3, 4], [10, 2, 1]],
+            "offset": [1, 2],
+            "len": [3, 2],
+        }
+    )
+
+    assert df.select([pl.col("lst").arr.slice("offset", "len")]).to_dict(False) == {
+        "lst": [[2, 3, 4], [1]]
+    }
+    assert df.select([pl.col("lst").arr.slice("offset", 1)]).to_dict(False) == {
+        "lst": [[2], [1]]
+    }
+    assert df.select([pl.col("lst").arr.slice(-2, "len")]).to_dict(False) == {
+        "lst": [[3, 4], [2, 1]]
+    }
+
+
+@typing.no_type_check
+def test_list_sliced_get_5186() -> None:
+    # https://github.com/pola-rs/polars/issues/5186
+    n = 30
+    df = pl.from_dict(
+        {
+            "ind": pl.arange(0, n, eager=True),
+            "inds": np.stack([np.arange(n), -np.arange(n)], axis=-1),
+        }
+    )
+
+    exprs = [
+        "ind",
+        pl.col("inds").arr.first().alias("first_element"),
+        pl.col("inds").arr.last().alias("last_element"),
+    ]
+    out1 = df.select(exprs)[10:20]
+    out2 = df[10:20].select(exprs)
+    assert_frame_equal(out1, out2)
+
+
+def test_empty_eval_dtype_5546() -> None:
+    # https://github.com/pola-rs/polars/issues/5546
+    df = pl.DataFrame([{"a": [{"name": 1}, {"name": 2}]}])
+
+    dtype = df.dtypes[0]
+
+    assert (
+        df.limit(0).with_columns(
+            pl.col("a")
+            .arr.eval(pl.element().filter(pl.first().struct.field("name") == 1))
+            .alias("a_filtered")
+        )
+    ).dtypes == [dtype, dtype]
+
+
+def test_list_amortized_apply_explode_5812() -> None:
+    s = pl.Series([None, [1, 3], [0, -3], [1, 2, 2]])
+    assert s.arr.sum().to_list() == [None, 4, -3, 5]
+    assert s.arr.min().to_list() == [None, 1, -3, 1]
+    assert s.arr.max().to_list() == [None, 3, 0, 2]
+    assert s.arr.arg_min().to_list() == [None, 0, 1, 0]
+    assert s.arr.arg_max().to_list() == [None, 1, 0, 1]
+
+
+def test_list_slice_5866() -> None:
+    vals = [[1, 2, 3, 4], [10, 2, 1]]
+    s = pl.Series("a", vals)
+    assert s.arr.slice(1).to_list() == [[2, 3, 4], [2, 1]]
+
+
+def test_list_take() -> None:
+    s = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8]])
+    # mypy: we make it work, but idomatic is `arr.get`.
+    assert s.arr.take(0).to_list() == [[1], [4], [6]]  # type: ignore[arg-type]
+    assert s.arr.take([0, 1]).to_list() == [[1, 2], [4, 5], [6, 7]]
+
+    assert s.arr.take([-1, 1]).to_list() == [[3, 2], [5, 5], [8, 7]]
+
+    # use another list to make sure negative indices are respected
+    taker = pl.Series([[-1, 1], [-1, 1], [-1, -2]])
+    assert s.arr.take(taker).to_list() == [[3, 2], [5, 5], [8, 7]]
+    with pytest.raises(pl.ComputeError, match=r"Take indices are out of bounds"):
+        s.arr.take([1, 2])
+    s = pl.Series(
+        [["A", "B", "C"], ["A"], ["B"], ["1", "2"], ["e"]],
+    )
+
+    assert s.arr.take([0, 2], null_on_oob=True).to_list() == [
+        ["A", "C"],
+        ["A", None],
+        ["B", None],
+        ["1", None],
+        ["e", None],
+    ]
+    assert s.arr.take([0, 1, 2], null_on_oob=True).to_list() == [
+        ["A", "B", "C"],
+        ["A", None, None],
+        ["B", None, None],
+        ["1", "2", None],
+        ["e", None, None],
+    ]
+    s = pl.Series([[42, 1, 2], [5, 6, 7]])
+
+    with pytest.raises(pl.ComputeError, match=r"Take indices are out of bounds"):
+        s.arr.take([[0, 1, 2, 3], [0, 1, 2, 3]])
+
+    assert s.arr.take([0, 1, 2, 3], null_on_oob=True).to_list() == [
+        [42, 1, 2, None],
+        [5, 6, 7, None],
+    ]
+
+
+def test_list_eval_all_null() -> None:
+    df = pl.DataFrame({"foo": [1, 2, 3], "bar": [None, None, None]}).with_columns(
+        pl.col("bar").cast(pl.List(pl.Utf8))
+    )
+
+    assert df.select(pl.col("bar").arr.eval(pl.element())).to_dict(False) == {
+        "bar": [None, None, None]
+    }
+
+
+def test_list_function_group_awareness() -> None:
+    df = pl.DataFrame(
+        {
+            "a": [100, 103, 105, 106, 105, 104, 103, 106, 100, 102],
+            "group": [0, 0, 1, 1, 1, 1, 1, 1, 2, 2],
+        }
+    )
+
+    assert df.groupby("group").agg(
+        [
+            pl.col("a").list().arr.get(0).alias("get"),
+            pl.col("a").list().arr.take([0]).alias("take"),
+            pl.col("a").list().arr.slice(0, 3).alias("slice"),
+        ]
+    ).sort("group").to_dict(False) == {
+        "group": [0, 1, 2],
+        "get": [[100], [105], [100]],
+        "take": [[[100]], [[105]], [[100]]],
+        "slice": [[[100, 103]], [[105, 106, 105]], [[100, 102]]],
+    }
diff --git a/py-polars/tests/unit/test_lists.py b/py-polars/tests/unit/test_lists.py
index 3ff8ca57a980..ede5e72d1a3d 100644
--- a/py-polars/tests/unit/test_lists.py
+++ b/py-polars/tests/unit/test_lists.py
@@ -1,86 +1,10 @@
 from __future__ import annotations
 
-import typing
 from datetime import date, datetime, time
 
-import numpy as np
 import pandas as pd
-import pytest
 
 import polars as pl
-from polars.testing import assert_frame_equal, assert_series_equal
-
-
-def test_list_arr_get() -> None:
-    a = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8, 9]])
-    out = a.arr.get(0)
-    expected = pl.Series("a", [1, 4, 6])
-    assert_series_equal(out, expected)
-    out = a.arr[0]
-    expected = pl.Series("a", [1, 4, 6])
-    assert_series_equal(out, expected)
-    out = a.arr.first()
-    assert_series_equal(out, expected)
-    out = pl.select(pl.lit(a).arr.first()).to_series()
-    assert_series_equal(out, expected)
-
-    out = a.arr.get(-1)
-    expected = pl.Series("a", [3, 5, 9])
-    assert_series_equal(out, expected)
-    out = a.arr.last()
-    assert_series_equal(out, expected)
-    out = pl.select(pl.lit(a).arr.last()).to_series()
-    assert_series_equal(out, expected)
-
-    a = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8, 9]])
-    out = a.arr.get(-3)
-    expected = pl.Series("a", [1, None, 7])
-    assert_series_equal(out, expected)
-
-    assert pl.DataFrame(
-        {"a": [[1], [2], [3], [4, 5, 6], [7, 8, 9], [None, 11]]}
-    ).with_columns(
-        [pl.col("a").arr.get(i).alias(f"get_{i}") for i in range(4)]
-    ).to_dict(
-        False
-    ) == {
-        "a": [[1], [2], [3], [4, 5, 6], [7, 8, 9], [None, 11]],
-        "get_0": [1, 2, 3, 4, 7, None],
-        "get_1": [None, None, None, 5, 8, 11],
-        "get_2": [None, None, None, 6, 9, None],
-        "get_3": [None, None, None, None, None, None],
-    }
-
-    # get by indexes where some are out of bounds
-    df = pl.DataFrame({"cars": [[1, 2, 3], [2, 3], [4], []], "indexes": [-2, 1, -3, 0]})
-
-    assert df.select([pl.col("cars").arr.get("indexes")]).to_dict(False) == {
-        "cars": [2, 3, None, None]
-    }
-    # exact on oob boundary
-    df = pl.DataFrame(
-        {
-            "index": [3, 3, 3],
-            "lists": [[3, 4, 5], [4, 5, 6], [7, 8, 9, 4]],
-        }
-    )
-
-    assert df.select(pl.col("lists").arr.get(3)).to_dict(False) == {
-        "lists": [None, None, 4]
-    }
-    assert df.select(pl.col("lists").arr.get(pl.col("index"))).to_dict(False) == {
-        "lists": [None, None, 4]
-    }
-
-
-def test_contains() -> None:
-    a = pl.Series("a", [[1, 2, 3], [2, 5], [6, 7, 8, 9]])
-    out = a.arr.contains(2)
-    expected = pl.Series("a", [True, True, False])
-    assert_series_equal(out, expected)
-
-    out = pl.select(pl.lit(a).arr.contains(2)).to_series()
-    assert_series_equal(out, expected)
 
 
 def test_dtype() -> None:
@@ -206,70 +130,6 @@ def test_list_concat_rolling_window() -> None:
     }
 
 
-def test_list_append() -> None:
-    df = pl.DataFrame({"a": [[1, 2], [1], [1, 2, 3]]})
-
-    out = df.select([pl.col("a").arr.concat(pl.Series([[1, 2]]))])
-    assert out["a"][0].to_list() == [1, 2, 1, 2]
-
-    out = df.select([pl.col("a").arr.concat([1, 4])])
-    assert out["a"][0].to_list() == [1, 2, 1, 4]
-
-    out_s = df["a"].arr.concat([4, 1])
-    assert out_s[0].to_list() == [1, 2, 4, 1]
-
-
-def test_list_arr_empty() -> None:
-    df = pl.DataFrame({"cars": [[1, 2, 3], [2, 3], [4], []]})
-
-    out = df.select(
-        [
-            pl.col("cars").arr.first().alias("cars_first"),
-            pl.when(pl.col("cars").arr.first() == 2)
-            .then(1)
-            .when(pl.col("cars").arr.contains(2))
-            .then(2)
-            .otherwise(3)
-            .alias("cars_literal"),
-        ]
-    )
-    expected = pl.DataFrame(
-        {"cars_first": [1, 2, 4, None], "cars_literal": [2, 1, 3, 3]},
-        schema_overrides={"cars_literal": pl.Int32},  # Literals default to Int32
-    )
-    assert_frame_equal(out, expected)
-
-
-def test_list_argminmax() -> None:
-    s = pl.Series("a", [[1, 2], [3, 2, 1]])
-    expected = pl.Series("a", [0, 2], dtype=pl.UInt32)
-    assert_series_equal(s.arr.arg_min(), expected)
-    expected = pl.Series("a", [1, 0], dtype=pl.UInt32)
-    assert_series_equal(s.arr.arg_max(), expected)
-
-
-def test_list_shift() -> None:
-    s = pl.Series("a", [[1, 2], [3, 2, 1]])
-    expected = pl.Series("a", [[None, 1], [None, 3, 2]])
-    assert s.arr.shift().to_list() == expected.to_list()
-
-
-def test_list_diff() -> None:
-    s = pl.Series("a", [[1, 2], [10, 2, 1]])
-    expected = pl.Series("a", [[None, 1], [None, -8, -1]])
-    assert s.arr.diff().to_list() == expected.to_list()
-
-
-def test_slice() -> None:
-    vals = [[1, 2, 3, 4], [10, 2, 1]]
-    s = pl.Series("a", vals)
-    assert s.arr.head(2).to_list() == [[1, 2], [10, 2]]
-    assert s.arr.tail(2).to_list() == [[3, 4], [2, 1]]
-    assert s.arr.tail(200).to_list() == vals
-    assert s.arr.head(200).to_list() == vals
-    assert s.arr.slice(1, 2).to_list() == [[2, 3], [2, 1]]
-
-
 def test_cast_inner() -> None:
     a = pl.Series([[1, 2]])
     for t in [bool, pl.Boolean]:
@@ -284,36 +144,6 @@ def test_cast_inner() -> None:
     )
 
 
-def test_list_eval_dtype_inference() -> None:
-    grades = pl.DataFrame(
-        {
-            "student": ["bas", "laura", "tim", "jenny"],
-            "arithmetic": [10, 5, 6, 8],
-            "biology": [4, 6, 2, 7],
-            "geography": [8, 4, 9, 7],
-        }
-    )
-
-    rank_pct = pl.col("").rank(reverse=True) / pl.col("").count().cast(pl.UInt16)
-
-    # the .arr.first() would fail if .arr.eval did not correctly infer the output type
-    assert grades.with_columns(
-        pl.concat_list(pl.all().exclude("student")).alias("all_grades")
-    ).select(
-        [
-            pl.col("all_grades")
-            .arr.eval(rank_pct, parallel=True)
-            .alias("grades_rank")
-            .arr.first()
-        ]
-    ).to_series().to_list() == [
-        0.3333333432674408,
-        0.6666666865348816,
-        0.6666666865348816,
-        0.3333333432674408,
-    ]
-
-
 def test_list_empty_groupby_result_3521() -> None:
     # Create a left relation where the join column contains a null value
     left = pl.DataFrame().with_columns(
@@ -374,37 +204,6 @@ def test_empty_list_construction() -> None:
     assert df.rows() == []
 
 
-def test_list_ternary_concat() -> None:
-    df = pl.DataFrame(
-        {
-            "list1": [["123", "456"], None],
-            "list2": [["789"], ["zzz"]],
-        }
-    )
-
-    assert df.with_columns(
-        pl.when(pl.col("list1").is_null())
-        .then(pl.col("list1").arr.concat(pl.col("list2")))
-        .otherwise(pl.col("list2"))
-        .alias("result")
-    ).to_dict(False) == {
-        "list1": [["123", "456"], None],
-        "list2": [["789"], ["zzz"]],
-        "result": [["789"], None],
-    }
-
-    assert df.with_columns(
-        pl.when(pl.col("list1").is_null())
-        .then(pl.col("list2"))
-        .otherwise(pl.col("list1").arr.concat(pl.col("list2")))
-        .alias("result")
-    ).to_dict(False) == {
-        "list1": [["123", "456"], None],
-        "list2": [["789"], ["zzz"]],
-        "result": [["123", "456", "789"], ["zzz"]],
-    }
-
-
 def test_list_concat_nulls() -> None:
     assert pl.DataFrame(
         {
@@ -436,17 +235,6 @@ def test_list_hash() -> None:
     assert out[0, "b"] == out[2, "b"]
 
 
-def test_arr_contains_categorical() -> None:
-    df = pl.DataFrame(
-        {"str": ["A", "B", "A", "B", "C"], "group": [1, 1, 2, 1, 2]}
-    ).lazy()
-    df = df.with_columns(pl.col("str").cast(pl.Categorical))
-    df_groups = df.groupby("group").agg([pl.col("str").alias("str_list")])
-    assert df_groups.filter(pl.col("str_list").arr.contains("C")).collect().to_dict(
-        False
-    ) == {"group": [2], "str_list": [["A", "C"]]}
-
-
 def test_list_diagonal_concat() -> None:
     df1 = pl.DataFrame({"a": [1, 2]})
 
@@ -458,23 +246,6 @@ def test_list_diagonal_concat() -> None:
     }
 
 
-def test_list_eval_type_coercion() -> None:
-    last_non_null_value = pl.element().fill_null(3).last()
-    df = pl.DataFrame(
-        {
-            "array_cols": [[1, None]],
-        }
-    )
-
-    assert df.select(
-        [
-            pl.col("array_cols")
-            .arr.eval(last_non_null_value, parallel=False)
-            .alias("col_last")
-        ]
-    ).to_dict(False) == {"col_last": [[3]]}
-
-
 def test_is_in_empty_list_4559() -> None:
     assert pl.Series(["a"]).is_in([]).to_list() == [False]
 
@@ -514,60 +285,6 @@ def test_groupby_list_column() -> None:
     }
 
 
-def test_list_slice() -> None:
-    df = pl.DataFrame(
-        {
-            "lst": [[1, 2, 3, 4], [10, 2, 1]],
-            "offset": [1, 2],
-            "len": [3, 2],
-        }
-    )
-
-    assert df.select([pl.col("lst").arr.slice("offset", "len")]).to_dict(False) == {
-        "lst": [[2, 3, 4], [1]]
-    }
-    assert df.select([pl.col("lst").arr.slice("offset", 1)]).to_dict(False) == {
-        "lst": [[2], [1]]
-    }
-    assert df.select([pl.col("lst").arr.slice(-2, "len")]).to_dict(False) == {
-        "lst": [[3, 4], [2, 1]]
-    }
-
-
-@typing.no_type_check
-def test_list_sliced_get_5186() -> None:
-    n = 30
-    df = pl.from_dict(
-        {
-            "ind": pl.arange(0, n, eager=True),
-            "inds": np.stack([np.arange(n), -np.arange(n)], axis=-1),
-        }
-    )
-
-    exprs = [
-        "ind",
-        pl.col("inds").arr.first().alias("first_element"),
-        pl.col("inds").arr.last().alias("last_element"),
-    ]
-    out1 = df.select(exprs)[10:20]
-    out2 = df[10:20].select(exprs)
-    assert_frame_equal(out1, out2)
-
-
-def test_empty_eval_dtype_5546() -> None:
-    df = pl.DataFrame([{"a": [{"name": 1}, {"name": 2}]}])
-
-    dtype = df.dtypes[0]
-
-    assert (
-        df.limit(0).with_columns(
-            pl.col("a")
-            .arr.eval(pl.element().filter(pl.first().struct.field("name") == 1))
-            .alias("a_filtered")
-        )
-    ).dtypes == [dtype, dtype]
-
-
 def test_fast_explode_flag() -> None:
     df1 = pl.DataFrame({"values": [[[1, 2]]]})
     assert df1.clone().vstack(df1)["values"].flags["FAST_EXPLODE"]
@@ -583,63 +300,6 @@ def test_fast_explode_flag() -> None:
     ]
 
 
-def test_list_amortized_apply_explode_5812() -> None:
-    s = pl.Series([None, [1, 3], [0, -3], [1, 2, 2]])
-    assert s.arr.sum().to_list() == [None, 4, -3, 5]
-    assert s.arr.min().to_list() == [None, 1, -3, 1]
-    assert s.arr.max().to_list() == [None, 3, 0, 2]
-    assert s.arr.arg_min().to_list() == [None, 0, 1, 0]
-    assert s.arr.arg_max().to_list() == [None, 1, 0, 1]
-
-
-def test_list_slice_5866() -> None:
-    vals = [[1, 2, 3, 4], [10, 2, 1]]
-    s = pl.Series("a", vals)
-    assert s.arr.slice(1).to_list() == [[2, 3, 4], [2, 1]]
-
-
-def test_list_take() -> None:
-    s = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8]])
-    # mypy: we make it work, but idomatic is `arr.get`.
-    assert s.arr.take(0).to_list() == [[1], [4], [6]]  # type: ignore[arg-type]
-    assert s.arr.take([0, 1]).to_list() == [[1, 2], [4, 5], [6, 7]]
-
-    assert s.arr.take([-1, 1]).to_list() == [[3, 2], [5, 5], [8, 7]]
-
-    # use another list to make sure negative indices are respected
-    taker = pl.Series([[-1, 1], [-1, 1], [-1, -2]])
-    assert s.arr.take(taker).to_list() == [[3, 2], [5, 5], [8, 7]]
-    with pytest.raises(pl.ComputeError, match=r"Take indices are out of bounds"):
-        s.arr.take([1, 2])
-    s = pl.Series(
-        [["A", "B", "C"], ["A"], ["B"], ["1", "2"], ["e"]],
-    )
-
-    assert s.arr.take([0, 2], null_on_oob=True).to_list() == [
-        ["A", "C"],
-        ["A", None],
-        ["B", None],
-        ["1", None],
-        ["e", None],
-    ]
-    assert s.arr.take([0, 1, 2], null_on_oob=True).to_list() == [
-        ["A", "B", "C"],
-        ["A", None, None],
-        ["B", None, None],
-        ["1", "2", None],
-        ["e", None, None],
-    ]
-    s = pl.Series([[42, 1, 2], [5, 6, 7]])
-
-    with pytest.raises(pl.ComputeError, match=r"Take indices are out of bounds"):
-        s.arr.take([[0, 1, 2, 3], [0, 1, 2, 3]])
-
-    assert s.arr.take([0, 1, 2, 3], null_on_oob=True).to_list() == [
-        [42, 1, 2, None],
-        [5, 6, 7, None],
-    ]
-
-
 def test_fast_explode_on_list_struct_6208() -> None:
     data = [
         {
@@ -700,38 +360,6 @@ def test_concat_list_in_agg_6397() -> None:
     }
 
 
-def test_list_eval_all_null() -> None:
-    df = pl.DataFrame({"foo": [1, 2, 3], "bar": [None, None, None]}).with_columns(
-        pl.col("bar").cast(pl.List(pl.Utf8))
-    )
-
-    assert df.select(pl.col("bar").arr.eval(pl.element())).to_dict(False) == {
-        "bar": [None, None, None]
-    }
-
-
-def test_list_function_group_awareness() -> None:
-    df = pl.DataFrame(
-        {
-            "a": [100, 103, 105, 106, 105, 104, 103, 106, 100, 102],
-            "group": [0, 0, 1, 1, 1, 1, 1, 1, 2, 2],
-        }
-    )
-
-    assert df.groupby("group").agg(
-        [
-            pl.col("a").list().arr.get(0).alias("get"),
-            pl.col("a").list().arr.take([0]).alias("take"),
-            pl.col("a").list().arr.slice(0, 3).alias("slice"),
-        ]
-    ).sort("group").to_dict(False) == {
-        "group": [0, 1, 2],
-        "get": [[100], [105], [100]],
-        "take": [[[100]], [[105]], [[100]]],
-        "slice": [[[100, 103]], [[105, 106, 105]], [[100, 102]]],
-    }
-
-
 def test_flat_aggregation_to_list_conversion_6918() -> None:
     df = pl.DataFrame({"a": [1, 2, 2], "b": [[0, 1], [2, 3], [4, 5]]})
 

From 694cdf99e858d0dbb56e56a426ff52d93cbffee8 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Fri, 17 Feb 2023 12:32:13 +0100
Subject: [PATCH 6/8] Datatypes folder

---
 py-polars/tests/unit/datatypes/__init__.py                 | 1 +
 py-polars/tests/unit/{ => datatypes}/test_bool.py          | 0
 py-polars/tests/unit/{ => datatypes}/test_categorical.py   | 0
 .../tests/unit/{test_lists.py => datatypes/test_list.py}   | 0
 py-polars/tests/unit/{ => datatypes}/test_object.py        | 0
 py-polars/tests/unit/{ => datatypes}/test_struct.py        | 0
 py-polars/tests/unit/{ => datatypes}/test_temporal.py      | 0
 py-polars/tests/unit/io/__init__.py                        | 7 ++++++-
 8 files changed, 7 insertions(+), 1 deletion(-)
 create mode 100644 py-polars/tests/unit/datatypes/__init__.py
 rename py-polars/tests/unit/{ => datatypes}/test_bool.py (100%)
 rename py-polars/tests/unit/{ => datatypes}/test_categorical.py (100%)
 rename py-polars/tests/unit/{test_lists.py => datatypes/test_list.py} (100%)
 rename py-polars/tests/unit/{ => datatypes}/test_object.py (100%)
 rename py-polars/tests/unit/{ => datatypes}/test_struct.py (100%)
 rename py-polars/tests/unit/{ => datatypes}/test_temporal.py (100%)

diff --git a/py-polars/tests/unit/datatypes/__init__.py b/py-polars/tests/unit/datatypes/__init__.py
new file mode 100644
index 000000000000..1a7dfc2793d2
--- /dev/null
+++ b/py-polars/tests/unit/datatypes/__init__.py
@@ -0,0 +1 @@
+"""Test module for testing behaviour of specific data types in various operations."""
diff --git a/py-polars/tests/unit/test_bool.py b/py-polars/tests/unit/datatypes/test_bool.py
similarity index 100%
rename from py-polars/tests/unit/test_bool.py
rename to py-polars/tests/unit/datatypes/test_bool.py
diff --git a/py-polars/tests/unit/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py
similarity index 100%
rename from py-polars/tests/unit/test_categorical.py
rename to py-polars/tests/unit/datatypes/test_categorical.py
diff --git a/py-polars/tests/unit/test_lists.py b/py-polars/tests/unit/datatypes/test_list.py
similarity index 100%
rename from py-polars/tests/unit/test_lists.py
rename to py-polars/tests/unit/datatypes/test_list.py
diff --git a/py-polars/tests/unit/test_object.py b/py-polars/tests/unit/datatypes/test_object.py
similarity index 100%
rename from py-polars/tests/unit/test_object.py
rename to py-polars/tests/unit/datatypes/test_object.py
diff --git a/py-polars/tests/unit/test_struct.py b/py-polars/tests/unit/datatypes/test_struct.py
similarity index 100%
rename from py-polars/tests/unit/test_struct.py
rename to py-polars/tests/unit/datatypes/test_struct.py
diff --git a/py-polars/tests/unit/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py
similarity index 100%
rename from py-polars/tests/unit/test_temporal.py
rename to py-polars/tests/unit/datatypes/test_temporal.py
diff --git a/py-polars/tests/unit/io/__init__.py b/py-polars/tests/unit/io/__init__.py
index 6657aa68ec3e..3f77a4ecb7e6 100644
--- a/py-polars/tests/unit/io/__init__.py
+++ b/py-polars/tests/unit/io/__init__.py
@@ -1 +1,6 @@
-"""Test module containing tests for all input/output methods."""
+"""
+Test module containing tests for all input/output methods.
+
+Note that tests should never persistently change the state on disk - all tests use a
+temporary directory when testing write functionality.
+"""

From 4337eb3eb7821dda2fb10e508d883bb37d1e7363 Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Fri, 17 Feb 2023 12:41:42 +0100
Subject: [PATCH 7/8] Operations folder

---
 py-polars/tests/unit/operations/__init__.py                | 1 +
 py-polars/tests/unit/{ => operations}/test_aggregations.py | 0
 py-polars/tests/unit/{ => operations}/test_apply.py        | 0
 py-polars/tests/unit/{ => operations}/test_arithmetic.py   | 0
 py-polars/tests/unit/{ => operations}/test_comparison.py   | 0
 py-polars/tests/unit/{ => operations}/test_drop.py         | 0
 py-polars/tests/unit/{ => operations}/test_explode.py      | 0
 py-polars/tests/unit/{ => operations}/test_filter.py       | 0
 py-polars/tests/unit/{ => operations}/test_folds.py        | 0
 py-polars/tests/unit/{ => operations}/test_groupby.py      | 0
 py-polars/tests/unit/{ => operations}/test_joins.py        | 0
 py-polars/tests/unit/{ => operations}/test_pivot.py        | 0
 py-polars/tests/unit/{ => operations}/test_rolling.py      | 0
 py-polars/tests/unit/{ => operations}/test_sort.py         | 0
 py-polars/tests/unit/{ => operations}/test_window.py       | 0
 15 files changed, 1 insertion(+)
 create mode 100644 py-polars/tests/unit/operations/__init__.py
 rename py-polars/tests/unit/{ => operations}/test_aggregations.py (100%)
 rename py-polars/tests/unit/{ => operations}/test_apply.py (100%)
 rename py-polars/tests/unit/{ => operations}/test_arithmetic.py (100%)
 rename py-polars/tests/unit/{ => operations}/test_comparison.py (100%)
 rename py-polars/tests/unit/{ => operations}/test_drop.py (100%)
 rename py-polars/tests/unit/{ => operations}/test_explode.py (100%)
 rename py-polars/tests/unit/{ => operations}/test_filter.py (100%)
 rename py-polars/tests/unit/{ => operations}/test_folds.py (100%)
 rename py-polars/tests/unit/{ => operations}/test_groupby.py (100%)
 rename py-polars/tests/unit/{ => operations}/test_joins.py (100%)
 rename py-polars/tests/unit/{ => operations}/test_pivot.py (100%)
 rename py-polars/tests/unit/{ => operations}/test_rolling.py (100%)
 rename py-polars/tests/unit/{ => operations}/test_sort.py (100%)
 rename py-polars/tests/unit/{ => operations}/test_window.py (100%)

diff --git a/py-polars/tests/unit/operations/__init__.py b/py-polars/tests/unit/operations/__init__.py
new file mode 100644
index 000000000000..2560f5ab9e7a
--- /dev/null
+++ b/py-polars/tests/unit/operations/__init__.py
@@ -0,0 +1 @@
+"""Test module for extensive testing of specific operations like join or explode."""
diff --git a/py-polars/tests/unit/test_aggregations.py b/py-polars/tests/unit/operations/test_aggregations.py
similarity index 100%
rename from py-polars/tests/unit/test_aggregations.py
rename to py-polars/tests/unit/operations/test_aggregations.py
diff --git a/py-polars/tests/unit/test_apply.py b/py-polars/tests/unit/operations/test_apply.py
similarity index 100%
rename from py-polars/tests/unit/test_apply.py
rename to py-polars/tests/unit/operations/test_apply.py
diff --git a/py-polars/tests/unit/test_arithmetic.py b/py-polars/tests/unit/operations/test_arithmetic.py
similarity index 100%
rename from py-polars/tests/unit/test_arithmetic.py
rename to py-polars/tests/unit/operations/test_arithmetic.py
diff --git a/py-polars/tests/unit/test_comparison.py b/py-polars/tests/unit/operations/test_comparison.py
similarity index 100%
rename from py-polars/tests/unit/test_comparison.py
rename to py-polars/tests/unit/operations/test_comparison.py
diff --git a/py-polars/tests/unit/test_drop.py b/py-polars/tests/unit/operations/test_drop.py
similarity index 100%
rename from py-polars/tests/unit/test_drop.py
rename to py-polars/tests/unit/operations/test_drop.py
diff --git a/py-polars/tests/unit/test_explode.py b/py-polars/tests/unit/operations/test_explode.py
similarity index 100%
rename from py-polars/tests/unit/test_explode.py
rename to py-polars/tests/unit/operations/test_explode.py
diff --git a/py-polars/tests/unit/test_filter.py b/py-polars/tests/unit/operations/test_filter.py
similarity index 100%
rename from py-polars/tests/unit/test_filter.py
rename to py-polars/tests/unit/operations/test_filter.py
diff --git a/py-polars/tests/unit/test_folds.py b/py-polars/tests/unit/operations/test_folds.py
similarity index 100%
rename from py-polars/tests/unit/test_folds.py
rename to py-polars/tests/unit/operations/test_folds.py
diff --git a/py-polars/tests/unit/test_groupby.py b/py-polars/tests/unit/operations/test_groupby.py
similarity index 100%
rename from py-polars/tests/unit/test_groupby.py
rename to py-polars/tests/unit/operations/test_groupby.py
diff --git a/py-polars/tests/unit/test_joins.py b/py-polars/tests/unit/operations/test_joins.py
similarity index 100%
rename from py-polars/tests/unit/test_joins.py
rename to py-polars/tests/unit/operations/test_joins.py
diff --git a/py-polars/tests/unit/test_pivot.py b/py-polars/tests/unit/operations/test_pivot.py
similarity index 100%
rename from py-polars/tests/unit/test_pivot.py
rename to py-polars/tests/unit/operations/test_pivot.py
diff --git a/py-polars/tests/unit/test_rolling.py b/py-polars/tests/unit/operations/test_rolling.py
similarity index 100%
rename from py-polars/tests/unit/test_rolling.py
rename to py-polars/tests/unit/operations/test_rolling.py
diff --git a/py-polars/tests/unit/test_sort.py b/py-polars/tests/unit/operations/test_sort.py
similarity index 100%
rename from py-polars/tests/unit/test_sort.py
rename to py-polars/tests/unit/operations/test_sort.py
diff --git a/py-polars/tests/unit/test_window.py b/py-polars/tests/unit/operations/test_window.py
similarity index 100%
rename from py-polars/tests/unit/test_window.py
rename to py-polars/tests/unit/operations/test_window.py

From 581465740199ff82e8d4775dec759cd2956f9cca Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijn@degooijer.io>
Date: Fri, 17 Feb 2023 12:49:34 +0100
Subject: [PATCH 8/8] Utils folder

---
 py-polars/tests/unit/utils/__init__.py                 | 1 +
 py-polars/tests/unit/{ => utils}/test_build_info.py    | 0
 py-polars/tests/unit/{ => utils}/test_show_versions.py | 0
 py-polars/tests/unit/{ => utils}/test_utils.py         | 0
 4 files changed, 1 insertion(+)
 create mode 100644 py-polars/tests/unit/utils/__init__.py
 rename py-polars/tests/unit/{ => utils}/test_build_info.py (100%)
 rename py-polars/tests/unit/{ => utils}/test_show_versions.py (100%)
 rename py-polars/tests/unit/{ => utils}/test_utils.py (100%)

diff --git a/py-polars/tests/unit/utils/__init__.py b/py-polars/tests/unit/utils/__init__.py
new file mode 100644
index 000000000000..f27b760953b4
--- /dev/null
+++ b/py-polars/tests/unit/utils/__init__.py
@@ -0,0 +1 @@
+"""Test module for utility functions."""
diff --git a/py-polars/tests/unit/test_build_info.py b/py-polars/tests/unit/utils/test_build_info.py
similarity index 100%
rename from py-polars/tests/unit/test_build_info.py
rename to py-polars/tests/unit/utils/test_build_info.py
diff --git a/py-polars/tests/unit/test_show_versions.py b/py-polars/tests/unit/utils/test_show_versions.py
similarity index 100%
rename from py-polars/tests/unit/test_show_versions.py
rename to py-polars/tests/unit/utils/test_show_versions.py
diff --git a/py-polars/tests/unit/test_utils.py b/py-polars/tests/unit/utils/test_utils.py
similarity index 100%
rename from py-polars/tests/unit/test_utils.py
rename to py-polars/tests/unit/utils/test_utils.py