From 034339a0548683c1a1a3414851587364757b04b2 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 17 Feb 2023 10:20:53 +0100 Subject: [PATCH 1/8] Add some module level docstrings --- py-polars/tests/unit/io/__init__.py | 1 + py-polars/tests/unit/namespaces/__init__.py | 11 +++++++++++ py-polars/tests/unit/namespaces/test_strptime.py | 5 +++++ 3 files changed, 17 insertions(+) create mode 100644 py-polars/tests/unit/io/__init__.py create mode 100644 py-polars/tests/unit/namespaces/__init__.py diff --git a/py-polars/tests/unit/io/__init__.py b/py-polars/tests/unit/io/__init__.py new file mode 100644 index 000000000000..6657aa68ec3e --- /dev/null +++ b/py-polars/tests/unit/io/__init__.py @@ -0,0 +1 @@ +"""Test module containing tests for all input/output methods.""" diff --git a/py-polars/tests/unit/namespaces/__init__.py b/py-polars/tests/unit/namespaces/__init__.py new file mode 100644 index 000000000000..96614f45647b --- /dev/null +++ b/py-polars/tests/unit/namespaces/__init__.py @@ -0,0 +1,11 @@ +""" +Test module containing dedicated tests for all namespace methods. + +Namespace methods are methods that are available on Series and Expr classes for +operations that are only available for specific data types. For example, +``Series.str.to_lowercase()``. + +These methods are almost exclusively implemented as expressions, with the Series method +dispatching to this implementation through a decorator. This means we only need to test +the Series method, as this will indirectly test the Expr method as well. +""" diff --git a/py-polars/tests/unit/namespaces/test_strptime.py b/py-polars/tests/unit/namespaces/test_strptime.py index 8f28f0721612..816b339fdd39 100644 --- a/py-polars/tests/unit/namespaces/test_strptime.py +++ b/py-polars/tests/unit/namespaces/test_strptime.py @@ -1,3 +1,8 @@ +""" +Module for testing ``.str.strptime`` of the string namespace. + +This method gets its own module due to its complexity. +""" from __future__ import annotations import sys From 7879d022fde57032d7ce71343c9bb74d4bacd7c7 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 17 Feb 2023 10:21:03 +0100 Subject: [PATCH 2/8] Move test_binary to namespaces --- py-polars/tests/unit/{ => namespaces}/test_binary.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename py-polars/tests/unit/{ => namespaces}/test_binary.py (100%) diff --git a/py-polars/tests/unit/test_binary.py b/py-polars/tests/unit/namespaces/test_binary.py similarity index 100% rename from py-polars/tests/unit/test_binary.py rename to py-polars/tests/unit/namespaces/test_binary.py From 7f5047a437df6305527bcc321e6c2333a28e0a7d Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 17 Feb 2023 11:31:58 +0100 Subject: [PATCH 3/8] Move struct --- .../tests/unit/namespaces/test_struct.py | 28 +++++++++++++++++++ py-polars/tests/unit/test_struct.py | 24 ---------------- 2 files changed, 28 insertions(+), 24 deletions(-) create mode 100644 py-polars/tests/unit/namespaces/test_struct.py diff --git a/py-polars/tests/unit/namespaces/test_struct.py b/py-polars/tests/unit/namespaces/test_struct.py new file mode 100644 index 000000000000..b0ab63ea094d --- /dev/null +++ b/py-polars/tests/unit/namespaces/test_struct.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +import polars as pl +from polars.testing import assert_frame_equal + + +def test_struct_various() -> None: + df = pl.DataFrame( + {"int": [1, 2], "str": ["a", "b"], "bool": [True, None], "list": [[1, 2], [3]]} + ) + s = df.to_struct("my_struct") + + assert s.struct.fields == ["int", "str", "bool", "list"] + assert s[0] == {"int": 1, "str": "a", "bool": True, "list": [1, 2]} + assert s[1] == {"int": 2, "str": "b", "bool": None, "list": [3]} + assert s.struct.field("list").to_list() == [[1, 2], [3]] + assert s.struct.field("int").to_list() == [1, 2] + + assert_frame_equal(df.to_struct("my_struct").struct.unnest(), df) + assert s.struct._ipython_key_completions_() == s.struct.fields + + +def test_rename_fields() -> None: + df = pl.DataFrame({"int": [1, 2], "str": ["a", "b"], "bool": [True, None]}) + assert df.to_struct("my_struct").struct.rename_fields(["a", "b"]).struct.fields == [ + "a", + "b", + ] diff --git a/py-polars/tests/unit/test_struct.py b/py-polars/tests/unit/test_struct.py index a89c36667eb5..39ca45ef738e 100644 --- a/py-polars/tests/unit/test_struct.py +++ b/py-polars/tests/unit/test_struct.py @@ -12,22 +12,6 @@ from polars.testing import assert_frame_equal -def test_struct_various() -> None: - df = pl.DataFrame( - {"int": [1, 2], "str": ["a", "b"], "bool": [True, None], "list": [[1, 2], [3]]} - ) - s = df.to_struct("my_struct") - - assert s.struct.fields == ["int", "str", "bool", "list"] - assert s[0] == {"int": 1, "str": "a", "bool": True, "list": [1, 2]} - assert s[1] == {"int": 2, "str": "b", "bool": None, "list": [3]} - assert s.struct.field("list").to_list() == [[1, 2], [3]] - assert s.struct.field("int").to_list() == [1, 2] - - assert_frame_equal(df.to_struct("my_struct").struct.unnest(), df) - assert s.struct._ipython_key_completions_() == s.struct.fields - - def test_struct_to_list() -> None: assert pl.DataFrame( {"int": [1, 2], "str": ["a", "b"], "bool": [True, None], "list": [[1, 2], [3]]} @@ -57,14 +41,6 @@ def test_apply_unnest() -> None: assert_frame_equal(df, expected) -def test_rename_fields() -> None: - df = pl.DataFrame({"int": [1, 2], "str": ["a", "b"], "bool": [True, None]}) - assert df.to_struct("my_struct").struct.rename_fields(["a", "b"]).struct.fields == [ - "a", - "b", - ] - - def test_struct_unnesting() -> None: df = pl.DataFrame({"a": [1, 2]}) out = df.select( From e195d3742a6f2e5b8fc5e54a6de1199198d7a9fd Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 17 Feb 2023 11:40:39 +0100 Subject: [PATCH 4/8] Move categorical --- .../tests/unit/namespaces/test_categorical.py | 74 +++++++++++++++++++ py-polars/tests/unit/test_categorical.py | 72 ------------------ 2 files changed, 74 insertions(+), 72 deletions(-) create mode 100644 py-polars/tests/unit/namespaces/test_categorical.py diff --git a/py-polars/tests/unit/namespaces/test_categorical.py b/py-polars/tests/unit/namespaces/test_categorical.py new file mode 100644 index 000000000000..1159c2160a30 --- /dev/null +++ b/py-polars/tests/unit/namespaces/test_categorical.py @@ -0,0 +1,74 @@ +import polars as pl +from polars.testing import assert_frame_equal + + +def test_categorical_lexical_sort() -> None: + df = pl.DataFrame( + {"cats": ["z", "z", "k", "a", "b"], "vals": [3, 1, 2, 2, 3]} + ).with_columns( + [ + pl.col("cats").cast(pl.Categorical).cat.set_ordering("lexical"), + ] + ) + + out = df.sort(["cats"]) + assert out["cats"].dtype == pl.Categorical + expected = pl.DataFrame( + {"cats": ["a", "b", "k", "z", "z"], "vals": [2, 3, 2, 3, 1]} + ) + assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected) + out = df.sort(["cats", "vals"]) + expected = pl.DataFrame( + {"cats": ["a", "b", "k", "z", "z"], "vals": [2, 3, 2, 1, 3]} + ) + assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected) + out = df.sort(["vals", "cats"]) + + expected = pl.DataFrame( + {"cats": ["z", "a", "k", "b", "z"], "vals": [1, 2, 2, 3, 3]} + ) + assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected) + + +def test_categorical_lexical_ordering_after_concat() -> None: + with pl.StringCache(): + ldf1 = ( + pl.DataFrame([pl.Series("key1", [8, 5]), pl.Series("key2", ["fox", "baz"])]) + .lazy() + .with_columns( + pl.col("key2").cast(pl.Categorical).cat.set_ordering("lexical") + ) + ) + ldf2 = ( + pl.DataFrame( + [pl.Series("key1", [6, 8, 6]), pl.Series("key2", ["fox", "foo", "bar"])] + ) + .lazy() + .with_columns( + pl.col("key2").cast(pl.Categorical).cat.set_ordering("lexical") + ) + ) + df = ( + pl.concat([ldf1, ldf2]) + .with_columns(pl.col("key2").cat.set_ordering("lexical")) + .collect() + ) + + df.sort(["key1", "key2"]) + + +def test_sort_categoricals_6014() -> None: + with pl.StringCache(): + # create basic categorical + df1 = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns( + pl.col("key").cast(pl.Categorical) + ) + # create lexically-ordered categorical + df2 = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns( + pl.col("key").cast(pl.Categorical).cat.set_ordering("lexical") + ) + + out = df1.sort("key") + assert out.to_dict(False) == {"key": ["bbb", "aaa", "ccc"]} + out = df2.sort("key") + assert out.to_dict(False) == {"key": ["aaa", "bbb", "ccc"]} diff --git a/py-polars/tests/unit/test_categorical.py b/py-polars/tests/unit/test_categorical.py index c2a51979ffdb..716570e9eff9 100644 --- a/py-polars/tests/unit/test_categorical.py +++ b/py-polars/tests/unit/test_categorical.py @@ -61,61 +61,6 @@ def test_read_csv_categorical() -> None: assert df["col1"].dtype == pl.Categorical -def test_categorical_lexical_sort() -> None: - df = pl.DataFrame( - {"cats": ["z", "z", "k", "a", "b"], "vals": [3, 1, 2, 2, 3]} - ).with_columns( - [ - pl.col("cats").cast(pl.Categorical).cat.set_ordering("lexical"), - ] - ) - - out = df.sort(["cats"]) - assert out["cats"].dtype == pl.Categorical - expected = pl.DataFrame( - {"cats": ["a", "b", "k", "z", "z"], "vals": [2, 3, 2, 3, 1]} - ) - assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected) - out = df.sort(["cats", "vals"]) - expected = pl.DataFrame( - {"cats": ["a", "b", "k", "z", "z"], "vals": [2, 3, 2, 1, 3]} - ) - assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected) - out = df.sort(["vals", "cats"]) - - expected = pl.DataFrame( - {"cats": ["z", "a", "k", "b", "z"], "vals": [1, 2, 2, 3, 3]} - ) - assert_frame_equal(out.with_columns(pl.col("cats").cast(pl.Utf8)), expected) - - -def test_categorical_lexical_ordering_after_concat() -> None: - with pl.StringCache(): - ldf1 = ( - pl.DataFrame([pl.Series("key1", [8, 5]), pl.Series("key2", ["fox", "baz"])]) - .lazy() - .with_columns( - pl.col("key2").cast(pl.Categorical).cat.set_ordering("lexical") - ) - ) - ldf2 = ( - pl.DataFrame( - [pl.Series("key1", [6, 8, 6]), pl.Series("key2", ["fox", "foo", "bar"])] - ) - .lazy() - .with_columns( - pl.col("key2").cast(pl.Categorical).cat.set_ordering("lexical") - ) - ) - df = ( - pl.concat([ldf1, ldf2]) - .with_columns(pl.col("key2").cat.set_ordering("lexical")) - .collect() - ) - - df.sort(["key1", "key2"]) - - def test_cat_to_dummies() -> None: df = pl.DataFrame({"foo": [1, 2, 3, 4], "bar": ["a", "b", "a", "c"]}) df = df.with_columns(pl.col("bar").cast(pl.Categorical)) @@ -295,23 +240,6 @@ def test_categorical_in_struct_nulls() -> None: assert s[2] == {"job": "waiter", "counts": 1} -def test_sort_categoricals_6014() -> None: - with pl.StringCache(): - # create basic categorical - df1 = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns( - pl.col("key").cast(pl.Categorical) - ) - # create lexically-ordered categorical - df2 = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns( - pl.col("key").cast(pl.Categorical).cat.set_ordering("lexical") - ) - - out = df1.sort("key") - assert out.to_dict(False) == {"key": ["bbb", "aaa", "ccc"]} - out = df2.sort("key") - assert out.to_dict(False) == {"key": ["aaa", "bbb", "ccc"]} - - def test_cast_inner_categorical() -> None: dtype = pl.List(pl.Categorical) out = pl.Series("foo", [["a"], ["a", "b"]]).cast(dtype) From c709cf147c93c1509c1f9fd19b64c8a74e441a4f Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 17 Feb 2023 12:24:18 +0100 Subject: [PATCH 5/8] List namespace --- py-polars/tests/unit/namespaces/test_list.py | 379 +++++++++++++++++++ py-polars/tests/unit/test_lists.py | 372 ------------------ 2 files changed, 379 insertions(+), 372 deletions(-) create mode 100644 py-polars/tests/unit/namespaces/test_list.py diff --git a/py-polars/tests/unit/namespaces/test_list.py b/py-polars/tests/unit/namespaces/test_list.py new file mode 100644 index 000000000000..cd0dc5ddc21a --- /dev/null +++ b/py-polars/tests/unit/namespaces/test_list.py @@ -0,0 +1,379 @@ +from __future__ import annotations + +import typing + +import numpy as np +import pytest + +import polars as pl +from polars.testing import assert_frame_equal, assert_series_equal + + +def test_list_arr_get() -> None: + a = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8, 9]]) + out = a.arr.get(0) + expected = pl.Series("a", [1, 4, 6]) + assert_series_equal(out, expected) + out = a.arr[0] + expected = pl.Series("a", [1, 4, 6]) + assert_series_equal(out, expected) + out = a.arr.first() + assert_series_equal(out, expected) + out = pl.select(pl.lit(a).arr.first()).to_series() + assert_series_equal(out, expected) + + out = a.arr.get(-1) + expected = pl.Series("a", [3, 5, 9]) + assert_series_equal(out, expected) + out = a.arr.last() + assert_series_equal(out, expected) + out = pl.select(pl.lit(a).arr.last()).to_series() + assert_series_equal(out, expected) + + a = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8, 9]]) + out = a.arr.get(-3) + expected = pl.Series("a", [1, None, 7]) + assert_series_equal(out, expected) + + assert pl.DataFrame( + {"a": [[1], [2], [3], [4, 5, 6], [7, 8, 9], [None, 11]]} + ).with_columns( + [pl.col("a").arr.get(i).alias(f"get_{i}") for i in range(4)] + ).to_dict( + False + ) == { + "a": [[1], [2], [3], [4, 5, 6], [7, 8, 9], [None, 11]], + "get_0": [1, 2, 3, 4, 7, None], + "get_1": [None, None, None, 5, 8, 11], + "get_2": [None, None, None, 6, 9, None], + "get_3": [None, None, None, None, None, None], + } + + # get by indexes where some are out of bounds + df = pl.DataFrame({"cars": [[1, 2, 3], [2, 3], [4], []], "indexes": [-2, 1, -3, 0]}) + + assert df.select([pl.col("cars").arr.get("indexes")]).to_dict(False) == { + "cars": [2, 3, None, None] + } + # exact on oob boundary + df = pl.DataFrame( + { + "index": [3, 3, 3], + "lists": [[3, 4, 5], [4, 5, 6], [7, 8, 9, 4]], + } + ) + + assert df.select(pl.col("lists").arr.get(3)).to_dict(False) == { + "lists": [None, None, 4] + } + assert df.select(pl.col("lists").arr.get(pl.col("index"))).to_dict(False) == { + "lists": [None, None, 4] + } + + +def test_contains() -> None: + a = pl.Series("a", [[1, 2, 3], [2, 5], [6, 7, 8, 9]]) + out = a.arr.contains(2) + expected = pl.Series("a", [True, True, False]) + assert_series_equal(out, expected) + + out = pl.select(pl.lit(a).arr.contains(2)).to_series() + assert_series_equal(out, expected) + + +def test_list_concat() -> None: + df = pl.DataFrame({"a": [[1, 2], [1], [1, 2, 3]]}) + + out = df.select([pl.col("a").arr.concat(pl.Series([[1, 2]]))]) + assert out["a"][0].to_list() == [1, 2, 1, 2] + + out = df.select([pl.col("a").arr.concat([1, 4])]) + assert out["a"][0].to_list() == [1, 2, 1, 4] + + out_s = df["a"].arr.concat([4, 1]) + assert out_s[0].to_list() == [1, 2, 4, 1] + + +def test_list_arr_empty() -> None: + df = pl.DataFrame({"cars": [[1, 2, 3], [2, 3], [4], []]}) + + out = df.select( + [ + pl.col("cars").arr.first().alias("cars_first"), + pl.when(pl.col("cars").arr.first() == 2) + .then(1) + .when(pl.col("cars").arr.contains(2)) + .then(2) + .otherwise(3) + .alias("cars_literal"), + ] + ) + expected = pl.DataFrame( + {"cars_first": [1, 2, 4, None], "cars_literal": [2, 1, 3, 3]}, + schema_overrides={"cars_literal": pl.Int32}, # Literals default to Int32 + ) + assert_frame_equal(out, expected) + + +def test_list_argminmax() -> None: + s = pl.Series("a", [[1, 2], [3, 2, 1]]) + expected = pl.Series("a", [0, 2], dtype=pl.UInt32) + assert_series_equal(s.arr.arg_min(), expected) + expected = pl.Series("a", [1, 0], dtype=pl.UInt32) + assert_series_equal(s.arr.arg_max(), expected) + + +def test_list_shift() -> None: + s = pl.Series("a", [[1, 2], [3, 2, 1]]) + expected = pl.Series("a", [[None, 1], [None, 3, 2]]) + assert s.arr.shift().to_list() == expected.to_list() + + +def test_list_diff() -> None: + s = pl.Series("a", [[1, 2], [10, 2, 1]]) + expected = pl.Series("a", [[None, 1], [None, -8, -1]]) + assert s.arr.diff().to_list() == expected.to_list() + + +def test_slice() -> None: + vals = [[1, 2, 3, 4], [10, 2, 1]] + s = pl.Series("a", vals) + assert s.arr.head(2).to_list() == [[1, 2], [10, 2]] + assert s.arr.tail(2).to_list() == [[3, 4], [2, 1]] + assert s.arr.tail(200).to_list() == vals + assert s.arr.head(200).to_list() == vals + assert s.arr.slice(1, 2).to_list() == [[2, 3], [2, 1]] + + +def test_list_eval_dtype_inference() -> None: + grades = pl.DataFrame( + { + "student": ["bas", "laura", "tim", "jenny"], + "arithmetic": [10, 5, 6, 8], + "biology": [4, 6, 2, 7], + "geography": [8, 4, 9, 7], + } + ) + + rank_pct = pl.col("").rank(reverse=True) / pl.col("").count().cast(pl.UInt16) + + # the .arr.first() would fail if .arr.eval did not correctly infer the output type + assert grades.with_columns( + pl.concat_list(pl.all().exclude("student")).alias("all_grades") + ).select( + [ + pl.col("all_grades") + .arr.eval(rank_pct, parallel=True) + .alias("grades_rank") + .arr.first() + ] + ).to_series().to_list() == [ + 0.3333333432674408, + 0.6666666865348816, + 0.6666666865348816, + 0.3333333432674408, + ] + + +def test_list_ternary_concat() -> None: + df = pl.DataFrame( + { + "list1": [["123", "456"], None], + "list2": [["789"], ["zzz"]], + } + ) + + assert df.with_columns( + pl.when(pl.col("list1").is_null()) + .then(pl.col("list1").arr.concat(pl.col("list2"))) + .otherwise(pl.col("list2")) + .alias("result") + ).to_dict(False) == { + "list1": [["123", "456"], None], + "list2": [["789"], ["zzz"]], + "result": [["789"], None], + } + + assert df.with_columns( + pl.when(pl.col("list1").is_null()) + .then(pl.col("list2")) + .otherwise(pl.col("list1").arr.concat(pl.col("list2"))) + .alias("result") + ).to_dict(False) == { + "list1": [["123", "456"], None], + "list2": [["789"], ["zzz"]], + "result": [["123", "456", "789"], ["zzz"]], + } + + +def test_arr_contains_categorical() -> None: + df = pl.DataFrame( + {"str": ["A", "B", "A", "B", "C"], "group": [1, 1, 2, 1, 2]} + ).lazy() + df = df.with_columns(pl.col("str").cast(pl.Categorical)) + df_groups = df.groupby("group").agg([pl.col("str").alias("str_list")]) + assert df_groups.filter(pl.col("str_list").arr.contains("C")).collect().to_dict( + False + ) == {"group": [2], "str_list": [["A", "C"]]} + + +def test_list_eval_type_coercion() -> None: + last_non_null_value = pl.element().fill_null(3).last() + df = pl.DataFrame( + { + "array_cols": [[1, None]], + } + ) + + assert df.select( + [ + pl.col("array_cols") + .arr.eval(last_non_null_value, parallel=False) + .alias("col_last") + ] + ).to_dict(False) == {"col_last": [[3]]} + + +def test_list_slice() -> None: + df = pl.DataFrame( + { + "lst": [[1, 2, 3, 4], [10, 2, 1]], + "offset": [1, 2], + "len": [3, 2], + } + ) + + assert df.select([pl.col("lst").arr.slice("offset", "len")]).to_dict(False) == { + "lst": [[2, 3, 4], [1]] + } + assert df.select([pl.col("lst").arr.slice("offset", 1)]).to_dict(False) == { + "lst": [[2], [1]] + } + assert df.select([pl.col("lst").arr.slice(-2, "len")]).to_dict(False) == { + "lst": [[3, 4], [2, 1]] + } + + +@typing.no_type_check +def test_list_sliced_get_5186() -> None: + # https://github.com/pola-rs/polars/issues/5186 + n = 30 + df = pl.from_dict( + { + "ind": pl.arange(0, n, eager=True), + "inds": np.stack([np.arange(n), -np.arange(n)], axis=-1), + } + ) + + exprs = [ + "ind", + pl.col("inds").arr.first().alias("first_element"), + pl.col("inds").arr.last().alias("last_element"), + ] + out1 = df.select(exprs)[10:20] + out2 = df[10:20].select(exprs) + assert_frame_equal(out1, out2) + + +def test_empty_eval_dtype_5546() -> None: + # https://github.com/pola-rs/polars/issues/5546 + df = pl.DataFrame([{"a": [{"name": 1}, {"name": 2}]}]) + + dtype = df.dtypes[0] + + assert ( + df.limit(0).with_columns( + pl.col("a") + .arr.eval(pl.element().filter(pl.first().struct.field("name") == 1)) + .alias("a_filtered") + ) + ).dtypes == [dtype, dtype] + + +def test_list_amortized_apply_explode_5812() -> None: + s = pl.Series([None, [1, 3], [0, -3], [1, 2, 2]]) + assert s.arr.sum().to_list() == [None, 4, -3, 5] + assert s.arr.min().to_list() == [None, 1, -3, 1] + assert s.arr.max().to_list() == [None, 3, 0, 2] + assert s.arr.arg_min().to_list() == [None, 0, 1, 0] + assert s.arr.arg_max().to_list() == [None, 1, 0, 1] + + +def test_list_slice_5866() -> None: + vals = [[1, 2, 3, 4], [10, 2, 1]] + s = pl.Series("a", vals) + assert s.arr.slice(1).to_list() == [[2, 3, 4], [2, 1]] + + +def test_list_take() -> None: + s = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8]]) + # mypy: we make it work, but idomatic is `arr.get`. + assert s.arr.take(0).to_list() == [[1], [4], [6]] # type: ignore[arg-type] + assert s.arr.take([0, 1]).to_list() == [[1, 2], [4, 5], [6, 7]] + + assert s.arr.take([-1, 1]).to_list() == [[3, 2], [5, 5], [8, 7]] + + # use another list to make sure negative indices are respected + taker = pl.Series([[-1, 1], [-1, 1], [-1, -2]]) + assert s.arr.take(taker).to_list() == [[3, 2], [5, 5], [8, 7]] + with pytest.raises(pl.ComputeError, match=r"Take indices are out of bounds"): + s.arr.take([1, 2]) + s = pl.Series( + [["A", "B", "C"], ["A"], ["B"], ["1", "2"], ["e"]], + ) + + assert s.arr.take([0, 2], null_on_oob=True).to_list() == [ + ["A", "C"], + ["A", None], + ["B", None], + ["1", None], + ["e", None], + ] + assert s.arr.take([0, 1, 2], null_on_oob=True).to_list() == [ + ["A", "B", "C"], + ["A", None, None], + ["B", None, None], + ["1", "2", None], + ["e", None, None], + ] + s = pl.Series([[42, 1, 2], [5, 6, 7]]) + + with pytest.raises(pl.ComputeError, match=r"Take indices are out of bounds"): + s.arr.take([[0, 1, 2, 3], [0, 1, 2, 3]]) + + assert s.arr.take([0, 1, 2, 3], null_on_oob=True).to_list() == [ + [42, 1, 2, None], + [5, 6, 7, None], + ] + + +def test_list_eval_all_null() -> None: + df = pl.DataFrame({"foo": [1, 2, 3], "bar": [None, None, None]}).with_columns( + pl.col("bar").cast(pl.List(pl.Utf8)) + ) + + assert df.select(pl.col("bar").arr.eval(pl.element())).to_dict(False) == { + "bar": [None, None, None] + } + + +def test_list_function_group_awareness() -> None: + df = pl.DataFrame( + { + "a": [100, 103, 105, 106, 105, 104, 103, 106, 100, 102], + "group": [0, 0, 1, 1, 1, 1, 1, 1, 2, 2], + } + ) + + assert df.groupby("group").agg( + [ + pl.col("a").list().arr.get(0).alias("get"), + pl.col("a").list().arr.take([0]).alias("take"), + pl.col("a").list().arr.slice(0, 3).alias("slice"), + ] + ).sort("group").to_dict(False) == { + "group": [0, 1, 2], + "get": [[100], [105], [100]], + "take": [[[100]], [[105]], [[100]]], + "slice": [[[100, 103]], [[105, 106, 105]], [[100, 102]]], + } diff --git a/py-polars/tests/unit/test_lists.py b/py-polars/tests/unit/test_lists.py index 3ff8ca57a980..ede5e72d1a3d 100644 --- a/py-polars/tests/unit/test_lists.py +++ b/py-polars/tests/unit/test_lists.py @@ -1,86 +1,10 @@ from __future__ import annotations -import typing from datetime import date, datetime, time -import numpy as np import pandas as pd -import pytest import polars as pl -from polars.testing import assert_frame_equal, assert_series_equal - - -def test_list_arr_get() -> None: - a = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8, 9]]) - out = a.arr.get(0) - expected = pl.Series("a", [1, 4, 6]) - assert_series_equal(out, expected) - out = a.arr[0] - expected = pl.Series("a", [1, 4, 6]) - assert_series_equal(out, expected) - out = a.arr.first() - assert_series_equal(out, expected) - out = pl.select(pl.lit(a).arr.first()).to_series() - assert_series_equal(out, expected) - - out = a.arr.get(-1) - expected = pl.Series("a", [3, 5, 9]) - assert_series_equal(out, expected) - out = a.arr.last() - assert_series_equal(out, expected) - out = pl.select(pl.lit(a).arr.last()).to_series() - assert_series_equal(out, expected) - - a = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8, 9]]) - out = a.arr.get(-3) - expected = pl.Series("a", [1, None, 7]) - assert_series_equal(out, expected) - - assert pl.DataFrame( - {"a": [[1], [2], [3], [4, 5, 6], [7, 8, 9], [None, 11]]} - ).with_columns( - [pl.col("a").arr.get(i).alias(f"get_{i}") for i in range(4)] - ).to_dict( - False - ) == { - "a": [[1], [2], [3], [4, 5, 6], [7, 8, 9], [None, 11]], - "get_0": [1, 2, 3, 4, 7, None], - "get_1": [None, None, None, 5, 8, 11], - "get_2": [None, None, None, 6, 9, None], - "get_3": [None, None, None, None, None, None], - } - - # get by indexes where some are out of bounds - df = pl.DataFrame({"cars": [[1, 2, 3], [2, 3], [4], []], "indexes": [-2, 1, -3, 0]}) - - assert df.select([pl.col("cars").arr.get("indexes")]).to_dict(False) == { - "cars": [2, 3, None, None] - } - # exact on oob boundary - df = pl.DataFrame( - { - "index": [3, 3, 3], - "lists": [[3, 4, 5], [4, 5, 6], [7, 8, 9, 4]], - } - ) - - assert df.select(pl.col("lists").arr.get(3)).to_dict(False) == { - "lists": [None, None, 4] - } - assert df.select(pl.col("lists").arr.get(pl.col("index"))).to_dict(False) == { - "lists": [None, None, 4] - } - - -def test_contains() -> None: - a = pl.Series("a", [[1, 2, 3], [2, 5], [6, 7, 8, 9]]) - out = a.arr.contains(2) - expected = pl.Series("a", [True, True, False]) - assert_series_equal(out, expected) - - out = pl.select(pl.lit(a).arr.contains(2)).to_series() - assert_series_equal(out, expected) def test_dtype() -> None: @@ -206,70 +130,6 @@ def test_list_concat_rolling_window() -> None: } -def test_list_append() -> None: - df = pl.DataFrame({"a": [[1, 2], [1], [1, 2, 3]]}) - - out = df.select([pl.col("a").arr.concat(pl.Series([[1, 2]]))]) - assert out["a"][0].to_list() == [1, 2, 1, 2] - - out = df.select([pl.col("a").arr.concat([1, 4])]) - assert out["a"][0].to_list() == [1, 2, 1, 4] - - out_s = df["a"].arr.concat([4, 1]) - assert out_s[0].to_list() == [1, 2, 4, 1] - - -def test_list_arr_empty() -> None: - df = pl.DataFrame({"cars": [[1, 2, 3], [2, 3], [4], []]}) - - out = df.select( - [ - pl.col("cars").arr.first().alias("cars_first"), - pl.when(pl.col("cars").arr.first() == 2) - .then(1) - .when(pl.col("cars").arr.contains(2)) - .then(2) - .otherwise(3) - .alias("cars_literal"), - ] - ) - expected = pl.DataFrame( - {"cars_first": [1, 2, 4, None], "cars_literal": [2, 1, 3, 3]}, - schema_overrides={"cars_literal": pl.Int32}, # Literals default to Int32 - ) - assert_frame_equal(out, expected) - - -def test_list_argminmax() -> None: - s = pl.Series("a", [[1, 2], [3, 2, 1]]) - expected = pl.Series("a", [0, 2], dtype=pl.UInt32) - assert_series_equal(s.arr.arg_min(), expected) - expected = pl.Series("a", [1, 0], dtype=pl.UInt32) - assert_series_equal(s.arr.arg_max(), expected) - - -def test_list_shift() -> None: - s = pl.Series("a", [[1, 2], [3, 2, 1]]) - expected = pl.Series("a", [[None, 1], [None, 3, 2]]) - assert s.arr.shift().to_list() == expected.to_list() - - -def test_list_diff() -> None: - s = pl.Series("a", [[1, 2], [10, 2, 1]]) - expected = pl.Series("a", [[None, 1], [None, -8, -1]]) - assert s.arr.diff().to_list() == expected.to_list() - - -def test_slice() -> None: - vals = [[1, 2, 3, 4], [10, 2, 1]] - s = pl.Series("a", vals) - assert s.arr.head(2).to_list() == [[1, 2], [10, 2]] - assert s.arr.tail(2).to_list() == [[3, 4], [2, 1]] - assert s.arr.tail(200).to_list() == vals - assert s.arr.head(200).to_list() == vals - assert s.arr.slice(1, 2).to_list() == [[2, 3], [2, 1]] - - def test_cast_inner() -> None: a = pl.Series([[1, 2]]) for t in [bool, pl.Boolean]: @@ -284,36 +144,6 @@ def test_cast_inner() -> None: ) -def test_list_eval_dtype_inference() -> None: - grades = pl.DataFrame( - { - "student": ["bas", "laura", "tim", "jenny"], - "arithmetic": [10, 5, 6, 8], - "biology": [4, 6, 2, 7], - "geography": [8, 4, 9, 7], - } - ) - - rank_pct = pl.col("").rank(reverse=True) / pl.col("").count().cast(pl.UInt16) - - # the .arr.first() would fail if .arr.eval did not correctly infer the output type - assert grades.with_columns( - pl.concat_list(pl.all().exclude("student")).alias("all_grades") - ).select( - [ - pl.col("all_grades") - .arr.eval(rank_pct, parallel=True) - .alias("grades_rank") - .arr.first() - ] - ).to_series().to_list() == [ - 0.3333333432674408, - 0.6666666865348816, - 0.6666666865348816, - 0.3333333432674408, - ] - - def test_list_empty_groupby_result_3521() -> None: # Create a left relation where the join column contains a null value left = pl.DataFrame().with_columns( @@ -374,37 +204,6 @@ def test_empty_list_construction() -> None: assert df.rows() == [] -def test_list_ternary_concat() -> None: - df = pl.DataFrame( - { - "list1": [["123", "456"], None], - "list2": [["789"], ["zzz"]], - } - ) - - assert df.with_columns( - pl.when(pl.col("list1").is_null()) - .then(pl.col("list1").arr.concat(pl.col("list2"))) - .otherwise(pl.col("list2")) - .alias("result") - ).to_dict(False) == { - "list1": [["123", "456"], None], - "list2": [["789"], ["zzz"]], - "result": [["789"], None], - } - - assert df.with_columns( - pl.when(pl.col("list1").is_null()) - .then(pl.col("list2")) - .otherwise(pl.col("list1").arr.concat(pl.col("list2"))) - .alias("result") - ).to_dict(False) == { - "list1": [["123", "456"], None], - "list2": [["789"], ["zzz"]], - "result": [["123", "456", "789"], ["zzz"]], - } - - def test_list_concat_nulls() -> None: assert pl.DataFrame( { @@ -436,17 +235,6 @@ def test_list_hash() -> None: assert out[0, "b"] == out[2, "b"] -def test_arr_contains_categorical() -> None: - df = pl.DataFrame( - {"str": ["A", "B", "A", "B", "C"], "group": [1, 1, 2, 1, 2]} - ).lazy() - df = df.with_columns(pl.col("str").cast(pl.Categorical)) - df_groups = df.groupby("group").agg([pl.col("str").alias("str_list")]) - assert df_groups.filter(pl.col("str_list").arr.contains("C")).collect().to_dict( - False - ) == {"group": [2], "str_list": [["A", "C"]]} - - def test_list_diagonal_concat() -> None: df1 = pl.DataFrame({"a": [1, 2]}) @@ -458,23 +246,6 @@ def test_list_diagonal_concat() -> None: } -def test_list_eval_type_coercion() -> None: - last_non_null_value = pl.element().fill_null(3).last() - df = pl.DataFrame( - { - "array_cols": [[1, None]], - } - ) - - assert df.select( - [ - pl.col("array_cols") - .arr.eval(last_non_null_value, parallel=False) - .alias("col_last") - ] - ).to_dict(False) == {"col_last": [[3]]} - - def test_is_in_empty_list_4559() -> None: assert pl.Series(["a"]).is_in([]).to_list() == [False] @@ -514,60 +285,6 @@ def test_groupby_list_column() -> None: } -def test_list_slice() -> None: - df = pl.DataFrame( - { - "lst": [[1, 2, 3, 4], [10, 2, 1]], - "offset": [1, 2], - "len": [3, 2], - } - ) - - assert df.select([pl.col("lst").arr.slice("offset", "len")]).to_dict(False) == { - "lst": [[2, 3, 4], [1]] - } - assert df.select([pl.col("lst").arr.slice("offset", 1)]).to_dict(False) == { - "lst": [[2], [1]] - } - assert df.select([pl.col("lst").arr.slice(-2, "len")]).to_dict(False) == { - "lst": [[3, 4], [2, 1]] - } - - -@typing.no_type_check -def test_list_sliced_get_5186() -> None: - n = 30 - df = pl.from_dict( - { - "ind": pl.arange(0, n, eager=True), - "inds": np.stack([np.arange(n), -np.arange(n)], axis=-1), - } - ) - - exprs = [ - "ind", - pl.col("inds").arr.first().alias("first_element"), - pl.col("inds").arr.last().alias("last_element"), - ] - out1 = df.select(exprs)[10:20] - out2 = df[10:20].select(exprs) - assert_frame_equal(out1, out2) - - -def test_empty_eval_dtype_5546() -> None: - df = pl.DataFrame([{"a": [{"name": 1}, {"name": 2}]}]) - - dtype = df.dtypes[0] - - assert ( - df.limit(0).with_columns( - pl.col("a") - .arr.eval(pl.element().filter(pl.first().struct.field("name") == 1)) - .alias("a_filtered") - ) - ).dtypes == [dtype, dtype] - - def test_fast_explode_flag() -> None: df1 = pl.DataFrame({"values": [[[1, 2]]]}) assert df1.clone().vstack(df1)["values"].flags["FAST_EXPLODE"] @@ -583,63 +300,6 @@ def test_fast_explode_flag() -> None: ] -def test_list_amortized_apply_explode_5812() -> None: - s = pl.Series([None, [1, 3], [0, -3], [1, 2, 2]]) - assert s.arr.sum().to_list() == [None, 4, -3, 5] - assert s.arr.min().to_list() == [None, 1, -3, 1] - assert s.arr.max().to_list() == [None, 3, 0, 2] - assert s.arr.arg_min().to_list() == [None, 0, 1, 0] - assert s.arr.arg_max().to_list() == [None, 1, 0, 1] - - -def test_list_slice_5866() -> None: - vals = [[1, 2, 3, 4], [10, 2, 1]] - s = pl.Series("a", vals) - assert s.arr.slice(1).to_list() == [[2, 3, 4], [2, 1]] - - -def test_list_take() -> None: - s = pl.Series("a", [[1, 2, 3], [4, 5], [6, 7, 8]]) - # mypy: we make it work, but idomatic is `arr.get`. - assert s.arr.take(0).to_list() == [[1], [4], [6]] # type: ignore[arg-type] - assert s.arr.take([0, 1]).to_list() == [[1, 2], [4, 5], [6, 7]] - - assert s.arr.take([-1, 1]).to_list() == [[3, 2], [5, 5], [8, 7]] - - # use another list to make sure negative indices are respected - taker = pl.Series([[-1, 1], [-1, 1], [-1, -2]]) - assert s.arr.take(taker).to_list() == [[3, 2], [5, 5], [8, 7]] - with pytest.raises(pl.ComputeError, match=r"Take indices are out of bounds"): - s.arr.take([1, 2]) - s = pl.Series( - [["A", "B", "C"], ["A"], ["B"], ["1", "2"], ["e"]], - ) - - assert s.arr.take([0, 2], null_on_oob=True).to_list() == [ - ["A", "C"], - ["A", None], - ["B", None], - ["1", None], - ["e", None], - ] - assert s.arr.take([0, 1, 2], null_on_oob=True).to_list() == [ - ["A", "B", "C"], - ["A", None, None], - ["B", None, None], - ["1", "2", None], - ["e", None, None], - ] - s = pl.Series([[42, 1, 2], [5, 6, 7]]) - - with pytest.raises(pl.ComputeError, match=r"Take indices are out of bounds"): - s.arr.take([[0, 1, 2, 3], [0, 1, 2, 3]]) - - assert s.arr.take([0, 1, 2, 3], null_on_oob=True).to_list() == [ - [42, 1, 2, None], - [5, 6, 7, None], - ] - - def test_fast_explode_on_list_struct_6208() -> None: data = [ { @@ -700,38 +360,6 @@ def test_concat_list_in_agg_6397() -> None: } -def test_list_eval_all_null() -> None: - df = pl.DataFrame({"foo": [1, 2, 3], "bar": [None, None, None]}).with_columns( - pl.col("bar").cast(pl.List(pl.Utf8)) - ) - - assert df.select(pl.col("bar").arr.eval(pl.element())).to_dict(False) == { - "bar": [None, None, None] - } - - -def test_list_function_group_awareness() -> None: - df = pl.DataFrame( - { - "a": [100, 103, 105, 106, 105, 104, 103, 106, 100, 102], - "group": [0, 0, 1, 1, 1, 1, 1, 1, 2, 2], - } - ) - - assert df.groupby("group").agg( - [ - pl.col("a").list().arr.get(0).alias("get"), - pl.col("a").list().arr.take([0]).alias("take"), - pl.col("a").list().arr.slice(0, 3).alias("slice"), - ] - ).sort("group").to_dict(False) == { - "group": [0, 1, 2], - "get": [[100], [105], [100]], - "take": [[[100]], [[105]], [[100]]], - "slice": [[[100, 103]], [[105, 106, 105]], [[100, 102]]], - } - - def test_flat_aggregation_to_list_conversion_6918() -> None: df = pl.DataFrame({"a": [1, 2, 2], "b": [[0, 1], [2, 3], [4, 5]]}) From 694cdf99e858d0dbb56e56a426ff52d93cbffee8 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 17 Feb 2023 12:32:13 +0100 Subject: [PATCH 6/8] Datatypes folder --- py-polars/tests/unit/datatypes/__init__.py | 1 + py-polars/tests/unit/{ => datatypes}/test_bool.py | 0 py-polars/tests/unit/{ => datatypes}/test_categorical.py | 0 .../tests/unit/{test_lists.py => datatypes/test_list.py} | 0 py-polars/tests/unit/{ => datatypes}/test_object.py | 0 py-polars/tests/unit/{ => datatypes}/test_struct.py | 0 py-polars/tests/unit/{ => datatypes}/test_temporal.py | 0 py-polars/tests/unit/io/__init__.py | 7 ++++++- 8 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 py-polars/tests/unit/datatypes/__init__.py rename py-polars/tests/unit/{ => datatypes}/test_bool.py (100%) rename py-polars/tests/unit/{ => datatypes}/test_categorical.py (100%) rename py-polars/tests/unit/{test_lists.py => datatypes/test_list.py} (100%) rename py-polars/tests/unit/{ => datatypes}/test_object.py (100%) rename py-polars/tests/unit/{ => datatypes}/test_struct.py (100%) rename py-polars/tests/unit/{ => datatypes}/test_temporal.py (100%) diff --git a/py-polars/tests/unit/datatypes/__init__.py b/py-polars/tests/unit/datatypes/__init__.py new file mode 100644 index 000000000000..1a7dfc2793d2 --- /dev/null +++ b/py-polars/tests/unit/datatypes/__init__.py @@ -0,0 +1 @@ +"""Test module for testing behaviour of specific data types in various operations.""" diff --git a/py-polars/tests/unit/test_bool.py b/py-polars/tests/unit/datatypes/test_bool.py similarity index 100% rename from py-polars/tests/unit/test_bool.py rename to py-polars/tests/unit/datatypes/test_bool.py diff --git a/py-polars/tests/unit/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py similarity index 100% rename from py-polars/tests/unit/test_categorical.py rename to py-polars/tests/unit/datatypes/test_categorical.py diff --git a/py-polars/tests/unit/test_lists.py b/py-polars/tests/unit/datatypes/test_list.py similarity index 100% rename from py-polars/tests/unit/test_lists.py rename to py-polars/tests/unit/datatypes/test_list.py diff --git a/py-polars/tests/unit/test_object.py b/py-polars/tests/unit/datatypes/test_object.py similarity index 100% rename from py-polars/tests/unit/test_object.py rename to py-polars/tests/unit/datatypes/test_object.py diff --git a/py-polars/tests/unit/test_struct.py b/py-polars/tests/unit/datatypes/test_struct.py similarity index 100% rename from py-polars/tests/unit/test_struct.py rename to py-polars/tests/unit/datatypes/test_struct.py diff --git a/py-polars/tests/unit/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py similarity index 100% rename from py-polars/tests/unit/test_temporal.py rename to py-polars/tests/unit/datatypes/test_temporal.py diff --git a/py-polars/tests/unit/io/__init__.py b/py-polars/tests/unit/io/__init__.py index 6657aa68ec3e..3f77a4ecb7e6 100644 --- a/py-polars/tests/unit/io/__init__.py +++ b/py-polars/tests/unit/io/__init__.py @@ -1 +1,6 @@ -"""Test module containing tests for all input/output methods.""" +""" +Test module containing tests for all input/output methods. + +Note that tests should never persistently change the state on disk - all tests use a +temporary directory when testing write functionality. +""" From 4337eb3eb7821dda2fb10e508d883bb37d1e7363 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 17 Feb 2023 12:41:42 +0100 Subject: [PATCH 7/8] Operations folder --- py-polars/tests/unit/operations/__init__.py | 1 + py-polars/tests/unit/{ => operations}/test_aggregations.py | 0 py-polars/tests/unit/{ => operations}/test_apply.py | 0 py-polars/tests/unit/{ => operations}/test_arithmetic.py | 0 py-polars/tests/unit/{ => operations}/test_comparison.py | 0 py-polars/tests/unit/{ => operations}/test_drop.py | 0 py-polars/tests/unit/{ => operations}/test_explode.py | 0 py-polars/tests/unit/{ => operations}/test_filter.py | 0 py-polars/tests/unit/{ => operations}/test_folds.py | 0 py-polars/tests/unit/{ => operations}/test_groupby.py | 0 py-polars/tests/unit/{ => operations}/test_joins.py | 0 py-polars/tests/unit/{ => operations}/test_pivot.py | 0 py-polars/tests/unit/{ => operations}/test_rolling.py | 0 py-polars/tests/unit/{ => operations}/test_sort.py | 0 py-polars/tests/unit/{ => operations}/test_window.py | 0 15 files changed, 1 insertion(+) create mode 100644 py-polars/tests/unit/operations/__init__.py rename py-polars/tests/unit/{ => operations}/test_aggregations.py (100%) rename py-polars/tests/unit/{ => operations}/test_apply.py (100%) rename py-polars/tests/unit/{ => operations}/test_arithmetic.py (100%) rename py-polars/tests/unit/{ => operations}/test_comparison.py (100%) rename py-polars/tests/unit/{ => operations}/test_drop.py (100%) rename py-polars/tests/unit/{ => operations}/test_explode.py (100%) rename py-polars/tests/unit/{ => operations}/test_filter.py (100%) rename py-polars/tests/unit/{ => operations}/test_folds.py (100%) rename py-polars/tests/unit/{ => operations}/test_groupby.py (100%) rename py-polars/tests/unit/{ => operations}/test_joins.py (100%) rename py-polars/tests/unit/{ => operations}/test_pivot.py (100%) rename py-polars/tests/unit/{ => operations}/test_rolling.py (100%) rename py-polars/tests/unit/{ => operations}/test_sort.py (100%) rename py-polars/tests/unit/{ => operations}/test_window.py (100%) diff --git a/py-polars/tests/unit/operations/__init__.py b/py-polars/tests/unit/operations/__init__.py new file mode 100644 index 000000000000..2560f5ab9e7a --- /dev/null +++ b/py-polars/tests/unit/operations/__init__.py @@ -0,0 +1 @@ +"""Test module for extensive testing of specific operations like join or explode.""" diff --git a/py-polars/tests/unit/test_aggregations.py b/py-polars/tests/unit/operations/test_aggregations.py similarity index 100% rename from py-polars/tests/unit/test_aggregations.py rename to py-polars/tests/unit/operations/test_aggregations.py diff --git a/py-polars/tests/unit/test_apply.py b/py-polars/tests/unit/operations/test_apply.py similarity index 100% rename from py-polars/tests/unit/test_apply.py rename to py-polars/tests/unit/operations/test_apply.py diff --git a/py-polars/tests/unit/test_arithmetic.py b/py-polars/tests/unit/operations/test_arithmetic.py similarity index 100% rename from py-polars/tests/unit/test_arithmetic.py rename to py-polars/tests/unit/operations/test_arithmetic.py diff --git a/py-polars/tests/unit/test_comparison.py b/py-polars/tests/unit/operations/test_comparison.py similarity index 100% rename from py-polars/tests/unit/test_comparison.py rename to py-polars/tests/unit/operations/test_comparison.py diff --git a/py-polars/tests/unit/test_drop.py b/py-polars/tests/unit/operations/test_drop.py similarity index 100% rename from py-polars/tests/unit/test_drop.py rename to py-polars/tests/unit/operations/test_drop.py diff --git a/py-polars/tests/unit/test_explode.py b/py-polars/tests/unit/operations/test_explode.py similarity index 100% rename from py-polars/tests/unit/test_explode.py rename to py-polars/tests/unit/operations/test_explode.py diff --git a/py-polars/tests/unit/test_filter.py b/py-polars/tests/unit/operations/test_filter.py similarity index 100% rename from py-polars/tests/unit/test_filter.py rename to py-polars/tests/unit/operations/test_filter.py diff --git a/py-polars/tests/unit/test_folds.py b/py-polars/tests/unit/operations/test_folds.py similarity index 100% rename from py-polars/tests/unit/test_folds.py rename to py-polars/tests/unit/operations/test_folds.py diff --git a/py-polars/tests/unit/test_groupby.py b/py-polars/tests/unit/operations/test_groupby.py similarity index 100% rename from py-polars/tests/unit/test_groupby.py rename to py-polars/tests/unit/operations/test_groupby.py diff --git a/py-polars/tests/unit/test_joins.py b/py-polars/tests/unit/operations/test_joins.py similarity index 100% rename from py-polars/tests/unit/test_joins.py rename to py-polars/tests/unit/operations/test_joins.py diff --git a/py-polars/tests/unit/test_pivot.py b/py-polars/tests/unit/operations/test_pivot.py similarity index 100% rename from py-polars/tests/unit/test_pivot.py rename to py-polars/tests/unit/operations/test_pivot.py diff --git a/py-polars/tests/unit/test_rolling.py b/py-polars/tests/unit/operations/test_rolling.py similarity index 100% rename from py-polars/tests/unit/test_rolling.py rename to py-polars/tests/unit/operations/test_rolling.py diff --git a/py-polars/tests/unit/test_sort.py b/py-polars/tests/unit/operations/test_sort.py similarity index 100% rename from py-polars/tests/unit/test_sort.py rename to py-polars/tests/unit/operations/test_sort.py diff --git a/py-polars/tests/unit/test_window.py b/py-polars/tests/unit/operations/test_window.py similarity index 100% rename from py-polars/tests/unit/test_window.py rename to py-polars/tests/unit/operations/test_window.py From 581465740199ff82e8d4775dec759cd2956f9cca Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 17 Feb 2023 12:49:34 +0100 Subject: [PATCH 8/8] Utils folder --- py-polars/tests/unit/utils/__init__.py | 1 + py-polars/tests/unit/{ => utils}/test_build_info.py | 0 py-polars/tests/unit/{ => utils}/test_show_versions.py | 0 py-polars/tests/unit/{ => utils}/test_utils.py | 0 4 files changed, 1 insertion(+) create mode 100644 py-polars/tests/unit/utils/__init__.py rename py-polars/tests/unit/{ => utils}/test_build_info.py (100%) rename py-polars/tests/unit/{ => utils}/test_show_versions.py (100%) rename py-polars/tests/unit/{ => utils}/test_utils.py (100%) diff --git a/py-polars/tests/unit/utils/__init__.py b/py-polars/tests/unit/utils/__init__.py new file mode 100644 index 000000000000..f27b760953b4 --- /dev/null +++ b/py-polars/tests/unit/utils/__init__.py @@ -0,0 +1 @@ +"""Test module for utility functions.""" diff --git a/py-polars/tests/unit/test_build_info.py b/py-polars/tests/unit/utils/test_build_info.py similarity index 100% rename from py-polars/tests/unit/test_build_info.py rename to py-polars/tests/unit/utils/test_build_info.py diff --git a/py-polars/tests/unit/test_show_versions.py b/py-polars/tests/unit/utils/test_show_versions.py similarity index 100% rename from py-polars/tests/unit/test_show_versions.py rename to py-polars/tests/unit/utils/test_show_versions.py diff --git a/py-polars/tests/unit/test_utils.py b/py-polars/tests/unit/utils/test_utils.py similarity index 100% rename from py-polars/tests/unit/test_utils.py rename to py-polars/tests/unit/utils/test_utils.py