Skip to content

Commit

Permalink
test(python): Use pytest marker for slow tests (#6642)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Feb 3, 2023
1 parent 5ba53a4 commit 66dae4d
Show file tree
Hide file tree
Showing 15 changed files with 128 additions and 156 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test-python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
maturin develop
- name: Run tests and report coverage
run: pytest --cov --numprocesses=auto
run: pytest --cov -n auto -m "slow or not slow"

- name: Run doctests
run: python tests/docs/run_doc_examples.py
Expand Down Expand Up @@ -129,7 +129,7 @@ jobs:
pip install target/wheels/polars-*.whl
- name: Run tests
run: pytest --numprocesses=auto
run: pytest -n auto -m "slow or not slow"

- name: Check import without optional dependencies
run: |
Expand Down
2 changes: 1 addition & 1 deletion py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions py-polars/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,20 @@ pre-commit: fmt clippy ## Run all code quality checks

.PHONY: test
test: venv build ## Run fast unittests
$(VENV_BIN)/pytest tests/unit/ --numprocesses=auto
$(VENV_BIN)/pytest -n auto

.PHONY: doctest
doctest: venv build ## Run doctests
$(VENV_BIN)/python tests/docs/run_doc_examples.py

.PHONY: test-all
test-all: venv build ## Run all tests
$(VENV_BIN)/pytest --numprocesses=auto
$(VENV_BIN)/pytest -n auto -m "slow or not slow"
$(VENV_BIN)/python tests/docs/run_doc_examples.py

.PHONY: coverage
coverage: venv build ## Run tests and report coverage
$(VENV_BIN)/pytest --cov --numprocesses=auto
$(VENV_BIN)/pytest --cov -n auto -m "slow or not slow"

.PHONY: clean
clean: ## Clean up caches and build artifacts
Expand Down
4 changes: 4 additions & 0 deletions py-polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ warn_return_any = false
[tool.pytest.ini_options]
addopts = [
"--import-mode=importlib",
"-m not slow and not hypothesis", # Default to running fast tests only. To run ALL tests, run: pytest -m ""
]
markers = [
"slow: Tests with a longer than average runtime.",
]

[tool.coverage.run]
Expand Down
Empty file removed py-polars/tests/slow/__init__.py
Empty file.
14 changes: 0 additions & 14 deletions py-polars/tests/slow/conftest.py

This file was deleted.

7 changes: 0 additions & 7 deletions py-polars/tests/slow/read_csv.py

This file was deleted.

14 changes: 0 additions & 14 deletions py-polars/tests/slow/test_overflow.py

This file was deleted.

50 changes: 0 additions & 50 deletions py-polars/tests/slow/test_parquet.py

This file was deleted.

32 changes: 0 additions & 32 deletions py-polars/tests/slow/test_streaming.py

This file was deleted.

7 changes: 7 additions & 0 deletions py-polars/tests/unit/io/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,3 +1126,10 @@ def test_read_csv_chunked() -> None:

# The next value should always be higher if monotonically increasing.
assert df.filter(pl.col("count") < pl.col("count").shift(1)).is_empty()


@pytest.mark.slow()
def test_read_web_file() -> None:
url = "https://raw.githubusercontent.com/pola-rs/polars/master/examples/datasets/foods1.csv"
df = pl.read_csv(url)
assert df.shape == (27, 4)
39 changes: 39 additions & 0 deletions py-polars/tests/unit/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
from pathlib import Path
from typing import TYPE_CHECKING

import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.dataset as ds
import pyarrow.parquet as pq
import pytest

Expand Down Expand Up @@ -402,3 +404,40 @@ def test_fetch_union() -> None:

expected = pl.DataFrame({"a": [0, 3], "b": [1, 4]})
assert_frame_equal(result_glob, expected)


@pytest.mark.slow()
@typing.no_type_check
@pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
def test_struct_pyarrow_dataset_5796() -> None:
num_rows = 2**17 + 1

df = pl.from_records(
[dict(id=i, nested=dict(a=i)) for i in range(num_rows)] # noqa: C408
)
with tempfile.TemporaryDirectory() as temp_dir:
file_path = Path(temp_dir) / "out.parquet"
df.write_parquet(file_path, use_pyarrow=True)
tbl = ds.dataset(file_path).to_table()
result = pl.from_arrow(tbl)

assert_frame_equal(result, df)


@pytest.mark.slow()
@pytest.mark.parametrize("case", [1048576, 1048577])
def test_parquet_chunks_545(case: int) -> None:
f = io.BytesIO()
# repeat until it has case instances
df = pd.DataFrame(
np.tile([1.0, pd.to_datetime("2010-10-10")], [case, 1]),
columns=["floats", "dates"],
)

# write as parquet
df.to_parquet(f)
f.seek(0)

# read it with polars
polars_df = pl.read_parquet(f)
assert_frame_equal(pl.DataFrame(df), polars_df)
14 changes: 14 additions & 0 deletions py-polars/tests/unit/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,3 +491,17 @@ def test_groupby_dynamic_iter() -> None:
((2, datetime(2020, 1, 1, 11)), (1, 3)),
]
assert result2 == expected2


@pytest.mark.slow()
@pytest.mark.parametrize("dtype", [pl.Int32, pl.UInt32])
def test_overflow_mean_partitioned_groupby_5194(dtype: pl.PolarsDataType) -> None:
df = pl.DataFrame(
[
pl.Series("data", [10_00_00_00] * 100_000, dtype=dtype),
pl.Series("group", [1, 2] * 50_000, dtype=dtype),
]
)
assert df.groupby("group").agg(pl.col("data").mean()).sort(by="group").to_dict(
False
) == {"group": [1, 2], "data": [10000000.0, 10000000.0]}
61 changes: 29 additions & 32 deletions py-polars/tests/unit/test_joins.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,38 +74,35 @@ def test_join_same_cat_src() -> None:
}


def test_sorted_merge_joins() -> None:
for reverse in [False, True]:
n = 30
df_a = pl.DataFrame(
{"a": np.sort(np.random.randint(0, n // 2, n))}
).with_row_count("row_a")

df_b = pl.DataFrame(
{"a": np.sort(np.random.randint(0, n // 2, n // 2))}
).with_row_count("row_b")

if reverse:
df_a = df_a.select(pl.all().reverse())
df_b = df_b.select(pl.all().reverse())

join_strategies: list[JoinStrategy] = ["left", "inner"]
for cast_to in [int, str, float]:
for how in join_strategies:
df_a_ = df_a.with_columns(pl.col("a").cast(cast_to))
df_b_ = df_b.with_columns(pl.col("a").cast(cast_to))

# hash join
out_hash_join = df_a_.join(df_b_, on="a", how=how)

# sorted merge join
out_sorted_merge_join = df_a_.with_columns(
pl.col("a").set_sorted(reverse)
).join(
df_b_.with_columns(pl.col("a").set_sorted(reverse)), on="a", how=how
)

assert_frame_equal(out_hash_join, out_sorted_merge_join)
@pytest.mark.parametrize("reverse", [False, True])
def test_sorted_merge_joins(reverse: bool) -> None:
n = 30
df_a = pl.DataFrame({"a": np.sort(np.random.randint(0, n // 2, n))}).with_row_count(
"row_a"
)
df_b = pl.DataFrame(
{"a": np.sort(np.random.randint(0, n // 2, n // 2))}
).with_row_count("row_b")

if reverse:
df_a = df_a.select(pl.all().reverse())
df_b = df_b.select(pl.all().reverse())

join_strategies: list[JoinStrategy] = ["left", "inner"]
for cast_to in [int, str, float]:
for how in join_strategies:
df_a_ = df_a.with_columns(pl.col("a").cast(cast_to))
df_b_ = df_b.with_columns(pl.col("a").cast(cast_to))

# hash join
out_hash_join = df_a_.join(df_b_, on="a", how=how)

# sorted merge join
out_sorted_merge_join = df_a_.with_columns(
pl.col("a").set_sorted(reverse)
).join(df_b_.with_columns(pl.col("a").set_sorted(reverse)), on="a", how=how)

assert_frame_equal(out_hash_join, out_sorted_merge_join)


def test_join_negative_integers() -> None:
Expand Down
30 changes: 29 additions & 1 deletion py-polars/tests/unit/test_streaming.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import time
from datetime import date
from typing import Any

import numpy as np
import pytest

import polars as pl
from polars.testing import assert_frame_equal
from polars.testing import assert_frame_equal, assert_series_equal


def test_streaming_groupby_types() -> None:
Expand Down Expand Up @@ -215,3 +216,30 @@ def test_streaming_streamable_functions(monkeypatch: Any, capfd: Any) -> None:

(_, err) = capfd.readouterr()
assert "df -> function -> ordered_sink" in err


@pytest.mark.slow()
def test_cross_join_stack() -> None:
a = pl.Series(np.arange(100_000)).to_frame().lazy()
t0 = time.time()
# this should be instant if directly pushed into sink
# if not the cross join will first fill the stack with all matches of a single chunk
assert a.join(a, how="cross").head().collect(streaming=True).shape == (5, 2)
t1 = time.time()
assert (t1 - t0) < 0.5


@pytest.mark.slow()
def test_ooc_sort(monkeypatch: Any) -> None:
monkeypatch.setenv("POLARS_FORCE_OOC_SORT", "1")

s = pl.arange(0, 100_000, eager=True).rename("idx")

df = s.shuffle().to_frame()

for reverse in [True, False]:
out = (
df.lazy().sort("idx", reverse=reverse).collect(streaming=True)
).to_series()

assert_series_equal(out, s.sort(reverse=reverse))

0 comments on commit 66dae4d

Please sign in to comment.