test(python): don't run tests that write to disk by default (#7321)

pola-rs · Mar 3, 2023 · 07c7785 · 07c7785
1 parent acec4c1
commit 07c7785
Show file tree

Hide file tree

Showing 11 changed files with 36 additions and 1 deletion.
diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml
@@ -169,9 +169,10 @@ addopts = [
   "--strict-markers",
   "--import-mode=importlib",
   # Default to running fast tests only. To run ALL tests, run: pytest -m ""
-  "-m not slow and not hypothesis and not benchmark",
+  "-m not slow and not hypothesis and not benchmark and not write_disk",
 ]
 markers = [
+  "write_disk: Tests that write to disk",
   "slow: Tests with a longer than average runtime.",
   "benchmark: Tests that should be run on a Polars release build.",
 ]

diff --git a/py-polars/tests/unit/io/test_avro.py b/py-polars/tests/unit/io/test_avro.py
@@ -31,6 +31,7 @@ def test_from_to_buffer(example_df: pl.DataFrame, compression: AvroCompression)
     assert_frame_equal(example_df, read_df)
 
 
+@pytest.mark.write_disk()
 @pytest.mark.parametrize("compression", COMPRESSIONS)
 def test_from_to_file(example_df: pl.DataFrame, compression: AvroCompression) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:

diff --git a/py-polars/tests/unit/io/test_csv.py b/py-polars/tests/unit/io/test_csv.py
@@ -59,6 +59,7 @@ def test_to_from_buffer(df_no_lists: pl.DataFrame) -> None:
         assert_frame_equal_local_categoricals(df.select(["time", "cat"]), read_df)
 
 
+@pytest.mark.write_disk()
 def test_to_from_file(df_no_lists: pl.DataFrame) -> None:
     df = df_no_lists.drop("strings_nulls")
 
@@ -365,6 +366,7 @@ def test_read_csv_buffer_ownership() -> None:
     assert buf.read() == bts
 
 
+@pytest.mark.write_disk()
 def test_read_csv_encoding() -> None:
     bts = (
         b"Value1,Value2,Value3,Value4,Region\n"
@@ -747,6 +749,7 @@ def test_csv_string_escaping() -> None:
     assert_frame_equal(df_read, df)
 
 
+@pytest.mark.write_disk()
 def test_glob_csv(df_no_lists: pl.DataFrame) -> None:
     df = df_no_lists.drop("strings_nulls")
     with tempfile.TemporaryDirectory() as temp_dir:
@@ -1171,6 +1174,7 @@ def test_csv_statistics_offset() -> None:
     assert pl.read_csv(io.StringIO(csv), n_rows=N).height == 4999
 
 
+@pytest.mark.write_disk()
 @pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
 def test_csv_scan_categorical() -> None:
     N = 5_000

diff --git a/py-polars/tests/unit/io/test_database.py b/py-polars/tests/unit/io/test_database.py
@@ -57,6 +57,7 @@ def create_temp_sqlite_db(test_db: str) -> None:
     conn.close()
 
 
+@pytest.mark.write_disk()
 @pytest.mark.parametrize(
     ("engine", "expected_dtypes", "expected_dates"),
     [
@@ -150,6 +151,7 @@ def test_read_database_exceptions(
             )
 
 
+@pytest.mark.write_disk()
 @pytest.mark.parametrize(
     ("engine", "mode"),
     [

diff --git a/py-polars/tests/unit/io/test_ipc.py b/py-polars/tests/unit/io/test_ipc.py
@@ -30,6 +30,7 @@ def test_from_to_buffer(df: pl.DataFrame, compression: IpcCompression) -> None:
 @pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
 @pytest.mark.parametrize("compression", COMPRESSIONS)
 @pytest.mark.parametrize("path_type", [str, Path])
+@pytest.mark.write_disk()
 def test_from_to_file(
     df: pl.DataFrame, compression: IpcCompression, path_type: type[str] | type[Path]
 ) -> None:
@@ -42,6 +43,7 @@ def test_from_to_file(
     assert_frame_equal_local_categoricals(df, df_read)
 
 
+@pytest.mark.write_disk()
 @pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
 def test_select_columns_from_file(df: pl.DataFrame) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
@@ -100,6 +102,7 @@ def test_ipc_schema(compression: IpcCompression) -> None:
     assert pl.read_ipc_schema(f) == expected
 
 
+@pytest.mark.write_disk()
 @pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
 @pytest.mark.parametrize("compression", COMPRESSIONS)
 @pytest.mark.parametrize("path_type", [str, Path])
@@ -148,6 +151,7 @@ def test_ipc_column_order() -> None:
     assert pl.read_ipc(f, columns=columns).columns == columns
 
 
+@pytest.mark.write_disk()
 @pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
 def test_glob_ipc(df: pl.DataFrame) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:

diff --git a/py-polars/tests/unit/io/test_json.py b/py-polars/tests/unit/io/test_json.py
@@ -18,6 +18,7 @@ def test_to_from_buffer(df: pl.DataFrame, buf: io.IOBase) -> None:
     assert_frame_equal_local_categoricals(df, read_df)
 
 
+@pytest.mark.write_disk()
 def test_to_from_file(df: pl.DataFrame) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
         file_path = Path(temp_dir) / "small.json"

diff --git a/py-polars/tests/unit/io/test_lazy_csv.py b/py-polars/tests/unit/io/test_lazy_csv.py
@@ -26,6 +26,7 @@ def test_scan_empty_csv(io_files_path: Path) -> None:
     assert "empty csv" in str(excinfo.value)
 
 
+@pytest.mark.write_disk()
 def test_invalid_utf8() -> None:
     np.random.seed(1)
     bts = bytes(np.random.randint(0, 255, 200))
@@ -102,6 +103,7 @@ def test_scan_slice_streaming(foods_file_path: Path) -> None:
     assert df.shape == (5, 4)
 
 
+@pytest.mark.write_disk()
 def test_glob_skip_rows() -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
         for i in range(2):

diff --git a/py-polars/tests/unit/io/test_lazy_json.py b/py-polars/tests/unit/io/test_lazy_json.py
@@ -36,6 +36,7 @@ def test_scan_ndjson(foods_ndjson_path: Path) -> None:
     assert df["foo"].to_list() == [10, 16, 21, 23, 24, 30, 35]
 
 
+@pytest.mark.write_disk()
 def test_scan_with_projection() -> None:
     json = r"""
 {"text": "\"hello", "id": 1}

diff --git a/py-polars/tests/unit/io/test_lazy_parquet.py b/py-polars/tests/unit/io/test_lazy_parquet.py
@@ -52,6 +52,7 @@ def test_row_count(foods_parquet_path: Path) -> None:
     assert df["foo"].to_list() == [10, 16, 21, 23, 24, 30, 35]
 
 
+@pytest.mark.write_disk()
 def test_categorical_parquet_statistics() -> None:
     df = pl.DataFrame(
         {
@@ -89,6 +90,7 @@ def test_categorical_parquet_statistics() -> None:
         assert df.shape == (4, 3)
 
 
+@pytest.mark.write_disk()
 def test_null_parquet() -> None:
     df = pl.DataFrame([pl.Series("foo", [], dtype=pl.Int8)])
     with tempfile.TemporaryDirectory() as temp_dir:
@@ -98,6 +100,7 @@ def test_null_parquet() -> None:
     assert_frame_equal(out, df)
 
 
+@pytest.mark.write_disk()
 def test_parquet_eq_stats() -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
         file_path = Path(temp_dir) / "stats.parquet"
@@ -119,6 +122,7 @@ def test_parquet_eq_stats() -> None:
         )
 
 
+@pytest.mark.write_disk()
 def test_parquet_is_in_stats() -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
         file_path = Path(temp_dir) / "stats.parquet"
@@ -157,6 +161,7 @@ def test_parquet_is_in_stats() -> None:
         ).collect().shape == (8, 1)
 
 
+@pytest.mark.write_disk()
 def test_parquet_stats() -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
         file_path = Path(temp_dir) / "binary_stats.parquet"
@@ -198,6 +203,7 @@ def test_row_count_schema(parquet_file_path: Path) -> None:
     ).dtypes == [pl.UInt32, pl.Utf8]
 
 
+@pytest.mark.write_disk()
 def test_parquet_eq_statistics(monkeypatch: Any, capfd: Any) -> None:
     monkeypatch.setenv("POLARS_VERBOSE", "1")
 
@@ -234,6 +240,7 @@ def test_parquet_eq_statistics(monkeypatch: Any, capfd: Any) -> None:
     )
 
 
+@pytest.mark.write_disk()
 def test_parquet_is_in_statistics(monkeypatch: Any, capfd: Any) -> None:
     monkeypatch.setenv("POLARS_VERBOSE", "1")
 
@@ -269,6 +276,7 @@ def test_parquet_is_in_statistics(monkeypatch: Any, capfd: Any) -> None:
     )
 
 
+@pytest.mark.write_disk()
 def test_parquet_statistics(monkeypatch: Any, capfd: Any) -> None:
     monkeypatch.setenv("POLARS_VERBOSE", "1")
 
@@ -304,6 +312,7 @@ def test_parquet_statistics(monkeypatch: Any, capfd: Any) -> None:
 
 
 @pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
+@pytest.mark.write_disk()
 def test_streaming_categorical() -> None:
     df = pl.DataFrame(
         [
@@ -331,6 +340,7 @@ def test_streaming_categorical() -> None:
 
 
 @pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
+@pytest.mark.write_disk()
 def test_parquet_struct_categorical() -> None:
     df = pl.DataFrame(
         [

diff --git a/py-polars/tests/unit/io/test_parquet.py b/py-polars/tests/unit/io/test_parquet.py
@@ -68,6 +68,7 @@ def test_to_from_buffer_lzo(df: pl.DataFrame) -> None:
         _ = pl.read_parquet(buf)
 
 
+@pytest.mark.write_disk()
 @pytest.mark.parametrize("compression", COMPRESSIONS)
 def test_to_from_file(df: pl.DataFrame, compression: ParquetCompression) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
@@ -77,6 +78,7 @@ def test_to_from_file(df: pl.DataFrame, compression: ParquetCompression) -> None
         assert_frame_equal_local_categoricals(df, read_df)
 
 
+@pytest.mark.write_disk()
 def test_to_from_file_lzo(df: pl.DataFrame) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
         file_path = Path(temp_dir) / "small.avro"
@@ -159,6 +161,7 @@ def test_nested_parquet() -> None:
     assert isinstance(read.dtypes[0].inner, pl.datatypes.Struct)
 
 
+@pytest.mark.write_disk()
 def test_glob_parquet(df: pl.DataFrame) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
         file_path = Path(temp_dir) / "small.parquet"
@@ -169,6 +172,7 @@ def test_glob_parquet(df: pl.DataFrame) -> None:
         assert pl.scan_parquet(path_glob).collect().shape == (3, 16)
 
 
+@pytest.mark.write_disk()
 def test_streaming_parquet_glob_5900(df: pl.DataFrame) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
         file_path = Path(temp_dir) / "small.parquet"
@@ -203,6 +207,7 @@ def test_chunked_round_trip() -> None:
     assert_frame_equal(pl.read_parquet(f), df)
 
 
+@pytest.mark.write_disk()
 def test_lazy_self_join_file_cache_prop_3979(df: pl.DataFrame) -> None:
     with tempfile.TemporaryDirectory() as temp_dir:
         file_path = Path(temp_dir) / "small.parquet"
@@ -351,6 +356,7 @@ def test_parquet_nested_dictionaries_6217() -> None:
         assert_frame_equal(read, df)
 
 
+@pytest.mark.write_disk()
 @pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
 def test_sink_parquet(io_files_path: Path) -> None:
     file = io_files_path / "small.parquet"
@@ -367,6 +373,7 @@ def test_sink_parquet(io_files_path: Path) -> None:
             assert_frame_equal(result, df_read)
 
 
+@pytest.mark.write_disk()
 @pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
 def test_sink_ipc(io_files_path: Path) -> None:
     file = io_files_path / "small.parquet"
@@ -383,6 +390,7 @@ def test_sink_ipc(io_files_path: Path) -> None:
             assert_frame_equal(result, df_read)
 
 
+@pytest.mark.write_disk()
 @pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
 def test_fetch_union() -> None:
     df1 = pl.DataFrame({"a": [0, 1, 2], "b": [1, 2, 3]})

diff --git a/py-polars/tests/unit/io/test_pyarrow_dataset.py b/py-polars/tests/unit/io/test_pyarrow_dataset.py
@@ -21,6 +21,7 @@ def helper_dataset_test(file_path: Path, query) -> None:
     assert_frame_equal(out, expected)
 
 
+@pytest.mark.write_disk()
 @pytest.mark.xfail(sys.platform == "win32", reason="Does not work on Windows")
 def test_dataset(df: pl.DataFrame) -> None:
     with tempfile.TemporaryDirectory() as temp_dir: