Skip to content

Commit

Permalink
fix(python): Handle DataFrame.vstack stacking itself (pola-rs#9895)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Jul 15, 2023
1 parent 5810a1d commit c8a98f9
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 39 deletions.
24 changes: 16 additions & 8 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5811,16 +5811,17 @@ def hstack(
else:
return self._from_pydf(self._df.hstack([s._s for s in columns]))

def vstack(self, df: DataFrame, *, in_place: bool = False) -> Self:
@deprecated_alias(df="other")
def vstack(self, other: DataFrame, *, in_place: bool = False) -> Self:
"""
Grow this DataFrame vertically by stacking a DataFrame to it.
Parameters
----------
df
other
DataFrame to stack.
in_place
Modify in place
Modify in place.
Examples
--------
Expand Down Expand Up @@ -5853,12 +5854,19 @@ def vstack(self, df: DataFrame, *, in_place: bool = False) -> Self:
"""
if in_place:
self._df.vstack_mut(df._df)
return self
else:
return self._from_pydf(self._df.vstack(df._df))
try:
self._df.vstack_mut(other._df)
return self
except RuntimeError as exc:
if str(exc) == "Already mutably borrowed":
self._df.vstack_mut(other._df.clone())
return self
else:
raise exc

return self._from_pydf(self._df.vstack(other._df))

def extend(self, other: Self) -> Self:
def extend(self, other: DataFrame) -> Self:
"""
Extend the memory backed by this `DataFrame` with the values from `other`.
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2358,12 +2358,12 @@ def append(self, other: Series, *, append_chunks: bool = True) -> Series:
self._s.append(other._s)
else:
self._s.extend(other._s)
return self
except RuntimeError as exc:
if str(exc) == "Already mutably borrowed":
self.append(other.clone(), append_chunks=append_chunks)
return self.append(other.clone(), append_chunks=append_chunks)
else:
raise exc
return self

def filter(self, predicate: Series | list[bool]) -> Self:
"""
Expand Down
24 changes: 12 additions & 12 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -940,33 +940,33 @@ impl PyDataFrame {
self.df.width()
}

pub fn hstack(&self, columns: Vec<PySeries>) -> PyResult<Self> {
let columns = columns.to_series();
let df = self.df.hstack(&columns).map_err(PyPolarsErr::from)?;
Ok(df.into())
}

pub fn hstack_mut(&mut self, columns: Vec<PySeries>) -> PyResult<()> {
let columns = columns.to_series();
self.df.hstack_mut(&columns).map_err(PyPolarsErr::from)?;
Ok(())
}

pub fn hstack(&self, columns: Vec<PySeries>) -> PyResult<Self> {
let columns = columns.to_series();
let df = self.df.hstack(&columns).map_err(PyPolarsErr::from)?;
pub fn vstack(&self, other: &PyDataFrame) -> PyResult<Self> {
let df = self.df.vstack(&other.df).map_err(PyPolarsErr::from)?;
Ok(df.into())
}

pub fn extend(&mut self, df: &PyDataFrame) -> PyResult<()> {
self.df.extend(&df.df).map_err(PyPolarsErr::from)?;
pub fn vstack_mut(&mut self, other: &PyDataFrame) -> PyResult<()> {
self.df.vstack_mut(&other.df).map_err(PyPolarsErr::from)?;
Ok(())
}

pub fn vstack_mut(&mut self, df: &PyDataFrame) -> PyResult<()> {
self.df.vstack_mut(&df.df).map_err(PyPolarsErr::from)?;
pub fn extend(&mut self, other: &PyDataFrame) -> PyResult<()> {
self.df.extend(&other.df).map_err(PyPolarsErr::from)?;
Ok(())
}

pub fn vstack(&mut self, df: &PyDataFrame) -> PyResult<Self> {
let df = self.df.vstack(&df.df).map_err(PyPolarsErr::from)?;
Ok(df.into())
}

pub fn drop_in_place(&mut self, name: &str) -> PyResult<PySeries> {
let s = self.df.drop_in_place(name).map_err(PyPolarsErr::from)?;
Ok(PySeries { series: s })
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -704,22 +704,6 @@ def test_hstack_dataframe(in_place: bool) -> None:
assert_frame_equal(df_out, expected)


@pytest.mark.parametrize("in_place", [True, False])
def test_vstack(in_place: bool) -> None:
df1 = pl.DataFrame({"foo": [1, 2], "bar": [6, 7], "ham": ["a", "b"]})
df2 = pl.DataFrame({"foo": [3, 4], "bar": [8, 9], "ham": ["c", "d"]})

expected = pl.DataFrame(
{"foo": [1, 2, 3, 4], "bar": [6, 7, 8, 9], "ham": ["a", "b", "c", "d"]}
)

out = df1.vstack(df2, in_place=in_place)
if in_place:
assert_frame_equal(df1, expected)
else:
assert_frame_equal(out, expected)


def test_extend() -> None:
with pl.StringCache():
df1 = pl.DataFrame(
Expand Down
46 changes: 46 additions & 0 deletions py-polars/tests/unit/dataframe/test_vstack.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import pytest

import polars as pl
from polars.testing import assert_frame_equal


@pytest.fixture()
def df1() -> pl.DataFrame:
return pl.DataFrame({"foo": [1, 2], "bar": [6, 7], "ham": ["a", "b"]})


@pytest.fixture()
def df2() -> pl.DataFrame:
return pl.DataFrame({"foo": [3, 4], "bar": [8, 9], "ham": ["c", "d"]})


def test_vstack(df1: pl.DataFrame, df2: pl.DataFrame) -> None:
result = df1.vstack(df2)
expected = pl.DataFrame(
{"foo": [1, 2, 3, 4], "bar": [6, 7, 8, 9], "ham": ["a", "b", "c", "d"]}
)
assert_frame_equal(result, expected)


def test_vstack_in_place(df1: pl.DataFrame, df2: pl.DataFrame) -> None:
df1.vstack(df2, in_place=True)
expected = pl.DataFrame(
{"foo": [1, 2, 3, 4], "bar": [6, 7, 8, 9], "ham": ["a", "b", "c", "d"]}
)
assert_frame_equal(df1, expected)


def test_vstack_self(df1: pl.DataFrame) -> None:
result = df1.vstack(df1)
expected = pl.DataFrame(
{"foo": [1, 2, 1, 2], "bar": [6, 7, 6, 7], "ham": ["a", "b", "a", "b"]}
)
assert_frame_equal(result, expected)


def test_vstack_self_in_place(df1: pl.DataFrame) -> None:
df1.vstack(df1, in_place=True)
expected = pl.DataFrame(
{"foo": [1, 2, 1, 2], "bar": [6, 7, 6, 7], "ham": ["a", "b", "a", "b"]}
)
assert_frame_equal(df1, expected)
2 changes: 1 addition & 1 deletion py-polars/tests/unit/test_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,7 @@ def test_lazy_concat(df: pl.DataFrame) -> None:

out = pl.concat([df.lazy(), df.lazy()]).collect()
assert out.shape == shape
assert_frame_equal(out, df.vstack(df.clone()))
assert_frame_equal(out, df.vstack(df))


def test_self_join() -> None:
Expand Down

0 comments on commit c8a98f9

Please sign in to comment.