Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: Make DataFrame.from_pandas process by column #14483

Merged
merged 8 commits into from
Nov 28, 2023
Merged
16 changes: 12 additions & 4 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1873,10 +1873,18 @@ def from_pandas(cls, index, nan_as_null=no_default):

if not isinstance(index, pd.Index):
raise TypeError("not a pandas.Index")

ind = cudf.Index(column.as_column(index, nan_as_null=nan_as_null))
ind.name = index.name
return ind
if isinstance(index, pd.RangeIndex):
return cudf.RangeIndex(
start=index.start,
stop=index.stop,
step=index.step,
name=index.name,
)
else:
return cudf.Index(
column.as_column(index, nan_as_null=nan_as_null),
name=index.name,
)

@property
def _constructor_expanddim(self):
Expand Down
55 changes: 21 additions & 34 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5245,30 +5245,20 @@ def from_pandas(cls, dataframe, nan_as_null=no_default):
if not dataframe.columns.is_unique:
raise ValueError("Duplicate column names are not allowed")

# Set columns
data = {}
for col_name, col_value in dataframe.items():
# necessary because multi-index can return multiple
# columns for a single key
if len(col_value.shape) == 1:
data[col_name] = column.as_column(
col_value.array, nan_as_null=nan_as_null
)
else:
vals = col_value.values.T
if vals.shape[0] == 1:
data[col_name] = column.as_column(
vals.flatten(), nan_as_null=nan_as_null
)
else:
if isinstance(col_name, tuple):
col_name = str(col_name)
for idx in range(len(vals.shape)):
data[col_name] = column.as_column(
vals[idx], nan_as_null=nan_as_null
)

index = cudf.from_pandas(dataframe.index, nan_as_null=nan_as_null)
data = {
col_name: column.as_column(
col_value.array, nan_as_null=nan_as_null
)
for col_name, col_value in dataframe.items()
}
if isinstance(dataframe.index, pd.MultiIndex):
index = cudf.MultiIndex.from_pandas(
dataframe.index, nan_as_null=nan_as_null
)
else:
index = cudf.Index.from_pandas(
dataframe.index, nan_as_null=nan_as_null
)
df = cls._from_data(data, index)
df._data._level_names = tuple(dataframe.columns.names)

Expand All @@ -5279,13 +5269,14 @@ def from_pandas(cls, dataframe, nan_as_null=no_default):
df.columns = dataframe.columns

return df
elif hasattr(dataframe, "__dataframe__"):
# TODO: Probably should be handled in the constructor as
# this isn't pandas specific
return from_dataframe(dataframe, allow_copy=True)
else:
try:
return from_dataframe(dataframe, allow_copy=True)
except Exception:
raise TypeError(
f"Could not construct DataFrame from {type(dataframe)}"
)
raise TypeError(
f"Could not construct DataFrame from {type(dataframe)}"
)

@classmethod
@_cudf_nvtx_annotate
Expand Down Expand Up @@ -7915,10 +7906,6 @@ def from_pandas(obj, nan_as_null=no_default):
return ret
elif isinstance(obj, pd.MultiIndex):
return MultiIndex.from_pandas(obj, nan_as_null=nan_as_null)
elif isinstance(obj, pd.RangeIndex):
return cudf.core.index.RangeIndex(
start=obj.start, stop=obj.stop, step=obj.step, name=obj.name
)
elif isinstance(obj, pd.Index):
return cudf.Index.from_pandas(obj, nan_as_null=nan_as_null)
elif isinstance(obj, pd.CategoricalDtype):
Expand Down
7 changes: 7 additions & 0 deletions python/cudf/cudf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2954,6 +2954,13 @@ def test_index_getitem_from_nonint_raises(idx):
cudf.Index([1, 2])[idx]


def test_from_pandas_rangeindex_return_rangeindex():
pidx = pd.RangeIndex(start=3, stop=9, step=3, name="a")
result = cudf.Index.from_pandas(pidx)
expected = cudf.RangeIndex(start=3, stop=9, step=3, name="a")
assert_eq(result, expected, exact=True)


@pytest.mark.parametrize(
"idx",
[
Expand Down