Access self.index instead of self._index where possible (#15781)

Since `index` is defined as ```python @Property def index(self): return self._index ``` Get and set to `self.index` when possible. Setting to `self.index` ensures that we may not be creating an invalid `IndexedFrame` with a `len(index) != len(columns)`. There are times when still setting `self._index` was necessary because some data was being swapped "inplace" and validation needed to be avoided. (Hoping to avoid this pattern in the future) Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: #15781
rapidsai · May 22, 2024 · 57444ed · 57444ed
1 parent 45dc595
commit 57444ed
Show file tree

Hide file tree

Showing 8 changed files with 253 additions and 231 deletions.
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
@@ -949,7 +949,7 @@ def nth(self, n):
 
         result = result[sizes > n]
 
-        result._index = self.obj.index.take(
+        result.index = self.obj.index.take(
             result._data["__groupbynth_order__"]
         )
         del result._data["__groupbynth_order__"]
@@ -1038,7 +1038,7 @@ def ngroup(self, ascending=True):
         if has_null_group:
             group_ids.iloc[-1] = cudf.NA
 
-        group_ids._index = index
+        group_ids.index = index
         return self._broadcast(group_ids)
 
     def sample(
@@ -1208,7 +1208,7 @@ def deserialize(cls, header, frames):
 
     def _grouped(self, *, include_groups: bool = True):
         offsets, grouped_key_cols, grouped_value_cols = self._groupby.groups(
-            [*self.obj._index._columns, *self.obj._columns]
+            [*self.obj.index._columns, *self.obj._columns]
         )
         grouped_keys = cudf.core.index._index_from_data(
             dict(enumerate(grouped_key_cols))
@@ -2849,8 +2849,8 @@ def _handle_label(self, by):
             self._key_columns.append(self._obj._data[by])
         except KeyError as e:
             # `by` can be index name(label) too.
-            if by in self._obj._index.names:
-                self._key_columns.append(self._obj._index._data[by])
+            if by in self._obj.index.names:
+                self._key_columns.append(self._obj.index._data[by])
             else:
                 raise e
         self.names.append(by)