From ab35f7dca36ee456b909a33fccb7fb27a652620d Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 29 Apr 2021 10:45:01 -0400
Subject: [PATCH 01/14] Add initial Buffer.copy()

---
 python/cudf/cudf/core/buffer.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/python/cudf/cudf/core/buffer.py b/python/cudf/cudf/core/buffer.py
index 9fc5570e35a..293fefc380c 100644
--- a/python/cudf/cudf/core/buffer.py
+++ b/python/cudf/cudf/core/buffer.py
@@ -141,6 +141,13 @@ def empty(cls, size: int) -> Buffer:
         dbuf = DeviceBuffer(size=size)
         return Buffer(dbuf)
 
+    def copy(self):
+        from rmm._lib.device_buffer import copy_device_to_ptr
+
+        out = Buffer(DeviceBuffer(size=self.size))
+        copy_device_to_ptr(self.ptr, out.ptr, self.size)
+        return out
+
 
 def _buffer_data_from_array_interface(array_interface):
     ptr = array_interface["data"][0]

From 72197a2a6da6ba2a95bb848526ffde97bafa39d8 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 29 Apr 2021 11:21:49 -0400
Subject: [PATCH 02/14] Add Buffer copy tests

---
 python/cudf/cudf/tests/test_buffer.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/python/cudf/cudf/tests/test_buffer.py b/python/cudf/cudf/tests/test_buffer.py
index 241d719f09e..4600d932c6f 100644
--- a/python/cudf/cudf/tests/test_buffer.py
+++ b/python/cudf/cudf/tests/test_buffer.py
@@ -1,5 +1,6 @@
 import cupy as cp
 import pytest
+from cupy.testing import assert_array_equal
 
 from cudf.core.buffer import Buffer
 
@@ -44,3 +45,14 @@ def test_buffer_from_cuda_iface_dtype(data, dtype):
             TypeError, match="Buffer data must be of uint8 type"
         ):
             buf = Buffer(data=data, size=data.size)  # noqa: F841
+
+
+@pytest.mark.parametrize("size", [0, 1, 10, 100, 1000, 10_000])
+def test_buffer_copy(size):
+    data = cp.random.randint(low=0, high=100, size=size, dtype="u1")
+    buf = Buffer(data=data)
+    got = buf.copy()
+    assert got.size == buf.size
+    if size > 0:
+        assert got.ptr != buf.ptr
+    assert_array_equal(cp.asarray(buf), cp.asarray(got))

From 9b4611b6b7833ef3496d91d51082477d03c063fa Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 29 Apr 2021 11:22:18 -0400
Subject: [PATCH 03/14] Docstring

---
 python/cudf/cudf/core/buffer.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/cudf/cudf/core/buffer.py b/python/cudf/cudf/core/buffer.py
index 293fefc380c..c6875052685 100644
--- a/python/cudf/cudf/core/buffer.py
+++ b/python/cudf/cudf/core/buffer.py
@@ -142,6 +142,10 @@ def empty(cls, size: int) -> Buffer:
         return Buffer(dbuf)
 
     def copy(self):
+        """
+        Create a new Buffer containing a copy of the data contained
+        in this Buffer.
+        """
         from rmm._lib.device_buffer import copy_device_to_ptr
 
         out = Buffer(DeviceBuffer(size=self.size))

From 8d64f843b6fb6e4d640d3c61e5e5e56157c88403 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 20 May 2021 11:58:17 -0400
Subject: [PATCH 04/14] Initial refactor of ColumnMethods

---
 python/cudf/cudf/_lib/strings/__init__.py   |  88 ++++
 python/cudf/cudf/_lib/strings/combine.pyx   |  24 +-
 python/cudf/cudf/core/column/categorical.py | 440 ++++++++++------
 python/cudf/cudf/core/column/lists.py       |  33 +-
 python/cudf/cudf/core/column/methods.py     |  17 +-
 python/cudf/cudf/core/column/string.py      | 550 +++++++++-----------
 python/cudf/cudf/core/column/struct.py      |  15 +-
 python/cudf/cudf/core/frame.py              |   9 +-
 python/cudf/cudf/core/index.py              |  16 +-
 python/cudf/cudf/core/join/_join_helpers.py |   4 +-
 python/cudf/cudf/core/series.py             |   8 +-
 python/cudf/cudf/core/tools/numeric.py      |  22 +-
 python/cudf/cudf/io/csv.py                  |   2 +-
 13 files changed, 693 insertions(+), 535 deletions(-)

diff --git a/python/cudf/cudf/_lib/strings/__init__.py b/python/cudf/cudf/_lib/strings/__init__.py
index e69de29bb2d..e942f742c66 100644
--- a/python/cudf/cudf/_lib/strings/__init__.py
+++ b/python/cudf/cudf/_lib/strings/__init__.py
@@ -0,0 +1,88 @@
+from cudf._lib.nvtext.edit_distance import edit_distance
+from cudf._lib.nvtext.generate_ngrams import (
+    generate_character_ngrams,
+    generate_ngrams,
+)
+from cudf._lib.nvtext.ngrams_tokenize import ngrams_tokenize
+from cudf._lib.nvtext.normalize import normalize_characters, normalize_spaces
+from cudf._lib.nvtext.replace import filter_tokens, replace_tokens
+from cudf._lib.nvtext.stemmer import (
+    LetterType,
+    is_letter,
+    is_letter_multi,
+    porter_stemmer_measure,
+)
+from cudf._lib.nvtext.subword_tokenize import subword_tokenize_vocab_file
+from cudf._lib.nvtext.tokenize import (
+    _count_tokens_column,
+    _count_tokens_scalar,
+    _tokenize_column,
+    _tokenize_scalar,
+    character_tokenize,
+    detokenize,
+)
+from cudf._lib.strings.attributes import (
+    code_points,
+    count_bytes,
+    count_characters,
+)
+from cudf._lib.strings.capitalize import capitalize, title
+from cudf._lib.strings.case import swapcase, to_lower, to_upper
+from cudf._lib.strings.char_types import (
+    filter_alphanum,
+    is_alnum,
+    is_alpha,
+    is_decimal,
+    is_digit,
+    is_lower,
+    is_numeric,
+    is_space,
+    is_upper,
+)
+from cudf._lib.strings.combine import (
+    concatenate,
+    join,
+    join_lists_with_column,
+    join_lists_with_scalar,
+)
+from cudf._lib.strings.contains import contains_re, count_re, match_re
+from cudf._lib.strings.convert.convert_fixed_point import to_decimal
+from cudf._lib.strings.convert.convert_floats import is_float
+from cudf._lib.strings.convert.convert_integers import is_integer
+from cudf._lib.strings.convert.convert_urls import url_decode, url_encode
+from cudf._lib.strings.extract import extract
+from cudf._lib.strings.find import (
+    contains,
+    contains_multiple,
+    endswith,
+    endswith_multiple,
+    find,
+    rfind,
+    startswith,
+    startswith_multiple,
+)
+from cudf._lib.strings.findall import findall
+from cudf._lib.strings.json import get_json_object
+from cudf._lib.strings.padding import PadSide, center, ljust, pad, rjust, zfill
+from cudf._lib.strings.replace import (
+    insert,
+    replace,
+    replace_multi,
+    slice_replace,
+)
+from cudf._lib.strings.replace_re import (
+    replace_multi_re,
+    replace_re,
+    replace_with_backrefs,
+)
+from cudf._lib.strings.split.partition import partition, rpartition
+from cudf._lib.strings.split.split import (
+    rsplit,
+    rsplit_record,
+    split,
+    split_record,
+)
+from cudf._lib.strings.strip import lstrip, rstrip, strip
+from cudf._lib.strings.substring import get, slice_from, slice_strings
+from cudf._lib.strings.translate import filter_characters, translate
+from cudf._lib.strings.wrap import wrap
diff --git a/python/cudf/cudf/_lib/strings/combine.pyx b/python/cudf/cudf/_lib/strings/combine.pyx
index 25619de3ed0..4560695f280 100644
--- a/python/cudf/cudf/_lib/strings/combine.pyx
+++ b/python/cudf/cudf/_lib/strings/combine.pyx
@@ -21,15 +21,15 @@ from cudf._lib.cpp.strings.combine cimport (
 
 
 def concatenate(Table source_strings,
-                object py_separator,
-                object py_narep):
+                object sep,
+                object na_rep):
     """
     Returns a Column by concatenating strings column-wise in `source_strings`
-    with the specified `py_separator` between each column and
-    `na`/`None` values are replaced by `py_narep`
+    with the specified `sep` between each column and
+    `na`/`None` values are replaced by `na_rep`
     """
-    cdef DeviceScalar separator = py_separator.device_value
-    cdef DeviceScalar narep = py_narep.device_value
+    cdef DeviceScalar separator = sep.device_value
+    cdef DeviceScalar narep = na_rep.device_value
 
     cdef unique_ptr[column] c_result
     cdef table_view source_view = source_strings.data_view()
@@ -51,16 +51,16 @@ def concatenate(Table source_strings,
 
 
 def join(Column source_strings,
-         object py_separator,
-         object py_narep):
+         object sep,
+         object na_rep):
     """
     Returns a Column by concatenating strings row-wise in `source_strings`
-    with the specified `py_separator` between each column and
-    `na`/`None` values are replaced by `py_narep`
+    with the specified `sep` between each column and
+    `na`/`None` values are replaced by `na_rep`
     """
 
-    cdef DeviceScalar separator = py_separator.device_value
-    cdef DeviceScalar narep = py_narep.device_value
+    cdef DeviceScalar separator = sep.device_value
+    cdef DeviceScalar narep = na_rep.device_value
 
     cdef unique_ptr[column] c_result
     cdef column_view source_view = source_strings.view()
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index c199947d261..aed4425093f 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -53,7 +53,7 @@
 class CategoricalAccessor(ColumnMethodsMixin):
     _column: CategoricalColumn
 
-    def __init__(self, column: Any, parent: ParentType = None):
+    def __init__(self, parent: ParentType):
         """
         Accessor object for categorical properties of the Series values.
         Be aware that assigning to `categories` is a inplace operation,
@@ -107,18 +107,18 @@ def __init__(self, column: Any, parent: ParentType = None):
         dtype: category
         Categories (3, int64): [1, 2, 3]
         """
-        if not is_categorical_dtype(column.dtype):
+        if not is_categorical_dtype(parent.dtype):
             raise AttributeError(
                 "Can only use .cat accessor with a 'category' dtype"
             )
-        super().__init__(column=column, parent=parent)
+        super().__init__(parent=parent)
 
     @property
     def categories(self) -> "cudf.Index":
         """
         The categories of this categorical.
         """
-        return cudf.core.index.as_index(self._column.categories)
+        return cudf.core.index.as_index(self._parent._column.categories)
 
     @property
     def codes(self) -> "cudf.Series":
@@ -130,14 +130,14 @@ def codes(self) -> "cudf.Series":
             if isinstance(self._parent, cudf.Series)
             else None
         )
-        return cudf.Series(self._column.codes, index=index)
+        return cudf.Series(self._parent._column.codes, index=index)
 
     @property
     def ordered(self) -> Optional[bool]:
         """
         Whether the categories have an ordered relationship.
         """
-        return self._column.ordered
+        return self._parent._column.ordered
 
     def as_ordered(self, inplace: bool = False) -> Optional[ParentType]:
         """
@@ -192,13 +192,9 @@ def as_ordered(self, inplace: bool = False) -> Optional[ParentType]:
         dtype: category
         Categories (3, int64): [1 < 2 < 10]
         """
-        out_col = self._column
-        if not out_col.ordered:
-            out_col = self._set_categories(
-                self._column.categories, self._column.categories, ordered=True,
-            )
-
-        return self._return_or_inplace(out_col, inplace=inplace)
+        return self._return_or_inplace(
+            self._parent._column.as_ordered(), inplace=inplace
+        )
 
     def as_unordered(self, inplace: bool = False) -> Optional[ParentType]:
         """
@@ -264,13 +260,9 @@ def as_unordered(self, inplace: bool = False) -> Optional[ParentType]:
         dtype: category
         Categories (3, int64): [1, 2, 10]
         """
-        out_col = self._column
-        if out_col.ordered:
-            out_col = self._set_categories(
-                self._column.categories, self.categories, ordered=False
-            )
-
-        return self._return_or_inplace(out_col, inplace=inplace)
+        return self._return_or_inplace(
+            self._parent._column.as_unordered(), inplace=inplace
+        )
 
     def add_categories(
         self, new_categories: Any, inplace: bool = False
@@ -326,7 +318,7 @@ def add_categories(
         Categories (5, int64): [1, 2, 0, 3, 4]
         """
 
-        old_categories = self._column.categories
+        old_categories = self._parent._column.categories
         new_categories = column.as_column(
             new_categories,
             dtype=old_categories.dtype if len(new_categories) == 0 else None,
@@ -351,9 +343,9 @@ def add_categories(
             raise ValueError("new categories must not include old categories")
 
         new_categories = old_categories.append(new_categories)
-        out_col = self._column
-        if not self._categories_equal(new_categories):
-            out_col = self._set_categories(old_categories, new_categories)
+        out_col = self._parent._column
+        if not out_col._categories_equal(new_categories):
+            out_col = out_col._set_categories(new_categories)
 
         return self._return_or_inplace(out_col, inplace=inplace)
 
@@ -441,11 +433,9 @@ def remove_categories(
             raise ValueError(f"removals must all be in old categories: {vals}")
 
         new_categories = cats[~cats.isin(removals)]._column
-        out_col = self._column
-        if not self._categories_equal(new_categories):
-            out_col = self._set_categories(
-                self._column.categories, new_categories
-            )
+        out_col = self._parent._column
+        if not out_col._categories_equal(new_categories):
+            out_col = out_col._set_categories(new_categories)
 
         return self._return_or_inplace(out_col, inplace=inplace)
 
@@ -548,7 +538,7 @@ def set_categories(
         # categories.
         if rename:
             # enforce same length
-            if len(new_categories) != len(self._column.categories):
+            if len(new_categories) != len(self._parent._column.categories):
                 raise ValueError(
                     "new_categories must have the same "
                     "number of items as old categories"
@@ -556,29 +546,29 @@ def set_categories(
 
             out_col = column.build_categorical_column(
                 categories=new_categories,
-                codes=self._column.base_children[0],
-                mask=self._column.base_mask,
-                size=self._column.size,
-                offset=self._column.offset,
+                codes=self._parent._column.base_children[0],
+                mask=self._parent._column.base_mask,
+                size=self._parent._column.size,
+                offset=self._parent._column.offset,
                 ordered=ordered,
             )
         else:
-            out_col = self._column
+            out_col = self._parent._column
             if not (type(out_col.categories) is type(new_categories)):
                 # If both categories are of different Column types,
                 # return a column full of Nulls.
                 out_col = _create_empty_categorical_column(
-                    self._column,
+                    self._parent._column,
                     CategoricalDtype(
                         categories=new_categories, ordered=ordered
                     ),
                 )
             elif (
-                not self._categories_equal(new_categories, ordered=ordered)
+                not out_col._categories_equal(new_categories, ordered=ordered)
                 or not self.ordered == ordered
             ):
-                out_col = self._set_categories(
-                    self._column.categories, new_categories, ordered=ordered,
+                out_col = out_col._set_categories(
+                    new_categories, ordered=ordered,
                 )
         return self._return_or_inplace(out_col, inplace=inplace)
 
@@ -658,102 +648,21 @@ def reorder_categories(
         # Ignore order for comparison because we're only interested
         # in whether new_categories has all the same values as the
         # current set of categories.
-        if not self._categories_equal(new_categories, ordered=False):
+        if not self._parent._column._categories_equal(
+            new_categories, ordered=False
+        ):
             raise ValueError(
                 "items in new_categories are not the same as in "
                 "old categories"
             )
-        out_col = self._set_categories(
-            self._column.categories, new_categories, ordered=ordered
+        out_col = self._parent._column._set_categories(
+            new_categories, ordered=ordered
         )
 
         return self._return_or_inplace(out_col, inplace=inplace)
 
-    def _categories_equal(
-        self, new_categories: ColumnBase, ordered=False
-    ) -> bool:
-        cur_categories = self._column.categories
-        if len(new_categories) != len(cur_categories):
-            return False
-        if new_categories.dtype != cur_categories.dtype:
-            return False
-        # if order doesn't matter, sort before the equals call below
-        if not ordered:
-            cur_categories = cudf.Series(cur_categories).sort_values(
-                ignore_index=True
-            )
-            new_categories = cudf.Series(new_categories).sort_values(
-                ignore_index=True
-            )
-        return cur_categories.equals(new_categories)
-
-    def _set_categories(
-        self,
-        current_categories: Any,
-        new_categories: Any,
-        is_unique: bool = False,
-        ordered: bool = False,
-    ) -> CategoricalColumn:
-        """Returns a new CategoricalColumn with the categories set to the
-        specified *new_categories*.
-
-        Notes
-        -----
-        Assumes ``new_categories`` is the same dtype as the current categories
-        """
-
-        cur_cats = column.as_column(current_categories)
-        new_cats = column.as_column(new_categories)
-
-        # Join the old and new categories to build a map from
-        # old to new codes, inserting na_sentinel for any old
-        # categories that don't exist in the new categories
-
-        # Ensure new_categories is unique first
-        if not (is_unique or new_cats.is_unique):
-            # drop_duplicates() instead of unique() to preserve order
-            new_cats = (
-                cudf.Series(new_cats)
-                .drop_duplicates(ignore_index=True)
-                ._column
-            )
-
-        cur_codes = self.codes
-        max_cat_size = (
-            len(cur_cats) if len(cur_cats) > len(new_cats) else len(new_cats)
-        )
-        out_code_dtype = min_unsigned_type(max_cat_size)
-
-        cur_order = column.arange(len(cur_codes))
-        old_codes = column.arange(len(cur_cats), dtype=out_code_dtype)
-        new_codes = column.arange(len(new_cats), dtype=out_code_dtype)
-
-        new_df = cudf.DataFrame({"new_codes": new_codes, "cats": new_cats})
-        old_df = cudf.DataFrame({"old_codes": old_codes, "cats": cur_cats})
-        cur_df = cudf.DataFrame({"old_codes": cur_codes, "order": cur_order})
-
-        # Join the old and new categories and line up their codes
-        df = old_df.merge(new_df, on="cats", how="left")
-        # Join the old and new codes to "recode" the codes data buffer
-        df = cur_df.merge(df, on="old_codes", how="left")
-        df = df.sort_values(by="order")
-        df.reset_index(drop=True, inplace=True)
-
-        ordered = ordered if ordered is not None else self.ordered
-        new_codes = df["new_codes"]._column
-
-        # codes can't have masks, so take mask out before moving in
-        return column.build_categorical_column(
-            categories=new_cats,
-            codes=column.as_column(new_codes.base_data, dtype=new_codes.dtype),
-            mask=new_codes.base_mask,
-            size=new_codes.size,
-            offset=new_codes.offset,
-            ordered=ordered,
-        )
-
     def _decategorize(self) -> ColumnBase:
-        return self._column._get_decategorized_column()
+        return self._parent._column._get_decategorized_column()
 
 
 class CategoricalColumn(column.ColumnBase):
@@ -941,9 +850,6 @@ def ordered(self) -> Optional[bool]:
     def ordered(self, value: bool):
         self.dtype.ordered = value
 
-    def cat(self, parent: ParentType = None):
-        return CategoricalAccessor(self, parent=parent)
-
     def unary_operator(self, unaryop: str):
         raise TypeError(
             f"Series of dtype `category` cannot perform the operation: "
@@ -1091,7 +997,7 @@ def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series:
             col = self
 
         signed_dtype = min_signed_type(len(col.categories))
-        codes = col.cat().codes.astype(signed_dtype).fillna(-1).to_array()
+        codes = col.codes.astype(signed_dtype).fillna(-1).to_array()
         categories = col.categories.dropna(drop_nan=True).to_pandas()
         data = pd.Categorical.from_codes(
             codes, categories=categories, ordered=col.ordered
@@ -1222,13 +1128,11 @@ def find_and_replace(
         # named 'index', which came from the filtered categories,
         # contains the new ints that we need to map to
         to_replace_col = column.as_column(catmap.index).astype(
-            self.cat().codes.dtype
-        )
-        replacement_col = catmap["index"]._column.astype(
-            self.cat().codes.dtype
+            self.codes.dtype
         )
+        replacement_col = catmap["index"]._column.astype(self.codes.dtype)
 
-        replaced = column.as_column(self.cat().codes)
+        replaced = column.as_column(self.codes)
         output = libcudf.replace.replace(
             replaced, to_replace_col, replacement_col
         )
@@ -1306,10 +1210,8 @@ def fillna(
                         )
                 # TODO: only required if fill_value has a subset of the
                 # categories:
-                fill_value = fill_value.cat()._set_categories(
-                    fill_value.cat().categories,
-                    self.categories,
-                    is_unique=True,
+                fill_value = fill_value._set_categories(
+                    self.categories, is_unique=True,
                 )
                 fill_value = column.as_column(fill_value.codes).astype(
                     self.codes.dtype
@@ -1377,8 +1279,8 @@ def as_categorical_column(
             # return a column full of Nulls.
             return _create_empty_categorical_column(self, dtype)
 
-        return self.cat().set_categories(
-            new_categories=dtype.categories, ordered=dtype.ordered
+        return self.set_categories(
+            new_categories=dtype.categories, ordered=bool(dtype.ordered)
         )
 
     def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
@@ -1402,8 +1304,8 @@ def as_timedelta_column(self, dtype, **kwargs) -> TimeDeltaColumn:
     def _get_decategorized_column(self) -> ColumnBase:
         if self.null_count == len(self):
             # self.categories is empty; just return codes
-            return self.cat().codes._column
-        gather_map = self.cat().codes.astype("int32").fillna(0)._column
+            return self.codes
+        gather_map = self.codes.astype("int32").fillna(0)
         out = self.categories.take(gather_map)
         out = out.set_mask(self.mask)
         return out
@@ -1436,19 +1338,14 @@ def copy(self, deep: bool = True) -> CategoricalColumn:
             )
 
     def __sizeof__(self) -> int:
-        return (
-            self.cat().categories.__sizeof__() + self.cat().codes.__sizeof__()
-        )
+        return self.categories.__sizeof__() + self.codes.__sizeof__()
 
     def _memory_usage(self, **kwargs) -> int:
         deep = kwargs.get("deep", False)
         if deep:
             return self.__sizeof__()
         else:
-            return (
-                self.categories._memory_usage()
-                + self.cat().codes.memory_usage()
-            )
+            return self.categories._memory_usage() + self.codes._memory_usage()
 
     def _mimic_inplace(
         self, other_col: ColumnBase, inplace: bool = False
@@ -1475,14 +1372,9 @@ def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:
 
         # Combine and de-dupe the categories
         cats = (
-            cudf.concat([o.cat().categories for o in objs])
-            .drop_duplicates()
-            ._column
+            cudf.concat([o.categories for o in objs]).drop_duplicates()._column
         )
-        objs = [
-            o.cat()._set_categories(o.cat().categories, cats, is_unique=True)
-            for o in objs
-        ]
+        objs = [o._set_categories(cats, is_unique=True) for o in objs]
         codes = [o.codes for o in objs]
 
         newsize = sum(map(len, codes))
@@ -1506,6 +1398,238 @@ def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:
             offset=codes_col.offset,
         )
 
+    def set_categories(
+        self, new_categories: Any, ordered: bool = False, rename: bool = False,
+    ) -> CategoricalColumn:
+        """
+        Set the categories to the specified new_categories.
+
+
+        `new_categories` can include new categories (which
+        will result in unused categories) or remove old categories
+        (which results in values set to null). If `rename==True`,
+        the categories will simple be renamed (less or more items
+        than in old categories will result in values set to null or
+        in unused categories respectively).
+
+        This method can be used to perform more than one action
+        of adding, removing, and reordering simultaneously and
+        is therefore faster than performing the individual steps
+        via the more specialised methods.
+
+        On the other hand this methods does not do checks
+        (e.g., whether the old categories are included in the
+        new categories on a reorder), which can result in
+        surprising changes.
+
+        Parameters
+        ----------
+
+        new_categories : list-like
+            The categories in new order.
+
+        ordered : bool, default None
+            Whether or not the categorical is treated as
+            a ordered categorical. If not given, do
+            not change the ordered information.
+
+        rename : bool, default False
+            Whether or not the `new_categories` should be
+            considered as a rename of the old categories
+            or as reordered categories.
+
+        Returns
+        -------
+        cat
+            Categorical with reordered categories
+            or None if inplace.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> s = cudf.Series([1, 1, 2, 10, 2, 10], dtype='category')
+        >>> s
+        0     1
+        1     1
+        2     2
+        3    10
+        4     2
+        5    10
+        dtype: category
+        Categories (3, int64): [1, 2, 10]
+        >>> s.cat.set_categories([1, 10])
+        0       1
+        1       1
+        2    <NA>
+        3      10
+        4    <NA>
+        5      10
+        dtype: category
+        Categories (2, int64): [1, 10]
+        >>> s.cat.set_categories([1, 10], inplace=True)
+        >>> s
+        0       1
+        1       1
+        2    <NA>
+        3      10
+        4    <NA>
+        5      10
+        dtype: category
+        Categories (2, int64): [1, 10]
+        """
+        ordered = ordered if ordered is not None else self.ordered
+        new_categories = column.as_column(new_categories)
+
+        if isinstance(new_categories, CategoricalColumn):
+            new_categories = new_categories.categories
+
+        # when called with rename=True, the pandas behavior is
+        # to replace the current category values with the new
+        # categories.
+        if rename:
+            # enforce same length
+            if len(new_categories) != len(self.categories):
+                raise ValueError(
+                    "new_categories must have the same "
+                    "number of items as old categories"
+                )
+
+            out_col = column.build_categorical_column(
+                categories=new_categories,
+                codes=self.base_children[0],
+                mask=self.base_mask,
+                size=self.size,
+                offset=self.offset,
+                ordered=ordered,
+            )
+        else:
+            out_col = self
+            if not (type(out_col.categories) is type(new_categories)):
+                # If both categories are of different Column types,
+                # return a column full of Nulls.
+                out_col = _create_empty_categorical_column(
+                    self,
+                    CategoricalDtype(
+                        categories=new_categories, ordered=ordered
+                    ),
+                )
+            elif (
+                not out_col._categories_equal(new_categories, ordered=ordered)
+                or not self.ordered == ordered
+            ):
+                out_col = out_col._set_categories(
+                    new_categories, ordered=ordered,
+                )
+        return out_col
+
+    def _categories_equal(
+        self, new_categories: ColumnBase, ordered=False
+    ) -> bool:
+        cur_categories = self.categories
+        if len(new_categories) != len(cur_categories):
+            return False
+        if new_categories.dtype != cur_categories.dtype:
+            return False
+        # if order doesn't matter, sort before the equals call below
+        if not ordered:
+            cur_categories = cudf.Series(cur_categories).sort_values(
+                ignore_index=True
+            )
+            new_categories = cudf.Series(new_categories).sort_values(
+                ignore_index=True
+            )
+        return cur_categories.equals(new_categories)
+
+    def _set_categories(
+        self,
+        new_categories: Any,
+        is_unique: bool = False,
+        ordered: bool = False,
+    ) -> CategoricalColumn:
+        """Returns a new CategoricalColumn with the categories set to the
+        specified *new_categories*.
+
+        Notes
+        -----
+        Assumes ``new_categories`` is the same dtype as the current categories
+        """
+
+        cur_cats = column.as_column(self.categories)
+        new_cats = column.as_column(new_categories)
+
+        # Join the old and new categories to build a map from
+        # old to new codes, inserting na_sentinel for any old
+        # categories that don't exist in the new categories
+
+        # Ensure new_categories is unique first
+        if not (is_unique or new_cats.is_unique):
+            # drop_duplicates() instead of unique() to preserve order
+            new_cats = (
+                cudf.Series(new_cats)
+                .drop_duplicates(ignore_index=True)
+                ._column
+            )
+
+        cur_codes = self.codes
+        max_cat_size = (
+            len(cur_cats) if len(cur_cats) > len(new_cats) else len(new_cats)
+        )
+        out_code_dtype = min_unsigned_type(max_cat_size)
+
+        cur_order = column.arange(len(cur_codes))
+        old_codes = column.arange(len(cur_cats), dtype=out_code_dtype)
+        new_codes = column.arange(len(new_cats), dtype=out_code_dtype)
+
+        new_df = cudf.DataFrame({"new_codes": new_codes, "cats": new_cats})
+        old_df = cudf.DataFrame({"old_codes": old_codes, "cats": cur_cats})
+        cur_df = cudf.DataFrame({"old_codes": cur_codes, "order": cur_order})
+
+        # Join the old and new categories and line up their codes
+        df = old_df.merge(new_df, on="cats", how="left")
+        # Join the old and new codes to "recode" the codes data buffer
+        df = cur_df.merge(df, on="old_codes", how="left")
+        df = df.sort_values(by="order")
+        df.reset_index(drop=True, inplace=True)
+
+        ordered = ordered if ordered is not None else self.ordered
+        new_codes = df["new_codes"]._column
+
+        # codes can't have masks, so take mask out before moving in
+        return column.build_categorical_column(
+            categories=new_cats,
+            codes=column.as_column(new_codes.base_data, dtype=new_codes.dtype),
+            mask=new_codes.base_mask,
+            size=new_codes.size,
+            offset=new_codes.offset,
+            ordered=ordered,
+        )
+
+    def as_ordered(self):
+        out_col = self
+        if not out_col.ordered:
+            out_col = column.build_categorical_column(
+                categories=self.categories,
+                codes=self.codes,
+                mask=self.base_mask,
+                size=self.base_size,
+                offset=self.offset,
+                ordered=True,
+            )
+        return out_col
+
+    def as_unordered(self):
+        out_col = self
+        if out_col.ordered:
+            out_col = column.build_categorical_column(
+                categories=self.categories,
+                codes=self.codes,
+                mask=self.base_mask,
+                size=self.base_size,
+                offset=self.offset,
+                ordered=False,
+            )
+        return out_col
+
 
 def _create_empty_categorical_column(
     categorical_column: CategoricalColumn, dtype: "CategoricalDtype"
@@ -1516,7 +1640,7 @@ def _create_empty_categorical_column(
             cudf.utils.utils.scalar_broadcast_to(
                 categorical_column.default_na_value(),
                 categorical_column.size,
-                np.dtype(categorical_column.cat().codes),
+                np.dtype(categorical_column.codes),
             )
         ),
         offset=categorical_column.offset,
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 7ea02c0e878..3d09bafd9ad 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -145,9 +145,6 @@ def offsets(self):
         """
         return self.children[0]
 
-    def list(self, parent=None):
-        return ListMethods(self, parent=parent)
-
     def to_arrow(self):
         offsets = self.offsets.to_arrow()
         elements = (
@@ -239,12 +236,12 @@ class ListMethods(ColumnMethodsMixin):
     List methods for Series
     """
 
-    def __init__(self, column, parent=None):
-        if not is_list_dtype(column.dtype):
+    def __init__(self, parent=None):
+        if not is_list_dtype(parent.dtype):
             raise AttributeError(
                 "Can only use .list accessor with a 'list' dtype"
             )
-        super().__init__(column=column, parent=parent)
+        super().__init__(parent=parent)
 
     def get(self, index):
         """
@@ -273,7 +270,7 @@ def get(self, index):
         min_col_list_len = self.len().min()
         if -min_col_list_len <= index < min_col_list_len:
             return self._return_or_inplace(
-                extract_element(self._column, index)
+                extract_element(self._parent._column, index)
             )
         else:
             raise IndexError("list index out of range")
@@ -302,7 +299,7 @@ def contains(self, search_key):
         search_key = cudf.Scalar(search_key)
         try:
             res = self._return_or_inplace(
-                contains_scalar(self._column, search_key)
+                contains_scalar(self._parent._column, search_key)
             )
         except RuntimeError as e:
             if (
@@ -339,11 +336,11 @@ def leaves(self):
         5       6
         dtype: int64
         """
-        if type(self._column.elements) is ListColumn:
-            return self._column.elements.list(parent=self._parent).leaves
+        if type(self._parent._column.elements) is ListColumn:
+            return self._parent._column.elements.elements
         else:
             return self._return_or_inplace(
-                self._column.elements, retain_index=False
+                self._parent._column.elements, retain_index=False
             )
 
     def len(self):
@@ -368,7 +365,7 @@ def len(self):
         2       2
         dtype: int32
         """
-        return self._return_or_inplace(count_elements(self._column))
+        return self._return_or_inplace(count_elements(self._parent._column))
 
     def take(self, lists_indices):
         """
@@ -401,7 +398,7 @@ def take(self, lists_indices):
         lists_indices_col = as_column(lists_indices)
         if not isinstance(lists_indices_col, ListColumn):
             raise ValueError("lists_indices should be list type array.")
-        if not lists_indices_col.size == self._column.size:
+        if not lists_indices_col.size == self._parent._column.size:
             raise ValueError(
                 "lists_indices and list column is of different " "size."
             )
@@ -416,7 +413,7 @@ def take(self, lists_indices):
 
         try:
             res = self._return_or_inplace(
-                segmented_gather(self._column, lists_indices_col)
+                segmented_gather(self._parent._column, lists_indices_col)
             )
         except RuntimeError as e:
             if "contains nulls" in str(e):
@@ -451,12 +448,12 @@ def unique(self):
         dtype: list
         """
 
-        if is_list_dtype(self._column.children[1].dtype):
+        if is_list_dtype(self._parent._column.children[1].dtype):
             raise NotImplementedError("Nested lists unique is not supported.")
 
         return self._return_or_inplace(
             drop_list_duplicates(
-                self._column, nulls_equal=True, nans_all_equal=True
+                self._parent._column, nulls_equal=True, nans_all_equal=True
             )
         )
 
@@ -506,10 +503,10 @@ def sort_values(
             raise NotImplementedError("`kind` not currently implemented.")
         if na_position not in {"first", "last"}:
             raise ValueError(f"Unknown `na_position` value {na_position}")
-        if is_list_dtype(self._column.children[1].dtype):
+        if is_list_dtype(self._parent._column.children[1].dtype):
             raise NotImplementedError("Nested lists sort is not supported.")
 
         return self._return_or_inplace(
-            sort_lists(self._column, ascending, na_position),
+            sort_lists(self._parent._column, ascending, na_position),
             retain_index=not ignore_index,
         )
diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py
index eec9c2a7860..e2f4acde8cd 100644
--- a/python/cudf/cudf/core/column/methods.py
+++ b/python/cudf/cudf/core/column/methods.py
@@ -2,26 +2,17 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Optional, Union, overload
+from typing import Optional, Union, overload
 
 from typing_extensions import Literal
 
 import cudf
 
-if TYPE_CHECKING:
-    from cudf.core.column import ColumnBase
-
 
 class ColumnMethodsMixin:
-    _column: ColumnBase
-    _parent: Optional[Union["cudf.Series", "cudf.Index"]]
+    _parent: Union["cudf.Series", "cudf.Index"]
 
-    def __init__(
-        self,
-        column: ColumnBase,
-        parent: Union["cudf.Series", "cudf.Index"] = None,
-    ):
-        self._column = column
+    def __init__(self, parent: Union["cudf.Series", "cudf.Index"]):
         self._parent = parent
 
     @overload
@@ -69,7 +60,7 @@ def _return_or_inplace(
                 )
                 return None
             else:
-                self._column._mimic_inplace(new_col, inplace=True)
+                self._parent._column._mimic_inplace(new_col, inplace=True)
                 return None
         else:
             if self._parent is None:
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 940b38ef5ff..36a7e159dc3 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -15,145 +15,8 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf._lib import string_casting as str_cast
+from cudf._lib import string_casting as str_cast, strings as libstrings
 from cudf._lib.column import Column
-from cudf._lib.nvtext.edit_distance import edit_distance as cpp_edit_distance
-from cudf._lib.nvtext.generate_ngrams import (
-    generate_character_ngrams as cpp_generate_character_ngrams,
-    generate_ngrams as cpp_generate_ngrams,
-)
-from cudf._lib.nvtext.ngrams_tokenize import (
-    ngrams_tokenize as cpp_ngrams_tokenize,
-)
-from cudf._lib.nvtext.normalize import (
-    normalize_characters as cpp_normalize_characters,
-    normalize_spaces as cpp_normalize_spaces,
-)
-from cudf._lib.nvtext.replace import (
-    filter_tokens as cpp_filter_tokens,
-    replace_tokens as cpp_replace_tokens,
-)
-from cudf._lib.nvtext.stemmer import (
-    LetterType,
-    is_letter as cpp_is_letter,
-    is_letter_multi as cpp_is_letter_multi,
-    porter_stemmer_measure as cpp_porter_stemmer_measure,
-)
-from cudf._lib.nvtext.subword_tokenize import (
-    subword_tokenize_vocab_file as cpp_subword_tokenize_vocab_file,
-)
-from cudf._lib.nvtext.tokenize import (
-    _count_tokens_column as cpp_count_tokens_column,
-    _count_tokens_scalar as cpp_count_tokens_scalar,
-    _tokenize_column as cpp_tokenize_column,
-    _tokenize_scalar as cpp_tokenize_scalar,
-    character_tokenize as cpp_character_tokenize,
-    detokenize as cpp_detokenize,
-)
-from cudf._lib.strings.attributes import (
-    code_points as cpp_code_points,
-    count_bytes as cpp_count_bytes,
-    count_characters as cpp_count_characters,
-)
-from cudf._lib.strings.capitalize import (
-    capitalize as cpp_capitalize,
-    title as cpp_title,
-)
-from cudf._lib.strings.case import (
-    swapcase as cpp_swapcase,
-    to_lower as cpp_to_lower,
-    to_upper as cpp_to_upper,
-)
-from cudf._lib.strings.char_types import (
-    filter_alphanum as cpp_filter_alphanum,
-    is_alnum as cpp_is_alnum,
-    is_alpha as cpp_is_alpha,
-    is_decimal as cpp_is_decimal,
-    is_digit as cpp_is_digit,
-    is_lower as cpp_is_lower,
-    is_numeric as cpp_is_numeric,
-    is_space as cpp_isspace,
-    is_upper as cpp_is_upper,
-)
-from cudf._lib.strings.combine import (
-    concatenate as cpp_concatenate,
-    join as cpp_join,
-    join_lists_with_column as cpp_join_lists_with_column,
-    join_lists_with_scalar as cpp_join_lists_with_scalar,
-)
-from cudf._lib.strings.contains import (
-    contains_re as cpp_contains_re,
-    count_re as cpp_count_re,
-    match_re as cpp_match_re,
-)
-from cudf._lib.strings.convert.convert_fixed_point import (
-    to_decimal as cpp_to_decimal,
-)
-from cudf._lib.strings.convert.convert_floats import is_float as cpp_is_float
-from cudf._lib.strings.convert.convert_integers import (
-    is_integer as cpp_is_integer,
-)
-from cudf._lib.strings.convert.convert_urls import (
-    url_decode as cpp_url_decode,
-    url_encode as cpp_url_encode,
-)
-from cudf._lib.strings.extract import extract as cpp_extract
-from cudf._lib.strings.find import (
-    contains as cpp_contains,
-    contains_multiple as cpp_contains_multiple,
-    endswith as cpp_endswith,
-    endswith_multiple as cpp_endswith_multiple,
-    find as cpp_find,
-    rfind as cpp_rfind,
-    startswith as cpp_startswith,
-    startswith_multiple as cpp_startswith_multiple,
-)
-from cudf._lib.strings.findall import findall as cpp_findall
-from cudf._lib.strings.json import get_json_object as cpp_get_json_object
-from cudf._lib.strings.padding import (
-    PadSide,
-    center as cpp_center,
-    ljust as cpp_ljust,
-    pad as cpp_pad,
-    rjust as cpp_rjust,
-    zfill as cpp_zfill,
-)
-from cudf._lib.strings.replace import (
-    insert as cpp_string_insert,
-    replace as cpp_replace,
-    replace_multi as cpp_replace_multi,
-    slice_replace as cpp_slice_replace,
-)
-from cudf._lib.strings.replace_re import (
-    replace_multi_re as cpp_replace_multi_re,
-    replace_re as cpp_replace_re,
-    replace_with_backrefs as cpp_replace_with_backrefs,
-)
-from cudf._lib.strings.split.partition import (
-    partition as cpp_partition,
-    rpartition as cpp_rpartition,
-)
-from cudf._lib.strings.split.split import (
-    rsplit as cpp_rsplit,
-    rsplit_record as cpp_rsplit_record,
-    split as cpp_split,
-    split_record as cpp_split_record,
-)
-from cudf._lib.strings.strip import (
-    lstrip as cpp_lstrip,
-    rstrip as cpp_rstrip,
-    strip as cpp_strip,
-)
-from cudf._lib.strings.substring import (
-    get as cpp_string_get,
-    slice_from as cpp_slice_from,
-    slice_strings as cpp_slice_strings,
-)
-from cudf._lib.strings.translate import (
-    filter_characters as cpp_filter_characters,
-    translate as cpp_translate,
-)
-from cudf._lib.strings.wrap import wrap as cpp_wrap
 from cudf._typing import ColumnLike, Dtype, ScalarLike
 from cudf.core.buffer import Buffer
 from cudf.core.column import column, datetime
@@ -216,7 +79,7 @@
 
 
 class StringMethods(ColumnMethodsMixin):
-    def __init__(self, column, parent=None):
+    def __init__(self, parent=None):
         """
         Vectorized string functions for Series and Index.
 
@@ -226,13 +89,15 @@ def __init__(self, column, parent=None):
         inspiration from R’s stringr package.
         """
         value_type = (
-            column.dtype.leaf_type if is_list_dtype(column) else column.dtype
+            parent.dtype.leaf_type
+            if is_list_dtype(parent.dtype)
+            else parent.dtype
         )
         if not is_string_dtype(value_type):
             raise AttributeError(
                 "Can only use .str accessor with string values"
             )
-        super().__init__(column=column, parent=parent)
+        super().__init__(parent=parent)
 
     def htoi(self) -> ParentType:
         """
@@ -255,7 +120,7 @@ def htoi(self) -> ParentType:
         dtype: int64
         """
 
-        out = str_cast.htoi(self._column)
+        out = str_cast.htoi(self._parent._column)
 
         return self._return_or_inplace(out, inplace=False)
 
@@ -286,7 +151,7 @@ def ip2int(self) -> ParentType:
         dtype: int64
         """
 
-        out = str_cast.ip2int(self._column)
+        out = str_cast.ip2int(self._parent._column)
 
         return self._return_or_inplace(out, inplace=False)
 
@@ -316,7 +181,9 @@ def len(self) -> ParentType:
         dtype: int32
         """
 
-        return self._return_or_inplace(cpp_count_characters(self._column))
+        return self._return_or_inplace(
+            libstrings.count_characters(self._parent._column)
+        )
 
     def byte_count(self) -> ParentType:
         """
@@ -343,7 +210,9 @@ def byte_count(self) -> ParentType:
         2    11
         dtype: int32
         """
-        return self._return_or_inplace(cpp_count_bytes(self._column),)
+        return self._return_or_inplace(
+            libstrings.count_bytes(self._parent._column),
+        )
 
     @overload
     def cat(self, sep: str = None, na_rep: str = None) -> str:
@@ -443,13 +312,15 @@ def cat(self, others=None, sep=None, na_rep=None):
             sep = ""
 
         if others is None:
-            data = cpp_join(
-                self._column, cudf.Scalar(sep), cudf.Scalar(na_rep, "str"),
+            data = libstrings.join(
+                self._parent._column,
+                cudf.Scalar(sep),
+                cudf.Scalar(na_rep, "str"),
             )
         else:
             other_cols = _get_cols_list(self._parent, others)
-            all_cols = [self._column] + other_cols
-            data = cpp_concatenate(
+            all_cols = [self._parent._column] + other_cols
+            data = libstrings.concatenate(
                 cudf.DataFrame(
                     {index: value for index, value in enumerate(all_cols)}
                 ),
@@ -595,15 +466,15 @@ def join(
                 f" of type : {type(string_na_rep)}"
             )
 
-        if isinstance(self._column, cudf.core.column.ListColumn):
-            strings_column = self._column
+        if isinstance(self._parent._column, cudf.core.column.ListColumn):
+            strings_column = self._parent._column
         else:
-            # If self._column is not a ListColumn, we will have to
+            # If self._parent._column is not a ListColumn, we will have to
             # split each row by character and create a ListColumn out of it.
             strings_column = self._split_by_character()
 
         if is_scalar(sep):
-            data = cpp_join_lists_with_scalar(
+            data = libstrings.join_lists_with_scalar(
                 strings_column, cudf.Scalar(sep), cudf.Scalar(string_na_rep)
             )
         elif can_convert_to_column(sep):
@@ -619,7 +490,7 @@ def join(
                     f"of type: {type(sep_na_rep)}"
                 )
 
-            data = cpp_join_lists_with_column(
+            data = libstrings.join_lists_with_column(
                 strings_column,
                 sep_column,
                 cudf.Scalar(string_na_rep),
@@ -634,16 +505,16 @@ def join(
         return self._return_or_inplace(data)
 
     def _split_by_character(self):
-        result_col = cpp_character_tokenize(self._column)
+        result_col = libstrings.character_tokenize(self._parent._column)
 
-        offset_col = self._column.children[0]
+        offset_col = self._parent._column.children[0]
 
         res = cudf.core.column.ListColumn(
-            size=len(self._column),
-            dtype=cudf.ListDtype(self._column.dtype),
-            mask=self._column.mask,
+            size=len(self._parent._column),
+            dtype=cudf.ListDtype(self._parent._column.dtype),
+            mask=self._parent._column.mask,
             offset=0,
-            null_count=self._column.null_count,
+            null_count=self._parent._column.null_count,
             children=(offset_col, result_col),
         )
         return res
@@ -708,7 +579,7 @@ def extract(
         if flags != 0:
             raise NotImplementedError("`flags` parameter is not yet supported")
 
-        out = cpp_extract(self._column, pat)
+        out = libstrings.extract(self._parent._column, pat)
         if out._num_columns == 1 and expand is False:
             return self._return_or_inplace(out._columns[0], expand=expand)
         else:
@@ -835,18 +706,18 @@ def contains(
 
         if pat is None:
             result_col = column.column_empty(
-                len(self._column), dtype="bool", masked=True
+                len(self._parent._column), dtype="bool", masked=True
             )
         elif is_scalar(pat):
             if regex is True:
-                result_col = cpp_contains_re(self._column, pat)
+                result_col = libstrings.contains_re(self._parent._column, pat)
             else:
-                result_col = cpp_contains(
-                    self._column, cudf.Scalar(pat, "str")
+                result_col = libstrings.contains(
+                    self._parent._column, cudf.Scalar(pat, "str")
                 )
         else:
-            result_col = cpp_contains_multiple(
-                self._column, column.as_column(pat, dtype="str")
+            result_col = libstrings.contains_multiple(
+                self._parent._column, column.as_column(pat, dtype="str")
             )
         return self._return_or_inplace(result_col)
 
@@ -934,12 +805,14 @@ def replace(
             )
 
             return self._return_or_inplace(
-                cpp_replace_multi_re(
-                    self._column, pat, column.as_column(repl, dtype="str")
+                libstrings.replace_multi_re(
+                    self._parent._column,
+                    pat,
+                    column.as_column(repl, dtype="str"),
                 )
                 if regex
-                else cpp_replace_multi(
-                    self._column,
+                else libstrings.replace_multi(
+                    self._parent._column,
                     column.as_column(pat, dtype="str"),
                     column.as_column(repl, dtype="str"),
                 ),
@@ -950,10 +823,12 @@ def replace(
 
         # Pandas forces non-regex replace when pat is a single-character
         return self._return_or_inplace(
-            cpp_replace_re(self._column, pat, cudf.Scalar(repl, "str"), n)
+            libstrings.replace_re(
+                self._parent._column, pat, cudf.Scalar(repl, "str"), n
+            )
             if regex is True and len(pat) > 1
-            else cpp_replace(
-                self._column,
+            else libstrings.replace(
+                self._parent._column,
                 cudf.Scalar(pat, "str"),
                 cudf.Scalar(repl, "str"),
                 n,
@@ -987,7 +862,7 @@ def replace_with_backrefs(self, pat: str, repl: str) -> ParentType:
         dtype: object
         """
         return self._return_or_inplace(
-            cpp_replace_with_backrefs(self._column, pat, repl)
+            libstrings.replace_with_backrefs(self._parent._column, pat, repl)
         )
 
     def slice(
@@ -1058,7 +933,7 @@ def slice(
         """
 
         return self._return_or_inplace(
-            cpp_slice_strings(self._column, start, stop, step),
+            libstrings.slice_strings(self._parent._column, start, stop, step),
         )
 
     def isinteger(self) -> ParentType:
@@ -1119,7 +994,9 @@ def isinteger(self) -> ParentType:
         2    False
         dtype: bool
         """
-        return self._return_or_inplace(cpp_is_integer(self._column))
+        return self._return_or_inplace(
+            libstrings.is_integer(self._parent._column)
+        )
 
     def ishex(self) -> ParentType:
         """
@@ -1158,7 +1035,7 @@ def ishex(self) -> ParentType:
         4     True
         dtype: bool
         """
-        return self._return_or_inplace(str_cast.is_hex(self._column))
+        return self._return_or_inplace(str_cast.is_hex(self._parent._column))
 
     def istimestamp(self, format: str) -> ParentType:
         """
@@ -1181,7 +1058,7 @@ def istimestamp(self, format: str) -> ParentType:
         dtype: bool
         """
         return self._return_or_inplace(
-            str_cast.istimestamp(self._column, format)
+            str_cast.istimestamp(self._parent._column, format)
         )
 
     def isfloat(self) -> ParentType:
@@ -1245,7 +1122,9 @@ def isfloat(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(cpp_is_float(self._column))
+        return self._return_or_inplace(
+            libstrings.is_float(self._parent._column)
+        )
 
     def isdecimal(self) -> ParentType:
         """
@@ -1306,7 +1185,9 @@ def isdecimal(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(cpp_is_decimal(self._column))
+        return self._return_or_inplace(
+            libstrings.is_decimal(self._parent._column)
+        )
 
     def isalnum(self) -> ParentType:
         """
@@ -1375,7 +1256,9 @@ def isalnum(self) -> ParentType:
         2    False
         dtype: bool
         """
-        return self._return_or_inplace(cpp_is_alnum(self._column))
+        return self._return_or_inplace(
+            libstrings.is_alnum(self._parent._column)
+        )
 
     def isalpha(self) -> ParentType:
         """
@@ -1431,7 +1314,9 @@ def isalpha(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(cpp_is_alpha(self._column))
+        return self._return_or_inplace(
+            libstrings.is_alpha(self._parent._column)
+        )
 
     def isdigit(self) -> ParentType:
         """
@@ -1493,7 +1378,9 @@ def isdigit(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(cpp_is_digit(self._column))
+        return self._return_or_inplace(
+            libstrings.is_digit(self._parent._column)
+        )
 
     def isnumeric(self) -> ParentType:
         """
@@ -1561,7 +1448,9 @@ def isnumeric(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(cpp_is_numeric(self._column))
+        return self._return_or_inplace(
+            libstrings.is_numeric(self._parent._column)
+        )
 
     def isupper(self) -> ParentType:
         """
@@ -1618,7 +1507,9 @@ def isupper(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(cpp_is_upper(self._column))
+        return self._return_or_inplace(
+            libstrings.is_upper(self._parent._column)
+        )
 
     def islower(self) -> ParentType:
         """
@@ -1675,7 +1566,9 @@ def islower(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(cpp_is_lower(self._column))
+        return self._return_or_inplace(
+            libstrings.is_lower(self._parent._column)
+        )
 
     def isipv4(self) -> ParentType:
         """
@@ -1699,7 +1592,7 @@ def isipv4(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(str_cast.is_ipv4(self._column))
+        return self._return_or_inplace(str_cast.is_ipv4(self._parent._column))
 
     def lower(self) -> ParentType:
         """
@@ -1738,7 +1631,9 @@ def lower(self) -> ParentType:
         3              swapcase
         dtype: object
         """
-        return self._return_or_inplace(cpp_to_lower(self._column))
+        return self._return_or_inplace(
+            libstrings.to_lower(self._parent._column)
+        )
 
     def upper(self) -> ParentType:
         """
@@ -1787,7 +1682,9 @@ def upper(self) -> ParentType:
         3              SWAPCASE
         dtype: object
         """
-        return self._return_or_inplace(cpp_to_upper(self._column))
+        return self._return_or_inplace(
+            libstrings.to_upper(self._parent._column)
+        )
 
     def capitalize(self) -> ParentType:
         """
@@ -1815,7 +1712,9 @@ def capitalize(self) -> ParentType:
         1    Goodbye, friend
         dtype: object
         """
-        return self._return_or_inplace(cpp_capitalize(self._column))
+        return self._return_or_inplace(
+            libstrings.capitalize(self._parent._column)
+        )
 
     def swapcase(self) -> ParentType:
         """
@@ -1860,7 +1759,9 @@ def swapcase(self) -> ParentType:
         3              sWaPcAsE
         dtype: object
         """
-        return self._return_or_inplace(cpp_swapcase(self._column))
+        return self._return_or_inplace(
+            libstrings.swapcase(self._parent._column)
+        )
 
     def title(self) -> ParentType:
         """
@@ -1905,7 +1806,7 @@ def title(self) -> ParentType:
         3              Swapcase
         dtype: object
         """
-        return self._return_or_inplace(cpp_title(self._column))
+        return self._return_or_inplace(libstrings.title(self._parent._column))
 
     def filter_alphanum(
         self, repl: str = None, keep: bool = True
@@ -1941,7 +1842,9 @@ def filter_alphanum(
             repl = ""
 
         return self._return_or_inplace(
-            cpp_filter_alphanum(self._column, cudf.Scalar(repl), keep),
+            libstrings.filter_alphanum(
+                self._parent._column, cudf.Scalar(repl), keep
+            ),
         )
 
     def slice_from(
@@ -1984,8 +1887,10 @@ def slice_from(
         """
 
         return self._return_or_inplace(
-            cpp_slice_from(
-                self._column, column.as_column(starts), column.as_column(stops)
+            libstrings.slice_from(
+                self._parent._column,
+                column.as_column(starts),
+                column.as_column(stops),
             ),
         )
 
@@ -2073,7 +1978,9 @@ def slice_replace(
             repl = ""
 
         return self._return_or_inplace(
-            cpp_slice_replace(self._column, start, stop, cudf.Scalar(repl)),
+            libstrings.slice_replace(
+                self._parent._column, start, stop, cudf.Scalar(repl)
+            ),
         )
 
     def insert(self, start: int = 0, repl: str = None) -> ParentType:
@@ -2123,7 +2030,7 @@ def insert(self, start: int = 0, repl: str = None) -> ParentType:
             repl = ""
 
         return self._return_or_inplace(
-            cpp_string_insert(self._column, start, cudf.Scalar(repl)),
+            libstrings.insert(self._parent._column, start, cudf.Scalar(repl)),
         )
 
     def get(self, i: int = 0) -> ParentType:
@@ -2168,7 +2075,7 @@ def get(self, i: int = 0) -> ParentType:
         dtype: object
         """
 
-        return self._return_or_inplace(cpp_string_get(self._column, i))
+        return self._return_or_inplace(libstrings.get(self._parent._column, i))
 
     def get_json_object(self, json_path):
         """
@@ -2221,8 +2128,8 @@ def get_json_object(self, json_path):
 
         try:
             res = self._return_or_inplace(
-                cpp_get_json_object(
-                    self._column, cudf.Scalar(json_path, "str")
+                libstrings.get_json_object(
+                    self._parent._column, cudf.Scalar(json_path, "str")
                 )
             )
         except RuntimeError as e:
@@ -2366,18 +2273,22 @@ def split(
             pat = ""
 
         if expand:
-            if self._column.null_count == len(self._column):
-                result_table = cudf.core.frame.Frame({0: self._column.copy()})
+            if self._parent._column.null_count == len(self._parent._column):
+                result_table = cudf.core.frame.Frame(
+                    {0: self._parent._column.copy()}
+                )
             else:
-                result_table = cpp_split(
-                    self._column, cudf.Scalar(pat, "str"), n
+                result_table = libstrings.split(
+                    self._parent._column, cudf.Scalar(pat, "str"), n
                 )
                 if len(result_table._data) == 1:
-                    if result_table._data[0].null_count == len(self._column):
+                    if result_table._data[0].null_count == len(
+                        self._parent._column
+                    ):
                         result_table = cudf.core.frame.Frame({})
         else:
-            result_table = cpp_split_record(
-                self._column, cudf.Scalar(pat, "str"), n
+            result_table = libstrings.split_record(
+                self._parent._column, cudf.Scalar(pat, "str"), n
             )
 
         return self._return_or_inplace(result_table, expand=expand)
@@ -2521,15 +2432,23 @@ def rsplit(
             pat = ""
 
         if expand:
-            if self._column.null_count == len(self._column):
-                result_table = cudf.core.frame.Frame({0: self._column.copy()})
+            if self._parent._column.null_count == len(self._parent._column):
+                result_table = cudf.core.frame.Frame(
+                    {0: self._parent._column.copy()}
+                )
             else:
-                result_table = cpp_rsplit(self._column, cudf.Scalar(pat), n)
+                result_table = libstrings.rsplit(
+                    self._parent._column, cudf.Scalar(pat), n
+                )
                 if len(result_table._data) == 1:
-                    if result_table._data[0].null_count == len(self._column):
+                    if result_table._data[0].null_count == len(
+                        self._parent._column
+                    ):
                         result_table = cudf.core.frame.Frame({})
         else:
-            result_table = cpp_rsplit_record(self._column, cudf.Scalar(pat), n)
+            result_table = libstrings.rsplit_record(
+                self._parent._column, cudf.Scalar(pat), n
+            )
 
         return self._return_or_inplace(result_table, expand=expand)
 
@@ -2610,7 +2529,8 @@ def partition(self, sep: str = " ", expand: bool = True) -> ParentType:
             sep = " "
 
         return self._return_or_inplace(
-            cpp_partition(self._column, cudf.Scalar(sep)), expand=expand
+            libstrings.partition(self._parent._column, cudf.Scalar(sep)),
+            expand=expand,
         )
 
     def rpartition(self, sep: str = " ", expand: bool = True) -> ParentType:
@@ -2674,7 +2594,8 @@ def rpartition(self, sep: str = " ", expand: bool = True) -> ParentType:
             sep = " "
 
         return self._return_or_inplace(
-            cpp_rpartition(self._column, cudf.Scalar(sep)), expand=expand
+            libstrings.rpartition(self._parent._column, cudf.Scalar(sep)),
+            expand=expand,
         )
 
     def pad(
@@ -2754,14 +2675,14 @@ def pad(
             raise TypeError(msg)
 
         try:
-            side = PadSide[side.upper()]
+            side = libstrings.PadSide[side.upper()]
         except KeyError:
             raise ValueError(
                 "side has to be either one of {‘left’, ‘right’, ‘both’}"
             )
 
         return self._return_or_inplace(
-            cpp_pad(self._column, width, fillchar, side)
+            libstrings.pad(self._parent._column, width, fillchar, side)
         )
 
     def zfill(self, width: int) -> ParentType:
@@ -2835,7 +2756,9 @@ def zfill(self, width: int) -> ParentType:
             msg = f"width must be of integer type, not {type(width).__name__}"
             raise TypeError(msg)
 
-        return self._return_or_inplace(cpp_zfill(self._column, width))
+        return self._return_or_inplace(
+            libstrings.zfill(self._parent._column, width)
+        )
 
     def center(self, width: int, fillchar: str = " ") -> ParentType:
         """
@@ -2906,7 +2829,7 @@ def center(self, width: int, fillchar: str = " ") -> ParentType:
             raise TypeError(msg)
 
         return self._return_or_inplace(
-            cpp_center(self._column, width, fillchar)
+            libstrings.center(self._parent._column, width, fillchar)
         )
 
     def ljust(self, width: int, fillchar: str = " ") -> ParentType:
@@ -2960,7 +2883,7 @@ def ljust(self, width: int, fillchar: str = " ") -> ParentType:
             raise TypeError(msg)
 
         return self._return_or_inplace(
-            cpp_ljust(self._column, width, fillchar)
+            libstrings.ljust(self._parent._column, width, fillchar)
         )
 
     def rjust(self, width: int, fillchar: str = " ") -> ParentType:
@@ -3014,7 +2937,7 @@ def rjust(self, width: int, fillchar: str = " ") -> ParentType:
             raise TypeError(msg)
 
         return self._return_or_inplace(
-            cpp_rjust(self._column, width, fillchar)
+            libstrings.rjust(self._parent._column, width, fillchar)
         )
 
     def strip(self, to_strip: str = None) -> ParentType:
@@ -3073,7 +2996,7 @@ def strip(self, to_strip: str = None) -> ParentType:
             to_strip = ""
 
         return self._return_or_inplace(
-            cpp_strip(self._column, cudf.Scalar(to_strip))
+            libstrings.strip(self._parent._column, cudf.Scalar(to_strip))
         )
 
     def lstrip(self, to_strip: str = None) -> ParentType:
@@ -3120,7 +3043,7 @@ def lstrip(self, to_strip: str = None) -> ParentType:
             to_strip = ""
 
         return self._return_or_inplace(
-            cpp_lstrip(self._column, cudf.Scalar(to_strip))
+            libstrings.lstrip(self._parent._column, cudf.Scalar(to_strip))
         )
 
     def rstrip(self, to_strip: str = None) -> ParentType:
@@ -3175,7 +3098,7 @@ def rstrip(self, to_strip: str = None) -> ParentType:
             to_strip = ""
 
         return self._return_or_inplace(
-            cpp_rstrip(self._column, cudf.Scalar(to_strip))
+            libstrings.rstrip(self._parent._column, cudf.Scalar(to_strip))
         )
 
     def wrap(self, width: int, **kwargs) -> ParentType:
@@ -3270,7 +3193,9 @@ def wrap(self, width: int, **kwargs) -> ParentType:
                 "`break_on_hyphens`=False"
             )
 
-        return self._return_or_inplace(cpp_wrap(self._column, width))
+        return self._return_or_inplace(
+            libstrings.wrap(self._parent._column, width)
+        )
 
     def count(self, pat: str, flags: int = 0) -> ParentType:
         """
@@ -3330,7 +3255,9 @@ def count(self, pat: str, flags: int = 0) -> ParentType:
         if flags != 0:
             raise NotImplementedError("`flags` parameter is not yet supported")
 
-        return self._return_or_inplace(cpp_count_re(self._column, pat))
+        return self._return_or_inplace(
+            libstrings.count_re(self._parent._column, pat)
+        )
 
     def findall(
         self, pat: str, flags: int = 0, expand: bool = True
@@ -3399,7 +3326,7 @@ def findall(
             raise NotImplementedError("`flags` parameter is not yet supported")
 
         return self._return_or_inplace(
-            cpp_findall(self._column, pat), expand=expand
+            libstrings.findall(self._parent._column, pat), expand=expand
         )
 
     def isempty(self) -> ParentType:
@@ -3422,7 +3349,9 @@ def isempty(self) -> ParentType:
         4    False
         dtype: bool
         """
-        return self._return_or_inplace((self._column == "").fillna(False))
+        return self._return_or_inplace(
+            (self._parent._column == "").fillna(False)
+        )
 
     def isspace(self) -> ParentType:
         """
@@ -3478,7 +3407,9 @@ def isspace(self) -> ParentType:
         2    False
         dtype: bool
         """
-        return self._return_or_inplace(cpp_isspace(self._column))
+        return self._return_or_inplace(
+            libstrings.is_space(self._parent._column)
+        )
 
     def endswith(self, pat: str) -> ParentType:
         """
@@ -3523,13 +3454,15 @@ def endswith(self, pat: str) -> ParentType:
         """
         if pat is None:
             result_col = column.column_empty(
-                len(self._column), dtype="bool", masked=True
+                len(self._parent._column), dtype="bool", masked=True
             )
         elif is_scalar(pat):
-            result_col = cpp_endswith(self._column, cudf.Scalar(pat, "str"))
+            result_col = libstrings.endswith(
+                self._parent._column, cudf.Scalar(pat, "str")
+            )
         else:
-            result_col = cpp_endswith_multiple(
-                self._column, column.as_column(pat, dtype="str")
+            result_col = libstrings.endswith_multiple(
+                self._parent._column, column.as_column(pat, dtype="str")
             )
 
         return self._return_or_inplace(result_col)
@@ -3583,13 +3516,15 @@ def startswith(self, pat: Union[str, Sequence]) -> ParentType:
         """
         if pat is None:
             result_col = column.column_empty(
-                len(self._column), dtype="bool", masked=True
+                len(self._parent._column), dtype="bool", masked=True
             )
         elif is_scalar(pat):
-            result_col = cpp_startswith(self._column, cudf.Scalar(pat, "str"))
+            result_col = libstrings.startswith(
+                self._parent._column, cudf.Scalar(pat, "str")
+            )
         else:
-            result_col = cpp_startswith_multiple(
-                self._column, column.as_column(pat, dtype="str")
+            result_col = libstrings.startswith_multiple(
+                self._parent._column, column.as_column(pat, dtype="str")
             )
 
         return self._return_or_inplace(result_col)
@@ -3643,8 +3578,8 @@ def find(self, sub: str, start: int = 0, end: int = None) -> ParentType:
         if end is None:
             end = -1
 
-        result_col = cpp_find(
-            self._column, cudf.Scalar(sub, "str"), start, end
+        result_col = libstrings.find(
+            self._parent._column, cudf.Scalar(sub, "str"), start, end
         )
 
         return self._return_or_inplace(result_col)
@@ -3702,8 +3637,8 @@ def rfind(self, sub: str, start: int = 0, end: int = None) -> ParentType:
         if end is None:
             end = -1
 
-        result_col = cpp_rfind(
-            self._column, cudf.Scalar(sub, "str"), start, end
+        result_col = libstrings.rfind(
+            self._parent._column, cudf.Scalar(sub, "str"), start, end
         )
 
         return self._return_or_inplace(result_col)
@@ -3757,8 +3692,8 @@ def index(self, sub: str, start: int = 0, end: int = None) -> ParentType:
         if end is None:
             end = -1
 
-        result_col = cpp_find(
-            self._column, cudf.Scalar(sub, "str"), start, end
+        result_col = libstrings.find(
+            self._parent._column, cudf.Scalar(sub, "str"), start, end
         )
 
         result = self._return_or_inplace(result_col)
@@ -3817,8 +3752,8 @@ def rindex(self, sub: str, start: int = 0, end: int = None) -> ParentType:
         if end is None:
             end = -1
 
-        result_col = cpp_rfind(
-            self._column, cudf.Scalar(sub, "str"), start, end
+        result_col = libstrings.rfind(
+            self._parent._column, cudf.Scalar(sub, "str"), start, end
         )
 
         result = self._return_or_inplace(result_col)
@@ -3871,7 +3806,9 @@ def match(self, pat: str, case: bool = True, flags: int = 0) -> ParentType:
         if flags != 0:
             raise NotImplementedError("`flags` parameter is not yet supported")
 
-        return self._return_or_inplace(cpp_match_re(self._column, pat))
+        return self._return_or_inplace(
+            libstrings.match_re(self._parent._column, pat)
+        )
 
     def url_decode(self) -> ParentType:
         """
@@ -3901,7 +3838,9 @@ def url_decode(self) -> ParentType:
         dtype: object
         """
 
-        return self._return_or_inplace(cpp_url_decode(self._column))
+        return self._return_or_inplace(
+            libstrings.url_decode(self._parent._column)
+        )
 
     def url_encode(self) -> ParentType:
         """
@@ -3932,7 +3871,9 @@ def url_encode(self) -> ParentType:
         1    https%3A%2F%2Fmedium.com%2Frapids-ai
         dtype: object
         """
-        return self._return_or_inplace(cpp_url_encode(self._column))
+        return self._return_or_inplace(
+            libstrings.url_encode(self._parent._column)
+        )
 
     def code_points(self) -> ParentType:
         """
@@ -3966,7 +3907,7 @@ def code_points(self) -> ParentType:
         dtype: int32
         """
 
-        new_col = cpp_code_points(self._column)
+        new_col = libstrings.code_points(self._parent._column)
         if isinstance(self._parent, cudf.Series):
             return cudf.Series(new_col, name=self._parent.name)
         elif isinstance(self._parent, cudf.Index):
@@ -4015,7 +3956,9 @@ def translate(self, table: dict) -> ParentType:
         dtype: object
         """
         table = str.maketrans(table)
-        return self._return_or_inplace(cpp_translate(self._column, table))
+        return self._return_or_inplace(
+            libstrings.translate(self._parent._column, table)
+        )
 
     def filter_characters(
         self, table: dict, keep: bool = True, repl: str = None
@@ -4065,8 +4008,8 @@ def filter_characters(
             repl = ""
         table = str.maketrans(table)
         return self._return_or_inplace(
-            cpp_filter_characters(
-                self._column, table, keep, cudf.Scalar(repl)
+            libstrings.filter_characters(
+                self._parent._column, table, keep, cudf.Scalar(repl)
             ),
         )
 
@@ -4088,7 +4031,9 @@ def normalize_spaces(self) -> ParentType:
         1    test string
         dtype: object
         """
-        return self._return_or_inplace(cpp_normalize_spaces(self._column))
+        return self._return_or_inplace(
+            libstrings.normalize_spaces(self._parent._column)
+        )
 
     def normalize_characters(self, do_lower: bool = True) -> ParentType:
         """
@@ -4136,7 +4081,7 @@ def normalize_characters(self, do_lower: bool = True) -> ParentType:
         dtype: object
         """
         return self._return_or_inplace(
-            cpp_normalize_characters(self._column, do_lower)
+            libstrings.normalize_characters(self._parent._column, do_lower)
         )
 
     def tokenize(self, delimiter: str = " ") -> ParentType:
@@ -4172,12 +4117,12 @@ def tokenize(self, delimiter: str = " ") -> ParentType:
 
         if isinstance(delimiter, Column):
             return self._return_or_inplace(
-                cpp_tokenize_column(self._column, delimiter),
+                libstrings._tokenize_column(self._parent._column, delimiter),
                 retain_index=False,
             )
         elif isinstance(delimiter, cudf.Scalar):
             return self._return_or_inplace(
-                cpp_tokenize_scalar(self._column, delimiter),
+                libstrings._tokenize_scalar(self._parent._column, delimiter),
                 retain_index=False,
             )
         else:
@@ -4219,7 +4164,9 @@ def detokenize(
         """
         separator = _massage_string_arg(separator, "separator")
         return self._return_or_inplace(
-            cpp_detokenize(self._column, indices._column, separator),
+            libstrings.detokenize(
+                self._parent._column, indices._column, separator
+            ),
             retain_index=False,
         )
 
@@ -4270,7 +4217,7 @@ def character_tokenize(self) -> ParentType:
         29    .
         dtype: object
         """
-        result_col = cpp_character_tokenize(self._column)
+        result_col = libstrings.character_tokenize(self._parent._column)
         if isinstance(self._parent, cudf.Series):
             return cudf.Series(result_col, name=self._parent.name)
         elif isinstance(self._parent, cudf.Index):
@@ -4307,12 +4254,16 @@ def token_count(self, delimiter: str = " ") -> ParentType:
         delimiter = _massage_string_arg(delimiter, "delimiter", allow_col=True)
         if isinstance(delimiter, Column):
             return self._return_or_inplace(
-                cpp_count_tokens_column(self._column, delimiter)
+                libstrings._count_tokens_column(
+                    self._parent._column, delimiter
+                )
             )
 
         elif isinstance(delimiter, cudf.Scalar):
             return self._return_or_inplace(
-                cpp_count_tokens_scalar(self._column, delimiter)
+                libstrings._count_tokens_scalar(
+                    self._parent._column, delimiter
+                )
             )
         else:
             raise TypeError(
@@ -4354,7 +4305,8 @@ def ngrams(self, n: int = 2, separator: str = "_") -> ParentType:
         """
         separator = _massage_string_arg(separator, "separator")
         return self._return_or_inplace(
-            cpp_generate_ngrams(self._column, n, separator), retain_index=False
+            libstrings.generate_ngrams(self._parent._column, n, separator),
+            retain_index=False,
         )
 
     def character_ngrams(self, n: int = 2) -> ParentType:
@@ -4390,7 +4342,8 @@ def character_ngrams(self, n: int = 2) -> ParentType:
         dtype: object
         """
         return self._return_or_inplace(
-            cpp_generate_character_ngrams(self._column, n), retain_index=False
+            libstrings.generate_character_ngrams(self._parent._column, n),
+            retain_index=False,
         )
 
     def ngrams_tokenize(
@@ -4427,7 +4380,9 @@ def ngrams_tokenize(
         delimiter = _massage_string_arg(delimiter, "delimiter")
         separator = _massage_string_arg(separator, "separator")
         return self._return_or_inplace(
-            cpp_ngrams_tokenize(self._column, n, delimiter, separator),
+            libstrings.ngrams_tokenize(
+                self._parent._column, n, delimiter, separator
+            ),
             retain_index=False,
         )
 
@@ -4506,8 +4461,8 @@ def replace_tokens(
             )
 
         return self._return_or_inplace(
-            cpp_replace_tokens(
-                self._column,
+            libstrings.replace_tokens(
+                self._parent._column,
                 targets_column,
                 replacements_column,
                 cudf.Scalar(delimiter, dtype="str"),
@@ -4577,8 +4532,8 @@ def filter_tokens(
             )
 
         return self._return_or_inplace(
-            cpp_filter_tokens(
-                self._column,
+            libstrings.filter_tokens(
+                self._parent._column,
                 min_token_length,
                 cudf.Scalar(replacement, dtype="str"),
                 cudf.Scalar(delimiter, dtype="str"),
@@ -4673,8 +4628,8 @@ def subword_tokenize(
         array([[0, 0, 2],
                [1, 0, 1]], dtype=uint32)
         """
-        tokens, masks, metadata = cpp_subword_tokenize_vocab_file(
-            self._column,
+        tokens, masks, metadata = libstrings.subword_tokenize_vocab_file(
+            self._parent._column,
             hash_file,
             max_length,
             stride,
@@ -4708,7 +4663,7 @@ def porter_stemmer_measure(self) -> ParentType:
         dtype: int32
         """
         return self._return_or_inplace(
-            cpp_porter_stemmer_measure(self._column)
+            libstrings.porter_stemmer_measure(self._parent._column)
         )
 
     def is_consonant(self, position) -> ParentType:
@@ -4742,17 +4697,17 @@ def is_consonant(self, position) -> ParentType:
         1    False
         dtype: bool
          """
-        ltype = LetterType.CONSONANT
+        ltype = libstrings.LetterType.CONSONANT
 
         if can_convert_to_column(position):
             return self._return_or_inplace(
-                cpp_is_letter_multi(
-                    self._column, ltype, column.as_column(position)
+                libstrings.is_letter_multi(
+                    self._parent._column, ltype, column.as_column(position)
                 ),
             )
 
         return self._return_or_inplace(
-            cpp_is_letter(self._column, ltype, position)
+            libstrings.is_letter(self._parent._column, ltype, position)
         )
 
     def is_vowel(self, position) -> ParentType:
@@ -4786,17 +4741,17 @@ def is_vowel(self, position) -> ParentType:
         1     True
         dtype: bool
         """
-        ltype = LetterType.VOWEL
+        ltype = libstrings.LetterType.VOWEL
 
         if can_convert_to_column(position):
             return self._return_or_inplace(
-                cpp_is_letter_multi(
-                    self._column, ltype, column.as_column(position)
+                libstrings.is_letter_multi(
+                    self._parent._column, ltype, column.as_column(position)
                 ),
             )
 
         return self._return_or_inplace(
-            cpp_is_letter(self._column, ltype, position)
+            libstrings.is_letter(self._parent._column, ltype, position)
         )
 
     def edit_distance(self, targets) -> ParentType:
@@ -4845,7 +4800,7 @@ def edit_distance(self, targets) -> ParentType:
             )
 
         return self._return_or_inplace(
-            cpp_edit_distance(self._column, targets_column)
+            libstrings.edit_distance(self._parent._column, targets_column)
         )
 
 
@@ -5034,7 +4989,11 @@ def sum(
             skipna=skipna, min_count=min_count
         )
         if isinstance(result_col, type(self)):
-            return result_col.str().cat()
+            return libstrings.join(
+                result_col,
+                sep=cudf.Scalar(""),
+                na_rep=cudf.Scalar(None, "str"),
+            )[0]
         else:
             return result_col
 
@@ -5055,10 +5014,7 @@ def set_base_children(self, value: Tuple["column.ColumnBase", ...]):
         super().set_base_children(value)
 
     def __contains__(self, item: ScalarLike) -> bool:
-        return True in self.str().contains(f"^{item}$")
-
-    def str(self, parent: ParentType = None) -> StringMethods:
-        return StringMethods(self, parent=parent)
+        return True in libstrings.contains_re(self, f"^{item}$")
 
     def as_numerical_column(
         self, dtype: Dtype
@@ -5066,13 +5022,13 @@ def as_numerical_column(
         out_dtype = np.dtype(dtype)
 
         if out_dtype.kind in {"i", "u"}:
-            if not cpp_is_integer(self).all():
+            if not libstrings.is_integer(self).all():
                 raise ValueError(
                     "Could not convert strings to integer "
                     "type due to presence of non-integer values."
                 )
         elif out_dtype.kind == "f":
-            if not cpp_is_float(self).all():
+            if not libstrings.is_float(self).all():
                 raise ValueError(
                     "Could not convert strings to float "
                     "type due to presence of non-floating values."
@@ -5136,7 +5092,7 @@ def as_timedelta_column(
     def as_decimal_column(
         self, dtype: Dtype, **kwargs
     ) -> "cudf.core.column.DecimalColumn":
-        return cpp_to_decimal(self, dtype)
+        return libstrings.to_decimal(self, dtype)
 
     def as_string_column(self, dtype: Dtype, format=None) -> StringColumn:
         return self
@@ -5243,9 +5199,12 @@ def can_cast_safely(self, to_dtype: Dtype) -> bool:
 
         if self.dtype == to_dtype:
             return True
-        elif to_dtype.kind in {"i", "u"} and not cpp_is_integer(self).all():
+        elif (
+            to_dtype.kind in {"i", "u"}
+            and not libstrings.is_integer(self).all()
+        ):
             return False
-        elif to_dtype.kind == "f" and not cpp_is_float(self).all():
+        elif to_dtype.kind == "f" and not libstrings.is_float(self).all():
             return False
         else:
             return True
@@ -5305,7 +5264,7 @@ def fillna(
             return super().fillna(method=method)
 
     def _find_first_and_last(self, value: ScalarLike) -> Tuple[int, int]:
-        found_indices = self.str().contains(f"^{value}$")
+        found_indices = libstrings.contains(self, f"^{value}$")
         found_indices = libcudf.unary.cast(found_indices, dtype=np.int32)
         first = column.as_column(found_indices).find_first_value(1)
         last = column.as_column(found_indices).find_last_value(1)
@@ -5349,10 +5308,17 @@ def binary_operator(
             lhs, rhs = rhs, lhs
         if isinstance(rhs, (StringColumn, str, cudf.Scalar)):
             if op == "add":
-                return cast("column.ColumnBase", lhs.str().cat(others=rhs))
+                return cast(
+                    "column.ColumnBase",
+                    libstrings.concatenate(
+                        cudf.DataFrame({0: lhs, 1: rhs}),
+                        sep=cudf.Scalar(""),
+                        na_rep=cudf.Scalar(None, "str"),
+                    ),
+                )
             elif op in ("eq", "ne", "gt", "lt", "ge", "le", "NULL_EQUALS"):
                 return libcudf.binaryop.binaryop(
-                    lhs=self, rhs=rhs, op=op, dtype="bool"
+                    lhs=lhs, rhs=rhs, op=op, dtype="bool"
                 )
 
         raise TypeError(
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index c2b820d0b43..6c47d94d6e7 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -84,9 +84,6 @@ def copy(self, deep=True):
             result = result._rename_fields(self.dtype.fields.keys())
         return result
 
-    def struct(self, parent=None):
-        return StructMethods(self, parent=parent)
-
     def _rename_fields(self, names):
         """
         Return a StructColumn with the same field values as this StructColumn,
@@ -117,12 +114,12 @@ class StructMethods(ColumnMethodsMixin):
     Struct methods for Series
     """
 
-    def __init__(self, column, parent=None):
-        if not is_struct_dtype(column.dtype):
+    def __init__(self, parent=None):
+        if not is_struct_dtype(parent.dtype):
             raise AttributeError(
                 "Can only use .struct accessor with a 'struct' dtype"
             )
-        super().__init__(column=column, parent=parent)
+        super().__init__(parent=parent)
 
     def field(self, key):
         """
@@ -151,9 +148,9 @@ def field(self, key):
         1    3
         dtype: int64
         """
-        fields = list(self._column.dtype.fields.keys())
+        fields = list(self._parent._column.dtype.fields.keys())
         if key in fields:
             pos = fields.index(key)
-            return self._return_or_inplace(self._column.children[pos])
+            return self._return_or_inplace(self._parent._column.children[pos])
         else:
-            return self._return_or_inplace(self._column.children[key])
+            return self._return_or_inplace(self._parent._column.children[key])
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index f59954aaf08..3d44d137754 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -4037,7 +4037,7 @@ def _find_common_dtypes_and_categories(non_null_columns, dtypes):
         ):
             # Combine and de-dupe the categories
             categories[idx] = (
-                cudf.concat([col.cat().categories for col in cols])
+                cudf.concat([col.categories for col in cols])
                 .to_series()
                 .drop_duplicates(ignore_index=True)
                 ._column
@@ -4079,12 +4079,7 @@ def _cast_cols_to_common_dtypes(col_idxs, list_of_columns, dtypes, categories):
                 if idx in categories:
                     cols[idx] = (
                         cols[idx]
-                        .cat()
-                        ._set_categories(
-                            cols[idx].cat().categories,
-                            categories[idx],
-                            is_unique=True,
-                        )
+                        ._set_categories(categories[idx], is_unique=True,)
                         .codes
                     )
                 cols[idx] = cols[idx].astype(dtype)
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 3b977a8ced6..bcc77f17b5c 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2474,17 +2474,15 @@ def __new__(
             dtype = None
 
         if categories is not None:
-            data.cat().set_categories(
-                categories, ordered=ordered, inplace=True
-            )
+            data.set_categories(categories, ordered=ordered, inplace=True)
         elif isinstance(dtype, (pd.CategoricalDtype, cudf.CategoricalDtype)):
-            data.cat().set_categories(
+            data.set_categories(
                 dtype.categories, ordered=ordered, inplace=True
             )
         elif ordered is True and data.ordered is False:
-            data.cat().as_ordered(inplace=True)
+            data = data.as_ordered()
         elif ordered is False and data.ordered is True:
-            data.cat().as_unordered(inplace=True)
+            data = data.as_unordered()
 
         out._initialize(data, **kwargs)
 
@@ -2495,14 +2493,14 @@ def codes(self):
         """
         The category codes of this categorical.
         """
-        return self._values.cat().codes
+        return as_index(self._values.codes)
 
     @property
     def categories(self):
         """
         The categories of this categorical.
         """
-        return self._values.cat().categories
+        return cudf.Series(self._values.categories)
 
 
 def interval_range(
@@ -2782,7 +2780,7 @@ def __repr__(self):
     @copy_docstring(StringMethods.__init__)  # type: ignore
     @property
     def str(self):
-        return StringMethods(column=self._values, parent=self)
+        return StringMethods(parent=self)
 
     def _clean_nulls_from_index(self):
         """
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 5e15ddfc359..78fc7a863d6 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -188,7 +188,9 @@ def _match_categorical_dtypes_both(
     if how == "inner":
         # cast to category types -- we must cast them back later
         return _match_join_keys(
-            lcol.cat()._decategorize(), rcol.cat()._decategorize(), how,
+            lcol._get_decategorized_column(),
+            rcol._get_decategorized_column(),
+            how,
         )
     elif how in {"left", "leftanti", "leftsemi"}:
         # always cast to left type
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index d812214caf8..f57c3bc931b 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2329,22 +2329,22 @@ def __invert__(self):
     @copy_docstring(CategoricalAccessor.__init__)  # type: ignore
     @property
     def cat(self):
-        return CategoricalAccessor(column=self._column, parent=self)
+        return CategoricalAccessor(parent=self)
 
     @copy_docstring(StringMethods.__init__)  # type: ignore
     @property
     def str(self):
-        return StringMethods(column=self._column, parent=self)
+        return StringMethods(parent=self)
 
     @copy_docstring(ListMethods.__init__)  # type: ignore
     @property
     def list(self):
-        return ListMethods(column=self._column, parent=self)
+        return ListMethods(parent=self)
 
     @copy_docstring(StructMethods.__init__)  # type: ignore
     @property
     def struct(self):
-        return StructMethods(column=self._column, parent=self)
+        return StructMethods(parent=self)
 
     @property
     def dtype(self):
diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py
index 74f7d16e4ff..180ab9ad6b8 100644
--- a/python/cudf/cudf/core/tools/numeric.py
+++ b/python/cudf/cudf/core/tools/numeric.py
@@ -6,20 +6,20 @@
 import pandas as pd
 
 import cudf
+from cudf import _lib as libcudf
+from cudf._lib import strings as libstrings
 from cudf.core.column import as_column
 from cudf.utils.dtypes import (
     can_convert_to_column,
-    is_numerical_dtype,
-    is_datetime_dtype,
-    is_timedelta_dtype,
     is_categorical_dtype,
-    is_string_dtype,
+    is_datetime_dtype,
     is_list_dtype,
+    is_numerical_dtype,
+    is_string_dtype,
     is_struct_dtype,
+    is_timedelta_dtype,
 )
 
-import cudf._lib as libcudf
-
 
 def to_numeric(arg, errors="raise", downcast=None):
     """
@@ -195,13 +195,13 @@ def _convert_str_col(col, errors, _downcast=None):
     if not is_string_dtype(col):
         raise TypeError("col must be string dtype.")
 
-    is_integer = col.str().isinteger()
+    is_integer = libstrings.isinteger(col)
     if is_integer.all():
         return col.as_numerical_column(dtype=np.dtype("i8"))
 
     col = _proc_inf_empty_strings(col)
 
-    is_float = col.str().isfloat()
+    is_float = libstrings.isfloat(col)
     if is_float.all():
         if _downcast in {"unsigned", "signed", "integer"}:
             warnings.warn(
@@ -226,7 +226,7 @@ def _convert_str_col(col, errors, _downcast=None):
 def _proc_inf_empty_strings(col):
     """Handles empty and infinity strings
     """
-    col = col.str().lower()
+    col = libstrings.lower(col)
     col = _proc_empty_strings(col)
     col = _proc_inf_strings(col)
     return col
@@ -244,7 +244,7 @@ def _proc_inf_strings(col):
     """Convert "inf/infinity" strings into "Inf", the native string
     representing infinity in libcudf
     """
-    col = col.str().replace(
-        ["+", "inf", "inity"], ["", "Inf", ""], regex=False,
+    col = libstrings.replace.replace_muli(
+        col, as_column(["+", "inf", "inity"]), as_column(["", "Inf", ""]),
     )
     return col
diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py
index e2c7ca7dca1..582c5324b8f 100644
--- a/python/cudf/cudf/io/csv.py
+++ b/python/cudf/cudf/io/csv.py
@@ -177,7 +177,7 @@ def to_csv(
         df = df.copy(deep=False)
         for col_name, col in df._data.items():
             if isinstance(col, cudf.core.column.CategoricalColumn):
-                df._data[col_name] = col.astype(col.cat().categories.dtype)
+                df._data[col_name] = col.astype(col.categories.dtype)
 
         if isinstance(df.index, cudf.CategoricalIndex):
             df.index = df.index.astype(df.index.categories.dtype)

From 7d9fcc5ec5214e8ac3973f825a633b30c05c8d46 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 20 May 2021 15:29:43 -0400
Subject: [PATCH 05/14] More refactoring

---
 python/cudf/cudf/_lib/transpose.pyx         |  7 +++----
 python/cudf/cudf/core/column/__init__.py    |  5 +++--
 python/cudf/cudf/core/column/categorical.py |  6 ++----
 python/cudf/cudf/core/column/column.py      |  4 ++--
 python/cudf/cudf/core/column/string.py      |  2 +-
 python/cudf/cudf/core/frame.py              |  6 +++---
 python/cudf/cudf/core/index.py              | 12 +++++-------
 python/cudf/cudf/core/series.py             |  4 ++--
 python/cudf/cudf/core/tools/numeric.py      |  8 ++++----
 python/cudf/cudf/testing/testing.py         |  4 ++--
 python/cudf/cudf/tests/test_dataframe.py    |  4 ++--
 11 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/python/cudf/cudf/_lib/transpose.pyx b/python/cudf/cudf/_lib/transpose.pyx
index d2b053789cd..708f5013cd3 100644
--- a/python/cudf/cudf/_lib/transpose.pyx
+++ b/python/cudf/cudf/_lib/transpose.pyx
@@ -36,11 +36,10 @@ def transpose(Table source):
     if is_categorical_dtype(dtype):
         if any(not is_categorical_dtype(c.dtype) for c in source._columns):
             raise ValueError('Columns must all have the same dtype')
-        cats = list(c.cat().categories for c in source._columns)
-        cats = cudf.Series(cudf.concat(cats)).drop_duplicates()._column
+        cats = list(c.categories for c in source._columns)
+        cats = cudf.core.column.concat_columns(cats).unique()
         source = Table(index=source._index, data=[
-            (name, col.cat()._set_categories(
-                col.cat().categories, cats, is_unique=True).codes)
+            (name, col._set_categories(cats, is_unique=True).codes)
             for name, col in source._data.items()
         ])
     elif dtype.kind in 'OU':
diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py
index 32cb557548f..841829302a1 100644
--- a/python/cudf/cudf/core/column/__init__.py
+++ b/python/cudf/cudf/core/column/__init__.py
@@ -10,15 +10,16 @@
     column_empty,
     column_empty_like,
     column_empty_like_same_mask,
+    concat_columns,
     deserialize_columns,
     full,
     serialize_columns,
 )
 from cudf.core.column.datetime import DatetimeColumn  # noqa: F401
+from cudf.core.column.decimal import DecimalColumn  # noqa: F401
+from cudf.core.column.interval import IntervalColumn  # noqa: F401
 from cudf.core.column.lists import ListColumn  # noqa: F401
 from cudf.core.column.numerical import NumericalColumn  # noqa: F401
 from cudf.core.column.string import StringColumn  # noqa: F401
 from cudf.core.column.struct import StructColumn  # noqa: F401
 from cudf.core.column.timedelta import TimeDeltaColumn  # noqa: F401
-from cudf.core.column.interval import IntervalColumn  # noqa: F401
-from cudf.core.column.decimal import DecimalColumn  # noqa: F401
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index aed4425093f..9b01e80a805 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1363,7 +1363,7 @@ def view(self, dtype: Dtype) -> ColumnBase:
     @staticmethod
     def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:
         # TODO: This function currently assumes it is being called from
-        # column._concat_columns, at least to the extent that all the
+        # column.concat_columns, at least to the extent that all the
         # preprocessing in that function has already been done. That should be
         # improved as the concatenation API is solidified.
 
@@ -1371,9 +1371,7 @@ def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:
         head = next((obj for obj in objs if obj.valid_count), objs[0])
 
         # Combine and de-dupe the categories
-        cats = (
-            cudf.concat([o.categories for o in objs]).drop_duplicates()._column
-        )
+        cats = column.concat_columns([o.categories for o in objs]).unique()
         objs = [o._set_categories(cats, is_unique=True) for o in objs]
         codes = [o.codes for o in objs]
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 20f302f7e59..815a895fba2 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -688,7 +688,7 @@ def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
         return indices[-1]
 
     def append(self, other: ColumnBase) -> ColumnBase:
-        return _concat_columns([self, as_column(other)])
+        return concat_columns([self, as_column(other)])
 
     def quantile(
         self,
@@ -2230,7 +2230,7 @@ def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase:
     return ColumnBase.from_scalar(cudf.Scalar(fill_value, dtype), size)
 
 
-def _concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
+def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
     """Concatenate a sequence of columns."""
     if len(objs) == 0:
         dtype = pd.api.types.pandas_dtype(None)
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 36a7e159dc3..c72268d7491 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5264,7 +5264,7 @@ def fillna(
             return super().fillna(method=method)
 
     def _find_first_and_last(self, value: ScalarLike) -> Tuple[int, int]:
-        found_indices = libstrings.contains(self, f"^{value}$")
+        found_indices = libstrings.contains_re(self, f"^{value}$")
         found_indices = libcudf.unary.cast(found_indices, dtype=np.int32)
         first = column.as_column(found_indices).find_first_value(1)
         last = column.as_column(found_indices).find_last_value(1)
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 3d44d137754..53cbc2d80fb 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -23,6 +23,7 @@
     as_column,
     build_categorical_column,
     column_empty,
+    concat_columns,
 )
 from cudf.core.join import merge
 from cudf.utils.dtypes import (
@@ -37,7 +38,7 @@
 T = TypeVar("T", bound="Frame")
 
 if TYPE_CHECKING:
-    from cudf.core.column_accessor import ColumnAccessor
+    from cudf.core.columnn_accessor import ColumnAccessor
 
 
 class Frame(libcudf.table.Table):
@@ -4037,8 +4038,7 @@ def _find_common_dtypes_and_categories(non_null_columns, dtypes):
         ):
             # Combine and de-dupe the categories
             categories[idx] = (
-                cudf.concat([col.categories for col in cols])
-                .to_series()
+                cudf.Series(concat_columns([col.categories for col in cols]))
                 .drop_duplicates(ignore_index=True)
                 ._column
             )
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index bcc77f17b5c..735fa305b53 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -27,7 +27,7 @@
     arange,
     column,
 )
-from cudf.core.column.column import _concat_columns
+from cudf.core.column.column import concat_columns
 from cudf.core.column.string import StringMethods as StringMethods
 from cudf.core.dtypes import IntervalDtype
 from cudf.core.frame import SingleColumnFrame
@@ -639,7 +639,7 @@ def sum(self):
 
     @classmethod
     def _concat(cls, objs):
-        data = _concat_columns([o._values for o in objs])
+        data = concat_columns([o._values for o in objs])
         names = {obj.name for obj in objs}
         if len(names) == 1:
             [name] = names
@@ -2474,11 +2474,9 @@ def __new__(
             dtype = None
 
         if categories is not None:
-            data.set_categories(categories, ordered=ordered, inplace=True)
+            data = data.set_categories(categories, ordered=ordered)
         elif isinstance(dtype, (pd.CategoricalDtype, cudf.CategoricalDtype)):
-            data.set_categories(
-                dtype.categories, ordered=ordered, inplace=True
-            )
+            data = data.set_categories(dtype.categories, ordered=ordered)
         elif ordered is True and data.ordered is False:
             data = data.as_ordered()
         elif ordered is False and data.ordered is True:
@@ -2500,7 +2498,7 @@ def categories(self):
         """
         The categories of this categorical.
         """
-        return cudf.Series(self._values.categories)
+        return as_index(self._values.categories)
 
 
 def interval_range(
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index f57c3bc931b..205a5370ee3 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -32,7 +32,7 @@
 from cudf.core.column.categorical import (
     CategoricalAccessor as CategoricalAccessor,
 )
-from cudf.core.column.column import _concat_columns
+from cudf.core.column.column import concat_columns
 from cudf.core.column.lists import ListMethods
 from cudf.core.column.string import StringMethods
 from cudf.core.column.struct import StructMethods
@@ -2407,7 +2407,7 @@ def _concat(cls, objs, axis=0, index=True):
                 else:
                     objs = numeric_normalize_types(*objs)
 
-        col = _concat_columns([o._column for o in objs])
+        col = concat_columns([o._column for o in objs])
 
         if isinstance(col, cudf.core.column.DecimalColumn):
             col = objs[0]._column._copy_type_metadata(col)
diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py
index 180ab9ad6b8..068ae46b69c 100644
--- a/python/cudf/cudf/core/tools/numeric.py
+++ b/python/cudf/cudf/core/tools/numeric.py
@@ -195,13 +195,13 @@ def _convert_str_col(col, errors, _downcast=None):
     if not is_string_dtype(col):
         raise TypeError("col must be string dtype.")
 
-    is_integer = libstrings.isinteger(col)
+    is_integer = libstrings.is_integer(col)
     if is_integer.all():
         return col.as_numerical_column(dtype=np.dtype("i8"))
 
     col = _proc_inf_empty_strings(col)
 
-    is_float = libstrings.isfloat(col)
+    is_float = libstrings.is_float(col)
     if is_float.all():
         if _downcast in {"unsigned", "signed", "integer"}:
             warnings.warn(
@@ -226,7 +226,7 @@ def _convert_str_col(col, errors, _downcast=None):
 def _proc_inf_empty_strings(col):
     """Handles empty and infinity strings
     """
-    col = libstrings.lower(col)
+    col = libstrings.to_lower(col)
     col = _proc_empty_strings(col)
     col = _proc_inf_strings(col)
     return col
@@ -244,7 +244,7 @@ def _proc_inf_strings(col):
     """Convert "inf/infinity" strings into "Inf", the native string
     representing infinity in libcudf
     """
-    col = libstrings.replace.replace_muli(
+    col = libstrings.replace_multi(
         col, as_column(["+", "inf", "inity"]), as_column(["", "Inf", ""]),
     )
     return col
diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index bacab24a6f3..96a4ea31986 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -162,8 +162,8 @@ def assert_column_equal(
 
     if check_exact and check_categorical:
         if is_categorical_dtype(left) and is_categorical_dtype(right):
-            left_cat = left.cat().categories
-            right_cat = right.cat().categories
+            left_cat = left.categories
+            right_cat = right.categories
 
             if check_category_order:
                 assert_index_equal(
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index e5e36ba7e21..a5ae5154ec7 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -5206,8 +5206,8 @@ def test_memory_usage_cat():
     gdf = cudf.from_pandas(df)
 
     expected = (
-        gdf.B._column.cat().categories.__sizeof__()
-        + gdf.B._column.cat().codes.__sizeof__()
+        gdf.B._column.categories.__sizeof__()
+        + gdf.B._column.codes.__sizeof__()
     )
 
     # Check cat column

From 8911feaa17f676219ac45d800a5e12d40d361d1f Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 20 May 2021 17:49:19 -0400
Subject: [PATCH 06/14] More refactoring

---
 python/cudf/cudf/_typing.py                 |   1 +
 python/cudf/cudf/core/column/__init__.py    |   8 +-
 python/cudf/cudf/core/column/categorical.py |  60 ++-
 python/cudf/cudf/core/column/lists.py       |  24 +-
 python/cudf/cudf/core/column/methods.py     |   3 +-
 python/cudf/cudf/core/column/string.py      | 470 +++++++++-----------
 python/cudf/cudf/core/column/struct.py      |   6 +-
 7 files changed, 261 insertions(+), 311 deletions(-)

diff --git a/python/cudf/cudf/_typing.py b/python/cudf/cudf/_typing.py
index 034b18ec9e0..2025fd1ecf8 100644
--- a/python/cudf/cudf/_typing.py
+++ b/python/cudf/cudf/_typing.py
@@ -28,3 +28,4 @@
 BinaryOperand = Union["cudf.Scalar", "cudf.core.column.ColumnBase"]
 
 DataFrameOrSeries = Union["cudf.Series", "cudf.DataFrame"]
+SeriesOrIndex = Union["cudf.Series", "cudf.Index"]
diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py
index 841829302a1..1f35da4c134 100644
--- a/python/cudf/cudf/core/column/__init__.py
+++ b/python/cudf/cudf/core/column/__init__.py
@@ -1,3 +1,7 @@
+"""
+isort: skip_file
+"""
+
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 from cudf.core.column.categorical import CategoricalColumn
@@ -16,10 +20,10 @@
     serialize_columns,
 )
 from cudf.core.column.datetime import DatetimeColumn  # noqa: F401
-from cudf.core.column.decimal import DecimalColumn  # noqa: F401
-from cudf.core.column.interval import IntervalColumn  # noqa: F401
 from cudf.core.column.lists import ListColumn  # noqa: F401
 from cudf.core.column.numerical import NumericalColumn  # noqa: F401
 from cudf.core.column.string import StringColumn  # noqa: F401
 from cudf.core.column.struct import StructColumn  # noqa: F401
 from cudf.core.column.timedelta import TimeDeltaColumn  # noqa: F401
+from cudf.core.column.decimal import DecimalColumn  # noqa: F401
+from cudf.core.column.interval import IntervalColumn  # noqa: F401
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 9b01e80a805..6b32bf908ef 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -12,7 +12,6 @@
     Optional,
     Sequence,
     Tuple,
-    Union,
     cast,
 )
 
@@ -38,6 +37,7 @@
 )
 
 if TYPE_CHECKING:
+    from cudf._typing import SeriesOrIndex
     from cudf.core.column import (
         ColumnBase,
         DatetimeColumn,
@@ -47,13 +47,10 @@
     )
 
 
-ParentType = Union["cudf.Series", "cudf.Index"]
-
-
 class CategoricalAccessor(ColumnMethodsMixin):
     _column: CategoricalColumn
 
-    def __init__(self, parent: ParentType):
+    def __init__(self, parent: SeriesOrIndex):
         """
         Accessor object for categorical properties of the Series values.
         Be aware that assigning to `categories` is a inplace operation,
@@ -118,7 +115,7 @@ def categories(self) -> "cudf.Index":
         """
         The categories of this categorical.
         """
-        return cudf.core.index.as_index(self._parent._column.categories)
+        return cudf.core.index.as_index(self._column.categories)
 
     @property
     def codes(self) -> "cudf.Series":
@@ -130,16 +127,16 @@ def codes(self) -> "cudf.Series":
             if isinstance(self._parent, cudf.Series)
             else None
         )
-        return cudf.Series(self._parent._column.codes, index=index)
+        return cudf.Series(self._column.codes, index=index)
 
     @property
     def ordered(self) -> Optional[bool]:
         """
         Whether the categories have an ordered relationship.
         """
-        return self._parent._column.ordered
+        return self._column.ordered
 
-    def as_ordered(self, inplace: bool = False) -> Optional[ParentType]:
+    def as_ordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]:
         """
         Set the Categorical to be ordered.
 
@@ -193,10 +190,10 @@ def as_ordered(self, inplace: bool = False) -> Optional[ParentType]:
         Categories (3, int64): [1 < 2 < 10]
         """
         return self._return_or_inplace(
-            self._parent._column.as_ordered(), inplace=inplace
+            self._column.as_ordered(), inplace=inplace
         )
 
-    def as_unordered(self, inplace: bool = False) -> Optional[ParentType]:
+    def as_unordered(self, inplace: bool = False) -> Optional[SeriesOrIndex]:
         """
         Set the Categorical to be unordered.
 
@@ -261,12 +258,12 @@ def as_unordered(self, inplace: bool = False) -> Optional[ParentType]:
         Categories (3, int64): [1, 2, 10]
         """
         return self._return_or_inplace(
-            self._parent._column.as_unordered(), inplace=inplace
+            self._column.as_unordered(), inplace=inplace
         )
 
     def add_categories(
         self, new_categories: Any, inplace: bool = False
-    ) -> Optional[ParentType]:
+    ) -> Optional[SeriesOrIndex]:
         """
         Add new categories.
 
@@ -318,7 +315,7 @@ def add_categories(
         Categories (5, int64): [1, 2, 0, 3, 4]
         """
 
-        old_categories = self._parent._column.categories
+        old_categories = self._column.categories
         new_categories = column.as_column(
             new_categories,
             dtype=old_categories.dtype if len(new_categories) == 0 else None,
@@ -343,7 +340,7 @@ def add_categories(
             raise ValueError("new categories must not include old categories")
 
         new_categories = old_categories.append(new_categories)
-        out_col = self._parent._column
+        out_col = self._column
         if not out_col._categories_equal(new_categories):
             out_col = out_col._set_categories(new_categories)
 
@@ -351,7 +348,7 @@ def add_categories(
 
     def remove_categories(
         self, removals: Any, inplace: bool = False,
-    ) -> Optional[ParentType]:
+    ) -> Optional[SeriesOrIndex]:
         """
         Remove the specified categories.
 
@@ -433,7 +430,7 @@ def remove_categories(
             raise ValueError(f"removals must all be in old categories: {vals}")
 
         new_categories = cats[~cats.isin(removals)]._column
-        out_col = self._parent._column
+        out_col = self._column
         if not out_col._categories_equal(new_categories):
             out_col = out_col._set_categories(new_categories)
 
@@ -445,7 +442,7 @@ def set_categories(
         ordered: bool = False,
         rename: bool = False,
         inplace: bool = False,
-    ) -> Optional[ParentType]:
+    ) -> Optional[SeriesOrIndex]:
         """
         Set the categories to the specified new_categories.
 
@@ -538,7 +535,7 @@ def set_categories(
         # categories.
         if rename:
             # enforce same length
-            if len(new_categories) != len(self._parent._column.categories):
+            if len(new_categories) != len(self._column.categories):
                 raise ValueError(
                     "new_categories must have the same "
                     "number of items as old categories"
@@ -546,19 +543,19 @@ def set_categories(
 
             out_col = column.build_categorical_column(
                 categories=new_categories,
-                codes=self._parent._column.base_children[0],
-                mask=self._parent._column.base_mask,
-                size=self._parent._column.size,
-                offset=self._parent._column.offset,
+                codes=self._column.base_children[0],
+                mask=self._column.base_mask,
+                size=self._column.size,
+                offset=self._column.offset,
                 ordered=ordered,
             )
         else:
-            out_col = self._parent._column
+            out_col = self._column
             if not (type(out_col.categories) is type(new_categories)):
                 # If both categories are of different Column types,
                 # return a column full of Nulls.
                 out_col = _create_empty_categorical_column(
-                    self._parent._column,
+                    self._column,
                     CategoricalDtype(
                         categories=new_categories, ordered=ordered
                     ),
@@ -577,7 +574,7 @@ def reorder_categories(
         new_categories: Any,
         ordered: bool = False,
         inplace: bool = False,
-    ) -> Optional[ParentType]:
+    ) -> Optional[SeriesOrIndex]:
         """
         Reorder categories as specified in new_categories.
 
@@ -648,22 +645,15 @@ def reorder_categories(
         # Ignore order for comparison because we're only interested
         # in whether new_categories has all the same values as the
         # current set of categories.
-        if not self._parent._column._categories_equal(
-            new_categories, ordered=False
-        ):
+        if not self._column._categories_equal(new_categories, ordered=False):
             raise ValueError(
                 "items in new_categories are not the same as in "
                 "old categories"
             )
-        out_col = self._parent._column._set_categories(
-            new_categories, ordered=ordered
-        )
+        out_col = self._column._set_categories(new_categories, ordered=ordered)
 
         return self._return_or_inplace(out_col, inplace=inplace)
 
-    def _decategorize(self) -> ColumnBase:
-        return self._parent._column._get_decategorized_column()
-
 
 class CategoricalColumn(column.ColumnBase):
     """Implements operations for Columns of Categorical type
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 3d09bafd9ad..db180f6f9e1 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -270,7 +270,7 @@ def get(self, index):
         min_col_list_len = self.len().min()
         if -min_col_list_len <= index < min_col_list_len:
             return self._return_or_inplace(
-                extract_element(self._parent._column, index)
+                extract_element(self._column, index)
             )
         else:
             raise IndexError("list index out of range")
@@ -299,7 +299,7 @@ def contains(self, search_key):
         search_key = cudf.Scalar(search_key)
         try:
             res = self._return_or_inplace(
-                contains_scalar(self._parent._column, search_key)
+                contains_scalar(self._column, search_key)
             )
         except RuntimeError as e:
             if (
@@ -336,11 +336,11 @@ def leaves(self):
         5       6
         dtype: int64
         """
-        if type(self._parent._column.elements) is ListColumn:
-            return self._parent._column.elements.elements
+        if type(self._column.elements) is ListColumn:
+            return self._column.elements.elements
         else:
             return self._return_or_inplace(
-                self._parent._column.elements, retain_index=False
+                self._column.elements, retain_index=False
             )
 
     def len(self):
@@ -365,7 +365,7 @@ def len(self):
         2       2
         dtype: int32
         """
-        return self._return_or_inplace(count_elements(self._parent._column))
+        return self._return_or_inplace(count_elements(self._column))
 
     def take(self, lists_indices):
         """
@@ -398,7 +398,7 @@ def take(self, lists_indices):
         lists_indices_col = as_column(lists_indices)
         if not isinstance(lists_indices_col, ListColumn):
             raise ValueError("lists_indices should be list type array.")
-        if not lists_indices_col.size == self._parent._column.size:
+        if not lists_indices_col.size == self._column.size:
             raise ValueError(
                 "lists_indices and list column is of different " "size."
             )
@@ -413,7 +413,7 @@ def take(self, lists_indices):
 
         try:
             res = self._return_or_inplace(
-                segmented_gather(self._parent._column, lists_indices_col)
+                segmented_gather(self._column, lists_indices_col)
             )
         except RuntimeError as e:
             if "contains nulls" in str(e):
@@ -448,12 +448,12 @@ def unique(self):
         dtype: list
         """
 
-        if is_list_dtype(self._parent._column.children[1].dtype):
+        if is_list_dtype(self._column.children[1].dtype):
             raise NotImplementedError("Nested lists unique is not supported.")
 
         return self._return_or_inplace(
             drop_list_duplicates(
-                self._parent._column, nulls_equal=True, nans_all_equal=True
+                self._column, nulls_equal=True, nans_all_equal=True
             )
         )
 
@@ -503,10 +503,10 @@ def sort_values(
             raise NotImplementedError("`kind` not currently implemented.")
         if na_position not in {"first", "last"}:
             raise ValueError(f"Unknown `na_position` value {na_position}")
-        if is_list_dtype(self._parent._column.children[1].dtype):
+        if is_list_dtype(self._column.children[1].dtype):
             raise NotImplementedError("Nested lists sort is not supported.")
 
         return self._return_or_inplace(
-            sort_lists(self._parent._column, ascending, na_position),
+            sort_lists(self._column, ascending, na_position),
             retain_index=not ignore_index,
         )
diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py
index e2f4acde8cd..4b8ab7758fa 100644
--- a/python/cudf/cudf/core/column/methods.py
+++ b/python/cudf/cudf/core/column/methods.py
@@ -14,6 +14,7 @@ class ColumnMethodsMixin:
 
     def __init__(self, parent: Union["cudf.Series", "cudf.Index"]):
         self._parent = parent
+        self._column = self._parent._column
 
     @overload
     def _return_or_inplace(
@@ -60,7 +61,7 @@ def _return_or_inplace(
                 )
                 return None
             else:
-                self._parent._column._mimic_inplace(new_col, inplace=True)
+                self._column._mimic_inplace(new_col, inplace=True)
                 return None
         else:
             if self._parent is None:
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index c72268d7491..9c5ae0a31ce 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5,7 +5,17 @@
 import builtins
 import pickle
 import warnings
-from typing import Any, Dict, Optional, Sequence, Tuple, Union, cast, overload
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+    cast,
+    overload,
+)
 
 import cupy
 import numpy as np
@@ -17,7 +27,6 @@
 from cudf import _lib as libcudf
 from cudf._lib import string_casting as str_cast, strings as libstrings
 from cudf._lib.column import Column
-from cudf._typing import ColumnLike, Dtype, ScalarLike
 from cudf.core.buffer import Buffer
 from cudf.core.column import column, datetime
 from cudf.core.column.methods import ColumnMethodsMixin
@@ -30,6 +39,10 @@
     is_string_dtype,
 )
 
+if TYPE_CHECKING:
+    from cudf._typing import ColumnLike, Dtype, ScalarLike, SeriesOrIndex
+
+
 _str_to_numeric_typecast_functions = {
     np.dtype("int8"): str_cast.stoi8,
     np.dtype("int16"): str_cast.stoi16,
@@ -75,10 +88,9 @@
 }
 
 
-ParentType = Union["cudf.Series", "cudf.Index"]
-
-
 class StringMethods(ColumnMethodsMixin):
+    _column: StringColumn
+
     def __init__(self, parent=None):
         """
         Vectorized string functions for Series and Index.
@@ -99,7 +111,7 @@ def __init__(self, parent=None):
             )
         super().__init__(parent=parent)
 
-    def htoi(self) -> ParentType:
+    def htoi(self) -> SeriesOrIndex:
         """
         Returns integer value represented by each hex string.
         String is interpretted to have hex (base-16) characters.
@@ -120,11 +132,11 @@ def htoi(self) -> ParentType:
         dtype: int64
         """
 
-        out = str_cast.htoi(self._parent._column)
+        out = str_cast.htoi(self._column)
 
         return self._return_or_inplace(out, inplace=False)
 
-    def ip2int(self) -> ParentType:
+    def ip2int(self) -> SeriesOrIndex:
         """
         This converts ip strings to integers
 
@@ -151,7 +163,7 @@ def ip2int(self) -> ParentType:
         dtype: int64
         """
 
-        out = str_cast.ip2int(self._parent._column)
+        out = str_cast.ip2int(self._column)
 
         return self._return_or_inplace(out, inplace=False)
 
@@ -161,7 +173,7 @@ def __getitem__(self, key):
         else:
             return self.get(key)
 
-    def len(self) -> ParentType:
+    def len(self) -> SeriesOrIndex:
         """
         Computes the length of each element in the Series/Index.
 
@@ -182,10 +194,10 @@ def len(self) -> ParentType:
         """
 
         return self._return_or_inplace(
-            libstrings.count_characters(self._parent._column)
+            libstrings.count_characters(self._column)
         )
 
-    def byte_count(self) -> ParentType:
+    def byte_count(self) -> SeriesOrIndex:
         """
         Computes the number of bytes of each string in the Series/Index.
 
@@ -210,9 +222,7 @@ def byte_count(self) -> ParentType:
         2    11
         dtype: int32
         """
-        return self._return_or_inplace(
-            libstrings.count_bytes(self._parent._column),
-        )
+        return self._return_or_inplace(libstrings.count_bytes(self._column),)
 
     @overload
     def cat(self, sep: str = None, na_rep: str = None) -> str:
@@ -221,7 +231,7 @@ def cat(self, sep: str = None, na_rep: str = None) -> str:
     @overload
     def cat(
         self, others, sep: str = None, na_rep: str = None
-    ) -> Union[ParentType, "cudf.core.column.string.StringColumn"]:
+    ) -> Union[SeriesOrIndex, "cudf.core.column.string.StringColumn"]:
         ...
 
     def cat(self, others=None, sep=None, na_rep=None):
@@ -313,13 +323,11 @@ def cat(self, others=None, sep=None, na_rep=None):
 
         if others is None:
             data = libstrings.join(
-                self._parent._column,
-                cudf.Scalar(sep),
-                cudf.Scalar(na_rep, "str"),
+                self._column, cudf.Scalar(sep), cudf.Scalar(na_rep, "str"),
             )
         else:
             other_cols = _get_cols_list(self._parent, others)
-            all_cols = [self._parent._column] + other_cols
+            all_cols = [self._column] + other_cols
             data = libstrings.concatenate(
                 cudf.DataFrame(
                     {index: value for index, value in enumerate(all_cols)}
@@ -340,7 +348,7 @@ def cat(self, others=None, sep=None, na_rep=None):
 
     def join(
         self, sep=None, string_na_rep=None, sep_na_rep=None
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Join lists contained as elements in the Series/Index with passed
         delimiter.
@@ -466,10 +474,10 @@ def join(
                 f" of type : {type(string_na_rep)}"
             )
 
-        if isinstance(self._parent._column, cudf.core.column.ListColumn):
-            strings_column = self._parent._column
+        if isinstance(self._column, cudf.core.column.ListColumn):
+            strings_column = self._column
         else:
-            # If self._parent._column is not a ListColumn, we will have to
+            # If self._column is not a ListColumn, we will have to
             # split each row by character and create a ListColumn out of it.
             strings_column = self._split_by_character()
 
@@ -505,23 +513,23 @@ def join(
         return self._return_or_inplace(data)
 
     def _split_by_character(self):
-        result_col = libstrings.character_tokenize(self._parent._column)
+        result_col = libstrings.character_tokenize(self._column)
 
-        offset_col = self._parent._column.children[0]
+        offset_col = self._column.children[0]
 
         res = cudf.core.column.ListColumn(
-            size=len(self._parent._column),
-            dtype=cudf.ListDtype(self._parent._column.dtype),
-            mask=self._parent._column.mask,
+            size=len(self._column),
+            dtype=cudf.ListDtype(self._column.dtype),
+            mask=self._column.mask,
             offset=0,
-            null_count=self._parent._column.null_count,
+            null_count=self._column.null_count,
             children=(offset_col, result_col),
         )
         return res
 
     def extract(
         self, pat: str, flags: int = 0, expand: bool = True
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Extract capture groups in the regex `pat` as columns in a DataFrame.
 
@@ -579,7 +587,7 @@ def extract(
         if flags != 0:
             raise NotImplementedError("`flags` parameter is not yet supported")
 
-        out = libstrings.extract(self._parent._column, pat)
+        out = libstrings.extract(self._column, pat)
         if out._num_columns == 1 and expand is False:
             return self._return_or_inplace(out._columns[0], expand=expand)
         else:
@@ -592,7 +600,7 @@ def contains(
         flags: int = 0,
         na=np.nan,
         regex: bool = True,
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Test if pattern or regex is contained within a string of a Series or
         Index.
@@ -706,18 +714,18 @@ def contains(
 
         if pat is None:
             result_col = column.column_empty(
-                len(self._parent._column), dtype="bool", masked=True
+                len(self._column), dtype="bool", masked=True
             )
         elif is_scalar(pat):
             if regex is True:
-                result_col = libstrings.contains_re(self._parent._column, pat)
+                result_col = libstrings.contains_re(self._column, pat)
             else:
                 result_col = libstrings.contains(
-                    self._parent._column, cudf.Scalar(pat, "str")
+                    self._column, cudf.Scalar(pat, "str")
                 )
         else:
             result_col = libstrings.contains_multiple(
-                self._parent._column, column.as_column(pat, dtype="str")
+                self._column, column.as_column(pat, dtype="str")
             )
         return self._return_or_inplace(result_col)
 
@@ -729,7 +737,7 @@ def replace(
         case=None,
         flags: int = 0,
         regex: bool = True,
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Replace occurrences of pattern/regex in the Series/Index with some
         other string. Equivalent to `str.replace()
@@ -806,13 +814,11 @@ def replace(
 
             return self._return_or_inplace(
                 libstrings.replace_multi_re(
-                    self._parent._column,
-                    pat,
-                    column.as_column(repl, dtype="str"),
+                    self._column, pat, column.as_column(repl, dtype="str"),
                 )
                 if regex
                 else libstrings.replace_multi(
-                    self._parent._column,
+                    self._column,
                     column.as_column(pat, dtype="str"),
                     column.as_column(repl, dtype="str"),
                 ),
@@ -824,18 +830,18 @@ def replace(
         # Pandas forces non-regex replace when pat is a single-character
         return self._return_or_inplace(
             libstrings.replace_re(
-                self._parent._column, pat, cudf.Scalar(repl, "str"), n
+                self._column, pat, cudf.Scalar(repl, "str"), n
             )
             if regex is True and len(pat) > 1
             else libstrings.replace(
-                self._parent._column,
+                self._column,
                 cudf.Scalar(pat, "str"),
                 cudf.Scalar(repl, "str"),
                 n,
             ),
         )
 
-    def replace_with_backrefs(self, pat: str, repl: str) -> ParentType:
+    def replace_with_backrefs(self, pat: str, repl: str) -> SeriesOrIndex:
         """
         Use the ``repl`` back-ref template to create a new string
         with the extracted elements found using the ``pat`` expression.
@@ -862,12 +868,12 @@ def replace_with_backrefs(self, pat: str, repl: str) -> ParentType:
         dtype: object
         """
         return self._return_or_inplace(
-            libstrings.replace_with_backrefs(self._parent._column, pat, repl)
+            libstrings.replace_with_backrefs(self._column, pat, repl)
         )
 
     def slice(
         self, start: int = None, stop: int = None, step: int = None
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Slice substrings from each element in the Series or Index.
 
@@ -933,10 +939,10 @@ def slice(
         """
 
         return self._return_or_inplace(
-            libstrings.slice_strings(self._parent._column, start, stop, step),
+            libstrings.slice_strings(self._column, start, stop, step),
         )
 
-    def isinteger(self) -> ParentType:
+    def isinteger(self) -> SeriesOrIndex:
         """
         Check whether all characters in each string form integer.
 
@@ -994,11 +1000,9 @@ def isinteger(self) -> ParentType:
         2    False
         dtype: bool
         """
-        return self._return_or_inplace(
-            libstrings.is_integer(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.is_integer(self._column))
 
-    def ishex(self) -> ParentType:
+    def ishex(self) -> SeriesOrIndex:
         """
         Check whether all characters in each string form a hex integer.
 
@@ -1035,9 +1039,9 @@ def ishex(self) -> ParentType:
         4     True
         dtype: bool
         """
-        return self._return_or_inplace(str_cast.is_hex(self._parent._column))
+        return self._return_or_inplace(str_cast.is_hex(self._column))
 
-    def istimestamp(self, format: str) -> ParentType:
+    def istimestamp(self, format: str) -> SeriesOrIndex:
         """
         Check whether all characters in each string can be converted to
         a timestamp using the given format.
@@ -1058,10 +1062,10 @@ def istimestamp(self, format: str) -> ParentType:
         dtype: bool
         """
         return self._return_or_inplace(
-            str_cast.istimestamp(self._parent._column, format)
+            str_cast.istimestamp(self._column, format)
         )
 
-    def isfloat(self) -> ParentType:
+    def isfloat(self) -> SeriesOrIndex:
         """
         Check whether all characters in each string form floating value.
 
@@ -1122,11 +1126,9 @@ def isfloat(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(
-            libstrings.is_float(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.is_float(self._column))
 
-    def isdecimal(self) -> ParentType:
+    def isdecimal(self) -> SeriesOrIndex:
         """
         Check whether all characters in each string are decimal.
 
@@ -1185,11 +1187,9 @@ def isdecimal(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(
-            libstrings.is_decimal(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.is_decimal(self._column))
 
-    def isalnum(self) -> ParentType:
+    def isalnum(self) -> SeriesOrIndex:
         """
         Check whether all characters in each string are alphanumeric.
 
@@ -1256,11 +1256,9 @@ def isalnum(self) -> ParentType:
         2    False
         dtype: bool
         """
-        return self._return_or_inplace(
-            libstrings.is_alnum(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.is_alnum(self._column))
 
-    def isalpha(self) -> ParentType:
+    def isalpha(self) -> SeriesOrIndex:
         """
         Check whether all characters in each string are alphabetic.
 
@@ -1314,11 +1312,9 @@ def isalpha(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(
-            libstrings.is_alpha(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.is_alpha(self._column))
 
-    def isdigit(self) -> ParentType:
+    def isdigit(self) -> SeriesOrIndex:
         """
         Check whether all characters in each string are digits.
 
@@ -1378,11 +1374,9 @@ def isdigit(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(
-            libstrings.is_digit(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.is_digit(self._column))
 
-    def isnumeric(self) -> ParentType:
+    def isnumeric(self) -> SeriesOrIndex:
         """
         Check whether all characters in each string are numeric.
 
@@ -1448,11 +1442,9 @@ def isnumeric(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(
-            libstrings.is_numeric(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.is_numeric(self._column))
 
-    def isupper(self) -> ParentType:
+    def isupper(self) -> SeriesOrIndex:
         """
         Check whether all characters in each string are uppercase.
 
@@ -1507,11 +1499,9 @@ def isupper(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(
-            libstrings.is_upper(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.is_upper(self._column))
 
-    def islower(self) -> ParentType:
+    def islower(self) -> SeriesOrIndex:
         """
         Check whether all characters in each string are lowercase.
 
@@ -1566,11 +1556,9 @@ def islower(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(
-            libstrings.is_lower(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.is_lower(self._column))
 
-    def isipv4(self) -> ParentType:
+    def isipv4(self) -> SeriesOrIndex:
         """
         Check whether all characters in each string form an IPv4 address.
 
@@ -1592,9 +1580,9 @@ def isipv4(self) -> ParentType:
         3    False
         dtype: bool
         """
-        return self._return_or_inplace(str_cast.is_ipv4(self._parent._column))
+        return self._return_or_inplace(str_cast.is_ipv4(self._column))
 
-    def lower(self) -> ParentType:
+    def lower(self) -> SeriesOrIndex:
         """
         Converts all characters to lowercase.
 
@@ -1631,11 +1619,9 @@ def lower(self) -> ParentType:
         3              swapcase
         dtype: object
         """
-        return self._return_or_inplace(
-            libstrings.to_lower(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.to_lower(self._column))
 
-    def upper(self) -> ParentType:
+    def upper(self) -> SeriesOrIndex:
         """
         Convert each string to uppercase.
         This only applies to ASCII characters at this time.
@@ -1682,11 +1668,9 @@ def upper(self) -> ParentType:
         3              SWAPCASE
         dtype: object
         """
-        return self._return_or_inplace(
-            libstrings.to_upper(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.to_upper(self._column))
 
-    def capitalize(self) -> ParentType:
+    def capitalize(self) -> SeriesOrIndex:
         """
         Convert strings in the Series/Index to be capitalized.
         This only applies to ASCII characters at this time.
@@ -1712,11 +1696,9 @@ def capitalize(self) -> ParentType:
         1    Goodbye, friend
         dtype: object
         """
-        return self._return_or_inplace(
-            libstrings.capitalize(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.capitalize(self._column))
 
-    def swapcase(self) -> ParentType:
+    def swapcase(self) -> SeriesOrIndex:
         """
         Change each lowercase character to uppercase and vice versa.
         This only applies to ASCII characters at this time.
@@ -1759,11 +1741,9 @@ def swapcase(self) -> ParentType:
         3              sWaPcAsE
         dtype: object
         """
-        return self._return_or_inplace(
-            libstrings.swapcase(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.swapcase(self._column))
 
-    def title(self) -> ParentType:
+    def title(self) -> SeriesOrIndex:
         """
         Uppercase the first letter of each letter after a space
         and lowercase the rest.
@@ -1806,11 +1786,11 @@ def title(self) -> ParentType:
         3              Swapcase
         dtype: object
         """
-        return self._return_or_inplace(libstrings.title(self._parent._column))
+        return self._return_or_inplace(libstrings.title(self._column))
 
     def filter_alphanum(
         self, repl: str = None, keep: bool = True
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Remove non-alphanumeric characters from strings in this column.
 
@@ -1842,14 +1822,12 @@ def filter_alphanum(
             repl = ""
 
         return self._return_or_inplace(
-            libstrings.filter_alphanum(
-                self._parent._column, cudf.Scalar(repl), keep
-            ),
+            libstrings.filter_alphanum(self._column, cudf.Scalar(repl), keep),
         )
 
     def slice_from(
         self, starts: "cudf.Series", stops: "cudf.Series"
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Return substring of each string using positions for each string.
 
@@ -1888,7 +1866,7 @@ def slice_from(
 
         return self._return_or_inplace(
             libstrings.slice_from(
-                self._parent._column,
+                self._column,
                 column.as_column(starts),
                 column.as_column(stops),
             ),
@@ -1896,7 +1874,7 @@ def slice_from(
 
     def slice_replace(
         self, start: int = None, stop: int = None, repl: str = None
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Replace the specified section of each string with a new string.
 
@@ -1979,11 +1957,11 @@ def slice_replace(
 
         return self._return_or_inplace(
             libstrings.slice_replace(
-                self._parent._column, start, stop, cudf.Scalar(repl)
+                self._column, start, stop, cudf.Scalar(repl)
             ),
         )
 
-    def insert(self, start: int = 0, repl: str = None) -> ParentType:
+    def insert(self, start: int = 0, repl: str = None) -> SeriesOrIndex:
         """
         Insert the specified string into each string in the specified
         position.
@@ -2030,10 +2008,10 @@ def insert(self, start: int = 0, repl: str = None) -> ParentType:
             repl = ""
 
         return self._return_or_inplace(
-            libstrings.insert(self._parent._column, start, cudf.Scalar(repl)),
+            libstrings.insert(self._column, start, cudf.Scalar(repl)),
         )
 
-    def get(self, i: int = 0) -> ParentType:
+    def get(self, i: int = 0) -> SeriesOrIndex:
         """
         Extract element from each component at specified position.
 
@@ -2075,7 +2053,7 @@ def get(self, i: int = 0) -> ParentType:
         dtype: object
         """
 
-        return self._return_or_inplace(libstrings.get(self._parent._column, i))
+        return self._return_or_inplace(libstrings.get(self._column, i))
 
     def get_json_object(self, json_path):
         """
@@ -2129,7 +2107,7 @@ def get_json_object(self, json_path):
         try:
             res = self._return_or_inplace(
                 libstrings.get_json_object(
-                    self._parent._column, cudf.Scalar(json_path, "str")
+                    self._column, cudf.Scalar(json_path, "str")
                 )
             )
         except RuntimeError as e:
@@ -2145,7 +2123,7 @@ def get_json_object(self, json_path):
 
     def split(
         self, pat: str = None, n: int = -1, expand: bool = None
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Split strings around given separator/delimiter.
 
@@ -2273,29 +2251,25 @@ def split(
             pat = ""
 
         if expand:
-            if self._parent._column.null_count == len(self._parent._column):
-                result_table = cudf.core.frame.Frame(
-                    {0: self._parent._column.copy()}
-                )
+            if self._column.null_count == len(self._column):
+                result_table = cudf.core.frame.Frame({0: self._column.copy()})
             else:
                 result_table = libstrings.split(
-                    self._parent._column, cudf.Scalar(pat, "str"), n
+                    self._column, cudf.Scalar(pat, "str"), n
                 )
                 if len(result_table._data) == 1:
-                    if result_table._data[0].null_count == len(
-                        self._parent._column
-                    ):
+                    if result_table._data[0].null_count == len(self._column):
                         result_table = cudf.core.frame.Frame({})
         else:
             result_table = libstrings.split_record(
-                self._parent._column, cudf.Scalar(pat, "str"), n
+                self._column, cudf.Scalar(pat, "str"), n
             )
 
         return self._return_or_inplace(result_table, expand=expand)
 
     def rsplit(
         self, pat: str = None, n: int = -1, expand: bool = None
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Split strings around given separator/delimiter.
 
@@ -2432,27 +2406,23 @@ def rsplit(
             pat = ""
 
         if expand:
-            if self._parent._column.null_count == len(self._parent._column):
-                result_table = cudf.core.frame.Frame(
-                    {0: self._parent._column.copy()}
-                )
+            if self._column.null_count == len(self._column):
+                result_table = cudf.core.frame.Frame({0: self._column.copy()})
             else:
                 result_table = libstrings.rsplit(
-                    self._parent._column, cudf.Scalar(pat), n
+                    self._column, cudf.Scalar(pat), n
                 )
                 if len(result_table._data) == 1:
-                    if result_table._data[0].null_count == len(
-                        self._parent._column
-                    ):
+                    if result_table._data[0].null_count == len(self._column):
                         result_table = cudf.core.frame.Frame({})
         else:
             result_table = libstrings.rsplit_record(
-                self._parent._column, cudf.Scalar(pat), n
+                self._column, cudf.Scalar(pat), n
             )
 
         return self._return_or_inplace(result_table, expand=expand)
 
-    def partition(self, sep: str = " ", expand: bool = True) -> ParentType:
+    def partition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex:
         """
         Split the string at the first occurrence of sep.
 
@@ -2529,11 +2499,11 @@ def partition(self, sep: str = " ", expand: bool = True) -> ParentType:
             sep = " "
 
         return self._return_or_inplace(
-            libstrings.partition(self._parent._column, cudf.Scalar(sep)),
+            libstrings.partition(self._column, cudf.Scalar(sep)),
             expand=expand,
         )
 
-    def rpartition(self, sep: str = " ", expand: bool = True) -> ParentType:
+    def rpartition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex:
         """
         Split the string at the last occurrence of sep.
 
@@ -2594,13 +2564,13 @@ def rpartition(self, sep: str = " ", expand: bool = True) -> ParentType:
             sep = " "
 
         return self._return_or_inplace(
-            libstrings.rpartition(self._parent._column, cudf.Scalar(sep)),
+            libstrings.rpartition(self._column, cudf.Scalar(sep)),
             expand=expand,
         )
 
     def pad(
         self, width: int, side: str = "left", fillchar: str = " "
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Pad strings in the Series/Index up to width.
 
@@ -2682,10 +2652,10 @@ def pad(
             )
 
         return self._return_or_inplace(
-            libstrings.pad(self._parent._column, width, fillchar, side)
+            libstrings.pad(self._column, width, fillchar, side)
         )
 
-    def zfill(self, width: int) -> ParentType:
+    def zfill(self, width: int) -> SeriesOrIndex:
         """
         Pad strings in the Series/Index by prepending ‘0’ characters.
 
@@ -2756,11 +2726,9 @@ def zfill(self, width: int) -> ParentType:
             msg = f"width must be of integer type, not {type(width).__name__}"
             raise TypeError(msg)
 
-        return self._return_or_inplace(
-            libstrings.zfill(self._parent._column, width)
-        )
+        return self._return_or_inplace(libstrings.zfill(self._column, width))
 
-    def center(self, width: int, fillchar: str = " ") -> ParentType:
+    def center(self, width: int, fillchar: str = " ") -> SeriesOrIndex:
         """
         Filling left and right side of strings in the Series/Index with an
         additional character.
@@ -2829,10 +2797,10 @@ def center(self, width: int, fillchar: str = " ") -> ParentType:
             raise TypeError(msg)
 
         return self._return_or_inplace(
-            libstrings.center(self._parent._column, width, fillchar)
+            libstrings.center(self._column, width, fillchar)
         )
 
-    def ljust(self, width: int, fillchar: str = " ") -> ParentType:
+    def ljust(self, width: int, fillchar: str = " ") -> SeriesOrIndex:
         """
         Filling right side of strings in the Series/Index with an additional
         character. Equivalent to `str.ljust()
@@ -2883,10 +2851,10 @@ def ljust(self, width: int, fillchar: str = " ") -> ParentType:
             raise TypeError(msg)
 
         return self._return_or_inplace(
-            libstrings.ljust(self._parent._column, width, fillchar)
+            libstrings.ljust(self._column, width, fillchar)
         )
 
-    def rjust(self, width: int, fillchar: str = " ") -> ParentType:
+    def rjust(self, width: int, fillchar: str = " ") -> SeriesOrIndex:
         """
         Filling left side of strings in the Series/Index with an additional
         character. Equivalent to `str.rjust()
@@ -2937,10 +2905,10 @@ def rjust(self, width: int, fillchar: str = " ") -> ParentType:
             raise TypeError(msg)
 
         return self._return_or_inplace(
-            libstrings.rjust(self._parent._column, width, fillchar)
+            libstrings.rjust(self._column, width, fillchar)
         )
 
-    def strip(self, to_strip: str = None) -> ParentType:
+    def strip(self, to_strip: str = None) -> SeriesOrIndex:
         """
         Remove leading and trailing characters.
 
@@ -2996,10 +2964,10 @@ def strip(self, to_strip: str = None) -> ParentType:
             to_strip = ""
 
         return self._return_or_inplace(
-            libstrings.strip(self._parent._column, cudf.Scalar(to_strip))
+            libstrings.strip(self._column, cudf.Scalar(to_strip))
         )
 
-    def lstrip(self, to_strip: str = None) -> ParentType:
+    def lstrip(self, to_strip: str = None) -> SeriesOrIndex:
         """
         Remove leading and trailing characters.
 
@@ -3043,10 +3011,10 @@ def lstrip(self, to_strip: str = None) -> ParentType:
             to_strip = ""
 
         return self._return_or_inplace(
-            libstrings.lstrip(self._parent._column, cudf.Scalar(to_strip))
+            libstrings.lstrip(self._column, cudf.Scalar(to_strip))
         )
 
-    def rstrip(self, to_strip: str = None) -> ParentType:
+    def rstrip(self, to_strip: str = None) -> SeriesOrIndex:
         """
         Remove leading and trailing characters.
 
@@ -3098,10 +3066,10 @@ def rstrip(self, to_strip: str = None) -> ParentType:
             to_strip = ""
 
         return self._return_or_inplace(
-            libstrings.rstrip(self._parent._column, cudf.Scalar(to_strip))
+            libstrings.rstrip(self._column, cudf.Scalar(to_strip))
         )
 
-    def wrap(self, width: int, **kwargs) -> ParentType:
+    def wrap(self, width: int, **kwargs) -> SeriesOrIndex:
         """
         Wrap long strings in the Series/Index to be formatted in
         paragraphs with length less than a given width.
@@ -3193,11 +3161,9 @@ def wrap(self, width: int, **kwargs) -> ParentType:
                 "`break_on_hyphens`=False"
             )
 
-        return self._return_or_inplace(
-            libstrings.wrap(self._parent._column, width)
-        )
+        return self._return_or_inplace(libstrings.wrap(self._column, width))
 
-    def count(self, pat: str, flags: int = 0) -> ParentType:
+    def count(self, pat: str, flags: int = 0) -> SeriesOrIndex:
         """
         Count occurrences of pattern in each string of the Series/Index.
 
@@ -3255,13 +3221,11 @@ def count(self, pat: str, flags: int = 0) -> ParentType:
         if flags != 0:
             raise NotImplementedError("`flags` parameter is not yet supported")
 
-        return self._return_or_inplace(
-            libstrings.count_re(self._parent._column, pat)
-        )
+        return self._return_or_inplace(libstrings.count_re(self._column, pat))
 
     def findall(
         self, pat: str, flags: int = 0, expand: bool = True
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Find all occurrences of pattern or regular expression in the
         Series/Index.
@@ -3326,10 +3290,10 @@ def findall(
             raise NotImplementedError("`flags` parameter is not yet supported")
 
         return self._return_or_inplace(
-            libstrings.findall(self._parent._column, pat), expand=expand
+            libstrings.findall(self._column, pat), expand=expand
         )
 
-    def isempty(self) -> ParentType:
+    def isempty(self) -> SeriesOrIndex:
         """
         Check whether each string is an empty string.
 
@@ -3349,11 +3313,9 @@ def isempty(self) -> ParentType:
         4    False
         dtype: bool
         """
-        return self._return_or_inplace(
-            (self._parent._column == "").fillna(False)
-        )
+        return self._return_or_inplace((self._column == "").fillna(False))
 
-    def isspace(self) -> ParentType:
+    def isspace(self) -> SeriesOrIndex:
         """
         Check whether all characters in each string are whitespace.
 
@@ -3407,11 +3369,9 @@ def isspace(self) -> ParentType:
         2    False
         dtype: bool
         """
-        return self._return_or_inplace(
-            libstrings.is_space(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.is_space(self._column))
 
-    def endswith(self, pat: str) -> ParentType:
+    def endswith(self, pat: str) -> SeriesOrIndex:
         """
         Test if the end of each string element matches a pattern.
 
@@ -3454,20 +3414,20 @@ def endswith(self, pat: str) -> ParentType:
         """
         if pat is None:
             result_col = column.column_empty(
-                len(self._parent._column), dtype="bool", masked=True
+                len(self._column), dtype="bool", masked=True
             )
         elif is_scalar(pat):
             result_col = libstrings.endswith(
-                self._parent._column, cudf.Scalar(pat, "str")
+                self._column, cudf.Scalar(pat, "str")
             )
         else:
             result_col = libstrings.endswith_multiple(
-                self._parent._column, column.as_column(pat, dtype="str")
+                self._column, column.as_column(pat, dtype="str")
             )
 
         return self._return_or_inplace(result_col)
 
-    def startswith(self, pat: Union[str, Sequence]) -> ParentType:
+    def startswith(self, pat: Union[str, Sequence]) -> SeriesOrIndex:
         """
         Test if the start of each string element matches a pattern.
 
@@ -3516,20 +3476,20 @@ def startswith(self, pat: Union[str, Sequence]) -> ParentType:
         """
         if pat is None:
             result_col = column.column_empty(
-                len(self._parent._column), dtype="bool", masked=True
+                len(self._column), dtype="bool", masked=True
             )
         elif is_scalar(pat):
             result_col = libstrings.startswith(
-                self._parent._column, cudf.Scalar(pat, "str")
+                self._column, cudf.Scalar(pat, "str")
             )
         else:
             result_col = libstrings.startswith_multiple(
-                self._parent._column, column.as_column(pat, dtype="str")
+                self._column, column.as_column(pat, dtype="str")
             )
 
         return self._return_or_inplace(result_col)
 
-    def find(self, sub: str, start: int = 0, end: int = None) -> ParentType:
+    def find(self, sub: str, start: int = 0, end: int = None) -> SeriesOrIndex:
         """
         Return lowest indexes in each strings in the Series/Index
         where the substring is fully contained between ``[start:end]``.
@@ -3579,12 +3539,14 @@ def find(self, sub: str, start: int = 0, end: int = None) -> ParentType:
             end = -1
 
         result_col = libstrings.find(
-            self._parent._column, cudf.Scalar(sub, "str"), start, end
+            self._column, cudf.Scalar(sub, "str"), start, end
         )
 
         return self._return_or_inplace(result_col)
 
-    def rfind(self, sub: str, start: int = 0, end: int = None) -> ParentType:
+    def rfind(
+        self, sub: str, start: int = 0, end: int = None
+    ) -> SeriesOrIndex:
         """
         Return highest indexes in each strings in the Series/Index
         where the substring is fully contained between ``[start:end]``.
@@ -3638,12 +3600,14 @@ def rfind(self, sub: str, start: int = 0, end: int = None) -> ParentType:
             end = -1
 
         result_col = libstrings.rfind(
-            self._parent._column, cudf.Scalar(sub, "str"), start, end
+            self._column, cudf.Scalar(sub, "str"), start, end
         )
 
         return self._return_or_inplace(result_col)
 
-    def index(self, sub: str, start: int = 0, end: int = None) -> ParentType:
+    def index(
+        self, sub: str, start: int = 0, end: int = None
+    ) -> SeriesOrIndex:
         """
         Return lowest indexes in each strings where the substring
         is fully contained between ``[start:end]``. This is the same
@@ -3693,7 +3657,7 @@ def index(self, sub: str, start: int = 0, end: int = None) -> ParentType:
             end = -1
 
         result_col = libstrings.find(
-            self._parent._column, cudf.Scalar(sub, "str"), start, end
+            self._column, cudf.Scalar(sub, "str"), start, end
         )
 
         result = self._return_or_inplace(result_col)
@@ -3703,7 +3667,9 @@ def index(self, sub: str, start: int = 0, end: int = None) -> ParentType:
         else:
             return result
 
-    def rindex(self, sub: str, start: int = 0, end: int = None) -> ParentType:
+    def rindex(
+        self, sub: str, start: int = 0, end: int = None
+    ) -> SeriesOrIndex:
         """
         Return highest indexes in each strings where the substring
         is fully contained between ``[start:end]``. This is the same
@@ -3753,7 +3719,7 @@ def rindex(self, sub: str, start: int = 0, end: int = None) -> ParentType:
             end = -1
 
         result_col = libstrings.rfind(
-            self._parent._column, cudf.Scalar(sub, "str"), start, end
+            self._column, cudf.Scalar(sub, "str"), start, end
         )
 
         result = self._return_or_inplace(result_col)
@@ -3763,7 +3729,9 @@ def rindex(self, sub: str, start: int = 0, end: int = None) -> ParentType:
         else:
             return result
 
-    def match(self, pat: str, case: bool = True, flags: int = 0) -> ParentType:
+    def match(
+        self, pat: str, case: bool = True, flags: int = 0
+    ) -> SeriesOrIndex:
         """
         Determine if each string matches a regular expression.
 
@@ -3806,11 +3774,9 @@ def match(self, pat: str, case: bool = True, flags: int = 0) -> ParentType:
         if flags != 0:
             raise NotImplementedError("`flags` parameter is not yet supported")
 
-        return self._return_or_inplace(
-            libstrings.match_re(self._parent._column, pat)
-        )
+        return self._return_or_inplace(libstrings.match_re(self._column, pat))
 
-    def url_decode(self) -> ParentType:
+    def url_decode(self) -> SeriesOrIndex:
         """
         Returns a URL-decoded format of each string.
         No format checking is performed. All characters
@@ -3838,11 +3804,9 @@ def url_decode(self) -> ParentType:
         dtype: object
         """
 
-        return self._return_or_inplace(
-            libstrings.url_decode(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.url_decode(self._column))
 
-    def url_encode(self) -> ParentType:
+    def url_encode(self) -> SeriesOrIndex:
         """
         Returns a URL-encoded format of each string.
         No format checking is performed.
@@ -3871,11 +3835,9 @@ def url_encode(self) -> ParentType:
         1    https%3A%2F%2Fmedium.com%2Frapids-ai
         dtype: object
         """
-        return self._return_or_inplace(
-            libstrings.url_encode(self._parent._column)
-        )
+        return self._return_or_inplace(libstrings.url_encode(self._column))
 
-    def code_points(self) -> ParentType:
+    def code_points(self) -> SeriesOrIndex:
         """
         Returns an array by filling it with the UTF-8 code point
         values for each character of each string.
@@ -3907,7 +3869,7 @@ def code_points(self) -> ParentType:
         dtype: int32
         """
 
-        new_col = libstrings.code_points(self._parent._column)
+        new_col = libstrings.code_points(self._column)
         if isinstance(self._parent, cudf.Series):
             return cudf.Series(new_col, name=self._parent.name)
         elif isinstance(self._parent, cudf.Index):
@@ -3915,7 +3877,7 @@ def code_points(self) -> ParentType:
         else:
             return new_col
 
-    def translate(self, table: dict) -> ParentType:
+    def translate(self, table: dict) -> SeriesOrIndex:
         """
         Map all characters in the string through the given
         mapping table.
@@ -3957,12 +3919,12 @@ def translate(self, table: dict) -> ParentType:
         """
         table = str.maketrans(table)
         return self._return_or_inplace(
-            libstrings.translate(self._parent._column, table)
+            libstrings.translate(self._column, table)
         )
 
     def filter_characters(
         self, table: dict, keep: bool = True, repl: str = None
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Remove characters from each string using the character ranges
         in the given mapping table.
@@ -4009,11 +3971,11 @@ def filter_characters(
         table = str.maketrans(table)
         return self._return_or_inplace(
             libstrings.filter_characters(
-                self._parent._column, table, keep, cudf.Scalar(repl)
+                self._column, table, keep, cudf.Scalar(repl)
             ),
         )
 
-    def normalize_spaces(self) -> ParentType:
+    def normalize_spaces(self) -> SeriesOrIndex:
         """
         Remove extra whitespace between tokens and trim whitespace
         from the beginning and the end of each string.
@@ -4032,10 +3994,10 @@ def normalize_spaces(self) -> ParentType:
         dtype: object
         """
         return self._return_or_inplace(
-            libstrings.normalize_spaces(self._parent._column)
+            libstrings.normalize_spaces(self._column)
         )
 
-    def normalize_characters(self, do_lower: bool = True) -> ParentType:
+    def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex:
         """
         Normalizes strings characters for tokenizing.
 
@@ -4081,10 +4043,10 @@ def normalize_characters(self, do_lower: bool = True) -> ParentType:
         dtype: object
         """
         return self._return_or_inplace(
-            libstrings.normalize_characters(self._parent._column, do_lower)
+            libstrings.normalize_characters(self._column, do_lower)
         )
 
-    def tokenize(self, delimiter: str = " ") -> ParentType:
+    def tokenize(self, delimiter: str = " ") -> SeriesOrIndex:
         """
         Each string is split into tokens using the provided delimiter(s).
         The sequence returned contains the tokens in the order
@@ -4117,12 +4079,12 @@ def tokenize(self, delimiter: str = " ") -> ParentType:
 
         if isinstance(delimiter, Column):
             return self._return_or_inplace(
-                libstrings._tokenize_column(self._parent._column, delimiter),
+                libstrings._tokenize_column(self._column, delimiter),
                 retain_index=False,
             )
         elif isinstance(delimiter, cudf.Scalar):
             return self._return_or_inplace(
-                libstrings._tokenize_scalar(self._parent._column, delimiter),
+                libstrings._tokenize_scalar(self._column, delimiter),
                 retain_index=False,
             )
         else:
@@ -4133,7 +4095,7 @@ def tokenize(self, delimiter: str = " ") -> ParentType:
 
     def detokenize(
         self, indices: "cudf.Series", separator: str = " "
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Combines tokens into strings by concatenating them in the order
         in which they appear in the ``indices`` column. The ``separator`` is
@@ -4164,13 +4126,11 @@ def detokenize(
         """
         separator = _massage_string_arg(separator, "separator")
         return self._return_or_inplace(
-            libstrings.detokenize(
-                self._parent._column, indices._column, separator
-            ),
+            libstrings.detokenize(self._column, indices._column, separator),
             retain_index=False,
         )
 
-    def character_tokenize(self) -> ParentType:
+    def character_tokenize(self) -> SeriesOrIndex:
         """
         Each string is split into individual characters.
         The sequence returned contains each character as an individual string.
@@ -4217,7 +4177,7 @@ def character_tokenize(self) -> ParentType:
         29    .
         dtype: object
         """
-        result_col = libstrings.character_tokenize(self._parent._column)
+        result_col = libstrings.character_tokenize(self._column)
         if isinstance(self._parent, cudf.Series):
             return cudf.Series(result_col, name=self._parent.name)
         elif isinstance(self._parent, cudf.Index):
@@ -4225,7 +4185,7 @@ def character_tokenize(self) -> ParentType:
         else:
             return result_col
 
-    def token_count(self, delimiter: str = " ") -> ParentType:
+    def token_count(self, delimiter: str = " ") -> SeriesOrIndex:
         """
         Each string is split into tokens using the provided delimiter.
         The returned integer sequence is the number of tokens in each string.
@@ -4254,16 +4214,12 @@ def token_count(self, delimiter: str = " ") -> ParentType:
         delimiter = _massage_string_arg(delimiter, "delimiter", allow_col=True)
         if isinstance(delimiter, Column):
             return self._return_or_inplace(
-                libstrings._count_tokens_column(
-                    self._parent._column, delimiter
-                )
+                libstrings._count_tokens_column(self._column, delimiter)
             )
 
         elif isinstance(delimiter, cudf.Scalar):
             return self._return_or_inplace(
-                libstrings._count_tokens_scalar(
-                    self._parent._column, delimiter
-                )
+                libstrings._count_tokens_scalar(self._column, delimiter)
             )
         else:
             raise TypeError(
@@ -4271,7 +4227,7 @@ def token_count(self, delimiter: str = " ") -> ParentType:
                 for delimiters, but got {type(delimiter)}"
             )
 
-    def ngrams(self, n: int = 2, separator: str = "_") -> ParentType:
+    def ngrams(self, n: int = 2, separator: str = "_") -> SeriesOrIndex:
         """
         Generate the n-grams from a set of tokens, each record
         in series is treated a token.
@@ -4305,11 +4261,11 @@ def ngrams(self, n: int = 2, separator: str = "_") -> ParentType:
         """
         separator = _massage_string_arg(separator, "separator")
         return self._return_or_inplace(
-            libstrings.generate_ngrams(self._parent._column, n, separator),
+            libstrings.generate_ngrams(self._column, n, separator),
             retain_index=False,
         )
 
-    def character_ngrams(self, n: int = 2) -> ParentType:
+    def character_ngrams(self, n: int = 2) -> SeriesOrIndex:
         """
         Generate the n-grams from characters in a column of strings.
 
@@ -4342,13 +4298,13 @@ def character_ngrams(self, n: int = 2) -> ParentType:
         dtype: object
         """
         return self._return_or_inplace(
-            libstrings.generate_character_ngrams(self._parent._column, n),
+            libstrings.generate_character_ngrams(self._column, n),
             retain_index=False,
         )
 
     def ngrams_tokenize(
         self, n: int = 2, delimiter: str = " ", separator: str = "_"
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Generate the n-grams using tokens from each string.
         This will tokenize each string and then generate ngrams for each
@@ -4380,15 +4336,13 @@ def ngrams_tokenize(
         delimiter = _massage_string_arg(delimiter, "delimiter")
         separator = _massage_string_arg(separator, "separator")
         return self._return_or_inplace(
-            libstrings.ngrams_tokenize(
-                self._parent._column, n, delimiter, separator
-            ),
+            libstrings.ngrams_tokenize(self._column, n, delimiter, separator),
             retain_index=False,
         )
 
     def replace_tokens(
         self, targets, replacements, delimiter: str = None
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         The targets tokens are searched for within each string in the series
         and replaced with the corresponding replacements if found.
@@ -4462,7 +4416,7 @@ def replace_tokens(
 
         return self._return_or_inplace(
             libstrings.replace_tokens(
-                self._parent._column,
+                self._column,
                 targets_column,
                 replacements_column,
                 cudf.Scalar(delimiter, dtype="str"),
@@ -4474,7 +4428,7 @@ def filter_tokens(
         min_token_length: int,
         replacement: str = None,
         delimiter: str = None,
-    ) -> ParentType:
+    ) -> SeriesOrIndex:
         """
         Remove tokens from within each string in the series that are
         smaller than min_token_length and optionally replace them
@@ -4533,7 +4487,7 @@ def filter_tokens(
 
         return self._return_or_inplace(
             libstrings.filter_tokens(
-                self._parent._column,
+                self._column,
                 min_token_length,
                 cudf.Scalar(replacement, dtype="str"),
                 cudf.Scalar(delimiter, dtype="str"),
@@ -4629,7 +4583,7 @@ def subword_tokenize(
                [1, 0, 1]], dtype=uint32)
         """
         tokens, masks, metadata = libstrings.subword_tokenize_vocab_file(
-            self._parent._column,
+            self._column,
             hash_file,
             max_length,
             stride,
@@ -4643,7 +4597,7 @@ def subword_tokenize(
             cupy.asarray(metadata),
         )
 
-    def porter_stemmer_measure(self) -> ParentType:
+    def porter_stemmer_measure(self) -> SeriesOrIndex:
         """
         Compute the Porter Stemmer measure for each string.
         The Porter Stemmer algorithm is described `here
@@ -4663,10 +4617,10 @@ def porter_stemmer_measure(self) -> ParentType:
         dtype: int32
         """
         return self._return_or_inplace(
-            libstrings.porter_stemmer_measure(self._parent._column)
+            libstrings.porter_stemmer_measure(self._column)
         )
 
-    def is_consonant(self, position) -> ParentType:
+    def is_consonant(self, position) -> SeriesOrIndex:
         """
         Return true for strings where the character at ``position`` is a
         consonant. The ``position`` parameter may also be a list of integers
@@ -4702,15 +4656,15 @@ def is_consonant(self, position) -> ParentType:
         if can_convert_to_column(position):
             return self._return_or_inplace(
                 libstrings.is_letter_multi(
-                    self._parent._column, ltype, column.as_column(position)
+                    self._column, ltype, column.as_column(position)
                 ),
             )
 
         return self._return_or_inplace(
-            libstrings.is_letter(self._parent._column, ltype, position)
+            libstrings.is_letter(self._column, ltype, position)
         )
 
-    def is_vowel(self, position) -> ParentType:
+    def is_vowel(self, position) -> SeriesOrIndex:
         """
         Return true for strings where the character at ``position`` is a
         vowel -- not a consonant. The ``position`` parameter may also be
@@ -4746,15 +4700,15 @@ def is_vowel(self, position) -> ParentType:
         if can_convert_to_column(position):
             return self._return_or_inplace(
                 libstrings.is_letter_multi(
-                    self._parent._column, ltype, column.as_column(position)
+                    self._column, ltype, column.as_column(position)
                 ),
             )
 
         return self._return_or_inplace(
-            libstrings.is_letter(self._parent._column, ltype, position)
+            libstrings.is_letter(self._column, ltype, position)
         )
 
-    def edit_distance(self, targets) -> ParentType:
+    def edit_distance(self, targets) -> SeriesOrIndex:
         """
         The ``targets`` strings are measured against the strings in this
         instance using the Levenshtein edit distance algorithm.
@@ -4800,7 +4754,7 @@ def edit_distance(self, targets) -> ParentType:
             )
 
         return self._return_or_inplace(
-            libstrings.edit_distance(self._parent._column, targets_column)
+            libstrings.edit_distance(self._column, targets_column)
         )
 
 
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index 6c47d94d6e7..7a35354ace6 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -148,9 +148,9 @@ def field(self, key):
         1    3
         dtype: int64
         """
-        fields = list(self._parent._column.dtype.fields.keys())
+        fields = list(self._column.dtype.fields.keys())
         if key in fields:
             pos = fields.index(key)
-            return self._return_or_inplace(self._parent._column.children[pos])
+            return self._return_or_inplace(self._column.children[pos])
         else:
-            return self._return_or_inplace(self._parent._column.children[key])
+            return self._return_or_inplace(self._column.children[key])

From 2a32405c453334bf0e3b64b18031228db42ec19a Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 20 May 2021 17:53:47 -0400
Subject: [PATCH 07/14] More refactoring

---
 python/cudf/cudf/core/column/categorical.py | 4 ++--
 python/cudf/cudf/core/column/lists.py       | 4 ++--
 python/cudf/cudf/core/column/methods.py     | 4 +++-
 python/cudf/cudf/core/column/string.py      | 4 ++--
 python/cudf/cudf/core/column/struct.py      | 4 ++--
 5 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 6b32bf908ef..6412844b73e 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -27,7 +27,7 @@
 from cudf._typing import ColumnLike, Dtype, ScalarLike
 from cudf.core.buffer import Buffer
 from cudf.core.column import column
-from cudf.core.column.methods import ColumnMethodsMixin
+from cudf.core.column.methods import ColumnMethods
 from cudf.core.dtypes import CategoricalDtype
 from cudf.utils.dtypes import (
     is_categorical_dtype,
@@ -47,7 +47,7 @@
     )
 
 
-class CategoricalAccessor(ColumnMethodsMixin):
+class CategoricalAccessor(ColumnMethods):
     _column: CategoricalColumn
 
     def __init__(self, parent: SeriesOrIndex):
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index db180f6f9e1..f3dbcac4ee2 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -19,7 +19,7 @@
 from cudf._typing import BinaryOperand
 from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase, as_column, column
-from cudf.core.column.methods import ColumnMethodsMixin
+from cudf.core.column.methods import ColumnMethods
 from cudf.core.dtypes import ListDtype
 from cudf.utils.dtypes import is_list_dtype, is_numerical_dtype
 
@@ -231,7 +231,7 @@ def __cuda_array_interface__(self):
         )
 
 
-class ListMethods(ColumnMethodsMixin):
+class ListMethods(ColumnMethods):
     """
     List methods for Series
     """
diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py
index 4b8ab7758fa..e6795713853 100644
--- a/python/cudf/cudf/core/column/methods.py
+++ b/python/cudf/cudf/core/column/methods.py
@@ -9,7 +9,9 @@
 import cudf
 
 
-class ColumnMethodsMixin:
+class ColumnMethods:
+    # Encapsulates common behaviour for Series/Index accessor classes
+
     _parent: Union["cudf.Series", "cudf.Index"]
 
     def __init__(self, parent: Union["cudf.Series", "cudf.Index"]):
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 9c5ae0a31ce..b9f3950cd5c 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -29,7 +29,7 @@
 from cudf._lib.column import Column
 from cudf.core.buffer import Buffer
 from cudf.core.column import column, datetime
-from cudf.core.column.methods import ColumnMethodsMixin
+from cudf.core.column.methods import ColumnMethods
 from cudf.utils import utils
 from cudf.utils.docutils import copy_docstring
 from cudf.utils.dtypes import (
@@ -88,7 +88,7 @@
 }
 
 
-class StringMethods(ColumnMethodsMixin):
+class StringMethods(ColumnMethods):
     _column: StringColumn
 
     def __init__(self, parent=None):
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index 7a35354ace6..dd9bddeed82 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -5,7 +5,7 @@
 
 import cudf
 from cudf.core.column import ColumnBase
-from cudf.core.column.methods import ColumnMethodsMixin
+from cudf.core.column.methods import ColumnMethods
 from cudf.utils.dtypes import is_struct_dtype
 
 
@@ -109,7 +109,7 @@ def __cuda_array_interface__(self):
         )
 
 
-class StructMethods(ColumnMethodsMixin):
+class StructMethods(ColumnMethods):
     """
     Struct methods for Series
     """

From b713e13bce3a8012a2fb62d51e42ced8ceeed223 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 20 May 2021 18:03:14 -0400
Subject: [PATCH 08/14] parent can never be None

---
 python/cudf/cudf/core/column/methods.py | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py
index e6795713853..0afa93d84aa 100644
--- a/python/cudf/cudf/core/column/methods.py
+++ b/python/cudf/cudf/core/column/methods.py
@@ -54,20 +54,14 @@ def _return_or_inplace(
         of the owner (Series or Index) to mimic an inplace operation
         """
         if inplace:
-            if self._parent is not None:
-                self._parent._mimic_inplace(
-                    self._parent.__class__._from_table(
-                        cudf._lib.table.Table({self._parent.name: new_col})
-                    ),
-                    inplace=True,
-                )
-                return None
-            else:
-                self._column._mimic_inplace(new_col, inplace=True)
-                return None
+            self._parent._mimic_inplace(
+                self._parent.__class__._from_table(
+                    cudf._lib.table.Table({self._parent.name: new_col})
+                ),
+                inplace=True,
+            )
+            return None
         else:
-            if self._parent is None:
-                return new_col
             if expand or isinstance(
                 self._parent, (cudf.DataFrame, cudf.MultiIndex)
             ):

From 50e6fa3700ad4e0091cefd0a161a57bdd94db386 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 20 May 2021 18:09:23 -0400
Subject: [PATCH 09/14] Redundant docstring

---
 python/cudf/cudf/core/column/categorical.py | 76 +--------------------
 1 file changed, 1 insertion(+), 75 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 6412844b73e..a02163cd463 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1389,82 +1389,8 @@ def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:
     def set_categories(
         self, new_categories: Any, ordered: bool = False, rename: bool = False,
     ) -> CategoricalColumn:
-        """
-        Set the categories to the specified new_categories.
-
-
-        `new_categories` can include new categories (which
-        will result in unused categories) or remove old categories
-        (which results in values set to null). If `rename==True`,
-        the categories will simple be renamed (less or more items
-        than in old categories will result in values set to null or
-        in unused categories respectively).
-
-        This method can be used to perform more than one action
-        of adding, removing, and reordering simultaneously and
-        is therefore faster than performing the individual steps
-        via the more specialised methods.
-
-        On the other hand this methods does not do checks
-        (e.g., whether the old categories are included in the
-        new categories on a reorder), which can result in
-        surprising changes.
-
-        Parameters
-        ----------
-
-        new_categories : list-like
-            The categories in new order.
+        # See CategoricalAccessor.set_categories.
 
-        ordered : bool, default None
-            Whether or not the categorical is treated as
-            a ordered categorical. If not given, do
-            not change the ordered information.
-
-        rename : bool, default False
-            Whether or not the `new_categories` should be
-            considered as a rename of the old categories
-            or as reordered categories.
-
-        Returns
-        -------
-        cat
-            Categorical with reordered categories
-            or None if inplace.
-
-        Examples
-        --------
-        >>> import cudf
-        >>> s = cudf.Series([1, 1, 2, 10, 2, 10], dtype='category')
-        >>> s
-        0     1
-        1     1
-        2     2
-        3    10
-        4     2
-        5    10
-        dtype: category
-        Categories (3, int64): [1, 2, 10]
-        >>> s.cat.set_categories([1, 10])
-        0       1
-        1       1
-        2    <NA>
-        3      10
-        4    <NA>
-        5      10
-        dtype: category
-        Categories (2, int64): [1, 10]
-        >>> s.cat.set_categories([1, 10], inplace=True)
-        >>> s
-        0       1
-        1       1
-        2    <NA>
-        3      10
-        4    <NA>
-        5      10
-        dtype: category
-        Categories (2, int64): [1, 10]
-        """
         ordered = ordered if ordered is not None else self.ordered
         new_categories = column.as_column(new_categories)
 

From 9d912461fad10885be190a83588f99702c9e7b0d Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 20 May 2021 18:22:29 -0400
Subject: [PATCH 10/14] MyPy fix

---
 python/cudf/cudf/core/column/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index a02163cd463..7d9fb7f94dc 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1554,7 +1554,7 @@ def _create_empty_categorical_column(
             cudf.utils.utils.scalar_broadcast_to(
                 categorical_column.default_na_value(),
                 categorical_column.size,
-                np.dtype(categorical_column.codes),
+                categorical_column.codes.dtype,
             )
         ),
         offset=categorical_column.offset,

From a2bd07a3a48924f77b268cff0e541b76b2a7e7c5 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 26 May 2021 08:47:54 -0400
Subject: [PATCH 11/14] Fix leaves method

---
 python/cudf/cudf/core/column/lists.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index f3dbcac4ee2..0efbbab14b9 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 import pickle
+from typing import Optional
 
 import numpy as np
 import pyarrow as pa
@@ -16,7 +17,7 @@
     sort_lists,
 )
 from cudf._lib.table import Table
-from cudf._typing import BinaryOperand
+from cudf._typing import BinaryOperand, SeriesOrIndex
 from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase, as_column, column
 from cudf.core.column.methods import ColumnMethods
@@ -230,6 +231,12 @@ def __cuda_array_interface__(self):
             "Lists are not yet supported via `__cuda_array_interface__`"
         )
 
+    def leaves(self):
+        if isinstance(self.elements, ListColumn):
+            return self.elements.leaves()
+        else:
+            return self.elements
+
 
 class ListMethods(ColumnMethods):
     """
@@ -315,7 +322,7 @@ def contains(self, search_key):
             return res
 
     @property
-    def leaves(self):
+    def leaves(self) -> Optional[SeriesOrIndex]:
         """
         From a Series of (possibly nested) lists, obtain the elements from
         the innermost lists as a flat Series (one value per row).
@@ -336,12 +343,9 @@ def leaves(self):
         5       6
         dtype: int64
         """
-        if type(self._column.elements) is ListColumn:
-            return self._column.elements.elements
-        else:
-            return self._return_or_inplace(
-                self._column.elements, retain_index=False
-            )
+        return self._return_or_inplace(
+            self._column.leaves(), retain_index=False
+        )
 
     def len(self):
         """

From 487378c7ef41915b03a7cc27aa279734951b14a3 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 13 Jul 2021 10:50:03 -0400
Subject: [PATCH 12/14] Move copyright

---
 python/cudf/cudf/core/column/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py
index b26daa3fb8d..5a44d7c58a6 100644
--- a/python/cudf/cudf/core/column/__init__.py
+++ b/python/cudf/cudf/core/column/__init__.py
@@ -1,8 +1,8 @@
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 """
 isort: skip_file
 """
 
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 from cudf.core.column.categorical import CategoricalColumn
 from cudf.core.column.column import (

From 8cafbd63968628e5475e463fd1abf1ab75ac233f Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 13 Jul 2021 17:13:05 -0400
Subject: [PATCH 13/14] Parent is not optional

---
 python/cudf/cudf/core/column/string.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 7b23ffa9f4b..d00fca8b1c5 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -100,7 +100,7 @@ def str_to_boolean(column: StringColumn):
 class StringMethods(ColumnMethods):
     _column: StringColumn
 
-    def __init__(self, parent=None):
+    def __init__(self, parent):
         """
         Vectorized string functions for Series and Index.
 

From f38dc491be763c86d3c0dd32f85535473569960c Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 14 Jul 2021 16:52:41 -0400
Subject: [PATCH 14/14] Remove code re-introduced by bad merge

---
 python/cudf/cudf/core/column/column.py | 55 --------------------------
 1 file changed, 55 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index fa9fe17a65d..7bc036587af 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2299,61 +2299,6 @@ def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase:
     return ColumnBase.from_scalar(cudf.Scalar(fill_value, dtype), size)
 
 
-def _copy_type_metadata_from_arrow(
-    arrow_array: pa.array, cudf_column: ColumnBase
-) -> ColumnBase:
-    """
-    Similar to `Column._copy_type_metadata`, except copies type metadata
-    from arrow array into a cudf column. Recursive for every level.
-    * When `arrow_array` is struct type and `cudf_column` is StructDtype, copy
-    field names.
-    * When `arrow_array` is decimal type and `cudf_column` is
-    Decimal64Dtype, copy precisions.
-    """
-    if pa.types.is_decimal(arrow_array.type) and isinstance(
-        cudf_column,
-        (cudf.core.column.Decimal32Column, cudf.core.column.Decimal64Column),
-    ):
-        cudf_column.dtype.precision = arrow_array.type.precision
-    elif pa.types.is_struct(arrow_array.type) and isinstance(
-        cudf_column, cudf.core.column.StructColumn
-    ):
-        base_children = tuple(
-            _copy_type_metadata_from_arrow(arrow_array.field(i), col_child)
-            for i, col_child in enumerate(cudf_column.base_children)
-        )
-        cudf_column.set_base_children(base_children)
-        return cudf.core.column.StructColumn(
-            data=None,
-            size=cudf_column.base_size,
-            dtype=StructDtype.from_arrow(arrow_array.type),
-            mask=cudf_column.base_mask,
-            offset=cudf_column.offset,
-            null_count=cudf_column.null_count,
-            children=base_children,
-        )
-    elif pa.types.is_list(arrow_array.type) and isinstance(
-        cudf_column, cudf.core.column.ListColumn
-    ):
-        if arrow_array.values and cudf_column.base_children:
-            base_children = (
-                cudf_column.base_children[0],
-                _copy_type_metadata_from_arrow(
-                    arrow_array.values, cudf_column.base_children[1]
-                ),
-            )
-            return cudf.core.column.ListColumn(
-                size=cudf_column.base_size,
-                dtype=ListDtype.from_arrow(arrow_array.type),
-                mask=cudf_column.base_mask,
-                offset=cudf_column.offset,
-                null_count=cudf_column.null_count,
-                children=base_children,
-            )
-
-    return cudf_column
-
-
 def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
     """Concatenate a sequence of columns."""
     if len(objs) == 0: