From e24fa8f0b0cca2c9a441002623fdbc40631ed369 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 14 Jan 2022 16:35:43 -0800
Subject: [PATCH] Run doctests. (#9815)

This PR adds doctests and resolves #9513. Several issues were found by running doctests that have now been resolved:

- [x] #9821
- [x] #9822
- [x] #9823
- [x] #9824
- [x] #9825
- [x] #9826
- [x] #9827
- [x] #9828 (workaround by deleting doctests)
- [x] #9829

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/9815
---
 python/cudf/cudf/__init__.py                |  63 ++++++++++
 python/cudf/cudf/api/__init__.py            |   4 +-
 python/cudf/cudf/api/extensions/__init__.py |   6 +
 python/cudf/cudf/core/_base_index.py        |  14 ++-
 python/cudf/cudf/core/column/categorical.py |   1 -
 python/cudf/cudf/core/dataframe.py          | 122 ++++++++++++--------
 python/cudf/cudf/core/groupby/groupby.py    |  11 +-
 python/cudf/cudf/core/index.py              |  72 ++++++------
 python/cudf/cudf/core/multiindex.py         |  16 +--
 python/cudf/cudf/core/reshape.py            |   3 +-
 python/cudf/cudf/core/scalar.py             |   2 +-
 python/cudf/cudf/core/series.py             |  42 ++++---
 python/cudf/cudf/core/tools/datetimes.py    |   8 +-
 python/cudf/cudf/tests/test_dataframe.py    |  81 ++++++++-----
 python/cudf/cudf/tests/test_doctests.py     | 102 ++++++++++++++++
 python/cudf/cudf/utils/docutils.py          |  14 +--
 python/cudf/cudf/utils/ioutils.py           |   6 +-
 17 files changed, 402 insertions(+), 165 deletions(-)
 create mode 100644 python/cudf/cudf/tests/test_doctests.py

diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index 2461e7b09bc..4dadf6a1869 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -126,3 +126,66 @@
 
 __version__ = get_versions()["version"]
 del get_versions
+
+__all__ = [
+    "BaseIndex",
+    "CategoricalDtype",
+    "CategoricalIndex",
+    "DataFrame",
+    "DateOffset",
+    "DatetimeIndex",
+    "Decimal32Dtype",
+    "Decimal64Dtype",
+    "Float32Index",
+    "Float64Index",
+    "GenericIndex",
+    "Grouper",
+    "Index",
+    "Int16Index",
+    "Int32Index",
+    "Int64Index",
+    "Int8Index",
+    "IntervalDtype",
+    "IntervalIndex",
+    "ListDtype",
+    "MultiIndex",
+    "NA",
+    "RangeIndex",
+    "Scalar",
+    "Series",
+    "StringIndex",
+    "StructDtype",
+    "TimedeltaIndex",
+    "UInt16Index",
+    "UInt32Index",
+    "UInt64Index",
+    "UInt8Index",
+    "api",
+    "concat",
+    "cut",
+    "date_range",
+    "factorize",
+    "from_dataframe",
+    "from_dlpack",
+    "from_pandas",
+    "get_dummies",
+    "interval_range",
+    "isclose",
+    "melt",
+    "merge",
+    "merge_sorted",
+    "pivot",
+    "read_avro",
+    "read_csv",
+    "read_feather",
+    "read_hdf",
+    "read_json",
+    "read_orc",
+    "read_parquet",
+    "read_text",
+    "set_allocator",
+    "testing",
+    "to_datetime",
+    "to_numeric",
+    "unstack",
+]
diff --git a/python/cudf/cudf/api/__init__.py b/python/cudf/cudf/api/__init__.py
index 21c24015e41..c66bfb4efeb 100644
--- a/python/cudf/cudf/api/__init__.py
+++ b/python/cudf/cudf/api/__init__.py
@@ -1,3 +1,5 @@
 # Copyright (c) 2021, NVIDIA CORPORATION.
 
-from cudf.api import types
+from cudf.api import extensions, types
+
+__all__ = ["extensions", "types"]
diff --git a/python/cudf/cudf/api/extensions/__init__.py b/python/cudf/cudf/api/extensions/__init__.py
index c971e6f7731..eeb5dcdb32a 100644
--- a/python/cudf/cudf/api/extensions/__init__.py
+++ b/python/cudf/cudf/api/extensions/__init__.py
@@ -5,3 +5,9 @@
     register_index_accessor,
     register_series_accessor,
 )
+
+__all__ = [
+    "register_dataframe_accessor",
+    "register_index_accessor",
+    "register_series_accessor",
+]
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 683f3fefe1c..4f2614e843f 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -495,7 +495,7 @@ def fillna(self, value, downcast=None):
         >>> import cudf
         >>> index = cudf.Index([1, 2, None, 4])
         >>> index
-        Int64Index([1, 2, null, 4], dtype='int64')
+        Int64Index([1, 2, <NA>, 4], dtype='int64')
         >>> index.fillna(3)
         Int64Index([1, 2, 3, 4], dtype='int64')
         """
@@ -553,7 +553,7 @@ def to_pandas(self):
         >>> type(idx.to_pandas())
         <class 'pandas.core.indexes.numeric.Int64Index'>
         >>> type(idx)
-        <class 'cudf.core.index.GenericIndex'>
+        <class 'cudf.core.index.Int64Index'>
         """
         return pd.Index(self._values.to_pandas(), name=self.name)
 
@@ -942,6 +942,7 @@ def is_interval(self):
         Examples
         --------
         >>> import cudf
+        >>> import pandas as pd
         >>> idx = cudf.from_pandas(
         ...     pd.Index([pd.Interval(left=0, right=5),
         ...               pd.Interval(left=5, right=10)])
@@ -1105,15 +1106,16 @@ def join(
         Examples
         --------
         >>> import cudf
-        >>> lhs = cudf.DataFrame(
-        ...     {"a":[2, 3, 1], "b":[3, 4, 2]}).set_index(['a', 'b']
-        ... ).index
+        >>> lhs = cudf.DataFrame({
+        ...     "a": [2, 3, 1],
+        ...     "b": [3, 4, 2],
+        ... }).set_index(['a', 'b']).index
         >>> lhs
         MultiIndex([(2, 3),
                     (3, 4),
                     (1, 2)],
                    names=['a', 'b'])
-        >>> rhs = cudf.DataFrame({"a":[1, 4, 3]}).set_index('a').index
+        >>> rhs = cudf.DataFrame({"a": [1, 4, 3]}).set_index('a').index
         >>> rhs
         Int64Index([1, 4, 3], dtype='int64', name='a')
         >>> lhs.join(rhs, how='inner')
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 4be7a422de0..de06e62cbb1 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -61,7 +61,6 @@ class CategoricalAccessor(ColumnMethods):
     --------
     >>> s = cudf.Series([1,2,3], dtype='category')
     >>> s
-    >>> s
     0    1
     1    2
     2    3
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 6bbb2fca77c..69600426ec0 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -463,12 +463,12 @@ class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin):
     ...     [(t0+ timedelta(seconds=x)) for x in range(n)])
     ... })
     >>> df
-        id                datetimes
-    0    0  2018-10-07T12:00:00.000
-    1    1  2018-10-07T12:00:01.000
-    2    2  2018-10-07T12:00:02.000
-    3    3  2018-10-07T12:00:03.000
-    4    4  2018-10-07T12:00:04.000
+        id            datetimes
+    0    0  2018-10-07 12:00:00
+    1    1  2018-10-07 12:00:01
+    2    2  2018-10-07 12:00:02
+    3    3  2018-10-07 12:00:03
+    4    4  2018-10-07 12:00:04
 
     Build DataFrame via list of rows as tuples:
 
@@ -984,23 +984,34 @@ def __getitem__(self, arg):
 
         Examples
         --------
-        >>> df = DataFrame([('a', list(range(20))),
-        ...                 ('b', list(range(20))),
-        ...                 ('c', list(range(20)))])
-        >>> df[:4]    # get first 4 rows of all columns
+        >>> df = cudf.DataFrame({
+        ...     'a': list(range(10)),
+        ...     'b': list(range(10)),
+        ...     'c': list(range(10)),
+        ... })
+
+        Get first 4 rows of all columns.
+
+        >>> df[:4]
            a  b  c
         0  0  0  0
         1  1  1  1
         2  2  2  2
         3  3  3  3
-        >>> df[-5:]  # get last 5 rows of all columns
-             a   b   c
-        15  15  15  15
-        16  16  16  16
-        17  17  17  17
-        18  18  18  18
-        19  19  19  19
-        >>> df[['a', 'c']] # get columns a and c
+
+        Get last 5 rows of all columns.
+
+        >>> df[-5:]
+           a  b  c
+        5  5  5  5
+        6  6  6  6
+        7  7  7  7
+        8  8  8  8
+        9  9  9  9
+
+        Get columns a and c.
+
+        >>> df[['a', 'c']]
            a  c
         0  0  0
         1  1  1
@@ -1012,8 +1023,17 @@ def __getitem__(self, arg):
         7  7  7
         8  8  8
         9  9  9
-        >>> df[[True, False, True, False]] # mask the entire dataframe,
-        # returning the rows specified in the boolean mask
+
+        Return the rows specified in the boolean mask.
+
+        >>> df[[True, False, True, False, True,
+        ...     False, True, False, True, False]]
+           a  b  c
+        0  0  0  0
+        2  2  2  2
+        4  4  4  4
+        6  6  6  6
+        8  8  8  8
         """
         if _is_scalar_or_zero_d_array(arg) or isinstance(arg, tuple):
             return self._get_columns_by_label(arg, downcast=True)
@@ -1261,10 +1281,12 @@ def memory_usage(self, index=True, deep=False):
         object     40000
         bool        5000
         dtype: int64
+
         Use a Categorical for efficient storage of an object-dtype column with
         many repeated values.
+
         >>> df['object'].astype('category').memory_usage(deep=True)
-        5048
+        5008
         """
         if deep:
             warnings.warn(
@@ -2225,11 +2247,11 @@ def reindex(
         3    3  13.0
         4    4  14.0
         >>> df_new
-           key   val  sum
-        0    0  10.0  NaN
-        3    3  13.0  NaN
-        4    4  14.0  NaN
-        5   -1   NaN  NaN
+           key   val   sum
+        0     0  10.0  <NA>
+        3     3  13.0  <NA>
+        4     4  14.0  <NA>
+        5  <NA>  <NA>  <NA>
         """
 
         if labels is None and index is None and columns is None:
@@ -3701,10 +3723,10 @@ def query(self, expr, local_dict=None):
 
         Examples
         --------
-        >>> import cudf
-        >>> a = ('a', [1, 2, 2])
-        >>> b = ('b', [3, 4, 5])
-        >>> df = cudf.DataFrame([a, b])
+        >>> df = cudf.DataFrame({
+        ...     "a": [1, 2, 2],
+        ...     "b": [3, 4, 5],
+        ... })
         >>> expr = "(a == 2 and b == 4) or (b == 3)"
         >>> df.query(expr)
            a  b
@@ -3720,8 +3742,8 @@ def query(self, expr, local_dict=None):
         >>> df['datetimes'] = data
         >>> search_date = datetime.datetime.strptime('2018-10-08', '%Y-%m-%d')
         >>> df.query('datetimes==@search_date')
-                        datetimes
-        1 2018-10-08T00:00:00.000
+           datetimes
+        1 2018-10-08
 
         Using local_dict:
 
@@ -3732,9 +3754,9 @@ def query(self, expr, local_dict=None):
         >>> df['datetimes'] = data
         >>> search_date2 = datetime.datetime.strptime('2018-10-08', '%Y-%m-%d')
         >>> df.query('datetimes==@search_date',
-        ...         local_dict={'search_date':search_date2})
-                        datetimes
-        1 2018-10-08T00:00:00.000
+        ...          local_dict={'search_date': search_date2})
+           datetimes
+        1 2018-10-08
         """
         # can't use `annotate` decorator here as we inspect the calling
         # environment.
@@ -4189,18 +4211,23 @@ def info(
         dtypes: float64(1), int64(1), object(1)
         memory usage: 130.0+ bytes
 
-        Pipe output of DataFrame.info to buffer instead of sys.stdout,
-        get buffer content and writes to a text file:
+        Pipe output of DataFrame.info to a buffer instead of sys.stdout and
+        print buffer contents:
 
         >>> import io
         >>> buffer = io.StringIO()
         >>> df.info(buf=buffer)
-        >>> s = buffer.getvalue()
-        >>> with open("df_info.txt", "w",
-        ...           encoding="utf-8") as f:
-        ...     f.write(s)
-        ...
-        369
+        >>> print(buffer.getvalue())
+        <class 'cudf.core.dataframe.DataFrame'>
+        RangeIndex: 5 entries, 0 to 4
+        Data columns (total 3 columns):
+         #   Column     Non-Null Count  Dtype
+        ---  ------     --------------  -----
+         0   int_col    5 non-null      int64
+         1   text_col   5 non-null      object
+         2   float_col  5 non-null      float64
+        dtypes: float64(1), int64(1), object(1)
+        memory usage: 130.0+ bytes
 
         The `memory_usage` parameter allows deep introspection mode, specially
         useful for big DataFrames and fine-tune memory optimization:
@@ -5761,7 +5788,7 @@ def stack(self, level=-1, dropna=True):
         Examples
         --------
         >>> import cudf
-        >>> df = cudf.DataFrame({'a':[0,1,3], 'b':[1,2,4]})
+        >>> df = cudf.DataFrame({'a': [0, 1, 3], 'b': [1, 2, 4]})
         >>> df.stack()
         0  a    0
            b    1
@@ -6084,8 +6111,11 @@ def explode(self, column, ignore_index=False):
         Examples
         --------
         >>> import cudf
-        >>> cudf.DataFrame(
-                {"a": [[1, 2, 3], [], None, [4, 5]], "b": [11, 22, 33, 44]})
+        >>> df = cudf.DataFrame({
+        ...     "a": [[1, 2, 3], [], None, [4, 5]],
+        ...     "b": [11, 22, 33, 44],
+        ... })
+        >>> df
                    a   b
         0  [1, 2, 3]  11
         1         []  22
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 08ef3f07776..5b041ba53b9 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1256,9 +1256,10 @@ class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin):
     --------
     >>> import cudf
     >>> import pandas as pd
-    >>> df = cudf.DataFrame({'Animal': ['Falcon', 'Falcon',
-    ...                               'Parrot', 'Parrot'],
-    ...                    'Max Speed': [380., 370., 24., 26.]})
+    >>> df = cudf.DataFrame({
+    ...     'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot'],
+    ...     'Max Speed': [380., 370., 24., 26.],
+    ... })
     >>> df
        Animal  Max Speed
     0  Falcon      380.0
@@ -1272,10 +1273,10 @@ class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin):
     Parrot       25.0
 
     >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
-    ... ['Captive', 'Wild', 'Captive', 'Wild']]
+    ...           ['Captive', 'Wild', 'Captive', 'Wild']]
     >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
     >>> df = cudf.DataFrame({'Max Speed': [390., 350., 30., 20.]},
-            index=index)
+    ...     index=index)
     >>> df
                     Max Speed
     Animal Type
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 859a81bc5f4..1e493708415 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1535,9 +1535,11 @@ class DatetimeIndex(GenericIndex):
     --------
     >>> import cudf
     >>> cudf.DatetimeIndex([1, 2, 3, 4], name="a")
-    DatetimeIndex(['1970-01-01 00:00:00.001000', '1970-01-01 00:00:00.002000',
-                   '1970-01-01 00:00:00.003000', '1970-01-01 00:00:00.004000'],
-                  dtype='datetime64[ms]', name='a')
+    DatetimeIndex(['1970-01-01 00:00:00.000000001',
+                   '1970-01-01 00:00:00.000000002',
+                   '1970-01-01 00:00:00.000000003',
+                   '1970-01-01 00:00:00.000000004'],
+                  dtype='datetime64[ns]', name='a')
     """
 
     def __init__(
@@ -1899,12 +1901,13 @@ def ceil(self, freq):
         Examples
         --------
         >>> import cudf
-        >>> gIndex = cudf.DatetimeIndex(["2020-05-31 08:00:00",
-        ... "1999-12-31 18:40:00"])
+        >>> gIndex = cudf.DatetimeIndex([
+        ...     "2020-05-31 08:05:42",
+        ...     "1999-12-31 18:40:30",
+        ... ])
         >>> gIndex.ceil("T")
-        DatetimeIndex(['2020-05-31 08:00:00', '1999-12-31 18:40:00'],
-        dtype='datetime64[ns]', freq=None)
-        """
+        DatetimeIndex(['2020-05-31 08:06:00', '1999-12-31 18:41:00'], dtype='datetime64[ns]')
+        """  # noqa: E501
         out_column = self._values.ceil(freq)
 
         return self.__class__._from_data({self.name: out_column})
@@ -1930,12 +1933,13 @@ def floor(self, freq):
         Examples
         --------
         >>> import cudf
-        >>> gIndex = cudf.DatetimeIndex(["2020-05-31 08:59:59"
-        ... ,"1999-12-31 18:44:59"])
+        >>> gIndex = cudf.DatetimeIndex([
+        ...     "2020-05-31 08:59:59",
+        ...     "1999-12-31 18:44:59",
+        ... ])
         >>> gIndex.floor("T")
-        DatetimeIndex(['2020-05-31 08:59:00', '1999-12-31 18:44:00'],
-        dtype='datetime64[ns]', freq=None)
-        """
+        DatetimeIndex(['2020-05-31 08:59:00', '1999-12-31 18:44:00'], dtype='datetime64[ns]')
+        """  # noqa: E501
         out_column = self._values.floor(freq)
 
         return self.__class__._from_data({self.name: out_column})
@@ -1967,21 +1971,14 @@ def round(self, freq):
         ...     "2001-01-01 00:05:04",
         ... ], dtype="datetime64[ns]")
         >>> dt_idx
-        DatetimeIndex(['2001-01-01 00:04:45',
-                '2001-01-01 00:05:04',
-                '2001-01-01 00:04:58'],
-                dtype='datetime64[ns]', freq=None)
+        DatetimeIndex(['2001-01-01 00:04:45', '2001-01-01 00:04:58',
+                       '2001-01-01 00:05:04'],
+                      dtype='datetime64[ns]')
         >>> dt_idx.round('H')
-        DatetimeIndex(['2001-01-01',
-                    '2001-01-01',
-                    '2001-01-01'],
-                    dtype='datetime64[ns]', freq=None)
+        DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01'], dtype='datetime64[ns]')
         >>> dt_idx.round('T')
-        DatetimeIndex(['2001-01-01 00:05:00',
-                    '2001-01-01 00:05:00',
-                    '2001-01-01 00:05:00'],
-                    dtype='datetime64[ns]', freq=None)
-        """
+        DatetimeIndex(['2001-01-01 00:05:00', '2001-01-01 00:05:00', '2001-01-01 00:05:00'], dtype='datetime64[ns]')
+        """  # noqa: E501
         out_column = self._values.round(freq)
 
         return self.__class__._from_data({self.name: out_column})
@@ -2018,14 +2015,15 @@ class TimedeltaIndex(GenericIndex):
     --------
     >>> import cudf
     >>> cudf.TimedeltaIndex([1132223, 2023232, 342234324, 4234324],
-    ...     dtype='timedelta64[ns]')
-    TimedeltaIndex(['00:00:00.001132', '00:00:00.002023', '00:00:00.342234',
-                    '00:00:00.004234'],
-                dtype='timedelta64[ns]')
-    >>> cudf.TimedeltaIndex([1, 2, 3, 4], dtype='timedelta64[s]',
+    ...     dtype="timedelta64[ns]")
+    TimedeltaIndex(['0 days 00:00:00.001132223', '0 days 00:00:00.002023232',
+                    '0 days 00:00:00.342234324', '0 days 00:00:00.004234324'],
+                  dtype='timedelta64[ns]')
+    >>> cudf.TimedeltaIndex([1, 2, 3, 4], dtype="timedelta64[s]",
     ...     name="delta-index")
-    TimedeltaIndex(['00:00:01', '00:00:02', '00:00:03', '00:00:04'],
-                dtype='timedelta64[s]', name='delta-index')
+    TimedeltaIndex(['0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03',
+                    '0 days 00:00:04'],
+                  dtype='timedelta64[s]', name='delta-index')
     """
 
     def __init__(
@@ -2154,11 +2152,11 @@ class CategoricalIndex(GenericIndex):
     >>> import pandas as pd
     >>> cudf.CategoricalIndex(
     ... data=[1, 2, 3, 4], categories=[1, 2], ordered=False, name="a")
-    CategoricalIndex([1, 2, <NA>, <NA>], categories=[1, 2], ordered=False, name='a', dtype='category', name='a')
+    CategoricalIndex([1, 2, <NA>, <NA>], categories=[1, 2], ordered=False, dtype='category', name='a')
 
     >>> cudf.CategoricalIndex(
     ... data=[1, 2, 3, 4], dtype=pd.CategoricalDtype([1, 2, 3]), name="a")
-    CategoricalIndex([1, 2, 3, <NA>], categories=[1, 2, 3], ordered=False, name='a', dtype='category', name='a')
+    CategoricalIndex([1, 2, 3, <NA>], categories=[1, 2, 3], ordered=False, dtype='category', name='a')
     """  # noqa: E501
 
     def __init__(
@@ -2449,9 +2447,7 @@ def from_breaks(breaks, closed="right", name=None, copy=False, dtype=None):
         >>> import cudf
         >>> import pandas as pd
         >>> cudf.IntervalIndex.from_breaks([0, 1, 2, 3])
-        IntervalIndex([(0, 1], (1, 2], (2, 3]],
-                    closed='right',
-                    dtype='interval[int64]')
+        IntervalIndex([(0, 1], (1, 2], (2, 3]], dtype='interval')
         """
         if copy:
             breaks = column.as_column(breaks, dtype=dtype).copy()
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index b333c862f21..3acc947c649 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -190,7 +190,7 @@ def rename(self, names, inplace=False):
         Renaming each levels of a MultiIndex to specified name:
 
         >>> midx = cudf.MultiIndex.from_product(
-                [('A', 'B'), (2020, 2021)], names=['c1', 'c2'])
+        ...     [('A', 'B'), (2020, 2021)], names=['c1', 'c2'])
         >>> midx.rename(['lv1', 'lv2'])
         MultiIndex([('A', 2020),
                     ('A', 2021),
@@ -1086,7 +1086,7 @@ def values(self):
             [4, 2],
             [5, 1]])
         >>> type(midx.values)
-        <class 'cupy.core.core.ndarray'>
+        <class 'cupy._core.core.ndarray'>
         """
         return self.to_frame(index=False).values
 
@@ -1587,13 +1587,13 @@ def get_loc(self, key, method=None, tolerance=None):
         --------
         >>> import cudf
         >>> mi = cudf.MultiIndex.from_tuples(
-            [('a', 'd'), ('b', 'e'), ('b', 'f')])
+        ...     [('a', 'd'), ('b', 'e'), ('b', 'f')])
         >>> mi.get_loc('b')
         slice(1, 3, None)
         >>> mi.get_loc(('b', 'e'))
         1
         >>> non_monotonic_non_unique_idx = cudf.MultiIndex.from_tuples(
-            [('c', 'd'), ('b', 'e'), ('a', 'f'), ('b', 'e')])
+        ...     [('c', 'd'), ('b', 'e'), ('a', 'f'), ('b', 'e')])
         >>> non_monotonic_non_unique_idx.get_loc('b') # differ from pandas
         slice(1, 4, 2)
 
@@ -1609,10 +1609,10 @@ def get_loc(self, key, method=None, tolerance=None):
 
                 >>> import pandas as pd
                 >>> import cudf
-                >>> x = pd.MultiIndex.from_tuples(
-                            [(2, 1, 1), (1, 2, 3), (1, 2, 1),
-                                (1, 1, 1), (1, 1, 1), (2, 2, 1)]
-                        )
+                >>> x = pd.MultiIndex.from_tuples([
+                ...     (2, 1, 1), (1, 2, 3), (1, 2, 1),
+                ...     (1, 1, 1), (1, 1, 1), (2, 2, 1),
+                ... ])
                 >>> x.get_loc(1)
                 array([False,  True,  True,  True,  True, False])
                 >>> cudf.from_pandas(x).get_loc(1)
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index 1733a6c0b9a..68113cfdca9 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -891,7 +891,7 @@ def pivot(data, index=None, columns=None, values=None):
     Examples
     --------
     >>> a = cudf.DataFrame()
-    >>> a['a'] = [1, 1, 2, 2],
+    >>> a['a'] = [1, 1, 2, 2]
     >>> a['b'] = ['a', 'b', 'a', 'b']
     >>> a['c'] = [1, 2, 3, 4]
     >>> a.pivot(index='a', columns='b')
@@ -973,6 +973,7 @@ def unstack(df, level, fill_value=None):
 
     Examples
     --------
+    >>> df = cudf.DataFrame()
     >>> df['a'] = [1, 1, 1, 2, 2]
     >>> df['b'] = [1, 2, 3, 1, 2]
     >>> df['c'] = [5, 6, 7, 8, 9]
diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py
index 787b28e213c..37bb8e32c5a 100644
--- a/python/cudf/cudf/core/scalar.py
+++ b/python/cudf/cudf/core/scalar.py
@@ -32,7 +32,7 @@ class Scalar(object):
     >>> cudf.Scalar(42, dtype='int64') + np.int8(21)
     Scalar(63, dtype=int64)
     >>> x = cudf.Scalar(42, dtype='datetime64[s]')
-    >>> y = cudf.Scalar(21, dtype='timedelta64[ns])
+    >>> y = cudf.Scalar(21, dtype='timedelta64[ns]')
     >>> x - y
     Scalar(1970-01-01T00:00:41.999999979, dtype=datetime64[ns])
     >>> cudf.Series([1,2,3]) + cudf.Scalar(1)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 7da3bdbb31e..6842a05a505 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -512,13 +512,26 @@ def from_pandas(cls, s, nan_as_null=None):
     @property
     def dt(self):
         """
-        Accessor object for datetimelike properties of the Series values.
+        Accessor object for datetime-like properties of the Series values.
 
         Examples
         --------
+        >>> s = cudf.Series(cudf.date_range(
+        ...   start='2001-02-03 12:00:00',
+        ...   end='2001-02-03 14:00:00',
+        ...   freq='1H'))
         >>> s.dt.hour
+        0    12
+        1    13
+        dtype: int16
         >>> s.dt.second
+        0    0
+        1    0
+        dtype: int16
         >>> s.dt.day
+        0    3
+        1    3
+        dtype: int16
 
         Returns
         -------
@@ -674,10 +687,12 @@ def drop(
            y    3
         2  x    4
            y    5
+        dtype: int64
         >>> s.drop(labels='y', level=1)
         0  x    0
         1  x    2
         2  x    4
+        Name: 2, dtype: int64
         """
         if labels is not None:
             if index is not None or columns is not None:
@@ -1032,7 +1047,7 @@ def memory_usage(self, index=True, deep=False):
         --------
         >>> s = cudf.Series(range(3), index=['a','b','c'])
         >>> s.memory_usage()
-        48
+        43
 
         Not including the index gives the size of the rest of the data, which
         is necessarily smaller:
@@ -1539,7 +1554,7 @@ def dropna(self, axis=0, inplace=False, how=None):
         >>> ser
         0       1
         1       2
-        2    null
+        2    <NA>
         dtype: int64
 
         Drop null values from a Series.
@@ -1800,7 +1815,7 @@ def data(self):
         3    4
         dtype: int64
         >>> series.data
-        <cudf.core.buffer.Buffer object at 0x7f23c192d110>
+        <cudf.core.buffer.Buffer object at 0x...>
         >>> series.data.to_host_array()
         array([1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
                0, 0, 4, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)
@@ -1824,14 +1839,7 @@ def as_mask(self):
         >>> import cudf
         >>> s = cudf.Series([True, False, True])
         >>> s.as_mask()
-        <cudf.core.buffer.Buffer object at 0x7f23c3eed0d0>
-        >>> s.as_mask().to_host_array()
-        array([  5,   0,   0,   0,   0,   0,   0,   0,   1,   0,   0,   0,   0,
-                 0,   0,   0,   2,   0,   0,   0,   0,   0,   0,   0, 181, 164,
-               188,   1,   0,   0,   0,   0, 255, 255, 255, 255, 255, 255, 255,
-               127, 253, 214,  62, 241,   1,   0,   0,   0,   0,   0,   0,   0,
-                 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
-             dtype=uint8)
+        <cudf.core.buffer.Buffer object at 0x...>
         """
         if not is_bool_dtype(self.dtype):
             raise TypeError(
@@ -2805,11 +2813,11 @@ def autocorr(self, lag=1):
         Examples
         --------
         >>> import cudf
-        >>> s = cudf.Series([0.25, 0.5, 0.2, -0.05])
+        >>> s = cudf.Series([0.25, 0.5, 0.2, -0.05, 0.17])
         >>> s.autocorr()
-        0.10355263309024071
+        0.1438853844...
         >>> s.autocorr(lag=2)
-        -0.9999999999999999
+        -0.9647548490...
         """
         return self.corr(self.shift(lag))
 
@@ -3584,7 +3592,7 @@ def keys(self):
         dtype: int64
 
         >>> sr.keys()
-        RangeIndex(start=0, stop=6)
+        RangeIndex(start=0, stop=6, step=1)
         >>> sr = cudf.Series(['a', 'b', 'c'])
         >>> sr
         0    a
@@ -3592,7 +3600,7 @@ def keys(self):
         2    c
         dtype: object
         >>> sr.keys()
-        RangeIndex(start=0, stop=3)
+        RangeIndex(start=0, stop=3, step=1)
         >>> sr = cudf.Series([1, 2, 3], index=['a', 'b', 'c'])
         >>> sr
         a    1
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index 15426d0173a..62c31691ac1 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -397,10 +397,10 @@ class DateOffset:
     --------
     >>> from cudf import DateOffset
     >>> ts = cudf.Series([
-        "2000-01-01 00:00:00.012345678",
-        "2000-01-31 00:00:00.012345678",
-        "2000-02-29 00:00:00.012345678",
-    ], dtype='datetime64[ns])
+    ...     "2000-01-01 00:00:00.012345678",
+    ...     "2000-01-31 00:00:00.012345678",
+    ...     "2000-02-29 00:00:00.012345678",
+    ... ], dtype='datetime64[ns]')
     >>> ts + DateOffset(months=3)
     0   2000-04-01 00:00:00.012345678
     1   2000-04-30 00:00:00.012345678
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 372587ba677..6171f20929d 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -746,18 +746,31 @@ def test_index_astype(nelem):
     np.testing.assert_equal(df.index.to_numpy(), df["a"].to_numpy())
 
 
-def test_dataframe_to_string():
-    pd.options.display.max_rows = 5
-    pd.options.display.max_columns = 8
-    # Test basic
+def test_dataframe_to_string_with_skipped_rows():
+    # Test skipped rows
     df = cudf.DataFrame(
         {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
     )
-    string = str(df)
 
-    assert string.splitlines()[-1] == "[6 rows x 2 columns]"
+    with pd.option_context("display.max_rows", 5):
+        got = df.to_string()
+
+    expect = textwrap.dedent(
+        """\
+            a   b
+        0   1  11
+        1   2  12
+        .. ..  ..
+        4   5  15
+        5   6  16
+
+        [6 rows x 2 columns]"""
+    )
+    assert got == expect
 
-    # Test skipped columns
+
+def test_dataframe_to_string_with_skipped_rows_and_columns():
+    # Test skipped rows and skipped columns
     df = cudf.DataFrame(
         {
             "a": [1, 2, 3, 4, 5, 6],
@@ -766,11 +779,26 @@ def test_dataframe_to_string():
             "d": [11, 12, 13, 14, 15, 16],
         }
     )
-    string = df.to_string()
 
-    assert string.splitlines()[-1] == "[6 rows x 4 columns]"
+    with pd.option_context("display.max_rows", 5, "display.max_columns", 3):
+        got = df.to_string()
+
+    expect = textwrap.dedent(
+        """\
+            a  ...   d
+        0   1  ...  11
+        1   2  ...  12
+        .. ..  ...  ..
+        4   5  ...  15
+        5   6  ...  16
+
+        [6 rows x 4 columns]"""
+    )
+    assert got == expect
+
 
-    # Test masked
+def test_dataframe_to_string_with_masked_data():
+    # Test masked data
     df = cudf.DataFrame(
         {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
     )
@@ -783,34 +811,33 @@ def test_dataframe_to_string():
     assert masked.null_count == 2
     df["c"] = masked
 
-    # check data
+    # Check data
     values = masked.copy()
     validids = [0, 2, 3, 5]
     densearray = masked.dropna().to_numpy()
     np.testing.assert_equal(data[validids], densearray)
-    # valid position is correct
-
+    # Valid position is correct
     for i in validids:
         assert data[i] == values[i]
-    # null position is correct
+    # Null position is correct
     for i in range(len(values)):
         if i not in validids:
             assert values[i] is cudf.NA
 
-    pd.options.display.max_rows = 10
-    got = df.to_string()
+    with pd.option_context("display.max_rows", 10):
+        got = df.to_string()
 
-    expect = """
-a b  c
-0 1 11 0
-1 2 12 <NA>
-2 3 13 2
-3 4 14 3
-4 5 15 <NA>
-5 6 16 5
-"""
-    # values should match despite whitespace difference
-    assert got.split() == expect.split()
+    expect = textwrap.dedent(
+        """\
+           a   b     c
+        0  1  11     0
+        1  2  12  <NA>
+        2  3  13     2
+        3  4  14     3
+        4  5  15  <NA>
+        5  6  16     5"""
+    )
+    assert got == expect
 
 
 def test_dataframe_to_string_wide(monkeypatch):
diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
new file mode 100644
index 00000000000..05d6886c297
--- /dev/null
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -0,0 +1,102 @@
+import contextlib
+import doctest
+import inspect
+import io
+import os
+
+import numpy as np
+import pytest
+
+import cudf
+
+
+def _name_in_all(parent, name):
+    return name in getattr(parent, "__all__", [])
+
+
+def _is_public_name(parent, name):
+    return not name.startswith("_")
+
+
+def _find_doctests_in_obj(obj, finder=None, criteria=None):
+    """Find all doctests in an object.
+
+    Parameters
+    ----------
+    obj : module or class
+        The object to search for docstring examples.
+    finder : doctest.DocTestFinder, optional
+        The DocTestFinder object to use. If not provided, a DocTestFinder is
+        constructed.
+    criteria : callable, optional
+        Callable indicating whether to recurse over members of the provided
+        object. If not provided, names not defined in the object's ``__all__``
+        property are ignored.
+
+    Yields
+    ------
+    doctest.DocTest
+        The next doctest found in the object.
+    """
+    if finder is None:
+        finder = doctest.DocTestFinder()
+    if criteria is None:
+        criteria = _name_in_all
+    for docstring in finder.find(obj):
+        if docstring.examples:
+            yield docstring
+    for name, member in inspect.getmembers(obj):
+        # Only recurse over members matching the criteria
+        if not criteria(obj, name):
+            continue
+        # Recurse over the public API of modules (objects defined in the
+        # module's __all__)
+        if inspect.ismodule(member):
+            yield from _find_doctests_in_obj(
+                member, finder, criteria=_name_in_all
+            )
+        # Recurse over the public API of classes (attributes not prefixed with
+        # an underscore)
+        if inspect.isclass(member):
+            yield from _find_doctests_in_obj(
+                member, finder, criteria=_is_public_name
+            )
+
+
+class TestDoctests:
+    @pytest.fixture(autouse=True)
+    def chdir_to_tmp_path(cls, tmp_path):
+        # Some doctests generate files, so this fixture runs the tests in a
+        # temporary directory.
+        original_directory = os.getcwd()
+        os.chdir(tmp_path)
+        yield
+        os.chdir(original_directory)
+
+    @pytest.mark.parametrize(
+        "docstring",
+        _find_doctests_in_obj(cudf),
+        ids=lambda docstring: docstring.name,
+    )
+    def test_docstring(self, docstring):
+        # We ignore differences in whitespace in the doctest output, and enable
+        # the use of an ellipsis "..." to match any string in the doctest
+        # output. An ellipsis is useful for, e.g., memory addresses or
+        # imprecise floating point values.
+        optionflags = doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
+        runner = doctest.DocTestRunner(optionflags=optionflags)
+
+        # These global names are pre-defined and can be used in doctests
+        # without first importing them.
+        globals = dict(cudf=cudf, np=np,)
+        docstring.globs = globals
+
+        # Capture stdout and include failing outputs in the traceback.
+        doctest_stdout = io.StringIO()
+        with contextlib.redirect_stdout(doctest_stdout):
+            runner.run(docstring)
+            results = runner.summarize()
+        assert not results.failed, (
+            f"{results.failed} of {results.attempted} doctests failed for "
+            f"{docstring.name}:\n{doctest_stdout.getvalue()}"
+        )
diff --git a/python/cudf/cudf/utils/docutils.py b/python/cudf/cudf/utils/docutils.py
index 7a4a2673f9b..2fcf996b641 100644
--- a/python/cudf/cudf/utils/docutils.py
+++ b/python/cudf/cudf/utils/docutils.py
@@ -225,13 +225,13 @@ def wrapper(func):
         2   2010-01-01
         dtype: datetime64[s]
         >>> s.describe()
-        count                                3
-        mean     2006-09-01 08:00:00.000000000
-        min      2000-01-01 00:00:00.000000000
-        25%      2004-12-31 12:00:00.000000000
-        50%      2010-01-01 00:00:00.000000000
-        75%      2010-01-01 00:00:00.000000000
-        max      2010-01-01 00:00:00.000000000
+        count                     3
+        mean    2006-09-01 08:00:00
+        min     2000-01-01 00:00:00
+        25%     2004-12-31 12:00:00
+        50%     2010-01-01 00:00:00
+        75%     2010-01-01 00:00:00
+        max     2010-01-01 00:00:00
         dtype: object
 
         Describing a ``DataFrame``. By default only numeric fields are
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index b881f9372bc..6f958860dad 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -972,9 +972,9 @@
 >>> import cudf
 >>> filename = 'foo.csv'
 >>> df = cudf.DataFrame({'x': [0, 1, 2, 3],
-                         'y': [1.0, 3.3, 2.2, 4.4],
-                         'z': ['a', 'b', 'c', 'd']})
->>> df = df.set_index([3, 2, 1, 0])
+...                      'y': [1.0, 3.3, 2.2, 4.4],
+...                      'z': ['a', 'b', 'c', 'd']})
+>>> df = df.set_index(cudf.Series([3, 2, 1, 0]))
 >>> df.to_csv(filename)
 
 """