diff --git a/docs/cudf/source/user_guide/api_docs/index.rst b/docs/cudf/source/user_guide/api_docs/index.rst
index 5f26a921012..d05501f4a4a 100644
--- a/docs/cudf/source/user_guide/api_docs/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/index.rst
@@ -26,3 +26,4 @@ This page provides a list of all publicly accessible modules, methods and classe
     options
     extension_dtypes
     pylibcudf/index.rst
+    performance_tracking
diff --git a/docs/cudf/source/user_guide/api_docs/performance_tracking.rst b/docs/cudf/source/user_guide/api_docs/performance_tracking.rst
new file mode 100644
index 00000000000..9da79e69fb2
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/performance_tracking.rst
@@ -0,0 +1,12 @@
+.. _api.performance_tracking:
+
+====================
+Performance Tracking
+====================
+
+.. currentmodule:: cudf.utils.performance_tracking
+.. autosummary::
+   :toctree: api/
+
+   get_memory_records
+   print_memory_report
diff --git a/docs/cudf/source/user_guide/index.md b/docs/cudf/source/user_guide/index.md
index 486368c3b8b..df4e4795a08 100644
--- a/docs/cudf/source/user_guide/index.md
+++ b/docs/cudf/source/user_guide/index.md
@@ -16,5 +16,6 @@ options
 performance-comparisons/index
 PandasCompat
 copy-on-write
+memory-profiling
 pandas-2.0-breaking-changes
 ```
diff --git a/docs/cudf/source/user_guide/memory-profiling.md b/docs/cudf/source/user_guide/memory-profiling.md
new file mode 100644
index 00000000000..ab5433685e6
--- /dev/null
+++ b/docs/cudf/source/user_guide/memory-profiling.md
@@ -0,0 +1,44 @@
+(memory-profiling-user-doc)=
+
+# Memory Profiling
+
+Peak memory usage is a common concern in GPU programming because GPU memory is typically smaller than available CPU memory. To easily identify memory hotspots, cuDF provides a memory profiler. It comes with an overhead so avoid using it in performance-sensitive code.
+
+## Enabling Memory Profiling
+
+First, enable memory profiling in RMM by calling {py:func}`rmm.statistics.enable_statistics()`. This adds a statistics resource adaptor to the current RMM memory resource, which enables cuDF to access memory profiling information. See the [RMM documentation](https://docs.rapids.ai/api/rmm/stable/guide/#memory-statistics-and-profiling) for more details.
+
+Second, enable memory profiling in cuDF by setting the `memory_profiling` option to `True`. Use {py:func}`cudf.set_option` or set the environment variable ``CUDF_MEMORY_PROFILING=1`` prior to the launch of the Python interpreter.
+
+To get the result of the profiling, use {py:func}`cudf.utils.performance_tracking.print_memory_report` or access the raw profiling data by using: {py:func}`cudf.utils.performance_tracking.get_memory_records`.
+
+### Example
+In the following, we enable profiling, do some work, and then print the profiling results:
+
+```python
+>>> import cudf
+>>> from cudf.utils.performance_tracking import print_memory_report
+>>> from rmm.statistics import enable_statistics
+>>> enable_statistics()
+>>> cudf.set_option("memory_profiling", True)
+>>> cudf.DataFrame({"a": [1, 2, 3]})  # Some work
+   a
+0  1
+1  2
+2  3
+>>> print_memory_report()  # Pretty print the result of the profiling
+Memory Profiling
+================
+
+Legends:
+ncalls       - number of times the function or code block was called
+memory_peak  - peak memory allocated in function or code block (in bytes)
+memory_total - total memory allocated in function or code block (in bytes)
+
+Ordered by: memory_peak
+
+ncalls memory_peak memory_total filename:lineno(function)
+     1          32           32 cudf/core/dataframe.py:690(DataFrame.__init__)
+     2           0            0 cudf/core/index.py:214(RangeIndex.__init__)
+     6           0            0 cudf/core/index.py:424(RangeIndex.__len__)
+```
diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py
index 762cd7f9e86..ed351a6b107 100644
--- a/python/cudf/cudf/core/buffer/spill_manager.py
+++ b/python/cudf/cudf/core/buffer/spill_manager.py
@@ -18,14 +18,14 @@
 import rmm.mr
 
 from cudf.options import get_option
-from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.string import format_bytes
 
 if TYPE_CHECKING:
     from cudf.core.buffer.spillable_buffer import SpillableBufferOwner
 
 _spill_cudf_nvtx_annotate = partial(
-    _cudf_nvtx_annotate, domain="cudf_python-spill"
+    _performance_tracking, domain="cudf_python-spill"
 )
 
 
diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py
index eb57a371965..4c9e524ee05 100644
--- a/python/cudf/cudf/core/buffer/spillable_buffer.py
+++ b/python/cudf/cudf/core/buffer/spillable_buffer.py
@@ -10,6 +10,7 @@
 from typing import TYPE_CHECKING, Any, Literal
 
 import numpy
+import nvtx
 from typing_extensions import Self
 
 import rmm
@@ -21,7 +22,7 @@
     host_memory_allocation,
 )
 from cudf.core.buffer.exposure_tracked_buffer import ExposureTrackedBuffer
-from cudf.utils.nvtx_annotation import _get_color_for_nvtx, annotate
+from cudf.utils.performance_tracking import _get_color_for_nvtx
 from cudf.utils.string import format_bytes
 
 if TYPE_CHECKING:
@@ -200,7 +201,7 @@ def spill(self, target: str = "cpu") -> None:
                 )
 
             if (ptr_type, target) == ("gpu", "cpu"):
-                with annotate(
+                with nvtx.annotate(
                     message="SpillDtoH",
                     color=_get_color_for_nvtx("SpillDtoH"),
                     domain="cudf_python-spill",
@@ -218,7 +219,7 @@ def spill(self, target: str = "cpu") -> None:
                 # trigger a new call to this buffer's `spill()`.
                 # Therefore, it is important that spilling-on-demand doesn't
                 # try to unspill an already locked buffer!
-                with annotate(
+                with nvtx.annotate(
                     message="SpillHtoD",
                     color=_get_color_for_nvtx("SpillHtoD"),
                     domain="cudf_python-spill",
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index f7f5ef792d6..3fc29582c4c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -83,7 +83,7 @@
     min_scalar_type,
     numeric_normalize_types,
 )
-from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.utils import GetAttrGetItemMixin, _external_only_api
 
 if TYPE_CHECKING:
@@ -145,7 +145,7 @@ def __setitem__(self, key, value):
             key = (key, slice(None))
         return self._setitem_tuple_arg(key, value)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _can_downcast_to_series(self, df, arg):
         """
         This method encapsulates the logic used
@@ -188,7 +188,7 @@ def _can_downcast_to_series(self, df, arg):
                 return True
         return False
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _downcast_to_series(self, df, arg):
         """
         "Downcast" from a DataFrame to a Series
@@ -233,11 +233,11 @@ class _DataFrameLocIndexer(_DataFrameIndexer):
     For selection by label.
     """
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _getitem_scalar(self, arg):
         return self._frame[arg[1]].loc[arg[0]]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _getitem_tuple_arg(self, arg):
         from uuid import uuid4
 
@@ -363,7 +363,7 @@ def _getitem_tuple_arg(self, arg):
             return self._downcast_to_series(df, arg)
         return df
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _setitem_tuple_arg(self, key, value):
         if (
             isinstance(self._frame.index, MultiIndex)
@@ -532,7 +532,7 @@ def __getitem__(self, arg):
             return frame._empty_like(keep_index=True)
         assert_never(row_spec)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _setitem_tuple_arg(self, key, value):
         columns_df = self._frame._from_data(
             self._frame._data.select_by_index(key[1]), self._frame.index
@@ -677,7 +677,7 @@ class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin):
     _groupby = DataFrameGroupBy
     _resampler = DataFrameResampler
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __init__(
         self,
         data=None,
@@ -859,7 +859,7 @@ def __init__(
             columns, pd.MultiIndex
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _init_from_series_list(self, data, columns, index):
         if index is None:
             # When `index` is `None`, the final index of
@@ -972,7 +972,7 @@ def _init_from_series_list(self, data, columns, index):
         else:
             self._data.rangeindex = True
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _init_from_list_like(self, data, index=None, columns=None):
         if index is None:
             index = RangeIndex(start=0, stop=len(data))
@@ -1030,7 +1030,7 @@ def _init_from_list_like(self, data, index=None, columns=None):
             )
             self._data.label_dtype = getattr(columns, "dtype", None)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _init_from_dict_like(
         self, data, index=None, columns=None, nan_as_null=None
     ):
@@ -1119,7 +1119,7 @@ def _from_data(
         return out
 
     @staticmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _align_input_series_indices(data, index):
         input_series = [
             Series(val)
@@ -1187,7 +1187,7 @@ def deserialize(cls, header, frames):
         return obj
 
     @property
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def shape(self):
         """Returns a tuple representing the dimensionality of the DataFrame."""
         return self._num_rows, self._num_columns
@@ -1270,7 +1270,7 @@ def __setattr__(self, key, col):
         else:
             super().__setattr__(key, col)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __getitem__(self, arg):
         """
         If *arg* is a ``str`` or ``int`` type, return the column Series.
@@ -1364,7 +1364,7 @@ def __getitem__(self, arg):
                 f"__getitem__ on type {type(arg)} is not supported"
             )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __setitem__(self, arg, value):
         """Add/set column by *arg or DataFrame*"""
         if isinstance(arg, DataFrame):
@@ -1482,7 +1482,7 @@ def __setitem__(self, arg, value):
     def __delitem__(self, name):
         self._drop_column(name)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def memory_usage(self, index=True, deep=False):
         mem_usage = [col.memory_usage for col in self._data.columns]
         names = [str(name) for name in self._data.names]
@@ -1494,7 +1494,7 @@ def memory_usage(self, index=True, deep=False):
             index=as_index(names),
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __array_function__(self, func, types, args, kwargs):
         if "out" in kwargs or not all(
             issubclass(t, (Series, DataFrame)) for t in types
@@ -1528,7 +1528,7 @@ def __array_function__(self, func, types, args, kwargs):
         return NotImplemented
 
     # The _get_numeric_data method is necessary for dask compatibility.
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _get_numeric_data(self):
         """Return a dataframe with only numeric data types"""
         columns = [
@@ -1538,7 +1538,7 @@ def _get_numeric_data(self):
         ]
         return self[columns]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def assign(self, **kwargs: Callable[[Self], Any] | Any):
         """
         Assign columns to DataFrame from keyword arguments.
@@ -1571,7 +1571,7 @@ def assign(self, **kwargs: Callable[[Self], Any] | Any):
         return new_df
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _concat(
         cls, objs, axis=0, join="outer", ignore_index=False, sort=False
     ):
@@ -1963,12 +1963,12 @@ def _get_renderable_dataframe(self):
 
         return output
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __repr__(self):
         output = self._get_renderable_dataframe()
         return self._clean_renderable_dataframe(output)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _repr_html_(self):
         lines = (
             self._get_renderable_dataframe()
@@ -1984,7 +1984,7 @@ def _repr_html_(self):
             lines.append("</div>")
         return "\n".join(lines)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _repr_latex_(self):
         return self._get_renderable_dataframe().to_pandas()._repr_latex_()
 
@@ -2098,7 +2098,7 @@ def _make_operands_and_index_for_binop(
         return operands, index, can_use_self_column_name
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_dict(
         cls,
         data: dict,
@@ -2233,7 +2233,7 @@ def from_dict(
                 f"parameter. Got '{orient}' instead"
             )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_dict(
         self,
         orient: str = "dict",
@@ -2354,7 +2354,7 @@ def to_dict(
 
         return self.to_pandas().to_dict(orient=orient, into=into)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def scatter_by_map(
         self, map_index, map_size=None, keep_index=True, debug: bool = False
     ):
@@ -2447,7 +2447,7 @@ def scatter_by_map(
 
         return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def update(
         self,
         other,
@@ -2542,23 +2542,23 @@ def update(
 
         self._mimic_inplace(source_df, inplace=True)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __iter__(self):
         return iter(self._column_names)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __contains__(self, item):
         # This must check against containment in the pandas Index and not
         # self._column_names to handle NA, None, nan, etc. correctly.
         return item in self._data.to_pandas_index()
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def items(self):
         """Iterate over column names and series pairs"""
         for k in self:
             yield (k, self[k])
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def equals(self, other) -> bool:
         ret = super().equals(other)
         # If all other checks matched, validate names.
@@ -2591,13 +2591,13 @@ def at(self):
         "index is absolutely necessary. For checking if the columns are a "
         "MultiIndex, use _data.multiindex."
     )
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def columns(self):
         """Returns a tuple of columns"""
         return self._data.to_pandas_index()
 
     @columns.setter  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def columns(self, columns):
         multiindex = False
         rangeindex = False
@@ -2665,7 +2665,7 @@ def _set_columns_like(self, other: ColumnAccessor) -> None:
             verify=False,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def reindex(
         self,
         labels=None,
@@ -2813,7 +2813,7 @@ def reindex(
             fill_value=fill_value,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def set_index(
         self,
         keys,
@@ -2980,7 +2980,7 @@ def set_index(
         df.index = idx
         return df if not inplace else None
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def fillna(
         self, value=None, method=None, axis=None, inplace=False, limit=None
     ):  # noqa: D102
@@ -3006,7 +3006,7 @@ def fillna(
             value=value, method=method, axis=axis, inplace=inplace, limit=limit
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def where(self, cond, other=None, inplace=False):
         from cudf.core._internals.where import (
             _check_and_cast_columns_with_other,
@@ -3163,7 +3163,7 @@ def reset_index(
             inplace=inplace,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def insert(self, loc, name, value, nan_as_null=no_default):
         """Add a column to DataFrame at the index specified by loc.
 
@@ -3189,7 +3189,7 @@ def insert(self, loc, name, value, nan_as_null=no_default):
             ignore_index=False,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
         """
         Same as `insert`, with additional `ignore_index` param.
@@ -3271,7 +3271,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
         self._data.insert(name, value, loc=loc)
 
     @property  # type:ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def axes(self):
         """
         Return a list representing the axes of the DataFrame.
@@ -3363,7 +3363,7 @@ def diff(self, periods=1, axis=0):
 
         return self - self.shift(periods=periods)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def drop_duplicates(
         self,
         subset=None,
@@ -3451,14 +3451,14 @@ def drop_duplicates(
 
         return self._mimic_inplace(outdf, inplace=inplace)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def pop(self, item):
         """Return a column and drop it from the DataFrame."""
         popped = self[item]
         del self[item]
         return popped
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def rename(
         self,
         mapper=None,
@@ -3616,7 +3616,7 @@ def rename(
 
         return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def add_prefix(self, prefix):
         # TODO: Change to deep=False when copy-on-write is default
         out = self.copy(deep=True)
@@ -3625,7 +3625,7 @@ def add_prefix(self, prefix):
         ]
         return out
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def add_suffix(self, suffix):
         # TODO: Change to deep=False when copy-on-write is default
         out = self.copy(deep=True)
@@ -3634,7 +3634,7 @@ def add_suffix(self, suffix):
         ]
         return out
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def agg(self, aggs, axis=None):
         """
         Aggregate using one or more operations over the specified axis.
@@ -3770,7 +3770,7 @@ def agg(self, aggs, axis=None):
         else:
             raise ValueError("argument must be a string, list or dict")
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nlargest(self, n, columns, keep="first"):
         """Return the first *n* rows ordered by *columns* in descending order.
 
@@ -3910,7 +3910,7 @@ def nsmallest(self, n, columns, keep="first"):
         """
         return self._n_largest_or_smallest(False, n, columns, keep)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def swaplevel(self, i=-2, j=-1, axis=0):
         """
         Swap level i with level j.
@@ -3977,7 +3977,7 @@ def swaplevel(self, i=-2, j=-1, axis=0):
 
         return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def transpose(self):
         """Transpose index and columns.
 
@@ -4041,7 +4041,7 @@ def transpose(self):
 
     T = property(transpose, doc=transpose.__doc__)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def melt(self, **kwargs):
         """Unpivots a DataFrame from wide format to long format,
         optionally leaving identifier variables set.
@@ -4071,7 +4071,7 @@ def melt(self, **kwargs):
 
         return melt(self, **kwargs)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def merge(
         self,
         right,
@@ -4224,7 +4224,7 @@ def merge(
             suffixes=suffixes,
         ).perform_merge()
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def join(
         self,
         other,
@@ -4273,7 +4273,7 @@ def join(
         )
         return df
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         groupby_doc_template.format(
             ret=textwrap.dedent(
@@ -4407,7 +4407,7 @@ def query(self, expr, local_dict=None):
                 BooleanMask.from_column_unchecked(boolmask)
             )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def apply(
         self, func, axis=1, raw=False, result_type=None, args=(), **kwargs
     ):
@@ -4691,7 +4691,7 @@ def _func(x):  # pragma: no cover
 
         return DataFrame._from_data(result, index=self.index)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @applyutils.doc_apply()
     def apply_rows(
         self,
@@ -4770,7 +4770,7 @@ def apply_rows(
             cache_key=cache_key,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @applyutils.doc_applychunks()
     def apply_chunks(
         self,
@@ -4837,7 +4837,7 @@ def apply_chunks(
             tpb=tpb,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def partition_by_hash(self, columns, nparts, keep_index=True):
         """Partition the dataframe by the hashed value of data in *columns*.
 
@@ -5181,7 +5181,7 @@ def _sizeof_fmt(num, size_qualifier):
 
         cudf.utils.ioutils.buffer_write_lines(buf, lines)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_describe()
     def describe(
         self,
@@ -5243,7 +5243,7 @@ def describe(
                 )
             return res
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_pandas(
         self, *, nullable: bool = False, arrow_type: bool = False
     ) -> pd.DataFrame:
@@ -5333,7 +5333,7 @@ def to_pandas(
         return out_df
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_pandas(cls, dataframe, nan_as_null=no_default):
         """
         Convert from a Pandas DataFrame.
@@ -5406,7 +5406,7 @@ def from_pandas(cls, dataframe, nan_as_null=no_default):
             )
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_arrow(cls, table):
         """
         Convert from PyArrow Table to DataFrame.
@@ -5492,7 +5492,7 @@ def from_arrow(cls, table):
 
         return out
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_arrow(self, preserve_index=None):
         """
         Convert to a PyArrow Table.
@@ -5582,7 +5582,7 @@ def to_arrow(self, preserve_index=None):
 
         return out.replace_schema_metadata(metadata)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_records(self, index=True):
         """Convert to a numpy recarray
 
@@ -5606,7 +5606,7 @@ def to_records(self, index=True):
         return ret
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_records(cls, data, index=None, columns=None, nan_as_null=False):
         """
         Convert structured or record ndarray to DataFrame.
@@ -5685,7 +5685,7 @@ def from_records(cls, data, index=None, columns=None, nan_as_null=False):
         return df
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False):
         """Convert a numpy/cupy array to DataFrame.
 
@@ -5763,7 +5763,7 @@ def _from_arrays(cls, data, index=None, columns=None, nan_as_null=False):
             index=index,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def interpolate(
         self,
         method="linear",
@@ -5793,7 +5793,7 @@ def interpolate(
             **kwargs,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def quantile(
         self,
         q=0.5,
@@ -5936,7 +5936,7 @@ def quantile(
         result.index = cudf.Index(list(map(float, qs)), dtype="float64")
         return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def isin(self, values):
         """
         Whether each element in the DataFrame is contained in values.
@@ -6080,7 +6080,7 @@ def make_false_column_like_self():
     #
     # Stats
     #
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _prepare_for_rowwise_op(self, method, skipna, numeric_only):
         """Prepare a DataFrame for CuPy-based row-wise operations."""
 
@@ -6132,7 +6132,7 @@ def _prepare_for_rowwise_op(self, method, skipna, numeric_only):
             coerced = coerced.astype("int64", copy=False)
         return coerced, mask, common_dtype
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def count(self, axis=0, numeric_only=False):
         """
         Count ``non-NA`` cells for each column or row.
@@ -6184,7 +6184,7 @@ def count(self, axis=0, numeric_only=False):
         "columns": 1,
     }
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _reduce(
         self,
         op,
@@ -6308,7 +6308,7 @@ def _reduce(
         else:
             raise ValueError(f"Invalid value of {axis=} received for {op}")
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _scan(
         self,
         op,
@@ -6325,7 +6325,7 @@ def _scan(
         elif axis == 1:
             return self._apply_cupy_method_axis_1(op, **kwargs)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def mode(self, axis=0, numeric_only=False, dropna=True):
         """
         Get the mode(s) of each element along the selected axis.
@@ -6432,17 +6432,17 @@ def mode(self, axis=0, numeric_only=False, dropna=True):
 
         return df
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def all(self, axis=0, bool_only=None, skipna=True, **kwargs):
         obj = self.select_dtypes(include="bool") if bool_only else self
         return super(DataFrame, obj).all(axis, skipna, **kwargs)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def any(self, axis=0, bool_only=None, skipna=True, **kwargs):
         obj = self.select_dtypes(include="bool") if bool_only else self
         return super(DataFrame, obj).any(axis, skipna, **kwargs)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
         # This method uses cupy to perform scans and reductions along rows of a
         # DataFrame. Since cuDF is designed around columnar storage and
@@ -6542,7 +6542,7 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
             result_df._set_columns_like(prepared._data)
             return result_df
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _columns_view(self, columns):
         """
         Return a subset of the DataFrame's columns as a view.
@@ -6551,7 +6551,7 @@ def _columns_view(self, columns):
             {col: self._data[col] for col in columns}, index=self.index
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def select_dtypes(self, include=None, exclude=None):
         """Return a subset of the DataFrame's columns based on the column dtypes.
 
@@ -6816,7 +6816,7 @@ def to_orc(
             index=index,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def stack(self, level=-1, dropna=no_default, future_stack=False):
         """Stack the prescribed level(s) from columns to index
 
@@ -7161,7 +7161,7 @@ def unnamed_group_generator():
         else:
             return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def cov(self, **kwargs):
         """Compute the covariance matrix of a DataFrame.
 
@@ -7216,7 +7216,7 @@ def corr(self, method="pearson", min_periods=None):
         df._set_columns_like(self._data)
         return df
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_struct(self, name=None):
         """
         Return a struct Series composed of the columns of the DataFrame.
@@ -7250,7 +7250,7 @@ def to_struct(self, name=None):
             name=name,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def keys(self):
         """
         Get the columns.
@@ -7310,14 +7310,14 @@ def iterrows(self):
             "if you wish to iterate over each row."
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @copy_docstring(reshape.pivot)
     def pivot(self, *, columns, index=no_default, values=no_default):
         return cudf.core.reshape.pivot(
             self, index=index, columns=columns, values=values
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @copy_docstring(reshape.pivot_table)
     def pivot_table(
         self,
@@ -7346,14 +7346,14 @@ def pivot_table(
             sort=sort,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @copy_docstring(reshape.unstack)
     def unstack(self, level=-1, fill_value=None):
         return cudf.core.reshape.unstack(
             self, level=level, fill_value=fill_value
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def explode(self, column, ignore_index=False):
         """
         Transform each element of a list-like to a row, replicating index
@@ -7549,7 +7549,7 @@ def _from_columns_like_self(
         result._set_columns_like(self._data)
         return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def interleave_columns(self):
         """
         Interleave Series columns of a table into a single column.
@@ -7597,7 +7597,7 @@ def interleave_columns(self):
             {None: libcudf.reshape.interleave_columns([*self._columns])}
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def eval(self, expr: str, inplace: bool = False, **kwargs):
         """Evaluate a string describing operations on DataFrame columns.
 
@@ -7953,7 +7953,7 @@ def func(left, right, output):
     )
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def from_pandas(obj, nan_as_null=no_default):
     """
     Convert certain Pandas objects into the cudf equivalent.
@@ -8080,7 +8080,7 @@ def from_pandas(obj, nan_as_null=no_default):
         )
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def merge(left, right, *args, **kwargs):
     if isinstance(left, Series):
         left = left.to_frame()
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 8ca71180c00..9bac75dc6ac 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -32,7 +32,7 @@
 from cudf.core.mixins import BinaryOperand, Scannable
 from cudf.utils import ioutils
 from cudf.utils.dtypes import find_common_type
-from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.utils import _array_ufunc, _warn_no_dask_cudf
 
 if TYPE_CHECKING:
@@ -86,7 +86,7 @@ def _dtypes(self) -> abc.Iterable:
     def ndim(self) -> int:
         raise NotImplementedError()
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def serialize(self):
         # TODO: See if self._data can be serialized outright
         header = {
@@ -101,7 +101,7 @@ def serialize(self):
         return header, frames
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def deserialize(cls, header, frames):
         cls_deserialize = pickle.loads(header["type-serialized"])
         column_names = pickle.loads(header["column_names"])
@@ -122,7 +122,7 @@ def deserialize(cls, header, frames):
         return cls_deserialize._from_data(col_accessor)
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _from_data(cls, data: MutableMapping) -> Self:
         """
         Construct cls from a ColumnAccessor-like mapping.
@@ -131,7 +131,7 @@ def _from_data(cls, data: MutableMapping) -> Self:
         Frame.__init__(obj, data)
         return obj
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _from_data_like_self(self, data: MutableMapping) -> Self:
         """
         Return type(self) from a ColumnAccessor-like mapping but
@@ -139,7 +139,7 @@ def _from_data_like_self(self, data: MutableMapping) -> Self:
         """
         return self._from_data(data)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _from_columns_like_self(
         self,
         columns: list[ColumnBase],
@@ -155,7 +155,7 @@ def _from_columns_like_self(
         frame = self.__class__._from_data(data)
         return frame._copy_type_metadata(self)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _mimic_inplace(
         self, result: Self, inplace: bool = False
     ) -> Self | None:
@@ -171,7 +171,7 @@ def _mimic_inplace(
             return result
 
     @property
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def size(self) -> int:
         """
         Return the number of elements in the underlying data.
@@ -263,11 +263,11 @@ def memory_usage(self, deep=False):
         """
         raise NotImplementedError
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __len__(self) -> int:
         return self._num_rows
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def astype(self, dtype: dict[Any, Dtype], copy: bool = False) -> Self:
         casted = (
             col.astype(dtype.get(col_name, col.dtype), copy=copy)
@@ -276,7 +276,7 @@ def astype(self, dtype: dict[Any, Dtype], copy: bool = False) -> Self:
         ca = self._data._from_columns_like_self(casted, verify=False)
         return self._from_data_like_self(ca)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def equals(self, other) -> bool:
         """
         Test whether two objects contain the same elements.
@@ -347,7 +347,7 @@ def equals(self, other) -> bool:
             )
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _get_columns_by_label(self, labels) -> Self:
         """
         Returns columns of the Frame specified by `labels`.
@@ -357,7 +357,7 @@ def _get_columns_by_label(self, labels) -> Self:
         return self._from_data_like_self(self._data.select_by_label(labels))
 
     @property
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def values(self) -> cupy.ndarray:
         """
         Return a CuPy representation of the DataFrame.
@@ -373,7 +373,7 @@ def values(self) -> cupy.ndarray:
         return self.to_cupy()
 
     @property
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def values_host(self) -> np.ndarray:
         """
         Return a NumPy representation of the data.
@@ -388,7 +388,7 @@ def values_host(self) -> np.ndarray:
         """
         return self.to_numpy()
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __array__(self, dtype=None):
         raise TypeError(
             "Implicit conversion to a host NumPy array via __array__ is not "
@@ -397,14 +397,14 @@ def __array__(self, dtype=None):
             "using .to_numpy()."
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __arrow_array__(self, type=None):
         raise TypeError(
             "Implicit conversion to a host PyArrow object via __arrow_array__ "
             "is not allowed. Consider using .to_arrow()"
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _to_array(
         self,
         get_array: Callable,
@@ -468,7 +468,7 @@ def to_array(
     # particular, we need to benchmark how much of the overhead is coming from
     # (potentially unavoidable) local copies in to_cupy and how much comes from
     # inefficiencies in the implementation.
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_cupy(
         self,
         dtype: Dtype | None = None,
@@ -502,7 +502,7 @@ def to_cupy(
             na_value,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_numpy(
         self,
         dtype: Dtype | None = None,
@@ -537,7 +537,7 @@ def to_numpy(
             lambda col: col.values_host, numpy, copy, dtype, na_value
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def where(self, cond, other=None, inplace: bool = False) -> Self | None:
         """
         Replace values where the condition is False.
@@ -610,7 +610,7 @@ def where(self, cond, other=None, inplace: bool = False) -> Self | None:
         """
         raise NotImplementedError
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def fillna(
         self,
         value: None | ScalarLike | cudf.Series = None,
@@ -767,14 +767,14 @@ def fillna(
             inplace=inplace,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _drop_column(self, name):
         """Drop a column by *name*"""
         if name not in self._data:
             raise KeyError(f"column '{name}' does not exist")
         del self._data[name]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _quantile_table(
         self,
         q: float,
@@ -808,7 +808,7 @@ def _quantile_table(
         )
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_arrow(cls, data: pa.Table) -> Self:
         """Convert from PyArrow Table to Frame
 
@@ -968,7 +968,7 @@ def from_arrow(cls, data: pa.Table) -> Self:
 
         return cls._from_data({name: result[name] for name in column_names})
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_arrow(self):
         """
         Convert to arrow Table
@@ -992,7 +992,7 @@ def to_arrow(self):
             {str(name): col.to_arrow() for name, col in self._data.items()}
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _positions_from_column_names(self, column_names) -> list[int]:
         """Map each column name into their positions in the frame.
 
@@ -1005,7 +1005,7 @@ def _positions_from_column_names(self, column_names) -> list[int]:
             if name in set(column_names)
         ]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _copy_type_metadata(self: Self, other: Self) -> Self:
         """
         Copy type metadata from each column of `other` to the corresponding
@@ -1020,7 +1020,7 @@ def _copy_type_metadata(self: Self, other: Self) -> Self:
 
         return self
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def isna(self):
         """
         Identify missing values.
@@ -1101,7 +1101,7 @@ def isna(self):
     # Alias for isna
     isnull = isna
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def notna(self):
         """
         Identify non-missing values.
@@ -1182,7 +1182,7 @@ def notna(self):
     # Alias for notna
     notnull = notna
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def searchsorted(
         self,
         values,
@@ -1296,7 +1296,7 @@ def searchsorted(
         else:
             return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def argsort(
         self,
         by=None,
@@ -1383,7 +1383,7 @@ def argsort(
             by=by, ascending=ascending, na_position=na_position
         ).values
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _get_sorted_inds(
         self,
         by=None,
@@ -1411,7 +1411,7 @@ def _get_sorted_inds(
             stable=True,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _split(self, splits):
         """Split a frame with split points in ``splits``. Returns a list of
         Frames of length `len(splits) + 1`.
@@ -1426,13 +1426,13 @@ def _split(self, splits):
             for split_idx in range(len(splits) + 1)
         ]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _encode(self):
         columns, indices = libcudf.transform.table_encode([*self._columns])
         keys = self._from_columns_like_self(columns)
         return keys, indices
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _unaryop(self, op):
         data_columns = (col.unary_operator(op) for col in self._columns)
         return self._from_data_like_self(
@@ -1440,7 +1440,7 @@ def _unaryop(self, op):
         )
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _colwise_binop(
         cls,
         operands: dict[str | None, tuple[ColumnBase, Any, bool, Any]],
@@ -1519,11 +1519,11 @@ def _colwise_binop(
 
         return output
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         return _array_ufunc(self, ufunc, method, inputs, kwargs)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @acquire_spill_lock()
     def _apply_cupy_ufunc_to_operands(
         self, ufunc, cupy_func, operands, **kwargs
@@ -1565,7 +1565,7 @@ def _apply_cupy_ufunc_to_operands(
         return data
 
     # Unary logical operators
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __neg__(self):
         """Negate for integral dtypes, logical NOT for bools."""
         return self._from_data_like_self(
@@ -1579,30 +1579,30 @@ def __neg__(self):
             )
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __pos__(self):
         return self.copy(deep=True)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __abs__(self):
         return self._unaryop("abs")
 
     # Reductions
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _get_axis_from_axis_arg(cls, axis):
         try:
             return cls._SUPPORT_AXIS_LOOKUP[axis]
         except KeyError:
             raise ValueError(f"No axis named {axis} for object type {cls}")
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _reduce(self, *args, **kwargs):
         raise NotImplementedError(
             f"Reductions are not supported for objects of type {type(self)}."
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def min(
         self,
         axis=0,
@@ -1653,7 +1653,7 @@ def min(
             **kwargs,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def max(
         self,
         axis=0,
@@ -1701,7 +1701,7 @@ def max(
             **kwargs,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def all(self, axis=0, skipna=True, **kwargs):
         """
         Return whether all elements are True in DataFrame.
@@ -1754,7 +1754,7 @@ def all(self, axis=0, skipna=True, **kwargs):
             **kwargs,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def any(self, axis=0, skipna=True, **kwargs):
         """
         Return whether any elements is True in DataFrame.
@@ -1807,26 +1807,26 @@ def any(self, axis=0, skipna=True, **kwargs):
             **kwargs,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @ioutils.doc_to_dlpack()
     def to_dlpack(self):
         """{docstring}"""
 
         return cudf.io.dlpack.to_dlpack(self)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __str__(self):
         return repr(self)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __deepcopy__(self, memo):
         return self.copy(deep=True)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __copy__(self):
         return self.copy(deep=False)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __invert__(self):
         """Bitwise invert (~) for integral dtypes, logical NOT for bools."""
         return self._from_data_like_self(
@@ -1835,7 +1835,7 @@ def __invert__(self):
             )
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nunique(self, dropna: bool = True):
         """
         Returns a per column mapping with counts of unique values for
@@ -1856,7 +1856,7 @@ def nunique(self, dropna: bool = True):
         )
 
     @staticmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _repeat(
         columns: list[ColumnBase], repeats, axis=None
     ) -> list[ColumnBase]:
@@ -1870,7 +1870,7 @@ def _repeat(
 
         return libcudf.filling.repeat(columns, repeats)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @_warn_no_dask_cudf
     def __dask_tokenize__(self):
         from dask.base import normalize_token
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 77b54a583d3..eccb3acabf6 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -31,7 +31,7 @@
 from cudf.core.mixins import Reducible, Scannable
 from cudf.core.multiindex import MultiIndex
 from cudf.core.udf.groupby_utils import _can_be_jitted, jit_groupby_apply
-from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.utils import GetAttrGetItemMixin
 
 if TYPE_CHECKING:
@@ -392,7 +392,7 @@ def indices(self):
             zip(index.to_pandas(), cp.split(indices.values, offsets[1:-1]))
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def get_group(self, name, obj=None):
         """
         Construct DataFrame from group with provided name.
@@ -436,7 +436,7 @@ def get_group(self, name, obj=None):
             )
         return obj.iloc[self.indices[name]]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def size(self):
         """
         Return the size of each group.
@@ -451,7 +451,7 @@ def size(self):
             .agg("size")
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def cumcount(self):
         """
         Return the cumulative count of keys in each group.
@@ -467,7 +467,7 @@ def cumcount(self):
             .agg("cumcount")
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def rank(
         self,
         method="average",
@@ -521,7 +521,7 @@ def _groupby(self):
             [*self.grouping.keys._columns], dropna=self._dropna
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def agg(self, func):
         """
         Apply aggregation(s) to the groups.
@@ -821,7 +821,7 @@ def _head_tail(self, n, *, take_head: bool, preserve_order: bool):
         else:
             return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def head(self, n: int = 5, *, preserve_order: bool = True):
         """Return first n rows of each group
 
@@ -874,7 +874,7 @@ def head(self, n: int = 5, *, preserve_order: bool = True):
             n, take_head=True, preserve_order=preserve_order
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def tail(self, n: int = 5, *, preserve_order: bool = True):
         """Return last n rows of each group
 
@@ -928,7 +928,7 @@ def tail(self, n: int = 5, *, preserve_order: bool = True):
             n, take_head=False, preserve_order=preserve_order
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nth(self, n):
         """
         Return the nth row from each group.
@@ -949,7 +949,7 @@ def nth(self, n):
         del self.obj._data["__groupbynth_order__"]
         return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def ngroup(self, ascending=True):
         """
         Number each group from 0 to the number of groups - 1.
@@ -1261,7 +1261,7 @@ def _normalize_aggs(
         ]
         return column_names, columns, normalized_aggs
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def pipe(self, func, *args, **kwargs):
         """
         Apply a function `func` with arguments to this GroupBy
@@ -1316,7 +1316,7 @@ def pipe(self, func, *args, **kwargs):
         """
         return cudf.core.common.pipe(self, func, *args, **kwargs)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _jit_groupby_apply(
         self, function, group_names, offsets, group_keys, grouped_values, *args
     ):
@@ -1327,7 +1327,7 @@ def _jit_groupby_apply(
             chunk_results, group_names, group_keys, grouped_values
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _iterative_groupby_apply(
         self, function, group_names, offsets, group_keys, grouped_values, *args
     ):
@@ -1415,7 +1415,7 @@ def _post_process_chunk_results(
                 result.index = cudf.MultiIndex._from_data(index_data)
         return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def apply(
         self, function, *args, engine="auto", include_groups: bool = True
     ):
@@ -1573,7 +1573,7 @@ def mult(df):
             result = result.reset_index()
         return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def apply_grouped(self, function, **kwargs):
         """Apply a transformation function over the grouped chunk.
 
@@ -1712,7 +1712,7 @@ def rolling_avg(val, avg):
         kwargs.update({"chunks": offsets})
         return grouped_values.apply_chunks(function, **kwargs)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _broadcast(self, values):
         """
         Broadcast the results of an aggregation to the group
@@ -1736,7 +1736,7 @@ def _broadcast(self, values):
             values.index = self.obj.index
         return values
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def transform(self, function):
         """Apply an aggregation, then broadcast the result to the group size.
 
@@ -1801,7 +1801,7 @@ def rolling(self, *args, **kwargs):
         """
         return cudf.core.window.rolling.RollingGroupby(self, *args, **kwargs)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def count(self, dropna=True):
         """Compute the number of values in each column.
 
@@ -1816,7 +1816,7 @@ def func(x):
 
         return self.agg(func)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def describe(self, include=None, exclude=None):
         """
         Generate descriptive statistics that summarizes the central tendency,
@@ -1888,7 +1888,7 @@ def describe(self, include=None, exclude=None):
         )
         return res
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def corr(self, method="pearson", min_periods=1):
         """
         Compute pairwise correlation of columns, excluding NA/null values.
@@ -1950,7 +1950,7 @@ def corr(self, method="pearson", min_periods=1):
             lambda x: x.corr(method, min_periods), "Correlation"
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def cov(self, min_periods=0, ddof=1):
         """
         Compute the pairwise covariance among the columns of a DataFrame,
@@ -2129,7 +2129,7 @@ def _cov_or_corr(self, func, method_name):
 
         return res
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def var(self, ddof=1):
         """Compute the column-wise variance of the values in each group.
 
@@ -2145,7 +2145,7 @@ def func(x):
 
         return self.agg(func)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def std(self, ddof=1):
         """Compute the column-wise std of the values in each group.
 
@@ -2161,7 +2161,7 @@ def func(x):
 
         return self.agg(func)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def quantile(self, q=0.5, interpolation="linear"):
         """Compute the column-wise quantiles of the values in each group.
 
@@ -2179,18 +2179,18 @@ def func(x):
 
         return self.agg(func)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def collect(self):
         """Get a list of all the values for each column in each group."""
         _deprecate_collect()
         return self.agg(list)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def unique(self):
         """Get a list of the unique values for each column in each group."""
         return self.agg("unique")
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def diff(self, periods=1, axis=0):
         """Get the difference between the values in each group.
 
@@ -2258,7 +2258,7 @@ def bfill(self, limit=None):
 
         return self._scan_fill("bfill", limit)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def fillna(
         self,
         value=None,
@@ -2325,7 +2325,7 @@ def fillna(
             value=value, inplace=inplace, axis=axis, limit=limit
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         """
         Shift each group by ``periods`` positions.
@@ -2388,7 +2388,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         result = self._mimic_pandas_order(result)
         return result._copy_type_metadata(values)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def pct_change(
         self,
         periods=1,
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 71658695b80..e069f8d0ea6 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -58,7 +58,7 @@
     is_mixed_with_object_dtype,
     numeric_normalize_types,
 )
-from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.utils import _warn_no_dask_cudf, search_range
 
 if TYPE_CHECKING:
@@ -204,7 +204,7 @@ class RangeIndex(BaseIndex, BinaryOperand):
 
     _range: range
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __init__(
         self, start, stop=None, step=1, dtype=None, copy=False, name=None
     ):
@@ -259,17 +259,17 @@ def factorize(self, sort: bool = False, use_na_sentinel: bool = True):
         return codes, uniques
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def name(self):
         return self._name
 
     @name.setter  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def name(self, value):
         self._name = value
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def start(self) -> int:
         """
         The value of the `start` parameter (0 if this was not supplied).
@@ -277,7 +277,7 @@ def start(self) -> int:
         return self._range.start
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def stop(self) -> int:
         """
         The value of the stop parameter.
@@ -285,7 +285,7 @@ def stop(self) -> int:
         return self._range.stop
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def step(self) -> int:
         """
         The value of the step parameter.
@@ -293,12 +293,12 @@ def step(self) -> int:
         return self._range.step
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _num_rows(self) -> int:
         return len(self)
 
     @cached_property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _values(self):
         if len(self) > 0:
             return column.as_column(self._range, dtype=self.dtype)
@@ -330,18 +330,18 @@ def _is_interval(self) -> bool:
         return False
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def hasnans(self) -> bool:
         return False
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _data(self):
         return cudf.core.column_accessor.ColumnAccessor(
             {self.name: self._values}
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __contains__(self, item):
         hash(item)
         if isinstance(item, bool) or not isinstance(
@@ -357,7 +357,7 @@ def __contains__(self, item):
         except (ValueError, OverflowError):
             return False
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def copy(self, name=None, deep=False):
         """
         Make a copy of this object.
@@ -377,7 +377,7 @@ def copy(self, name=None, deep=False):
 
         return RangeIndex(self._range, name=name)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def astype(self, dtype, copy: bool = True):
         if is_dtype_equal(dtype, self.dtype):
             return self
@@ -386,15 +386,15 @@ def astype(self, dtype, copy: bool = True):
     def fillna(self, value, downcast=None):
         return self.copy()
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def drop_duplicates(self, keep="first"):
         return self
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def duplicated(self, keep="first") -> cupy.ndarray:
         return cupy.zeros(len(self), dtype=bool)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __repr__(self):
         return (
             f"{self.__class__.__name__}(start={self.start}, stop={self.stop}"
@@ -408,15 +408,15 @@ def __repr__(self):
         )
 
     @property
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def size(self) -> int:
         return len(self)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __len__(self):
         return len(self._range)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __getitem__(self, index):
         if isinstance(index, slice):
             sl_start, sl_stop, sl_step = index.indices(len(self))
@@ -435,13 +435,13 @@ def __getitem__(self, index):
             return self.start + index * self.step
         return self._as_int_index()[index]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def equals(self, other) -> bool:
         if isinstance(other, RangeIndex):
             return self._range == other._range
         return self._as_int_index().equals(other)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def serialize(self):
         header = {}
         header["index_column"] = {}
@@ -462,7 +462,7 @@ def serialize(self):
         return header, frames
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def deserialize(cls, header, frames):
         h = header["index_column"]
         name = pickle.loads(header["name"])
@@ -472,7 +472,7 @@ def deserialize(cls, header, frames):
         return RangeIndex(start=start, stop=stop, step=step, name=name)
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def dtype(self):
         """
         `dtype` of the range of values in RangeIndex.
@@ -487,7 +487,7 @@ def dtype(self):
     def _dtypes(self) -> Iterable:
         return [(self.name, self.dtype)]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_pandas(
         self, *, nullable: bool = False, arrow_type: bool = False
     ) -> pd.RangeIndex:
@@ -508,16 +508,16 @@ def is_unique(self) -> bool:
         return True
 
     @cached_property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_monotonic_increasing(self) -> bool:
         return self.step > 0 or len(self) <= 1
 
     @cached_property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_monotonic_decreasing(self):
         return self.step < 0 or len(self) <= 1
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def memory_usage(self, deep: bool = False) -> int:
         if deep:
             warnings.warn(
@@ -530,7 +530,7 @@ def unique(self) -> Self:
         # RangeIndex always has unique values
         return self.copy()
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __mul__(self, other):
         # Multiplication by raw ints must return a RangeIndex to match pandas.
         if isinstance(other, cudf.Scalar) and other.dtype.kind in "iu":
@@ -547,24 +547,24 @@ def __mul__(self, other):
             )
         return self._as_int_index().__mul__(other)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __rmul__(self, other):
         # Multiplication is commutative.
         return self.__mul__(other)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _as_int_index(self):
         # Convert self to an integer index. This method is used to perform ops
         # that are not defined directly on RangeIndex.
         return cudf.Index._from_data(self._data)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         return self._as_int_index().__array_ufunc__(
             ufunc, method, *inputs, **kwargs
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def get_indexer(self, target, limit=None, method=None, tolerance=None):
         target_col = cudf.core.column.as_column(target)
         if method is not None or not isinstance(
@@ -594,7 +594,7 @@ def get_indexer(self, target, limit=None, method=None, tolerance=None):
             locs[valid] = len(self) - 1 - locs[valid]
         return locs
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def get_loc(self, key):
         if not is_scalar(key):
             raise TypeError("Should be a scalar-like")
@@ -608,7 +608,7 @@ def get_loc(self, key):
             raise KeyError(key)
         return idx_int
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _union(self, other, sort=None):
         if isinstance(other, RangeIndex):
             # Variable suffixes are of the
@@ -685,7 +685,7 @@ def _union(self, other, sort=None):
             self._as_int_index()._union(other, sort=sort)
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _intersection(self, other, sort=None):
         if not isinstance(other, RangeIndex):
             return self._try_reconstruct_range_index(
@@ -733,7 +733,7 @@ def _intersection(self, other, sort=None):
 
         return self._try_reconstruct_range_index(new_index)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def difference(self, other, sort=None):
         if isinstance(other, RangeIndex) and self.equals(other):
             return self[:0]._get_reconciled_name_object(other)
@@ -785,14 +785,14 @@ def sort_values(
         else:
             return sorted_index
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _gather(self, gather_map, nullify=False, check_bounds=True):
         gather_map = cudf.core.column.as_column(gather_map)
         return cudf.Index._from_data(
             {self.name: self._values.take(gather_map, nullify, check_bounds)}
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _apply_boolean_mask(self, boolean_mask):
         return cudf.Index._from_data(
             {self.name: self._values.apply_boolean_mask(boolean_mask)}
@@ -838,21 +838,21 @@ def join(
         )
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _column(self):
         return self._as_int_index()._column
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _columns(self):
         return self._as_int_index()._columns
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def values_host(self) -> np.ndarray:
         return np.arange(start=self.start, stop=self.stop, step=self.step)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def argsort(
         self,
         ascending=True,
@@ -865,19 +865,19 @@ def argsort(
         else:
             return cupy.arange(len(self))
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def where(self, cond, other=None, inplace=False):
         return self._as_int_index().where(cond, other, inplace)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_numpy(self) -> np.ndarray:
         return self.values_host
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_cupy(self) -> cupy.ndarray:
         return self.values
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_arrow(self) -> pa.Array:
         return pa.array(self._range, type=pa.from_numpy_dtype(self.dtype))
 
@@ -889,23 +889,23 @@ def __array__(self, dtype=None):
             "using .to_numpy()."
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nunique(self, dropna: bool = True) -> int:
         return len(self)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def isna(self) -> cupy.ndarray:
         return cupy.zeros(len(self), dtype=bool)
 
     isnull = isna
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def notna(self) -> cupy.ndarray:
         return cupy.ones(len(self), dtype=bool)
 
     notnull = isna
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _minmax(self, meth: str):
         no_steps = len(self) - 1
         if no_steps == -1:
@@ -1004,12 +1004,12 @@ class Index(SingleColumnFrame, BaseIndex, metaclass=IndexMeta):
         Column's, the data Column will be cloned to adopt this name.
     """
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __init__(self, data, **kwargs):
         name = _getdefault_name(data, name=kwargs.get("name"))
         super().__init__({name: data})
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         ret = super().__array_ufunc__(ufunc, method, *inputs, **kwargs)
 
@@ -1046,7 +1046,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         return NotImplemented
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _from_data(cls, data: MutableMapping, name: Any = no_default) -> Self:
         out = super()._from_data(data=data)
         if name is not no_default:
@@ -1054,7 +1054,7 @@ def _from_data(cls, data: MutableMapping, name: Any = no_default) -> Self:
         return out
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _from_data_like_self(
         cls, data: MutableMapping, name: Any = no_default
     ) -> Self:
@@ -1064,7 +1064,7 @@ def _from_data_like_self(
         return out
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_arrow(cls, obj):
         try:
             return cls(ColumnBase.from_arrow(obj))
@@ -1118,12 +1118,12 @@ def _binaryop(
         return ret
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _values(self):
         return self._column
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _concat(cls, objs):
         non_empties = [index for index in objs if len(index)]
         if len(objs) != len(non_empties):
@@ -1166,16 +1166,16 @@ def _concat(cls, objs):
         result.name = name
         return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def memory_usage(self, deep=False):
         return self._column.memory_usage
 
     @cached_property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_unique(self):
         return self._column.is_unique
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def equals(self, other) -> bool:
         if not isinstance(other, BaseIndex) or len(self) != len(other):
             return False
@@ -1198,7 +1198,7 @@ def equals(self, other) -> bool:
         except TypeError:
             return False
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def copy(self, name=None, deep=False):
         """
         Make a copy of this object.
@@ -1221,11 +1221,11 @@ def copy(self, name=None, deep=False):
             {name: self._values.copy(True) if deep else self._values}
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def astype(self, dtype, copy: bool = True):
         return super().astype({self.name: dtype}, copy)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def get_indexer(self, target, method=None, limit=None, tolerance=None):
         if is_scalar(target):
             raise TypeError("Should be a sequence")
@@ -1297,7 +1297,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
 
         return _return_get_indexer_result(result_series.to_cupy())
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def get_loc(self, key):
         if not is_scalar(key):
             raise TypeError("Should be a scalar-like")
@@ -1333,7 +1333,7 @@ def get_loc(self, key):
         mask[true_inds] = True
         return mask
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __repr__(self):
         max_seq_items = pd.get_option("max_seq_items") or len(self)
         mr = 0
@@ -1419,7 +1419,7 @@ def __repr__(self):
         lines.append(f"{prior_to_dtype} {keywords})")
         return "\n".join(lines)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __getitem__(self, index):
         res = self._get_elements_from_column(index)
         if isinstance(res, ColumnBase):
@@ -1427,20 +1427,20 @@ def __getitem__(self, index):
         return res
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def dtype(self):
         """
         `dtype` of the underlying values in Index.
         """
         return self._values.dtype
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def isna(self):
         return self._column.isnull().values
 
     isnull = isna
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def notna(self):
         return self._column.notnull().values
 
@@ -1470,11 +1470,11 @@ def _is_interval(self):
         return False
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def hasnans(self):
         return self._column.has_nulls(include_nan=True)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def argsort(
         self,
         axis=0,
@@ -1518,7 +1518,7 @@ def repeat(self, repeats, axis=None):
             Frame._repeat([*self._columns], repeats, axis), self._column_names
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def where(self, cond, other=None, inplace=False):
         result_col = super().where(cond, other, inplace)
         return self._mimic_inplace(
@@ -1615,7 +1615,7 @@ def _indices_of(self, value):
 
     @copy_docstring(StringMethods)  # type: ignore
     @property
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def str(self):
         if is_string_dtype(self.dtype):
             return StringMethods(parent=self)
@@ -1698,7 +1698,7 @@ class DatetimeIndex(Index):
                   dtype='datetime64[ns]', name='a')
     """
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __init__(
         self,
         data=None,
@@ -1761,7 +1761,7 @@ def __init__(
             ):
                 raise ValueError("No unique frequency found")
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _copy_type_metadata(self: Self, other: Self) -> Self:
         super()._copy_type_metadata(other)
         self._freq = _validate_freq(other._freq)
@@ -1783,7 +1783,7 @@ def __getitem__(self, index):
             return pd.Timestamp(value)
         return value
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def copy(self, name=None, deep=False):
         idx_copy = super().copy(name=name, deep=deep)
         return idx_copy._copy_type_metadata(self)
@@ -1801,7 +1801,7 @@ def searchsorted(
         )
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def year(self):
         """
         The year of the datetime.
@@ -1820,7 +1820,7 @@ def year(self):
         return self._get_dt_field("year")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def month(self):
         """
         The month as January=1, December=12.
@@ -1839,7 +1839,7 @@ def month(self):
         return self._get_dt_field("month")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def day(self):
         """
         The day of the datetime.
@@ -1858,7 +1858,7 @@ def day(self):
         return self._get_dt_field("day")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def hour(self):
         """
         The hours of the datetime.
@@ -1879,7 +1879,7 @@ def hour(self):
         return self._get_dt_field("hour")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def minute(self):
         """
         The minutes of the datetime.
@@ -1900,7 +1900,7 @@ def minute(self):
         return self._get_dt_field("minute")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def second(self):
         """
         The seconds of the datetime.
@@ -1921,7 +1921,7 @@ def second(self):
         return self._get_dt_field("second")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def microsecond(self):
         """
         The microseconds of the datetime.
@@ -1952,7 +1952,7 @@ def microsecond(self):
         )
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nanosecond(self):
         """
         The nanoseconds of the datetime.
@@ -1974,7 +1974,7 @@ def nanosecond(self):
         return self._get_dt_field("nanosecond")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def weekday(self):
         """
         The day of the week with Monday=0, Sunday=6.
@@ -1996,7 +1996,7 @@ def weekday(self):
         return self._get_dt_field("weekday")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def dayofweek(self):
         """
         The day of the week with Monday=0, Sunday=6.
@@ -2018,7 +2018,7 @@ def dayofweek(self):
         return self._get_dt_field("weekday")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def dayofyear(self):
         """
         The day of the year, from 1-365 in non-leap years and
@@ -2041,7 +2041,7 @@ def dayofyear(self):
         return self._get_dt_field("day_of_year")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def day_of_year(self):
         """
         The day of the year, from 1-365 in non-leap years and
@@ -2064,7 +2064,7 @@ def day_of_year(self):
         return self._get_dt_field("day_of_year")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_leap_year(self):
         """
         Boolean indicator if the date belongs to a leap year.
@@ -2083,7 +2083,7 @@ def is_leap_year(self):
         return cupy.asarray(res)
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def quarter(self):
         """
         Integer indicator for which quarter of the year the date belongs in.
@@ -2108,7 +2108,7 @@ def quarter(self):
         res = extract_quarter(self._values)
         return Index(res, dtype="int8")
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def day_name(self, locale: str | None = None) -> Index:
         """
         Return the day names. Currently supports English locale only.
@@ -2128,7 +2128,7 @@ def day_name(self, locale: str | None = None) -> Index:
         day_names = self._column.get_day_names(locale)
         return Index._from_data({self.name: day_names})
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def month_name(self, locale: str | None = None) -> Index:
         """
         Return the month names. Currently supports English locale only.
@@ -2147,7 +2147,7 @@ def month_name(self, locale: str | None = None) -> Index:
         month_names = self._column.get_month_names(locale)
         return Index._from_data({self.name: month_names})
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def isocalendar(self) -> cudf.DataFrame:
         """
         Returns a DataFrame with the year, week, and day
@@ -2172,7 +2172,7 @@ def isocalendar(self) -> cudf.DataFrame:
         )
         return cudf.DataFrame._from_data(ca, index=self)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_pandas(
         self, *, nullable: bool = False, arrow_type: bool = False
     ) -> pd.DatetimeIndex:
@@ -2181,7 +2181,7 @@ def to_pandas(
             result.freq = self._freq._maybe_as_fast_pandas_offset()
         return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _get_dt_field(self, field):
         out_column = self._values.get_dt_field(field)
         # column.column_empty_like always returns a Column object
@@ -2198,7 +2198,7 @@ def _get_dt_field(self, field):
     def _is_boolean(self):
         return False
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def ceil(self, freq):
         """
         Perform ceil operation on the data to the specified freq.
@@ -2231,7 +2231,7 @@ def ceil(self, freq):
 
         return self.__class__._from_data({self.name: out_column})
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def floor(self, freq):
         """
         Perform floor operation on the data to the specified freq.
@@ -2264,7 +2264,7 @@ def floor(self, freq):
 
         return self.__class__._from_data({self.name: out_column})
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def round(self, freq):
         """
         Perform round operation on the data to the specified freq.
@@ -2452,7 +2452,7 @@ class TimedeltaIndex(Index):
                   dtype='timedelta64[s]', name='delta-index')
     """
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __init__(
         self,
         data=None,
@@ -2500,7 +2500,7 @@ def __getitem__(self, index):
         return value
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def days(self):
         """
         Number of days for each element.
@@ -2509,7 +2509,7 @@ def days(self):
         return Index(self._values.days, name=self.name, dtype="int64")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def seconds(self):
         """
         Number of seconds (>= 0 and less than 1 day) for each element.
@@ -2517,7 +2517,7 @@ def seconds(self):
         return Index(self._values.seconds, name=self.name, dtype="int32")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def microseconds(self):
         """
         Number of microseconds (>= 0 and less than 1 second) for each element.
@@ -2525,7 +2525,7 @@ def microseconds(self):
         return Index(self._values.microseconds, name=self.name, dtype="int32")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nanoseconds(self):
         """
         Number of nanoseconds (>= 0 and less than 1 microsecond) for each
@@ -2534,7 +2534,7 @@ def nanoseconds(self):
         return Index(self._values.nanoseconds, name=self.name, dtype="int32")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def components(self):
         """
         Return a dataframe of the components (days, hours, minutes,
@@ -2612,7 +2612,7 @@ class CategoricalIndex(Index):
     CategoricalIndex([1, 2, 3, <NA>], categories=[1, 2, 3], ordered=False, dtype='category', name='a')
     """  # noqa: E501
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __init__(
         self,
         data=None,
@@ -2667,7 +2667,7 @@ def __init__(
         super().__init__(data, name=name)
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def codes(self):
         """
         The category codes of this categorical.
@@ -2675,7 +2675,7 @@ def codes(self):
         return Index(self._values.codes)
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def categories(self):
         """
         The categories of this categorical.
@@ -2689,7 +2689,7 @@ def _is_categorical(self):
         return True
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def interval_range(
     start=None,
     end=None,
@@ -2841,7 +2841,7 @@ class IntervalIndex(Index):
     IntervalIndex
     """
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __init__(
         self,
         data,
@@ -2900,7 +2900,7 @@ def closed(self):
         return self.dtype.closed
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_breaks(
         cls,
         breaks,
@@ -2975,7 +2975,7 @@ def _clean_nulls_from_index(self):
         return self
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def as_index(
     arbitrary, nan_as_null=no_default, copy=False, name=no_default, dtype=None
 ) -> BaseIndex:
@@ -3090,7 +3090,7 @@ def _getdefault_name(values, name):
     return name
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def _concat_range_index(indexes: list[RangeIndex]) -> BaseIndex:
     """
     An internal Utility function to concat RangeIndex objects.
@@ -3131,7 +3131,7 @@ def _concat_range_index(indexes: list[RangeIndex]) -> BaseIndex:
     return RangeIndex(start, stop, step)
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def _extended_gcd(a: int, b: int) -> tuple[int, int, int]:
     """
     Extended Euclidean algorithms to solve Bezout's identity:
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 280a6e92eab..72bd3c45fa6 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -56,7 +56,7 @@
 from cudf.utils import docutils, ioutils
 from cudf.utils._numba import _CUDFNumbaConfig
 from cudf.utils.docutils import copy_docstring
-from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.utils import _warn_no_dask_cudf
 
 if TYPE_CHECKING:
@@ -301,13 +301,13 @@ def _from_data(
         out._index = RangeIndex(out._data.nrows) if index is None else index
         return out
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _from_data_like_self(self, data: MutableMapping):
         out = super()._from_data_like_self(data)
         out.index = self.index
         return out
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _from_columns_like_self(
         self,
         columns: list[ColumnBase],
@@ -363,7 +363,7 @@ def _mimic_inplace(
             self._index = result.index
         return super()._mimic_inplace(result, inplace)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _scan(self, op, axis=None, skipna=True):
         """
         Return {op_name} of the {cls}.
@@ -439,7 +439,7 @@ def _check_data_index_length_match(self) -> None:
             )
 
     @property
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def empty(self):
         """
         Indicator whether DataFrame or Series is empty.
@@ -501,7 +501,7 @@ def empty(self):
         """
         return self.size == 0
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @ioutils.doc_to_json()
     def to_json(self, path_or_buf=None, *args, **kwargs):
         """{docstring}"""
@@ -510,14 +510,14 @@ def to_json(self, path_or_buf=None, *args, **kwargs):
             self, path_or_buf=path_or_buf, *args, **kwargs
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @ioutils.doc_to_hdf()
     def to_hdf(self, path_or_buf, key, *args, **kwargs):
         """{docstring}"""
 
         cudf.io.hdf.to_hdf(path_or_buf, key, self, *args, **kwargs)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_string(self):
         r"""
         Convert to string
@@ -606,7 +606,7 @@ def copy(self, deep: bool = True) -> Self:
             self.index.copy(deep=False),
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def equals(self, other) -> bool:  # noqa: D102
         return super().equals(other) and self.index.equals(other.index)
 
@@ -632,7 +632,7 @@ def index(self, value):
 
         self._index = value
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def replace(
         self,
         to_replace=None,
@@ -900,7 +900,7 @@ def replace(
 
         return self._mimic_inplace(result, inplace=inplace)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def clip(self, lower=None, upper=None, inplace=False, axis=1):
         """
         Trim values at input threshold(s).
@@ -1026,7 +1026,7 @@ def clip(self, lower=None, upper=None, inplace=False, axis=1):
         )
         return self._mimic_inplace(output, inplace=inplace)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def abs(self):
         """
         Return a Series/DataFrame with absolute numeric value of each element.
@@ -1052,7 +1052,7 @@ def abs(self):
         """
         return self._unaryop("abs")
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def dot(self, other, reflect=False):
         """
         Get dot product of frame and other, (binary operator `dot`).
@@ -1159,15 +1159,15 @@ def dot(self, other, reflect=False):
             )
         return result.item()
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __matmul__(self, other):
         return self.dot(other)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __rmatmul__(self, other):
         return self.dot(other, reflect=True)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def head(self, n=5):
         """
         Return the first `n` rows.
@@ -1246,7 +1246,7 @@ def head(self, n=5):
         """
         return self.iloc[:n]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def tail(self, n=5):
         """
         Returns the last n rows as a new DataFrame or Series
@@ -1277,7 +1277,7 @@ def tail(self, n=5):
 
         return self.iloc[-n:]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def pipe(self, func, *args, **kwargs):
         """
         Apply ``func(self, *args, **kwargs)``.
@@ -1324,7 +1324,7 @@ def pipe(self, func, *args, **kwargs):
         """
         return cudf.core.common.pipe(self, func, *args, **kwargs)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def sum(
         self,
         axis=no_default,
@@ -1385,7 +1385,7 @@ def sum(
             **kwargs,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def product(
         self,
         axis=no_default,
@@ -1452,7 +1452,7 @@ def product(
     # Alias for pandas compatibility.
     prod = product
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def mean(self, axis=0, skipna=True, numeric_only=False, **kwargs):
         """
         Return the mean of the values for the requested axis.
@@ -1541,7 +1541,7 @@ def median(
             **kwargs,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def std(
         self,
         axis=no_default,
@@ -1600,7 +1600,7 @@ def std(
             **kwargs,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def var(
         self,
         axis=no_default,
@@ -1658,7 +1658,7 @@ def var(
             **kwargs,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def kurtosis(self, axis=0, skipna=True, numeric_only=False, **kwargs):
         """
         Return Fisher's unbiased kurtosis of a sample.
@@ -1718,7 +1718,7 @@ def kurtosis(self, axis=0, skipna=True, numeric_only=False, **kwargs):
     # Alias for kurtosis.
     kurt = kurtosis
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def skew(self, axis=0, skipna=True, numeric_only=False, **kwargs):
         """
         Return unbiased Fisher-Pearson skew of a sample.
@@ -1777,7 +1777,7 @@ def skew(self, axis=0, skipna=True, numeric_only=False, **kwargs):
             **kwargs,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def mask(self, cond, other=None, inplace: bool = False) -> Self | None:
         """
         Replace values where the condition is True.
@@ -1839,7 +1839,7 @@ def mask(self, cond, other=None, inplace: bool = False) -> Self | None:
 
         return self.where(cond=~cond, other=other, inplace=inplace)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @copy_docstring(Rolling)
     def rolling(
         self, window, min_periods=None, center=False, axis=0, win_type=None
@@ -1879,7 +1879,7 @@ def ewm(
             times=times,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nans_to_nulls(self):
         """
         Convert nans (if any) to nulls
@@ -1935,7 +1935,7 @@ def nans_to_nulls(self):
             self._data._from_columns_like_self(result)
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def interpolate(
         self,
         method="linear",
@@ -2034,7 +2034,7 @@ def interpolate(
             )
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         """Shift values by `periods` positions."""
         axis = self._get_axis_from_axis_arg(axis)
@@ -2050,7 +2050,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
             self._data._from_columns_like_self(data_columns)
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def truncate(self, before=None, after=None, axis=0, copy=True):
         """
         Truncate a Series or DataFrame before and after some index value.
@@ -2398,7 +2398,7 @@ def iloc(self):
         return self._iloc_indexer_type(self)
 
     @property  # type:ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def axes(self):
         """
         Return a list representing the axes of the Series.
@@ -2530,7 +2530,7 @@ def squeeze(self, axis: Literal["index", "columns", 0, 1, None] = None):
         )
         return self.iloc[indexer]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def scale(self):
         """
         Scale values to [0, 1] in float64
@@ -2565,7 +2565,7 @@ def scale(self):
         scaled.index = self.index.copy(deep=False)
         return scaled
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def sort_index(
         self,
         axis=0,
@@ -3070,7 +3070,7 @@ def drop_duplicates(
             self.index.names if not ignore_index else None,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def duplicated(self, subset=None, keep="first"):
         """
         Return boolean Series denoting duplicate rows.
@@ -3180,7 +3180,7 @@ def duplicated(self, subset=None, keep="first"):
         )
         return cudf.Series(result, index=self.index)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _empty_like(self, keep_index=True) -> Self:
         result = self._from_columns_like_self(
             libcudf.copying.columns_empty_like(
@@ -3217,7 +3217,7 @@ def _split(self, splits, keep_index=True):
             for i in range(len(splits) + 1)
         ]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def bfill(self, value=None, axis=None, inplace=None, limit=None):
         """
         Synonym for :meth:`Series.fillna` with ``method='bfill'``.
@@ -3236,7 +3236,7 @@ def bfill(self, value=None, axis=None, inplace=None, limit=None):
                 limit=limit,
             )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def backfill(self, value=None, axis=None, inplace=None, limit=None):
         """
         Synonym for :meth:`Series.fillna` with ``method='bfill'``.
@@ -3256,7 +3256,7 @@ def backfill(self, value=None, axis=None, inplace=None, limit=None):
         )
         return self.bfill(value=value, axis=axis, inplace=inplace, limit=limit)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def ffill(self, value=None, axis=None, inplace=None, limit=None):
         """
         Synonym for :meth:`Series.fillna` with ``method='ffill'``.
@@ -3275,7 +3275,7 @@ def ffill(self, value=None, axis=None, inplace=None, limit=None):
                 limit=limit,
             )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def pad(self, value=None, axis=None, inplace=None, limit=None):
         """
         Synonym for :meth:`Series.fillna` with ``method='ffill'``.
@@ -3415,7 +3415,7 @@ def add_suffix(self, suffix):
         raise NotImplementedError
 
     @acquire_spill_lock()
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _apply(self, func, kernel_getter, *args, **kwargs):
         """Apply `func` across the rows of the frame."""
         if kwargs:
@@ -3626,7 +3626,7 @@ def _align_to_index(
         out.index.names = self.index.names
         return out
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _reindex(
         self,
         column_names,
@@ -4154,7 +4154,7 @@ def dropna(
 
         return self._mimic_inplace(result, inplace=inplace)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _drop_na_columns(self, how="any", subset=None, thresh=None):
         """
         Drop columns containing nulls
@@ -4471,7 +4471,7 @@ def last(self, offset):
             slice_func=lambda i: self.iloc[i:],
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def sample(
         self,
         n=None,
@@ -4751,7 +4751,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
 
         return NotImplemented
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def repeat(self, repeats, axis=None):
         """Repeats elements consecutively.
 
@@ -4949,7 +4949,7 @@ def astype(
                 raise e
             return self
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def drop(
         self,
         labels=None,
@@ -5161,7 +5161,7 @@ def drop(
         if not inplace:
             return out
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _explode(self, explode_column: Any, ignore_index: bool):
         # Helper function for `explode` in `Series` and `Dataframe`, explodes a
         # specified nested column. Other columns' corresponding rows are
@@ -5200,7 +5200,7 @@ def _explode(self, explode_column: Any, ignore_index: bool):
             self.index.names if not ignore_index else None,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def tile(self, count):
         """Repeats the rows `count` times to form a new Frame.
 
@@ -5233,7 +5233,7 @@ def tile(self, count):
             index_names=self._index_names,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def groupby(
         self,
         by=None,
@@ -5283,7 +5283,7 @@ def groupby(
             )
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Addition",
@@ -5324,7 +5324,7 @@ def add(self, other, axis, level=None, fill_value=None):  # noqa: D102
 
         return self._binaryop(other, "__add__", fill_value)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Addition",
@@ -5365,7 +5365,7 @@ def radd(self, other, axis, level=None, fill_value=None):  # noqa: D102
 
         return self._binaryop(other, "__radd__", fill_value)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Subtraction",
@@ -5408,7 +5408,7 @@ def subtract(self, other, axis, level=None, fill_value=None):  # noqa: D102
 
     sub = subtract
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Subtraction",
@@ -5449,7 +5449,7 @@ def rsub(self, other, axis, level=None, fill_value=None):  # noqa: D102
 
         return self._binaryop(other, "__rsub__", fill_value)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Multiplication",
@@ -5492,7 +5492,7 @@ def multiply(self, other, axis, level=None, fill_value=None):  # noqa: D102
 
     mul = multiply
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Multiplication",
@@ -5533,7 +5533,7 @@ def rmul(self, other, axis, level=None, fill_value=None):  # noqa: D102
 
         return self._binaryop(other, "__rmul__", fill_value)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Modulo",
@@ -5574,7 +5574,7 @@ def mod(self, other, axis, level=None, fill_value=None):  # noqa: D102
 
         return self._binaryop(other, "__mod__", fill_value)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Modulo",
@@ -5615,7 +5615,7 @@ def rmod(self, other, axis, level=None, fill_value=None):  # noqa: D102
 
         return self._binaryop(other, "__rmod__", fill_value)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Exponential",
@@ -5656,7 +5656,7 @@ def pow(self, other, axis, level=None, fill_value=None):  # noqa: D102
 
         return self._binaryop(other, "__pow__", fill_value)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Exponential",
@@ -5697,7 +5697,7 @@ def rpow(self, other, axis, level=None, fill_value=None):  # noqa: D102
 
         return self._binaryop(other, "__rpow__", fill_value)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Integer division",
@@ -5738,7 +5738,7 @@ def floordiv(self, other, axis, level=None, fill_value=None):  # noqa: D102
 
         return self._binaryop(other, "__floordiv__", fill_value)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Integer division",
@@ -5779,7 +5779,7 @@ def rfloordiv(self, other, axis, level=None, fill_value=None):  # noqa: D102
 
         return self._binaryop(other, "__rfloordiv__", fill_value)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Floating division",
@@ -5824,7 +5824,7 @@ def truediv(self, other, axis, level=None, fill_value=None):  # noqa: D102
     div = truediv
     divide = truediv
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Floating division",
@@ -5868,7 +5868,7 @@ def rtruediv(self, other, axis, level=None, fill_value=None):  # noqa: D102
     # Alias for rtruediv
     rdiv = rtruediv
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Equal to",
@@ -5908,7 +5908,7 @@ def eq(self, other, axis="columns", level=None, fill_value=None):  # noqa: D102
             other=other, op="__eq__", fill_value=fill_value, can_reindex=True
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Not equal to",
@@ -5948,7 +5948,7 @@ def ne(self, other, axis="columns", level=None, fill_value=None):  # noqa: D102
             other=other, op="__ne__", fill_value=fill_value, can_reindex=True
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Less than",
@@ -5988,7 +5988,7 @@ def lt(self, other, axis="columns", level=None, fill_value=None):  # noqa: D102
             other=other, op="__lt__", fill_value=fill_value, can_reindex=True
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Less than or equal to",
@@ -6028,7 +6028,7 @@ def le(self, other, axis="columns", level=None, fill_value=None):  # noqa: D102
             other=other, op="__le__", fill_value=fill_value, can_reindex=True
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Greater than",
@@ -6068,7 +6068,7 @@ def gt(self, other, axis="columns", level=None, fill_value=None):  # noqa: D102
             other=other, op="__gt__", fill_value=fill_value, can_reindex=True
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_binop_template.format(
             operation="Greater than or equal to",
@@ -6123,7 +6123,7 @@ def _preprocess_subset(self, subset):
             raise KeyError(f"columns {diff} do not exist")
         return subset
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def rank(
         self,
         axis=0,
@@ -6291,7 +6291,7 @@ def _check_duplicate_level_names(specified, level_names):
         )
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def _get_replacement_values_for_columns(
     to_replace: Any, value: Any, columns_dtype_map: dict[Any, Any]
 ) -> tuple[dict[Any, bool], dict[Any, Any], dict[Any, Any]]:
@@ -6458,7 +6458,7 @@ def _is_series(obj):
     return isinstance(obj, Frame) and obj.ndim == 1 and obj.index is not None
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def _drop_rows_by_labels(
     obj: DataFrameOrSeries,
     labels: ColumnLike | abc.Iterable | str,
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 547c14cdc99..7657fa9e234 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -32,7 +32,7 @@
 )
 from cudf.core.join._join_helpers import _match_join_keys
 from cudf.utils.dtypes import is_column_like
-from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.utils import NotIterable, _external_only_api, _is_same_name
 
 if TYPE_CHECKING:
@@ -126,7 +126,7 @@ class MultiIndex(Frame, BaseIndex, NotIterable):
                )
     """
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __init__(
         self,
         levels=None,
@@ -211,12 +211,12 @@ def __init__(
         self.names = names
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def names(self):
         return self._names
 
     @names.setter  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def names(self, value):
         if value is None:
             value = [None] * self.nlevels
@@ -242,13 +242,13 @@ def names(self, value):
             )
         self._names = pd.core.indexes.frozen.FrozenList(value)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_series(self, index=None, name=None):
         raise NotImplementedError(
             "MultiIndex.to_series isn't implemented yet."
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def astype(self, dtype, copy: bool = True):
         if not is_object_dtype(dtype):
             raise TypeError(
@@ -257,7 +257,7 @@ def astype(self, dtype, copy: bool = True):
             )
         return self
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def rename(self, names, inplace=False):
         """
         Alter MultiIndex level names
@@ -304,7 +304,7 @@ def rename(self, names, inplace=False):
         """
         return self.set_names(names, level=None, inplace=inplace)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def set_names(self, names, level=None, inplace=False):
         names_is_list_like = is_list_like(names)
         level_is_list_like = is_list_like(level)
@@ -342,7 +342,7 @@ def set_names(self, names, level=None, inplace=False):
         return self._set_names(names=names, inplace=inplace)
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _from_data(
         cls,
         data: MutableMapping,
@@ -354,16 +354,16 @@ def _from_data(
         return obj
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def name(self):
         return self._name
 
     @name.setter  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def name(self, value):
         self._name = value
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def copy(
         self,
         names=None,
@@ -432,7 +432,7 @@ def copy(
 
         return mi
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __repr__(self):
         max_seq_items = pd.get_option("display.max_seq_items") or len(self)
 
@@ -484,7 +484,7 @@ def _codes_frame(self):
 
     @property  # type: ignore
     @_external_only_api("Use ._codes_frame instead")
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def codes(self):
         """
         Returns the codes of the underlying MultiIndex.
@@ -510,13 +510,13 @@ def get_slice_bound(self, label, side, kind=None):
         raise NotImplementedError()
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nlevels(self):
         """Integer number of levels in this MultiIndex."""
         return self._num_columns
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def levels(self):
         """
         Returns list of levels in the MultiIndex
@@ -548,12 +548,12 @@ def levels(self):
         return self._levels
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def ndim(self) -> int:
         """Dimension of the data. For MultiIndex ndim is always 2."""
         return 2
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _get_level_label(self, level):
         """Get name of the level.
 
@@ -570,7 +570,7 @@ def _get_level_label(self, level):
         else:
             return self._data.names[level]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def isin(self, values, level=None):
         """Return a boolean array where the index values are in values.
 
@@ -669,7 +669,7 @@ def where(self, cond, other=None, inplace=False):
             ".where is not supported for MultiIndex operations"
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _compute_levels_and_codes(self):
         levels = []
 
@@ -683,7 +683,7 @@ def _compute_levels_and_codes(self):
         self._levels = levels
         self._codes = cudf.DataFrame._from_data(codes)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _compute_validity_mask(self, index, row_tuple, max_length):
         """Computes the valid set of indices of values in the lookup"""
         lookup = cudf.DataFrame()
@@ -731,7 +731,7 @@ def _compute_validity_mask(self, index, row_tuple, max_length):
                     raise KeyError(row)
         return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _get_valid_indices_by_tuple(self, index, row_tuple, max_length):
         # Instructions for Slicing
         # if tuple, get first and last elements of tuple
@@ -761,7 +761,7 @@ def _get_valid_indices_by_tuple(self, index, row_tuple, max_length):
             return row_tuple
         return self._compute_validity_mask(index, row_tuple, max_length)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _index_and_downcast(self, result, index, index_key):
         if isinstance(index_key, (numbers.Number, slice)):
             index_key = [index_key]
@@ -829,7 +829,7 @@ def _index_and_downcast(self, result, index, index_key):
             result.index = index
         return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _get_row_major(
         self,
         df: DataFrameOrSeries,
@@ -856,7 +856,7 @@ def _get_row_major(
         final = self._index_and_downcast(result, result.index, row_tuple)
         return final
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _validate_indexer(
         self,
         indexer: numbers.Number
@@ -884,7 +884,7 @@ def _validate_indexer(
             for i in indexer:
                 self._validate_indexer(i)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __eq__(self, other):
         if isinstance(other, MultiIndex):
             return np.array(
@@ -898,12 +898,12 @@ def __eq__(self, other):
         return NotImplemented
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def size(self):
         # The size of a MultiIndex is only dependent on the number of rows.
         return self._num_rows
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def take(self, indices):
         if isinstance(indices, cudf.Series) and indices.has_nulls:
             raise ValueError("Column must have no nulls.")
@@ -911,7 +911,7 @@ def take(self, indices):
         obj.names = self.names
         return obj
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def serialize(self):
         header, frames = super().serialize()
         # Overwrite the names in _data with the true names.
@@ -919,7 +919,7 @@ def serialize(self):
         return header, frames
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def deserialize(cls, header, frames):
         # Spoof the column names to construct the frame, then set manually.
         column_names = pickle.loads(header["column_names"])
@@ -927,7 +927,7 @@ def deserialize(cls, header, frames):
         obj = super().deserialize(header, frames)
         return obj._set_names(column_names)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __getitem__(self, index):
         flatten = isinstance(index, int)
 
@@ -954,7 +954,7 @@ def __getitem__(self, index):
             result._levels = self._levels
         return result
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_frame(self, index=True, name=no_default, allow_duplicates=False):
         """
         Create a DataFrame with the levels of the MultiIndex as columns.
@@ -1031,7 +1031,7 @@ def to_frame(self, index=True, name=no_default, allow_duplicates=False):
             data=ca, index=self if index else None
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def get_level_values(self, level):
         """
         Return the values at the requested level
@@ -1087,7 +1087,7 @@ def _is_interval(self):
         return False
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _concat(cls, objs):
         source_data = [o.to_frame(index=False) for o in objs]
 
@@ -1107,7 +1107,7 @@ def _concat(cls, objs):
         return cudf.MultiIndex.from_frame(source_data, names=names)
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_tuples(cls, tuples, names=None):
         """
         Convert list of tuples to MultiIndex.
@@ -1145,12 +1145,12 @@ def from_tuples(cls, tuples, names=None):
         pdi = pd.MultiIndex.from_tuples(tuples, names=names)
         return cls.from_pandas(pdi)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_numpy(self):
         return self.values_host
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def values_host(self):
         """
         Return a numpy representation of the MultiIndex.
@@ -1178,7 +1178,7 @@ def values_host(self):
         return self.to_pandas().values
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def values(self):
         """
         Return a CuPy representation of the MultiIndex.
@@ -1214,7 +1214,7 @@ def values(self):
         return self.to_frame(index=False).values
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_frame(cls, df: pd.DataFrame | cudf.DataFrame, names=None):
         """
         Make a MultiIndex from a DataFrame.
@@ -1289,7 +1289,7 @@ def from_frame(cls, df: pd.DataFrame | cudf.DataFrame, names=None):
         return obj
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_product(cls, arrays, names=None):
         """
         Make a MultiIndex from the cartesian product of multiple iterables.
@@ -1331,7 +1331,7 @@ def from_product(cls, arrays, names=None):
         return cls.from_pandas(pdi)
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_arrays(
         cls,
         arrays,
@@ -1390,7 +1390,7 @@ def from_arrays(
             codes=codes, levels=levels, sortorder=sortorder, names=names
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _poplevels(self, level):
         """
         Remove and return the specified levels from self.
@@ -1441,7 +1441,7 @@ def _poplevels(self, level):
 
         return popped
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def swaplevel(self, i=-2, j=-1):
         """
         Swap level i with level j.
@@ -1492,7 +1492,7 @@ def swaplevel(self, i=-2, j=-1):
             midx = midx.set_names(self.names)
         return midx
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def droplevel(self, level=-1):
         """
         Removes the specified levels from the MultiIndex.
@@ -1555,7 +1555,7 @@ def droplevel(self, level=-1):
         else:
             return mi
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_pandas(
         self, *, nullable: bool = False, arrow_type: bool = False
     ) -> pd.MultiIndex:
@@ -1572,7 +1572,7 @@ def to_pandas(
         )
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_pandas(cls, multiindex: pd.MultiIndex, nan_as_null=no_default):
         """
         Convert from a Pandas MultiIndex
@@ -1607,7 +1607,7 @@ def from_pandas(cls, multiindex: pd.MultiIndex, nan_as_null=no_default):
         )
 
     @cached_property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_unique(self):
         return len(self) == len(self.unique())
 
@@ -1615,7 +1615,7 @@ def is_unique(self):
     def dtype(self):
         return np.dtype("O")
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _is_sorted(self, ascending=None, null_position=None) -> bool:
         """
         Returns a boolean indicating whether the data of the MultiIndex are sorted
@@ -1661,7 +1661,7 @@ def _is_sorted(self, ascending=None, null_position=None) -> bool:
         )
 
     @cached_property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_monotonic_increasing(self) -> bool:
         """
         Return if the index is monotonic increasing
@@ -1670,7 +1670,7 @@ def is_monotonic_increasing(self) -> bool:
         return self._is_sorted(ascending=None, null_position=None)
 
     @cached_property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_monotonic_decreasing(self) -> bool:
         """
         Return if the index is monotonic decreasing
@@ -1680,7 +1680,7 @@ def is_monotonic_decreasing(self) -> bool:
             ascending=[False] * len(self.levels), null_position=None
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def fillna(self, value):
         """
         Fill null values with the specified value.
@@ -1721,11 +1721,11 @@ def fillna(self, value):
 
         return super().fillna(value=value)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def unique(self):
         return self.drop_duplicates(keep="first")
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nunique(self, dropna: bool = True) -> int:
         mi = self.dropna(how="all") if dropna else self
         return len(mi.unique())
@@ -1740,7 +1740,7 @@ def _clean_nulls_from_index(self):
             index_df._clean_nulls_from_dataframe(index_df), names=self.names
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def memory_usage(self, deep=False):
         usage = sum(col.memory_usage for col in self._data.columns)
         if self.levels:
@@ -1751,13 +1751,13 @@ def memory_usage(self, deep=False):
                 usage += col.memory_usage
         return usage
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def difference(self, other, sort=None):
         if hasattr(other, "to_pandas"):
             other = other.to_pandas()
         return cudf.from_pandas(self.to_pandas().difference(other, sort))
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def append(self, other):
         """
         Append a collection of MultiIndex objects together
@@ -1820,7 +1820,7 @@ def append(self, other):
 
         return MultiIndex._concat(to_concat)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __array_function__(self, func, types, args, kwargs):
         cudf_df_module = MultiIndex
 
@@ -1867,7 +1867,7 @@ def _level_index_from_level(self, level):
                 ) from None
             return level
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def get_indexer(self, target, method=None, limit=None, tolerance=None):
         if tolerance is not None:
             raise NotImplementedError(
@@ -1926,7 +1926,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
 
         return _return_get_indexer_result(result_series.to_cupy())
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def get_loc(self, key):
         is_sorted = (
             self.is_monotonic_increasing or self.is_monotonic_decreasing
@@ -2000,7 +2000,7 @@ def _maybe_match_names(self, other):
             for self_name, other_name in zip(self.names, other.names)
         ]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def union(self, other, sort=None):
         if not isinstance(other, MultiIndex):
             msg = "other must be a MultiIndex or a list of tuples"
@@ -2024,7 +2024,7 @@ def union(self, other, sort=None):
 
         return self._union(other, sort=sort)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _union(self, other, sort=None):
         # TODO: When to_frame is refactored to return a
         # deep copy in future, we should push most of the common
@@ -2050,7 +2050,7 @@ def _union(self, other, sort=None):
             return midx.sort_values()
         return midx
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _intersection(self, other, sort=None):
         if self.names != other.names:
             deep = True
@@ -2073,14 +2073,14 @@ def _intersection(self, other, sort=None):
             return midx.sort_values()
         return midx
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _copy_type_metadata(self: Self, other: Self) -> Self:
         res = super()._copy_type_metadata(other)
         if isinstance(other, MultiIndex):
             res._names = other._names
         return res
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _split_columns_by_levels(
         self, levels: tuple, *, in_levels: bool
     ) -> Generator[tuple[Any, column.ColumnBase], None, None]:
@@ -2099,7 +2099,7 @@ def _split_columns_by_levels(
             elif not in_levels and i not in level_indices:
                 yield name, col
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _new_index_for_reset_index(
         self, levels: tuple | None, name
     ) -> None | BaseIndex:
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index ea25d482578..9acf5294b72 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -68,7 +68,7 @@
     is_mixed_with_object_dtype,
     to_cudf_compatible_scalar,
 )
-from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _performance_tracking
 
 if TYPE_CHECKING:
     from cudf._typing import (
@@ -179,7 +179,7 @@ class _SeriesIlocIndexer(_FrameIndexer):
 
     _frame: cudf.Series
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __getitem__(self, arg):
         indexing_spec = indexing_utils.parse_row_iloc_indexer(
             indexing_utils.destructure_series_iloc_indexer(arg, self._frame),
@@ -187,7 +187,7 @@ def __getitem__(self, arg):
         )
         return self._frame._getitem_preprocessed(indexing_spec)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __setitem__(self, key, value):
         if isinstance(key, tuple):
             key = list(key)
@@ -274,7 +274,7 @@ class _SeriesLocIndexer(_FrameIndexer):
     Label-based selection
     """
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __getitem__(self, arg: Any) -> ScalarLike | DataFrameOrSeries:
         if isinstance(arg, pd.MultiIndex):
             arg = cudf.from_pandas(arg)
@@ -301,7 +301,7 @@ def __getitem__(self, arg: Any) -> ScalarLike | DataFrameOrSeries:
 
         return self._frame.iloc[arg]
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __setitem__(self, key, value):
         try:
             key = self._loc_to_iloc(key)
@@ -476,7 +476,7 @@ def _constructor_expanddim(self):
         return cudf.DataFrame
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_categorical(cls, categorical, codes=None):
         """Creates from a pandas.Categorical
 
@@ -517,7 +517,7 @@ def from_categorical(cls, categorical, codes=None):
         return Series(data=col)
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_masked_array(cls, data, mask, null_count=None):
         """Create a Series with null-mask.
         This is equivalent to:
@@ -566,7 +566,7 @@ def from_masked_array(cls, data, mask, null_count=None):
         col = as_column(data).set_mask(mask)
         return cls(data=col)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __init__(
         self,
         data=None,
@@ -663,7 +663,7 @@ def __init__(
         self._check_data_index_length_match()
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _from_data(
         cls,
         data: MutableMapping,
@@ -675,18 +675,18 @@ def _from_data(
             out.name = name
         return out
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _from_data_like_self(self, data: MutableMapping):
         out = super()._from_data_like_self(data)
         out.name = self.name
         return out
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __contains__(self, item):
         return item in self.index
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_pandas(cls, s: pd.Series, nan_as_null=no_default):
         """
         Convert from a Pandas Series.
@@ -735,7 +735,7 @@ def from_pandas(cls, s: pd.Series, nan_as_null=no_default):
         return result
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_unique(self):
         """Return boolean if values in the object are unique.
 
@@ -746,7 +746,7 @@ def is_unique(self):
         return self._column.is_unique
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def dt(self):
         """
         Accessor object for datetime-like properties of the Series values.
@@ -788,7 +788,7 @@ def dt(self):
             )
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def hasnans(self):
         """
         Return True if there are any NaNs or nulls.
@@ -829,7 +829,7 @@ def hasnans(self):
         """
         return self._column.has_nulls(include_nan=True)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def serialize(self):
         header, frames = super().serialize()
 
@@ -842,7 +842,7 @@ def serialize(self):
         return header, frames
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def deserialize(cls, header, frames):
         index_nframes = header["index_frame_count"]
         obj = super().deserialize(
@@ -855,7 +855,7 @@ def deserialize(cls, header, frames):
 
         return obj
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def drop(
         self,
         labels=None,
@@ -884,7 +884,7 @@ def tolist(self):  # noqa: D102
 
     to_list = tolist
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_dict(self, into: type[dict] = dict) -> dict:
         """
         Convert Series to {label -> value} dict or dict-like object.
@@ -923,7 +923,7 @@ def to_dict(self, into: type[dict] = dict) -> dict:
         """
         return self.to_pandas().to_dict(into=into)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def reindex(self, *args, **kwargs):
         """
         Conform Series to new index.
@@ -996,7 +996,7 @@ def reindex(self, *args, **kwargs):
         series.name = self.name
         return series
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         doc_reset_index_template.format(
             klass="Series",
@@ -1081,7 +1081,7 @@ def reset_index(
             inplace=inplace,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_frame(self, name=None):
         """Convert Series into a DataFrame
 
@@ -1124,13 +1124,13 @@ def to_frame(self, name=None):
 
         return cudf.DataFrame({col: self._column}, index=self.index)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def memory_usage(self, index=True, deep=False):
         return self._column.memory_usage + (
             self.index.memory_usage() if index else 0
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __array_function__(self, func, types, args, kwargs):
         if "out" in kwargs or not all(issubclass(t, Series) for t in types):
             return NotImplemented
@@ -1191,7 +1191,7 @@ def __array_function__(self, func, types, args, kwargs):
 
         return NotImplemented
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def map(self, arg, na_action=None) -> "Series":
         """
         Map values of Series according to input correspondence.
@@ -1333,7 +1333,7 @@ def _getitem_preprocessed(
             return self._empty_like(keep_index=True)
         assert_never(spec)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __getitem__(self, arg):
         if isinstance(arg, slice):
             return self.iloc[arg]
@@ -1344,7 +1344,7 @@ def __getitem__(self, arg):
 
     items = SingleColumnFrame.__iter__
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __setitem__(self, key, value):
         if isinstance(key, slice):
             self.iloc[key] = value
@@ -1495,36 +1495,36 @@ def _make_operands_and_index_for_binop(
 
     @copy_docstring(CategoricalAccessor)  # type: ignore
     @property
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def cat(self):
         return CategoricalAccessor(parent=self)
 
     @copy_docstring(StringMethods)  # type: ignore
     @property
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def str(self):
         return StringMethods(parent=self)
 
     @copy_docstring(ListMethods)  # type: ignore
     @property
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def list(self):
         return ListMethods(parent=self)
 
     @copy_docstring(StructMethods)  # type: ignore
     @property
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def struct(self):
         return StructMethods(parent=self)
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def dtype(self):
         """The dtype of the Series."""
         return self._column.dtype
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _concat(cls, objs, axis=0, index=True):
         # Concatenate index if not provided
         if index is True:
@@ -1590,25 +1590,25 @@ def _concat(cls, objs, axis=0, index=True):
         return cls(data=col, index=index, name=name)
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def valid_count(self):
         """Number of non-null values"""
         return len(self) - self._column.null_count
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def null_count(self):
         """Number of null values"""
         return self._column.null_count
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nullable(self):
         """A boolean indicating whether a null-mask is needed"""
         return self._column.nullable
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def has_nulls(self):
         """
         Indicator whether Series contains null values.
@@ -1637,7 +1637,7 @@ def has_nulls(self):
         """
         return self._column.has_nulls()
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def dropna(self, axis=0, inplace=False, how=None):
         """
         Return a Series with null values removed.
@@ -1717,7 +1717,7 @@ def dropna(self, axis=0, inplace=False, how=None):
 
         return self._mimic_inplace(result, inplace=inplace)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def drop_duplicates(self, keep="first", inplace=False, ignore_index=False):
         """
         Return Series with duplicate values removed.
@@ -1791,7 +1791,7 @@ def drop_duplicates(self, keep="first", inplace=False, ignore_index=False):
 
         return self._mimic_inplace(result, inplace=inplace)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def fillna(
         self, value=None, method=None, axis=None, inplace=False, limit=None
     ):
@@ -1896,7 +1896,7 @@ def between(self, left, right, inclusive="both") -> Series:
             )
         return self._from_data({self.name: lmask & rmask}, self.index)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def all(self, axis=0, bool_only=None, skipna=True, **kwargs):
         if bool_only not in (None, True):
             raise NotImplementedError(
@@ -1904,7 +1904,7 @@ def all(self, axis=0, bool_only=None, skipna=True, **kwargs):
             )
         return super().all(axis, skipna, **kwargs)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def any(self, axis=0, bool_only=None, skipna=True, **kwargs):
         if bool_only not in (None, True):
             raise NotImplementedError(
@@ -1912,7 +1912,7 @@ def any(self, axis=0, bool_only=None, skipna=True, **kwargs):
             )
         return super().any(axis, skipna, **kwargs)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_pandas(
         self,
         *,
@@ -2004,7 +2004,7 @@ def to_pandas(
         )
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def data(self):
         """The gpu buffer for the data
 
@@ -2029,12 +2029,12 @@ def data(self):
         return self._column.data
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nullmask(self):
         """The gpu buffer for the null-mask"""
         return cudf.Series(self._column.nullmask)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def astype(
         self,
         dtype,
@@ -2051,13 +2051,13 @@ def astype(
             dtype = {self.name: dtype}
         return super().astype(dtype, copy, errors)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def sort_index(self, axis=0, *args, **kwargs):
         if axis not in (0, "index"):
             raise ValueError("Only axis=0 is valid for Series.")
         return super().sort_index(axis=axis, *args, **kwargs)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def sort_values(
         self,
         axis=0,
@@ -2112,7 +2112,7 @@ def sort_values(
             ignore_index=ignore_index,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nlargest(self, n=5, keep="first"):
         """Returns a new Series of the *n* largest element.
 
@@ -2175,7 +2175,7 @@ def nlargest(self, n=5, keep="first"):
         """
         return self._n_largest_or_smallest(True, n, [self.name], keep)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nsmallest(self, n=5, keep="first"):
         """
         Returns a new Series of the *n* smallest element.
@@ -2251,7 +2251,7 @@ def nsmallest(self, n=5, keep="first"):
         """
         return self._n_largest_or_smallest(False, n, [self.name], keep)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def argsort(
         self,
         axis=0,
@@ -2274,7 +2274,7 @@ def argsort(
         obj.name = self.name
         return obj
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def replace(self, to_replace=None, value=no_default, *args, **kwargs):
         if is_dict_like(to_replace) and value not in {None, no_default}:
             raise ValueError(
@@ -2284,7 +2284,7 @@ def replace(self, to_replace=None, value=no_default, *args, **kwargs):
 
         return super().replace(to_replace, value, *args, **kwargs)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def update(self, other):
         """
         Modify Series in place using values from passed Series.
@@ -2390,7 +2390,7 @@ def update(self, other):
         self.mask(mask, other, inplace=True)
 
     # UDF related
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def apply(self, func, convert_dtype=True, args=(), **kwargs):
         """
         Apply a scalar function to the values of a Series.
@@ -2535,7 +2535,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
     #
     # Stats
     #
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def count(self):
         """
         Return number of non-NA/null observations in the Series
@@ -2559,7 +2559,7 @@ def count(self):
         """
         return self.valid_count
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def mode(self, dropna=True):
         """
         Return the mode(s) of the dataset.
@@ -2630,7 +2630,7 @@ def mode(self, dropna=True):
             {self.name: val_counts.index.sort_values()}, name=self.name
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def round(self, decimals=0, how="half_even"):
         if not is_integer(decimals):
             raise ValueError(
@@ -2639,7 +2639,7 @@ def round(self, decimals=0, how="half_even"):
         decimals = int(decimals)
         return super().round(decimals, how)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def cov(self, other, min_periods=None):
         """
         Compute covariance with Series, excluding missing values.
@@ -2690,7 +2690,7 @@ def cov(self, other, min_periods=None):
                 f"{other.dtype}"
             )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def transpose(self):
         """Return the transpose, which is by definition self."""
 
@@ -2698,7 +2698,7 @@ def transpose(self):
 
     T = property(transpose, doc=transpose.__doc__)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def duplicated(self, keep="first"):
         """
         Indicate duplicate Series values.
@@ -2778,7 +2778,7 @@ def duplicated(self, keep="first"):
         """
         return super().duplicated(keep=keep)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def corr(self, other, method="pearson", min_periods=None):
         """Calculates the sample correlation between two Series,
         excluding missing values.
@@ -2830,7 +2830,7 @@ def corr(self, other, method="pearson", min_periods=None):
                 f"cannot perform corr with types {self.dtype}, {other.dtype}"
             )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def autocorr(self, lag=1):
         """Compute the lag-N autocorrelation. This method computes the Pearson
         correlation between the Series and its shifted self.
@@ -2856,7 +2856,7 @@ def autocorr(self, lag=1):
         """
         return self.corr(self.shift(lag))
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def isin(self, values):
         """Check whether values are contained in Series.
 
@@ -2926,7 +2926,7 @@ def isin(self, values):
             {self.name: self._column.isin(values)}, index=self.index
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def unique(self):
         """
         Returns unique values of this Series.
@@ -2961,7 +2961,7 @@ def unique(self):
             return res.values
         return Series(res, name=self.name)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def value_counts(
         self,
         normalize=False,
@@ -3116,7 +3116,7 @@ def value_counts(
         res.name = result_name
         return res
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def quantile(
         self, q=0.5, interpolation="linear", exact=True, quant_index=True
     ):
@@ -3195,7 +3195,7 @@ def quantile(
         )
 
     @docutils.doc_describe()
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def describe(
         self,
         percentiles=None,
@@ -3240,7 +3240,7 @@ def describe(
             name=self.name,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def digitize(self, bins, right=False):
         """Return the indices of the bins to which each value belongs.
 
@@ -3276,7 +3276,7 @@ def digitize(self, bins, right=False):
             cudf.core.column.numerical.digitize(self._column, bins, right)
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def diff(self, periods=1):
         """First discrete difference of element.
 
@@ -3347,7 +3347,7 @@ def diff(self, periods=1):
 
         return self - self.shift(periods=periods)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     @docutils.doc_apply(
         groupby_doc_template.format(
             ret=textwrap.dedent(
@@ -3385,7 +3385,7 @@ def groupby(
             dropna,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def rename(self, index=None, copy=True):
         """
         Alter Series name
@@ -3431,7 +3431,7 @@ def rename(self, index=None, copy=True):
         out_data = self._data.copy(deep=copy)
         return Series._from_data(out_data, self.index, name=index)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def add_prefix(self, prefix):
         return Series._from_data(
             # TODO: Change to deep=False when copy-on-write is default
@@ -3439,7 +3439,7 @@ def add_prefix(self, prefix):
             index=prefix + self.index.astype(str),
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def add_suffix(self, suffix):
         return Series._from_data(
             # TODO: Change to deep=False when copy-on-write is default
@@ -3447,7 +3447,7 @@ def add_suffix(self, suffix):
             index=self.index.astype(str) + suffix,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def keys(self):
         """
         Return alias for index.
@@ -3491,7 +3491,7 @@ def keys(self):
         """
         return self.index
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def explode(self, ignore_index=False):
         """
         Transform each element of a list-like to a row, replicating index
@@ -3528,7 +3528,7 @@ def explode(self, ignore_index=False):
         """
         return super()._explode(self.name, ignore_index)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def pct_change(
         self, periods=1, fill_method=no_default, limit=no_default, freq=None
     ):
@@ -3602,7 +3602,7 @@ def pct_change(
         change = diff / data.shift(periods=periods, freq=freq)
         return change
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def where(self, cond, other=None, inplace=False):
         result_col = super().where(cond, other, inplace)
         return self._mimic_inplace(
@@ -3736,7 +3736,7 @@ class DatetimeProperties(BaseDatelikeProperties):
     """
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def year(self) -> Series:
         """
         The year of the datetime.
@@ -3761,7 +3761,7 @@ def year(self) -> Series:
         return self._get_dt_field("year")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def month(self) -> Series:
         """
         The month as January=1, December=12.
@@ -3786,7 +3786,7 @@ def month(self) -> Series:
         return self._get_dt_field("month")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def day(self) -> Series:
         """
         The day of the datetime.
@@ -3811,7 +3811,7 @@ def day(self) -> Series:
         return self._get_dt_field("day")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def hour(self) -> Series:
         """
         The hours of the datetime.
@@ -3836,7 +3836,7 @@ def hour(self) -> Series:
         return self._get_dt_field("hour")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def minute(self) -> Series:
         """
         The minutes of the datetime.
@@ -3861,7 +3861,7 @@ def minute(self) -> Series:
         return self._get_dt_field("minute")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def second(self) -> Series:
         """
         The seconds of the datetime.
@@ -3886,7 +3886,7 @@ def second(self) -> Series:
         return self._get_dt_field("second")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def microsecond(self) -> Series:
         """
         The microseconds of the datetime.
@@ -3918,7 +3918,7 @@ def microsecond(self) -> Series:
         return self._return_result_like_self(micro + extra)
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nanosecond(self) -> Series:
         """
         The nanoseconds of the datetime.
@@ -3943,7 +3943,7 @@ def nanosecond(self) -> Series:
         return self._get_dt_field("nanosecond")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def weekday(self) -> Series:
         """
         The day of the week with Monday=0, Sunday=6.
@@ -3980,7 +3980,7 @@ def weekday(self) -> Series:
         return self._get_dt_field("weekday")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def dayofweek(self) -> Series:
         """
         The day of the week with Monday=0, Sunday=6.
@@ -4017,7 +4017,7 @@ def dayofweek(self) -> Series:
         return self._get_dt_field("weekday")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def dayofyear(self) -> Series:
         """
         The day of the year, from 1-365 in non-leap years and
@@ -4055,7 +4055,7 @@ def dayofyear(self) -> Series:
         return self._get_dt_field("day_of_year")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def day_of_year(self) -> Series:
         """
         The day of the year, from 1-365 in non-leap years and
@@ -4093,7 +4093,7 @@ def day_of_year(self) -> Series:
         return self._get_dt_field("day_of_year")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_leap_year(self) -> Series:
         """
         Boolean indicator if the date belongs to a leap year.
@@ -4148,7 +4148,7 @@ def is_leap_year(self) -> Series:
         return self._return_result_like_self(res)
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def quarter(self) -> Series:
         """
         Integer indicator for which quarter of the year the date belongs in.
@@ -4177,7 +4177,7 @@ def quarter(self) -> Series:
         )
         return self._return_result_like_self(res)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def day_name(self, locale: str | None = None) -> Series:
         """
         Return the day names. Currently supports English locale only.
@@ -4213,7 +4213,7 @@ def day_name(self, locale: str | None = None) -> Series:
             self.series._column.get_day_names(locale)
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def month_name(self, locale: str | None = None) -> Series:
         """
         Return the month names. Currently supports English locale only.
@@ -4243,7 +4243,7 @@ def month_name(self, locale: str | None = None) -> Series:
             self.series._column.get_month_names(locale)
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def isocalendar(self) -> cudf.DataFrame:
         """
         Returns a DataFrame with the year, week, and day
@@ -4291,7 +4291,7 @@ def isocalendar(self) -> cudf.DataFrame:
         )
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_month_start(self) -> Series:
         """
         Booleans indicating if dates are the first day of the month.
@@ -4299,7 +4299,7 @@ def is_month_start(self) -> Series:
         return (self.day == 1).fillna(False)
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def days_in_month(self) -> Series:
         """
         Get the total number of days in the month that the date falls on.
@@ -4348,7 +4348,7 @@ def days_in_month(self) -> Series:
         )
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_month_end(self) -> Series:
         """
         Boolean indicator if the date is the last day of the month.
@@ -4391,7 +4391,7 @@ def is_month_end(self) -> Series:
         return (self.day == last_day.dt.day).fillna(False)
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_quarter_start(self) -> Series:
         """
         Boolean indicator if the date is the first day of a quarter.
@@ -4436,7 +4436,7 @@ def is_quarter_start(self) -> Series:
         return self._return_result_like_self(result)
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_quarter_end(self) -> Series:
         """
         Boolean indicator if the date is the last day of a quarter.
@@ -4483,7 +4483,7 @@ def is_quarter_end(self) -> Series:
         return self._return_result_like_self(result)
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_year_start(self) -> Series:
         """
         Boolean indicator if the date is the first day of the year.
@@ -4514,7 +4514,7 @@ def is_year_start(self) -> Series:
         return self._return_result_like_self(outcol.fillna(False))
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_year_end(self) -> Series:
         """
         Boolean indicator if the date is the last day of the year.
@@ -4547,13 +4547,13 @@ def is_year_end(self) -> Series:
         result = cudf._lib.copying.copy_if_else(leap, non_leap, leap_dates)
         return self._return_result_like_self(result.fillna(False))
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _get_dt_field(self, field: str) -> Series:
         return self._return_result_like_self(
             self.series._column.get_dt_field(field)
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def ceil(self, freq: str) -> Series:
         """
         Perform ceil operation on the data to the specified freq.
@@ -4586,7 +4586,7 @@ def ceil(self, freq: str) -> Series:
         """
         return self._return_result_like_self(self.series._column.ceil(freq))
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def floor(self, freq: str) -> Series:
         """
         Perform floor operation on the data to the specified freq.
@@ -4619,7 +4619,7 @@ def floor(self, freq: str) -> Series:
         """
         return self._return_result_like_self(self.series._column.floor(freq))
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def round(self, freq: str) -> Series:
         """
         Perform round operation on the data to the specified freq.
@@ -4655,7 +4655,7 @@ def round(self, freq: str) -> Series:
         """
         return self._return_result_like_self(self.series._column.round(freq))
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def strftime(self, date_format: str, *args, **kwargs) -> Series:
         """
         Convert to Series using specified ``date_format``.
@@ -4832,7 +4832,7 @@ class TimedeltaProperties(BaseDatelikeProperties):
     """
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def days(self) -> Series:
         """
         Number of days.
@@ -4864,7 +4864,7 @@ def days(self) -> Series:
         return self._get_td_field("days")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def seconds(self) -> Series:
         """
         Number of seconds (>= 0 and less than 1 day).
@@ -4903,7 +4903,7 @@ def seconds(self) -> Series:
         return self._get_td_field("seconds")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def microseconds(self) -> Series:
         """
         Number of microseconds (>= 0 and less than 1 second).
@@ -4935,7 +4935,7 @@ def microseconds(self) -> Series:
         return self._get_td_field("microseconds")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nanoseconds(self) -> Series:
         """
         Return the number of nanoseconds (n), where 0 <= n < 1 microsecond.
@@ -4967,7 +4967,7 @@ def nanoseconds(self) -> Series:
         return self._get_td_field("nanoseconds")
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def components(self) -> cudf.DataFrame:
         """
         Return a Dataframe of the components of the Timedeltas.
@@ -4999,14 +4999,14 @@ def components(self) -> cudf.DataFrame:
             ca, index=self.series.index
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _get_td_field(self, field: str) -> Series:
         return self._return_result_like_self(
             getattr(self.series._column, field)
         )
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def _align_indices(series_list, how="outer", allow_non_unique=False):
     """
     Internal util to align the indices of a list of Series objects
@@ -5069,7 +5069,7 @@ def _align_indices(series_list, how="outer", allow_non_unique=False):
 
 
 @acquire_spill_lock()
-@_cudf_nvtx_annotate
+@_performance_tracking
 def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
     r"""Returns a boolean array where two arrays are equal within a tolerance.
 
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index 23a2c828a04..f9555aee6a2 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -18,7 +18,7 @@
 )
 from cudf.core.column import ColumnBase, as_column
 from cudf.core.frame import Frame
-from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.utils import NotIterable
 
 if TYPE_CHECKING:
@@ -41,7 +41,7 @@ class SingleColumnFrame(Frame, NotIterable):
         "index": 0,
     }
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _reduce(
         self,
         op,
@@ -62,7 +62,7 @@ def _reduce(
         except AttributeError:
             raise TypeError(f"cannot perform {op} with type {self.dtype}")
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _scan(self, op, axis=None, *args, **kwargs):
         if axis not in (None, 0):
             raise NotImplementedError("axis parameter is not implemented yet")
@@ -70,24 +70,24 @@ def _scan(self, op, axis=None, *args, **kwargs):
         return super()._scan(op, axis=axis, *args, **kwargs)
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def name(self):
         """Get the name of this object."""
         return next(iter(self._column_names))
 
     @name.setter  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def name(self, value):
         self._data[value] = self._data.pop(self.name)
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def ndim(self) -> int:  # noqa: D401
         """Number of dimensions of the underlying data, by definition 1."""
         return 1
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def shape(self) -> tuple[int]:
         """Get a tuple representing the dimensionality of the Index."""
         return (len(self),)
@@ -99,27 +99,27 @@ def __bool__(self):
         )
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _num_columns(self) -> int:
         return 1
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _column(self) -> ColumnBase:
         return next(iter(self._columns))
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def values(self) -> cupy.ndarray:  # noqa: D102
         return self._column.values
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def values_host(self) -> numpy.ndarray:  # noqa: D102
         return self._column.values_host
 
     @classmethod
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def from_arrow(cls, array) -> Self:
         """Create from PyArrow Array/ChunkedArray.
 
@@ -150,7 +150,7 @@ def from_arrow(cls, array) -> Self:
         """
         return cls(ColumnBase.from_arrow(array))
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def to_arrow(self) -> pa.Array:
         """
         Convert to a PyArrow Array.
@@ -182,7 +182,7 @@ def to_arrow(self) -> pa.Array:
         return self._column.to_arrow()
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_unique(self) -> bool:
         """Return boolean if values in the object are unique.
 
@@ -193,7 +193,7 @@ def is_unique(self) -> bool:
         return self._column.is_unique
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_monotonic_increasing(self) -> bool:
         """Return boolean if values in the object are monotonically increasing.
 
@@ -204,7 +204,7 @@ def is_monotonic_increasing(self) -> bool:
         return self._column.is_monotonic_increasing
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def is_monotonic_decreasing(self) -> bool:
         """Return boolean if values in the object are monotonically decreasing.
 
@@ -215,7 +215,7 @@ def is_monotonic_decreasing(self) -> bool:
         return self._column.is_monotonic_decreasing
 
     @property  # type: ignore
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __cuda_array_interface__(self):
         # While the parent column class has a `__cuda_array_interface__` method
         # defined, it is not implemented for all column types. When it is not
@@ -229,7 +229,7 @@ def __cuda_array_interface__(self):
                 "'__cuda_array_interface__'"
             )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def factorize(
         self, sort: bool = False, use_na_sentinel: bool = True
     ) -> tuple[cupy.ndarray, cudf.Index]:
@@ -268,7 +268,7 @@ def factorize(
             use_na_sentinel=use_na_sentinel,
         )
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def _make_operands_for_binop(
         self,
         other: Any,
@@ -323,7 +323,7 @@ def _make_operands_for_binop(
 
         return {result_name: (self._column, other, reflect, fill_value)}
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def nunique(self, dropna: bool = True) -> int:
         """
         Return count of unique values for the column.
@@ -369,7 +369,7 @@ def _get_elements_from_column(self, arg) -> ScalarLike | ColumnBase:
                 return self._column.apply_boolean_mask(arg)
             raise NotImplementedError(f"Unknown indexer {type(arg)}")
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def where(self, cond, other=None, inplace=False):
         from cudf.core._internals.where import (
             _check_and_cast_columns_with_other,
diff --git a/python/cudf/cudf/core/udf/groupby_utils.py b/python/cudf/cudf/core/udf/groupby_utils.py
index 06d9296ca0f..265b87350ae 100644
--- a/python/cudf/cudf/core/udf/groupby_utils.py
+++ b/python/cudf/cudf/core/udf/groupby_utils.py
@@ -30,7 +30,7 @@
     _supported_dtypes_from_frame,
 )
 from cudf.utils._numba import _CUDFNumbaConfig
-from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _performance_tracking
 
 
 def _get_frame_groupby_type(dtype, index_dtype):
@@ -126,7 +126,7 @@ def _get_groupby_apply_kernel(frame, func, args):
     return kernel, return_type
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def jit_groupby_apply(offsets, grouped_values, function, *args):
     """
     Main entrypoint for JIT Groupby.apply via Numba.
diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
index f1704e4ea78..d616761cb3b 100644
--- a/python/cudf/cudf/core/udf/utils.py
+++ b/python/cudf/cudf/core/udf/utils.py
@@ -38,7 +38,7 @@
     STRING_TYPES,
     TIMEDELTA_TYPES,
 )
-from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.utils import initfunc
 
 # Maximum size of a string column is 2 GiB
@@ -71,7 +71,7 @@ def _ptx_file():
     )
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def _get_udf_return_type(argty, func: Callable, args=()):
     """
     Get the return type of a masked UDF for a given set of argument dtypes. It
@@ -236,7 +236,7 @@ def _generate_cache_key(frame, func: Callable, args, suffix="__APPLY_UDF"):
     )
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def _compile_or_get(
     frame, func, args, kernel_getter=None, suffix="__APPLY_UDF"
 ):
diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py
index f07764e2ce4..e909d96309e 100644
--- a/python/cudf/cudf/io/csv.py
+++ b/python/cudf/cudf/io/csv.py
@@ -12,10 +12,10 @@
 from cudf.api.types import is_scalar
 from cudf.utils import ioutils
 from cudf.utils.dtypes import _maybe_convert_to_default_type
-from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _performance_tracking
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 @ioutils.doc_read_csv()
 def read_csv(
     filepath_or_buffer,
@@ -151,7 +151,7 @@ def read_csv(
     return df
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 @ioutils.doc_to_csv()
 def to_csv(
     df,
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index 2a838ca7417..7733e770d99 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -22,7 +22,7 @@
 from cudf.api.types import is_list_like
 from cudf.core.column import as_column, build_categorical_column, column_empty
 from cudf.utils import ioutils
-from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _performance_tracking
 
 BYTE_SIZES = {
     "kb": 1000,
@@ -50,7 +50,7 @@
 }
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def _write_parquet(
     df,
     paths,
@@ -130,7 +130,7 @@ def _write_parquet(
 
 # Logic chosen to match: https://arrow.apache.org/
 # docs/_modules/pyarrow/parquet.html#write_to_dataset
-@_cudf_nvtx_annotate
+@_performance_tracking
 def write_to_dataset(
     df,
     root_path,
@@ -318,7 +318,7 @@ def write_to_dataset(
 
 
 @ioutils.doc_read_parquet_metadata()
-@_cudf_nvtx_annotate
+@_performance_tracking
 def read_parquet_metadata(filepath_or_buffer):
     """{docstring}"""
     # Multiple sources are passed as a list. If a single source is passed,
@@ -360,7 +360,7 @@ def read_parquet_metadata(filepath_or_buffer):
     return libparquet.read_parquet_metadata(filepaths_or_buffers)
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def _process_dataset(
     paths,
     fs,
@@ -515,7 +515,7 @@ def _process_dataset(
 
 
 @ioutils.doc_read_parquet()
-@_cudf_nvtx_annotate
+@_performance_tracking
 def read_parquet(
     filepath_or_buffer,
     engine="cudf",
@@ -785,7 +785,7 @@ def _handle_is(column: cudf.Series, value, *, negate) -> cudf.Series:
         return df
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def _parquet_to_frame(
     paths_or_buffers,
     *args,
@@ -885,7 +885,7 @@ def _parquet_to_frame(
         return dfs[0]
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def _read_parquet(
     filepaths_or_buffers,
     engine,
@@ -941,7 +941,7 @@ def _read_parquet(
 
 
 @ioutils.doc_to_parquet()
-@_cudf_nvtx_annotate
+@_performance_tracking
 def to_parquet(
     df,
     path,
@@ -1107,7 +1107,7 @@ def _get_estimated_file_size(df):
     return file_size
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def _get_partitioned(
     df,
     root_path,
@@ -1145,7 +1145,7 @@ def _get_partitioned(
     return full_paths, metadata_file_paths, grouped_df, part_offsets, filename
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 def _get_groups_and_offsets(
     df,
     partition_cols,
@@ -1305,7 +1305,7 @@ class ParquetDatasetWriter:
 
     """
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def __init__(
         self,
         path,
@@ -1355,7 +1355,7 @@ def __init__(
 
         self._file_sizes: dict[str, int] = {}
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def write_table(self, df):
         """
         Write a dataframe to the file/dataset
@@ -1486,7 +1486,7 @@ def write_table(self, df):
             self.path_cw_map.update({k: new_cw_idx for k in new_paths})
             self._chunked_writers[-1][0].write_table(grouped_df, part_info)
 
-    @_cudf_nvtx_annotate
+    @_performance_tracking
     def close(self, return_metadata=False):
         """
         Close all open files and optionally return footer metadata as a binary
diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py
index 0e19972f6e0..4329480bb2c 100644
--- a/python/cudf/cudf/io/text.py
+++ b/python/cudf/cudf/io/text.py
@@ -1,14 +1,14 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 from io import BytesIO, StringIO
 
 import cudf
 from cudf._lib import text as libtext
 from cudf.utils import ioutils
-from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _performance_tracking
 
 
-@_cudf_nvtx_annotate
+@_performance_tracking
 @ioutils.doc_read_text()
 def read_text(
     filepath_or_buffer,
diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py
index fb5a963f008..1f539e7f266 100644
--- a/python/cudf/cudf/options.py
+++ b/python/cudf/cudf/options.py
@@ -311,6 +311,20 @@ def _integer_and_none_validator(val):
     _make_contains_validator([False, True]),
 )
 
+_register_option(
+    "memory_profiling",
+    _env_get_bool("CUDF_MEMORY_PROFILING", False),
+    textwrap.dedent(
+        """
+        If set to `False`, disables memory profiling.
+        If set to `True`, enables memory profiling.
+        Read more at: :ref:`memory-profiling-user-doc`
+        \tValid values are True or False. Default is False.
+    """
+    ),
+    _make_contains_validator([False, True]),
+)
+
 
 class option_context(ContextDecorator):
     """
diff --git a/python/cudf/cudf/tests/test_performance_tracking.py b/python/cudf/cudf/tests/test_performance_tracking.py
new file mode 100644
index 00000000000..e886b77af3f
--- /dev/null
+++ b/python/cudf/cudf/tests/test_performance_tracking.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from io import StringIO
+
+import pytest
+
+import rmm.mr
+import rmm.statistics
+
+import cudf
+from cudf.utils.performance_tracking import (
+    get_memory_records,
+    print_memory_report,
+)
+
+
+@pytest.fixture
+def rmm_reset():
+    """Fixture to reset the RMM resource before and after the test"""
+    mr = rmm.mr.get_current_device_resource()
+    try:
+        rmm.mr.set_current_device_resource(rmm.mr.CudaMemoryResource())
+        yield
+    finally:
+        rmm.mr.set_current_device_resource(mr)
+
+
+def test_memory_profiling(rmm_reset):
+    df1 = cudf.DataFrame({"a": [1, 2, 3]})
+    assert len(get_memory_records()) == 0
+
+    rmm.statistics.enable_statistics()
+    cudf.set_option("memory_profiling", True)
+
+    df1.merge(df1)
+
+    assert len(get_memory_records()) > 0
+
+    out = StringIO()
+    print_memory_report(file=out)
+    assert "DataFrame.merge" in out.getvalue()
diff --git a/python/cudf/cudf/utils/nvtx_annotation.py b/python/cudf/cudf/utils/nvtx_annotation.py
deleted file mode 100644
index a4404e51232..00000000000
--- a/python/cudf/cudf/utils/nvtx_annotation.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-
-import hashlib
-from functools import partial
-
-from nvtx import annotate
-
-_NVTX_COLORS = ["green", "blue", "purple", "rapids"]
-
-
-def _get_color_for_nvtx(name):
-    m = hashlib.sha256()
-    m.update(name.encode())
-    hash_value = int(m.hexdigest(), 16)
-    idx = hash_value % len(_NVTX_COLORS)
-    return _NVTX_COLORS[idx]
-
-
-def _cudf_nvtx_annotate(func, domain="cudf_python"):
-    """Decorator for applying nvtx annotations to methods in cudf."""
-    return annotate(
-        message=func.__qualname__,
-        color=_get_color_for_nvtx(func.__qualname__),
-        domain=domain,
-    )(func)
-
-
-_dask_cudf_nvtx_annotate = partial(
-    _cudf_nvtx_annotate, domain="dask_cudf_python"
-)
diff --git a/python/cudf/cudf/utils/performance_tracking.py b/python/cudf/cudf/utils/performance_tracking.py
new file mode 100644
index 00000000000..30c891d0d5a
--- /dev/null
+++ b/python/cudf/cudf/utils/performance_tracking.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from __future__ import annotations
+
+import contextlib
+import functools
+import hashlib
+import sys
+
+import nvtx
+
+import rmm.statistics
+
+from cudf.options import get_option
+
+_NVTX_COLORS = ["green", "blue", "purple", "rapids"]
+
+
+def _get_color_for_nvtx(name):
+    m = hashlib.sha256()
+    m.update(name.encode())
+    hash_value = int(m.hexdigest(), 16)
+    idx = hash_value % len(_NVTX_COLORS)
+    return _NVTX_COLORS[idx]
+
+
+def _performance_tracking(func, domain="cudf_python"):
+    """Decorator for applying performance tracking (if enabled)."""
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        with contextlib.ExitStack() as stack:
+            if get_option("memory_profiling"):
+                # NB: the user still needs to call `rmm.statistics.enable_statistics()`
+                #     to enable memory profiling.
+                stack.enter_context(
+                    rmm.statistics.profiler(
+                        name=rmm.statistics._get_descriptive_name_of_object(
+                            func
+                        )
+                    )
+                )
+            if nvtx.enabled():
+                stack.enter_context(
+                    nvtx.annotate(
+                        message=func.__qualname__,
+                        color=_get_color_for_nvtx(func.__qualname__),
+                        domain=domain,
+                    )
+                )
+            return func(*args, **kwargs)
+
+    return wrapper
+
+
+_dask_cudf_performance_tracking = functools.partial(
+    _performance_tracking, domain="dask_cudf_python"
+)
+
+
+def get_memory_records() -> (
+    dict[str, rmm.statistics.ProfilerRecords.MemoryRecord]
+):
+    """Get the memory records from the memory profiling
+
+    Returns
+    -------
+    Dict that maps function names to memory records. Empty if
+    memory profiling is disabled
+    """
+    return rmm.statistics.default_profiler_records.records
+
+
+def print_memory_report(file=sys.stdout) -> None:
+    """Pretty print the result of the memory profiling
+
+    Parameters
+    ----------
+    file
+        The output stream
+    """
+    print(rmm.statistics.default_profiler_records.report(), file=file)
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index 2e4dfc4bb14..7347ec7866a 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -159,8 +159,9 @@ def _external_only_api(func, alternative=""):
     @functools.wraps(func)
     def wrapper(*args, **kwargs):
         # Check the immediately preceding frame to see if it's in cudf.
-        frame, lineno = next(traceback.walk_stack(None))
-        fn = frame.f_code.co_filename
+        pre_frame = traceback.extract_stack(limit=2)[0]
+        fn = pre_frame.filename
+        lineno = pre_frame.lineno
         if _cudf_root in fn and _tests_root not in fn:
             raise RuntimeError(
                 f"External-only API called in {fn} at line {lineno}. "
diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index d250589e389..1f55a59ea55 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -43,7 +43,7 @@
 
 import cudf
 from cudf.api.types import is_string_dtype
-from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _dask_cudf_performance_tracking
 
 from .core import DataFrame, Index, Series
 
@@ -53,7 +53,7 @@
 
 
 @meta_nonempty.register(cudf.BaseIndex)
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _nonempty_index(idx):
     if isinstance(idx, cudf.core.index.RangeIndex):
         return cudf.core.index.RangeIndex(2, name=idx.name)
@@ -100,7 +100,7 @@ def _nest_list_data(data, leaf_type):
     return data
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _get_non_empty_data(s):
     if isinstance(s, cudf.core.column.CategoricalColumn):
         categories = (
@@ -147,7 +147,7 @@ def _get_non_empty_data(s):
 
 
 @meta_nonempty.register(cudf.Series)
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _nonempty_series(s, idx=None):
     if idx is None:
         idx = _nonempty_index(s.index)
@@ -157,7 +157,7 @@ def _nonempty_series(s, idx=None):
 
 
 @meta_nonempty.register(cudf.DataFrame)
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def meta_nonempty_cudf(x):
     idx = meta_nonempty(x.index)
     columns_with_dtype = dict()
@@ -182,18 +182,18 @@ def meta_nonempty_cudf(x):
 
 
 @make_meta_dispatch.register((cudf.Series, cudf.DataFrame))
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def make_meta_cudf(x, index=None):
     return x.head(0)
 
 
 @make_meta_dispatch.register(cudf.BaseIndex)
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def make_meta_cudf_index(x, index=None):
     return x[:0]
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _empty_series(name, dtype, index=None):
     if isinstance(dtype, str) and dtype == "category":
         return cudf.Series(
@@ -203,7 +203,7 @@ def _empty_series(name, dtype, index=None):
 
 
 @make_meta_obj.register(object)
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def make_meta_object_cudf(x, index=None):
     """Create an empty cudf object containing the desired metadata.
 
@@ -274,7 +274,7 @@ def make_meta_object_cudf(x, index=None):
 
 
 @concat_dispatch.register((cudf.DataFrame, cudf.Series, cudf.BaseIndex))
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def concat_cudf(
     dfs,
     axis=0,
@@ -299,13 +299,13 @@ def concat_cudf(
 @categorical_dtype_dispatch.register(
     (cudf.DataFrame, cudf.Series, cudf.BaseIndex)
 )
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def categorical_dtype_cudf(categories=None, ordered=False):
     return cudf.CategoricalDtype(categories=categories, ordered=ordered)
 
 
 @tolist_dispatch.register((cudf.Series, cudf.BaseIndex))
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def tolist_cudf(obj):
     return obj.to_pandas().tolist()
 
@@ -313,7 +313,7 @@ def tolist_cudf(obj):
 @is_categorical_dtype_dispatch.register(
     (cudf.Series, cudf.BaseIndex, cudf.CategoricalDtype, Series)
 )
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def is_categorical_dtype_cudf(obj):
     return cudf.api.types._is_categorical_dtype(obj)
 
@@ -324,7 +324,7 @@ def get_grouper_cudf(obj):
 
 
 @percentile_lookup.register((cudf.Series, cp.ndarray, cudf.BaseIndex))
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def percentile_cudf(a, q, interpolation="linear"):
     # Cudf dispatch to the equivalent of `np.percentile`:
     # https://numpy.org/doc/stable/reference/generated/numpy.percentile.html
@@ -400,7 +400,7 @@ def _table_to_cudf(obj, table, self_destruct=None, **kwargs):
 
 
 @union_categoricals_dispatch.register((cudf.Series, cudf.BaseIndex))
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def union_categoricals_cudf(
     to_union, sort_categories=False, ignore_order=False
 ):
@@ -410,7 +410,7 @@ def union_categoricals_cudf(
 
 
 @hash_object_dispatch.register((cudf.DataFrame, cudf.Series))
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def hash_object_cudf(frame, index=True):
     if index:
         frame = frame.reset_index()
@@ -418,7 +418,7 @@ def hash_object_cudf(frame, index=True):
 
 
 @hash_object_dispatch.register(cudf.BaseIndex)
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def hash_object_cudf_index(ind, index=None):
     if isinstance(ind, cudf.MultiIndex):
         return ind.to_frame(index=False).hash_values()
@@ -428,7 +428,7 @@ def hash_object_cudf_index(ind, index=None):
 
 
 @group_split_dispatch.register((cudf.Series, cudf.DataFrame))
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def group_split_cudf(df, c, k, ignore_index=False):
     return dict(
         zip(
@@ -443,7 +443,7 @@ def group_split_cudf(df, c, k, ignore_index=False):
 
 
 @sizeof_dispatch.register(cudf.DataFrame)
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def sizeof_cudf_dataframe(df):
     return int(
         sum(col.memory_usage for col in df._data.columns)
@@ -452,7 +452,7 @@ def sizeof_cudf_dataframe(df):
 
 
 @sizeof_dispatch.register((cudf.Series, cudf.BaseIndex))
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def sizeof_cudf_series_index(obj):
     return obj.memory_usage()
 
diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py
index 3bd455a3a57..aab56e3a1b0 100644
--- a/python/dask_cudf/dask_cudf/core.py
+++ b/python/dask_cudf/dask_cudf/core.py
@@ -22,7 +22,7 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _dask_cudf_performance_tracking
 
 from dask_cudf import sorting
 from dask_cudf.accessors import ListMethods, StructMethods
@@ -53,7 +53,7 @@ def __repr__(self):
         s = "<dask_cudf.%s | %d tasks | %d npartitions>"
         return s % (type(self).__name__, len(self.dask), self.npartitions)
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def to_dask_dataframe(self, **kwargs):
         """Create a dask.dataframe object from a dask_cudf object
 
@@ -92,7 +92,7 @@ class DataFrame(_Frame, dd.core.DataFrame):
 
     _partition_type = cudf.DataFrame
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def _assign_column(self, k, v):
         def assigner(df, k, v):
             out = df.copy()
@@ -102,7 +102,7 @@ def assigner(df, k, v):
         meta = assigner(self._meta, k, dask_make_meta(v))
         return self.map_partitions(assigner, k, v, meta=meta)
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def apply_rows(self, func, incols, outcols, kwargs=None, cache_key=None):
         import uuid
 
@@ -123,7 +123,7 @@ def do_apply_rows(df, func, incols, outcols, kwargs):
         )
 
     @_deprecate_shuffle_kwarg
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def merge(self, other, shuffle_method=None, **kwargs):
         on = kwargs.pop("on", None)
         if isinstance(on, tuple):
@@ -136,7 +136,7 @@ def merge(self, other, shuffle_method=None, **kwargs):
         )
 
     @_deprecate_shuffle_kwarg
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def join(self, other, shuffle_method=None, **kwargs):
         # CuDF doesn't support "right" join yet
         how = kwargs.pop("how", "left")
@@ -155,7 +155,7 @@ def join(self, other, shuffle_method=None, **kwargs):
         )
 
     @_deprecate_shuffle_kwarg
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def set_index(
         self,
         other,
@@ -237,7 +237,7 @@ def set_index(
         )
 
     @_deprecate_shuffle_kwarg
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def sort_values(
         self,
         by,
@@ -275,14 +275,14 @@ def sort_values(
             return df.reset_index(drop=True)
         return df
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def to_parquet(self, path, *args, **kwargs):
         """Calls dask.dataframe.io.to_parquet with CudfEngine backend"""
         from dask_cudf.io import to_parquet
 
         return to_parquet(self, path, *args, **kwargs)
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def to_orc(self, path, **kwargs):
         """Calls dask_cudf.io.to_orc"""
         from dask_cudf.io import to_orc
@@ -290,7 +290,7 @@ def to_orc(self, path, **kwargs):
         return to_orc(self, path, **kwargs)
 
     @derived_from(pd.DataFrame)
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def var(
         self,
         axis=None,
@@ -324,28 +324,28 @@ def var(
             return _parallel_var(self, meta, skipna, split_every, out)
 
     @_deprecate_shuffle_kwarg
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def shuffle(self, *args, shuffle_method=None, **kwargs):
         """Wraps dask.dataframe DataFrame.shuffle method"""
         return super().shuffle(
             *args, shuffle_method=_get_shuffle_method(shuffle_method), **kwargs
         )
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def groupby(self, by=None, **kwargs):
         from .groupby import CudfDataFrameGroupBy
 
         return CudfDataFrameGroupBy(self, by=by, **kwargs)
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def sum_of_squares(x):
     x = x.astype("f8")._column
     outcol = libcudf.reduce.reduce("sum_of_squares", x)
     return cudf.Series(outcol)
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def var_aggregate(x2, x, n, ddof):
     try:
         with warnings.catch_warnings(record=True):
@@ -358,12 +358,12 @@ def var_aggregate(x2, x, n, ddof):
         return np.float64(np.nan)
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def nlargest_agg(x, **kwargs):
     return cudf.concat(x).nlargest(**kwargs)
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def nsmallest_agg(x, **kwargs):
     return cudf.concat(x).nsmallest(**kwargs)
 
@@ -371,7 +371,7 @@ def nsmallest_agg(x, **kwargs):
 class Series(_Frame, dd.core.Series):
     _partition_type = cudf.Series
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def count(self, split_every=False):
         return reduction(
             [self],
@@ -381,14 +381,14 @@ def count(self, split_every=False):
             meta="i8",
         )
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def mean(self, split_every=False):
         sum = self.sum(split_every=split_every)
         n = self.count(split_every=split_every)
         return sum / n
 
     @derived_from(pd.DataFrame)
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def var(
         self,
         axis=None,
@@ -417,19 +417,19 @@ def var(
         else:
             return _parallel_var(self, meta, skipna, split_every, out)
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def groupby(self, *args, **kwargs):
         from .groupby import CudfSeriesGroupBy
 
         return CudfSeriesGroupBy(self, *args, **kwargs)
 
     @property  # type: ignore
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def list(self):
         return ListMethods(self)
 
     @property  # type: ignore
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def struct(self):
         return StructMethods(self)
 
@@ -438,7 +438,7 @@ class Index(Series, dd.core.Index):
     _partition_type = cudf.Index  # type: ignore
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _naive_var(ddf, meta, skipna, ddof, split_every, out):
     num = ddf._get_numeric_data()
     x = 1.0 * num.sum(skipna=skipna, split_every=split_every)
@@ -453,7 +453,7 @@ def _naive_var(ddf, meta, skipna, ddof, split_every, out):
     return handle_out(out, result)
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _parallel_var(ddf, meta, skipna, split_every, out):
     def _local_var(x, skipna):
         if skipna:
@@ -520,7 +520,7 @@ def _finalize_var(vals):
     return handle_out(out, result)
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _extract_meta(x):
     """
     Extract internal cache data (``_meta``) from dask_cudf objects
@@ -536,7 +536,7 @@ def _extract_meta(x):
     return x
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _emulate(func, *args, **kwargs):
     """
     Apply a function using args / kwargs. If arguments contain dd.DataFrame /
@@ -546,7 +546,7 @@ def _emulate(func, *args, **kwargs):
         return func(*_extract_meta(args), **_extract_meta(kwargs))
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def align_partitions(args):
     """Align partitions between dask_cudf objects.
 
@@ -563,7 +563,7 @@ def align_partitions(args):
     return args
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def reduction(
     args,
     chunk=None,
@@ -702,7 +702,7 @@ def reduction(
     return dd.core.new_dd_object(graph, b, meta, (None, None))
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def from_cudf(data, npartitions=None, chunksize=None, sort=True, name=None):
     from dask_cudf import QUERY_PLANNING_ON
 
@@ -746,7 +746,7 @@ def from_cudf(data, npartitions=None, chunksize=None, sort=True, name=None):
 )
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def from_dask_dataframe(df):
     """
     Convert a Dask :class:`dask.dataframe.DataFrame` to a Dask-cuDF
diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py
index 2e72461b43d..bbbcde17b51 100644
--- a/python/dask_cudf/dask_cudf/groupby.py
+++ b/python/dask_cudf/dask_cudf/groupby.py
@@ -16,7 +16,7 @@
 
 import cudf
 from cudf.core.groupby.groupby import _deprecate_collect
-from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _dask_cudf_performance_tracking
 
 from dask_cudf.sorting import _deprecate_shuffle_kwarg
 
@@ -56,13 +56,13 @@ def wrapper(*args, **kwargs):
 
 
 class CudfDataFrameGroupBy(DataFrameGroupBy):
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def __init__(self, *args, sort=None, **kwargs):
         self.sep = kwargs.pop("sep", "___")
         self.as_index = kwargs.pop("as_index", True)
         super().__init__(*args, sort=sort, **kwargs)
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def __getitem__(self, key):
         if isinstance(key, list):
             g = CudfDataFrameGroupBy(
@@ -84,7 +84,7 @@ def __getitem__(self, key):
         g._meta = g._meta[key]
         return g
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def _make_groupby_method_aggs(self, agg_name):
         """Create aggs dictionary for aggregation methods"""
 
@@ -92,7 +92,7 @@ def _make_groupby_method_aggs(self, agg_name):
             return {c: agg_name for c in self.obj.columns if c not in self.by}
         return {c: agg_name for c in self.obj.columns if c != self.by}
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def count(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -102,7 +102,7 @@ def count(self, split_every=None, split_out=1):
             split_out,
         )
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def mean(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -112,7 +112,7 @@ def mean(self, split_every=None, split_out=1):
             split_out,
         )
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def std(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -122,7 +122,7 @@ def std(self, split_every=None, split_out=1):
             split_out,
         )
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def var(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -132,7 +132,7 @@ def var(self, split_every=None, split_out=1):
             split_out,
         )
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def sum(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -142,7 +142,7 @@ def sum(self, split_every=None, split_out=1):
             split_out,
         )
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def min(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -152,7 +152,7 @@ def min(self, split_every=None, split_out=1):
             split_out,
         )
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def max(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -162,7 +162,7 @@ def max(self, split_every=None, split_out=1):
             split_out,
         )
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def collect(self, split_every=None, split_out=1):
         _deprecate_collect()
@@ -173,7 +173,7 @@ def collect(self, split_every=None, split_out=1):
             split_out,
         )
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def first(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -183,7 +183,7 @@ def first(self, split_every=None, split_out=1):
             split_out,
         )
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def last(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -194,7 +194,7 @@ def last(self, split_every=None, split_out=1):
         )
 
     @_deprecate_shuffle_kwarg
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def aggregate(
         self, arg, split_every=None, split_out=1, shuffle_method=None
     ):
@@ -231,13 +231,13 @@ def aggregate(
 
 
 class CudfSeriesGroupBy(SeriesGroupBy):
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def __init__(self, *args, sort=None, **kwargs):
         self.sep = kwargs.pop("sep", "___")
         self.as_index = kwargs.pop("as_index", True)
         super().__init__(*args, sort=sort, **kwargs)
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def count(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -247,7 +247,7 @@ def count(self, split_every=None, split_out=1):
             split_out,
         )[self._slice]
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def mean(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -257,7 +257,7 @@ def mean(self, split_every=None, split_out=1):
             split_out,
         )[self._slice]
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def std(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -267,7 +267,7 @@ def std(self, split_every=None, split_out=1):
             split_out,
         )[self._slice]
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def var(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -277,7 +277,7 @@ def var(self, split_every=None, split_out=1):
             split_out,
         )[self._slice]
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def sum(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -287,7 +287,7 @@ def sum(self, split_every=None, split_out=1):
             split_out,
         )[self._slice]
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def min(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -297,7 +297,7 @@ def min(self, split_every=None, split_out=1):
             split_out,
         )[self._slice]
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def max(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -307,7 +307,7 @@ def max(self, split_every=None, split_out=1):
             split_out,
         )[self._slice]
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def collect(self, split_every=None, split_out=1):
         _deprecate_collect()
@@ -318,7 +318,7 @@ def collect(self, split_every=None, split_out=1):
             split_out,
         )[self._slice]
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def first(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -328,7 +328,7 @@ def first(self, split_every=None, split_out=1):
             split_out,
         )[self._slice]
 
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     @_check_groupby_optimized
     def last(self, split_every=None, split_out=1):
         return _make_groupby_agg_call(
@@ -339,7 +339,7 @@ def last(self, split_every=None, split_out=1):
         )[self._slice]
 
     @_deprecate_shuffle_kwarg
-    @_dask_cudf_nvtx_annotate
+    @_dask_cudf_performance_tracking
     def aggregate(
         self, arg, split_every=None, split_out=1, shuffle_method=None
     ):
@@ -429,7 +429,7 @@ def _shuffle_aggregate(
     return result
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def groupby_agg(
     ddf,
     gb_cols,
@@ -641,7 +641,7 @@ def groupby_agg(
     )
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _make_groupby_agg_call(
     gb, aggs, split_every, split_out, shuffle_method=None
 ):
@@ -663,7 +663,7 @@ def _make_groupby_agg_call(
     )
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _redirect_aggs(arg):
     """Redirect aggregations to their corresponding name in cuDF"""
     redirects = {
@@ -690,7 +690,7 @@ def _redirect_aggs(arg):
     return redirects.get(arg, arg)
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _aggs_optimized(arg, supported: set):
     """Check that aggregations in `arg` are a subset of `supported`"""
     if isinstance(arg, (list, dict)):
@@ -712,7 +712,7 @@ def _aggs_optimized(arg, supported: set):
     return False
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _groupby_optimized(gb):
     """Check that groupby input can use dask-cudf optimized codepath"""
     return isinstance(gb.obj, DaskDataFrame) and (
@@ -730,7 +730,7 @@ def _make_name(col_name, sep="_"):
     return sep.join(name for name in col_name if name != "")
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _groupby_partition_agg(df, gb_cols, aggs, columns, dropna, sort, sep):
     """Initial partition-level aggregation task.
 
@@ -768,7 +768,7 @@ def _groupby_partition_agg(df, gb_cols, aggs, columns, dropna, sort, sep):
     return gb[sorted(output_columns)]
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _tree_node_agg(df, gb_cols, dropna, sort, sep):
     """Node in groupby-aggregation reduction tree.
 
@@ -807,7 +807,7 @@ def _tree_node_agg(df, gb_cols, dropna, sort, sep):
     return gb[sorted(output_columns)]
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _var_agg(df, col, count_name, sum_name, pow2_sum_name, ddof=1):
     """Calculate variance (given count, sum, and sum-squared columns)."""
 
@@ -829,7 +829,7 @@ def _var_agg(df, col, count_name, sum_name, pow2_sum_name, ddof=1):
     return var
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _finalize_gb_agg(
     gb_in,
     gb_cols,
diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py
index f3774e20d32..a2ba4d1878e 100644
--- a/python/dask_cudf/dask_cudf/sorting.py
+++ b/python/dask_cudf/dask_cudf/sorting.py
@@ -18,7 +18,7 @@
 
 import cudf
 from cudf.api.types import _is_categorical_dtype
-from cudf.utils.nvtx_annotation import _dask_cudf_nvtx_annotate
+from cudf.utils.performance_tracking import _dask_cudf_performance_tracking
 
 _SHUFFLE_SUPPORT = ("tasks", "p2p")  # "disk" not supported
 
@@ -48,14 +48,14 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def set_index_post(df, index_name, drop, column_dtype):
     df2 = df.set_index(index_name, drop=drop)
     df2.columns = df2.columns.astype(column_dtype)
     return df2
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _set_partitions_pre(s, divisions, ascending=True, na_position="last"):
     if ascending:
         partitions = divisions.searchsorted(s, side="right") - 1
@@ -72,7 +72,7 @@ def _set_partitions_pre(s, divisions, ascending=True, na_position="last"):
     return partitions
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _quantile(a, q):
     n = len(a)
     if not len(a):
@@ -83,7 +83,7 @@ def _quantile(a, q):
     )
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def merge_quantiles(finalq, qs, vals):
     """Combine several quantile calculations of different data.
     [NOTE: Same logic as dask.array merge_percentiles]
@@ -146,7 +146,7 @@ def _append_counts(val, count):
     return rv.reset_index(drop=True)
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def _approximate_quantile(df, q):
     """Approximate quantiles of DataFrame or Series.
     [NOTE: Same logic as dask.dataframe Series quantile]
@@ -220,7 +220,7 @@ def set_quantile_index(df):
     return df
 
 
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def quantile_divisions(df, by, npartitions):
     qn = np.linspace(0.0, 1.0, npartitions + 1).tolist()
     divisions = _approximate_quantile(df[by], qn).compute()
@@ -257,7 +257,7 @@ def quantile_divisions(df, by, npartitions):
 
 
 @_deprecate_shuffle_kwarg
-@_dask_cudf_nvtx_annotate
+@_dask_cudf_performance_tracking
 def sort_values(
     df,
     by,