diff --git a/cpp/src/strings/split/split.cu b/cpp/src/strings/split/split.cu
index c87c36ba3b9..fbab5220383 100644
--- a/cpp/src/strings/split/split.cu
+++ b/cpp/src/strings/split/split.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,7 +19,6 @@
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/get_value.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/strings/detail/split_utils.cuh>
@@ -123,7 +122,7 @@ std::unique_ptr<table> split_fn(strings_column_view const& input,
 
   // builds the offsets and the vector of all tokens
   auto [offsets, tokens] = split_helper(input, tokenizer, stream, mr);
-  auto const d_offsets   = offsets->view().template data<size_type>();
+  auto const d_offsets   = cudf::detail::offsetalator_factory::make_input_iterator(offsets->view());
   auto const d_tokens    = tokens.data();
 
   // compute the maximum number of tokens for any string
@@ -132,7 +131,7 @@ std::unique_ptr<table> split_fn(strings_column_view const& input,
     thrust::make_counting_iterator<size_type>(0),
     thrust::make_counting_iterator<size_type>(input.size()),
     cuda::proclaim_return_type<size_type>([d_offsets] __device__(auto idx) -> size_type {
-      return d_offsets[idx + 1] - d_offsets[idx];
+      return static_cast<size_type>(d_offsets[idx + 1] - d_offsets[idx]);
     }),
     0,
     thrust::maximum{});
@@ -144,7 +143,7 @@ std::unique_ptr<table> split_fn(strings_column_view const& input,
       cuda::proclaim_return_type<string_index_pair>(
         [d_tokens, d_offsets, col] __device__(size_type idx) {
           auto const offset      = d_offsets[idx];
-          auto const token_count = d_offsets[idx + 1] - offset;
+          auto const token_count = static_cast<size_type>(d_offsets[idx + 1] - offset);
           return (col < token_count) ? d_tokens[offset + col] : string_index_pair{nullptr, 0};
         }));
     results.emplace_back(make_strings_column(itr, itr + input.size(), stream, mr));
@@ -360,12 +359,11 @@ std::unique_ptr<table> whitespace_split_fn(size_type strings_count,
   }
 
   // get the positions for every token
-  rmm::device_uvector<string_index_pair> tokens(columns_count * strings_count, stream);
+  rmm::device_uvector<string_index_pair> tokens(
+    static_cast<int64_t>(columns_count) * static_cast<int64_t>(strings_count), stream);
   string_index_pair* d_tokens = tokens.data();
-  thrust::fill(rmm::exec_policy(stream),
-               d_tokens,
-               d_tokens + (columns_count * strings_count),
-               string_index_pair{nullptr, 0});
+  thrust::fill(
+    rmm::exec_policy(stream), tokens.begin(), tokens.end(), string_index_pair{nullptr, 0});
   thrust::for_each_n(rmm::exec_policy(stream),
                      thrust::make_counting_iterator<size_type>(0),
                      strings_count,
diff --git a/cpp/src/strings/split/split.cuh b/cpp/src/strings/split/split.cuh
index c5fb44fc3dd..906c522e898 100644
--- a/cpp/src/strings/split/split.cuh
+++ b/cpp/src/strings/split/split.cuh
@@ -17,9 +17,9 @@
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/get_value.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/sizes_to_offsets_iterator.cuh>
+#include <cudf/detail/utilities/algorithm.cuh>
 #include <cudf/strings/detail/split_utils.cuh>
 #include <cudf/strings/detail/strings_column_factories.cuh>
 #include <cudf/strings/string_view.cuh>
@@ -66,9 +66,9 @@ struct base_split_tokenizer {
    * @param chars_bytes Total number of characters to process
    * @return true if delimiter is found starting at position `idx`
    */
-  __device__ bool is_delimiter(size_type idx,
-                               size_type const* d_offsets,
-                               size_type chars_bytes) const
+  __device__ bool is_delimiter(int64_t idx,
+                               cudf::detail::input_offsetalator const d_offsets,
+                               int64_t chars_bytes) const
   {
     auto const d_chars = get_base_ptr() + d_offsets[0];
     if (idx + d_delimiter.size_bytes() > chars_bytes) { return false; }
@@ -87,8 +87,8 @@ struct base_split_tokenizer {
    * @param d_delimiter_offsets Offsets per string to delimiters in d_positions
    */
   __device__ size_type count_tokens(size_type idx,
-                                    size_type const* d_positions,
-                                    size_type const* d_delimiter_offsets) const
+                                    int64_t const* d_positions,
+                                    int64_t const* d_delimiter_offsets) const
   {
     if (!is_valid(idx)) { return 0; }
 
@@ -96,12 +96,13 @@ struct base_split_tokenizer {
     auto const d_str      = get_string(idx);
     auto const d_str_end  = d_str.data() + d_str.size_bytes();
     auto const base_ptr   = get_base_ptr() + delim_size - 1;
+
     auto const delimiters =
-      cudf::device_span<size_type const>(d_positions + d_delimiter_offsets[idx],
-                                         d_delimiter_offsets[idx + 1] - d_delimiter_offsets[idx]);
+      cudf::device_span<int64_t const>(d_positions + d_delimiter_offsets[idx],
+                                       d_delimiter_offsets[idx + 1] - d_delimiter_offsets[idx]);
 
     size_type token_count = 1;  // all strings will have at least one token
-    size_type last_pos    = delimiters[0] - delim_size;
+    auto last_pos         = !delimiters.empty() ? (delimiters[0] - delim_size) : 0L;
     for (auto d_pos : delimiters) {
       // delimiter must fit in string && overlapping delimiters are ignored
       if (((base_ptr + d_pos) < d_str_end) && ((d_pos - last_pos) >= delim_size)) {
@@ -129,9 +130,9 @@ struct base_split_tokenizer {
    * @param d_all_tokens All output tokens for the strings column
    */
   __device__ void get_tokens(size_type idx,
-                             size_type const* d_tokens_offsets,
-                             size_type const* d_positions,
-                             size_type const* d_delimiter_offsets,
+                             cudf::detail::input_offsetalator const d_tokens_offsets,
+                             int64_t const* d_positions,
+                             int64_t const* d_delimiter_offsets,
                              string_index_pair* d_all_tokens) const
   {
     auto const d_tokens =  // this string's tokens output
@@ -149,8 +150,8 @@ struct base_split_tokenizer {
     }
 
     auto const delimiters =
-      cudf::device_span<size_type const>(d_positions + d_delimiter_offsets[idx],
-                                         d_delimiter_offsets[idx + 1] - d_delimiter_offsets[idx]);
+      cudf::device_span<int64_t const>(d_positions + d_delimiter_offsets[idx],
+                                       d_delimiter_offsets[idx + 1] - d_delimiter_offsets[idx]);
 
     auto& derived = static_cast<Derived const&>(*this);
     derived.process_tokens(d_str, delimiters, d_tokens);
@@ -184,7 +185,7 @@ struct split_tokenizer_fn : base_split_tokenizer<split_tokenizer_fn> {
    * @param d_tokens Output vector to store tokens for this string
    */
   __device__ void process_tokens(string_view const d_str,
-                                 device_span<size_type const> d_delimiters,
+                                 device_span<int64_t const> d_delimiters,
                                  device_span<string_index_pair> d_tokens) const
   {
     auto const base_ptr    = get_base_ptr();  // d_positions values based on this
@@ -239,7 +240,7 @@ struct rsplit_tokenizer_fn : base_split_tokenizer<rsplit_tokenizer_fn> {
    * @param d_tokens Output vector to store tokens for this string
    */
   __device__ void process_tokens(string_view const d_str,
-                                 device_span<size_type const> d_delimiters,
+                                 device_span<int64_t const> d_delimiters,
                                  device_span<string_index_pair> d_tokens) const
   {
     auto const base_ptr    = get_base_ptr();  // d_positions values are based on this ptr
@@ -290,7 +291,8 @@ struct rsplit_tokenizer_fn : base_split_tokenizer<rsplit_tokenizer_fn> {
  * @param input The input column of strings to split
  * @param tokenizer Object used for counting and identifying delimiters and tokens
  * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned objects' device memory.
+ * @param mr Device memory resource used to allocate the returned objects' device memory
+ * @return Token offsets and a vector of string indices
  */
 template <typename Tokenizer>
 std::pair<std::unique_ptr<column>, rmm::device_uvector<string_index_pair>> split_helper(
@@ -301,37 +303,38 @@ std::pair<std::unique_ptr<column>, rmm::device_uvector<string_index_pair>> split
 {
   auto const strings_count = input.size();
   auto const chars_bytes =
-    cudf::detail::get_value<size_type>(input.offsets(), input.offset() + strings_count, stream) -
-    cudf::detail::get_value<size_type>(input.offsets(), input.offset(), stream);
-
-  auto d_offsets = input.offsets_begin();
+    get_offset_value(input.offsets(), input.offset() + strings_count, stream) -
+    get_offset_value(input.offsets(), input.offset(), stream);
+  auto const d_offsets =
+    cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), input.offset());
 
   // count the number of delimiters in the entire column
   auto const delimiter_count =
     thrust::count_if(rmm::exec_policy(stream),
-                     thrust::make_counting_iterator<size_type>(0),
-                     thrust::make_counting_iterator<size_type>(chars_bytes),
-                     [tokenizer, d_offsets, chars_bytes] __device__(size_type idx) {
+                     thrust::counting_iterator<int64_t>(0),
+                     thrust::counting_iterator<int64_t>(chars_bytes),
+                     [tokenizer, d_offsets, chars_bytes] __device__(int64_t idx) {
                        return tokenizer.is_delimiter(idx, d_offsets, chars_bytes);
                      });
+
   // Create a vector of every delimiter position in the chars column.
   // These may include overlapping or otherwise out-of-bounds delimiters which
   // will be resolved during token processing.
-  auto delimiter_positions = rmm::device_uvector<size_type>(delimiter_count, stream);
+  auto delimiter_positions = rmm::device_uvector<int64_t>(delimiter_count, stream);
   auto d_positions         = delimiter_positions.data();
-  auto const copy_end =
-    thrust::copy_if(rmm::exec_policy(stream),
-                    thrust::make_counting_iterator<size_type>(0),
-                    thrust::make_counting_iterator<size_type>(chars_bytes),
-                    delimiter_positions.begin(),
-                    [tokenizer, d_offsets, chars_bytes] __device__(size_type idx) {
-                      return tokenizer.is_delimiter(idx, d_offsets, chars_bytes);
-                    });
+  auto const copy_end      = cudf::detail::copy_if_safe(
+    thrust::counting_iterator<int64_t>(0),
+    thrust::counting_iterator<int64_t>(chars_bytes),
+    delimiter_positions.begin(),
+    [tokenizer, d_offsets, chars_bytes] __device__(int64_t idx) {
+      return tokenizer.is_delimiter(idx, d_offsets, chars_bytes);
+    },
+    stream);
 
   // create a vector of offsets to each string's delimiter set within delimiter_positions
   auto const delimiter_offsets = [&] {
     // first, create a vector of string indices for each delimiter
-    auto string_indices = rmm::device_uvector<size_type>(delimiter_count, stream);
+    auto string_indices = rmm::device_uvector<int64_t>(delimiter_count, stream);
     thrust::upper_bound(rmm::exec_policy(stream),
                         d_offsets,
                         d_offsets + strings_count,
@@ -340,24 +343,24 @@ std::pair<std::unique_ptr<column>, rmm::device_uvector<string_index_pair>> split
                         string_indices.begin());
 
     // compute delimiter offsets per string
-    auto delimiter_offsets   = rmm::device_uvector<size_type>(strings_count + 1, stream);
+    auto delimiter_offsets   = rmm::device_uvector<int64_t>(strings_count + 1, stream);
     auto d_delimiter_offsets = delimiter_offsets.data();
 
     // memset to zero-out the delimiter counts for any null-entries or strings with no delimiters
     CUDF_CUDA_TRY(cudaMemsetAsync(
-      d_delimiter_offsets, 0, delimiter_offsets.size() * sizeof(size_type), stream.value()));
+      d_delimiter_offsets, 0, delimiter_offsets.size() * sizeof(int64_t), stream.value()));
 
     // next, count the number of delimiters per string
     auto d_string_indices = string_indices.data();  // identifies strings with delimiters only
-    thrust::for_each_n(rmm::exec_policy(stream),
-                       thrust::make_counting_iterator<size_type>(0),
-                       delimiter_count,
-                       [d_string_indices, d_delimiter_offsets] __device__(size_type idx) {
-                         auto const str_idx = d_string_indices[idx] - 1;
-                         cuda::atomic_ref<size_type, cuda::thread_scope_device> ref{
-                           *(d_delimiter_offsets + str_idx)};
-                         ref.fetch_add(1, cuda::std::memory_order_relaxed);
-                       });
+    thrust::for_each_n(
+      rmm::exec_policy(stream),
+      thrust::counting_iterator<int64_t>(0),
+      delimiter_count,
+      [d_string_indices, d_delimiter_offsets] __device__(int64_t idx) {
+        auto const str_idx = d_string_indices[idx] - 1;
+        cuda::atomic_ref<int64_t, cuda::thread_scope_device> ref{*(d_delimiter_offsets + str_idx)};
+        ref.fetch_add(1L, cuda::std::memory_order_relaxed);
+      });
     // finally, convert the delimiter counts into offsets
     thrust::exclusive_scan(rmm::exec_policy(stream),
                            delimiter_offsets.begin(),
@@ -379,11 +382,10 @@ std::pair<std::unique_ptr<column>, rmm::device_uvector<string_index_pair>> split
     });
 
   // create offsets from the counts for return to the caller
-  auto offsets = std::get<0>(
-    cudf::detail::make_offsets_child_column(token_counts.begin(), token_counts.end(), stream, mr));
-  auto const total_tokens =
-    cudf::detail::get_value<size_type>(offsets->view(), strings_count, stream);
-  auto const d_tokens_offsets = offsets->view().data<size_type>();
+  auto [offsets, total_tokens] = cudf::strings::detail::make_offsets_child_column(
+    token_counts.begin(), token_counts.end(), stream, mr);
+  auto const d_tokens_offsets =
+    cudf::detail::offsetalator_factory::make_input_iterator(offsets->view());
 
   // build a vector of all the token positions for all the strings
   auto tokens   = rmm::device_uvector<string_index_pair>(total_tokens, stream);
diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu
index 045aac279e6..d8385549840 100644
--- a/cpp/src/strings/split/split_re.cu
+++ b/cpp/src/strings/split/split_re.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,7 +21,6 @@
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/get_value.cuh>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/strings/detail/strings_column_factories.cuh>
@@ -36,7 +35,6 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/pair.h>
 #include <thrust/transform_reduce.h>
-#include <thrust/transform_scan.h>
 
 namespace cudf {
 namespace strings {
@@ -60,7 +58,7 @@ enum class split_direction {
 struct token_reader_fn {
   column_device_view const d_strings;
   split_direction const direction;
-  size_type const* d_token_offsets;
+  cudf::detail::input_offsetalator const d_token_offsets;
   string_index_pair* d_tokens;
 
   __device__ void operator()(size_type const idx, reprog_device const prog, int32_t const prog_idx)
@@ -73,9 +71,9 @@ struct token_reader_fn {
     auto const token_count  = d_token_offsets[idx + 1] - token_offset;
     auto const d_result     = d_tokens + token_offset;  // store tokens here
 
-    size_type token_idx = 0;
-    auto itr            = d_str.begin();
-    auto last_pos       = itr;
+    int64_t token_idx = 0;
+    auto itr          = d_str.begin();
+    auto last_pos     = itr;
     while (itr.position() <= nchars) {
       auto const match = prog.find(prog_idx, d_str, itr);
       if (!match) { break; }
@@ -90,7 +88,7 @@ struct token_reader_fn {
         d_result[token_idx++] = token;
       } else {
         if (direction == split_direction::FORWARD) { break; }  // we are done
-        for (auto l = 0; l < token_idx - 1; ++l) {
+        for (auto l = 0L; l < token_idx - 1; ++l) {
           d_result[l] = d_result[l + 1];  // shift left
         }
         d_result[token_idx - 1] = token;
@@ -120,50 +118,45 @@ struct token_reader_fn {
 /**
  * @brief Call regex to split each input string into tokens.
  *
- * This will also convert the `offsets` values from counts to offsets.
- *
  * @param d_strings Strings to split
  * @param d_prog Regex to evaluate against each string
  * @param direction Whether tokens are generated forwards or backwards.
  * @param max_tokens The maximum number of tokens for each split.
- * @param offsets The number of matches on input.
- *                The offsets for each token in each string on output.
+ * @param counts The number of tokens in each string
  * @param stream CUDA stream used for kernel launches.
  */
-rmm::device_uvector<string_index_pair> generate_tokens(column_device_view const& d_strings,
-                                                       reprog_device& d_prog,
-                                                       split_direction direction,
-                                                       size_type maxsplit,
-                                                       mutable_column_view& offsets,
-                                                       rmm::cuda_stream_view stream)
+std::pair<rmm::device_uvector<string_index_pair>, std::unique_ptr<column>> generate_tokens(
+  column_device_view const& d_strings,
+  reprog_device& d_prog,
+  split_direction direction,
+  size_type maxsplit,
+  column_view const& counts,
+  rmm::cuda_stream_view stream)
 {
   auto const strings_count = d_strings.size();
-
-  auto const max_tokens = maxsplit > 0 ? maxsplit : std::numeric_limits<size_type>::max();
-
-  auto const begin     = thrust::make_counting_iterator<size_type>(0);
-  auto const end       = thrust::make_counting_iterator<size_type>(strings_count);
-  auto const d_offsets = offsets.data<size_type>();
+  auto const max_tokens    = maxsplit > 0 ? maxsplit : std::numeric_limits<size_type>::max();
+  auto const d_counts      = counts.data<size_type>();
 
   // convert match counts to token offsets
-  auto map_fn = [d_strings, d_offsets, max_tokens] __device__(auto idx) {
-    return d_strings.is_null(idx) ? 0 : std::min(d_offsets[idx], max_tokens) + 1;
-  };
-  thrust::transform_exclusive_scan(
-    rmm::exec_policy(stream), begin, end + 1, d_offsets, map_fn, 0, thrust::plus<size_type>{});
+  auto map_fn = cuda::proclaim_return_type<size_type>(
+    [d_strings, d_counts, max_tokens] __device__(auto idx) -> size_type {
+      return d_strings.is_null(idx) ? 0 : std::min(d_counts[idx], max_tokens) + 1;
+    });
 
-  // the last offset entry is the total number of tokens to be generated
-  auto const total_tokens = cudf::detail::get_value<size_type>(offsets, strings_count, stream);
+  auto const begin = cudf::detail::make_counting_transform_iterator(0, map_fn);
+  auto const end   = begin + strings_count;
 
-  rmm::device_uvector<string_index_pair> tokens(total_tokens, stream);
-  if (total_tokens == 0) { return tokens; }
-
-  launch_for_each_kernel(token_reader_fn{d_strings, direction, d_offsets, tokens.data()},
-                         d_prog,
-                         d_strings.size(),
-                         stream);
+  auto [offsets, total_tokens] = cudf::strings::detail::make_offsets_child_column(
+    begin, end, stream, rmm::mr::get_current_device_resource());
+  auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets->view());
 
-  return tokens;
+  // build a vector of tokens
+  rmm::device_uvector<string_index_pair> tokens(total_tokens, stream);
+  if (total_tokens > 0) {
+    auto tr_fn = token_reader_fn{d_strings, direction, d_offsets, tokens.data()};
+    launch_for_each_kernel(tr_fn, d_prog, d_strings.size(), stream);
+  }
+  return std::pair(std::move(tokens), std::move(offsets));
 }
 
 /**
@@ -176,13 +169,13 @@ rmm::device_uvector<string_index_pair> generate_tokens(column_device_view const&
 struct tokens_transform_fn {
   column_device_view const d_strings;
   string_index_pair const* d_tokens;
-  size_type const* d_token_offsets;
+  cudf::detail::input_offsetalator const d_token_offsets;
   size_type const column_index;
 
   __device__ string_index_pair operator()(size_type idx) const
   {
     auto const offset      = d_token_offsets[idx];
-    auto const token_count = d_token_offsets[idx + 1] - offset;
+    auto const token_count = static_cast<size_type>(d_token_offsets[idx + 1] - offset);
     return (column_index >= token_count) || d_strings.is_null(idx)
              ? string_index_pair{nullptr, 0}
              : d_tokens[offset + column_index];
@@ -212,13 +205,13 @@ std::unique_ptr<table> split_re(strings_column_view const& input,
   auto d_strings = column_device_view::create(input.parent(), stream);
 
   // count the number of delimiters matched in each string
-  auto offsets = count_matches(
-    *d_strings, *d_prog, strings_count + 1, stream, rmm::mr::get_current_device_resource());
-  auto offsets_view = offsets->mutable_view();
-  auto d_offsets    = offsets_view.data<size_type>();
+  auto const counts = count_matches(
+    *d_strings, *d_prog, strings_count, stream, rmm::mr::get_current_device_resource());
 
   // get the split tokens from the input column; this also converts the counts into offsets
-  auto tokens = generate_tokens(*d_strings, *d_prog, direction, maxsplit, offsets_view, stream);
+  auto [tokens, offsets] =
+    generate_tokens(*d_strings, *d_prog, direction, maxsplit, counts->view(), stream);
+  auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets->view());
 
   // the output column count is the maximum number of tokens generated for any input string
   auto const columns_count = thrust::transform_reduce(
@@ -226,7 +219,7 @@ std::unique_ptr<table> split_re(strings_column_view const& input,
     thrust::make_counting_iterator<size_type>(0),
     thrust::make_counting_iterator<size_type>(strings_count),
     [d_offsets] __device__(auto const idx) -> size_type {
-      return d_offsets[idx + 1] - d_offsets[idx];
+      return static_cast<size_type>(d_offsets[idx + 1] - d_offsets[idx]);
     },
     0,
     thrust::maximum<size_type>{});
@@ -243,10 +236,11 @@ std::unique_ptr<table> split_re(strings_column_view const& input,
   }
 
   // convert the tokens into multiple strings columns
+  auto d_tokens            = tokens.data();
   auto make_strings_lambda = [&](size_type column_index) {
     // returns appropriate token for each row/column
     auto indices_itr = cudf::detail::make_counting_transform_iterator(
-      0, tokens_transform_fn{*d_strings, tokens.data(), d_offsets, column_index});
+      0, tokens_transform_fn{*d_strings, d_tokens, d_offsets, column_index});
     return make_strings_column(indices_itr, indices_itr + strings_count, stream, mr);
   };
   // build a vector of columns
@@ -276,11 +270,14 @@ std::unique_ptr<column> split_record_re(strings_column_view const& input,
   auto d_strings = column_device_view::create(input.parent(), stream);
 
   // count the number of delimiters matched in each string
-  auto offsets      = count_matches(*d_strings, *d_prog, strings_count + 1, stream, mr);
-  auto offsets_view = offsets->mutable_view();
+  auto counts = count_matches(*d_strings, *d_prog, strings_count, stream, mr);
 
   // get the split tokens from the input column; this also converts the counts into offsets
-  auto tokens = generate_tokens(*d_strings, *d_prog, direction, maxsplit, offsets_view, stream);
+  auto [tokens, offsets] =
+    generate_tokens(*d_strings, *d_prog, direction, maxsplit, counts->view(), stream);
+  CUDF_EXPECTS(tokens.size() < static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
+               "Size of output exceeds the column size limit",
+               std::overflow_error);
 
   // convert the tokens into one big strings column
   auto strings_output = make_strings_column(tokens.begin(), tokens.end(), stream, mr);
diff --git a/cpp/src/strings/split/split_record.cu b/cpp/src/strings/split/split_record.cu
index 64061aba4fd..c9ed7b0ed26 100644
--- a/cpp/src/strings/split/split_record.cu
+++ b/cpp/src/strings/split/split_record.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -66,6 +66,9 @@ std::unique_ptr<column> split_record_fn(strings_column_view const& input,
 
   // builds the offsets and the vector of all tokens
   auto [offsets, tokens] = split_helper(input, tokenizer, stream, mr);
+  CUDF_EXPECTS(tokens.size() < static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
+               "Size of output exceeds the column size limit",
+               std::overflow_error);
 
   // build a strings column from the tokens
   auto strings_child = make_strings_column(tokens.begin(), tokens.end(), stream, mr);