diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh
index 7b223c85c7a..8b6edd77476 100644
--- a/cpp/include/cudf/detail/null_mask.cuh
+++ b/cpp/include/cudf/detail/null_mask.cuh
@@ -508,17 +508,20 @@ std::vector<size_type> segmented_null_count(bitmask_type const* bitmask,
  * validity of any/all elements of segments of an input null mask.
  *
  * @tparam OffsetIterator Random-access input iterator type.
- * @param bitmask Null mask residing in device memory whose segments will be
- * reduced into a new mask.
- * @param first_bit_indices_begin Random-access input iterator to the beginning
- * of a sequence of indices of the first bit in each segment (inclusive).
- * @param first_bit_indices_end Random-access input iterator to the end of a
- * sequence of indices of the first bit in each segment (inclusive).
- * @param last_bit_indices_begin Random-access input iterator to the beginning
- * of a sequence of indices of the last bit in each segment (exclusive).
- * @param null_handling If `null_policy::INCLUDE`, all elements in a segment
- * must be valid for the reduced value to be valid. If `null_policy::EXCLUDE`,
- * the reduction is valid if any element in the segment is valid.
+ * @param bitmask Null mask residing in device memory whose segments will be reduced into a new
+ * mask.
+ * @param first_bit_indices_begin Random-access input iterator to the beginning of a sequence of
+ * indices of the first bit in each segment (inclusive).
+ * @param first_bit_indices_end Random-access input iterator to the end of a sequence of indices of
+ * the first bit in each segment (inclusive).
+ * @param last_bit_indices_begin Random-access input iterator to the beginning of a sequence of
+ * indices of the last bit in each segment (exclusive).
+ * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the
+ * reduced value to be valid. If `null_policy::EXCLUDE`, the reduction is valid if any element in
+ * the segment is valid.
+ * @param valid_initial_value Indicates whether a valid initial value was provided to the reduction.
+ * True indicates a valid initial value, false indicates a null initial value, and null indicates no
+ * initial value was provided.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned buffer's device memory.
  * @return A pair containing the reduced null mask and number of nulls.
@@ -530,6 +533,7 @@ std::pair<rmm::device_buffer, size_type> segmented_null_mask_reduction(
   OffsetIterator first_bit_indices_end,
   OffsetIterator last_bit_indices_begin,
   null_policy null_handling,
+  std::optional<bool> valid_initial_value,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr)
 {
@@ -549,7 +553,9 @@ std::pair<rmm::device_buffer, size_type> segmented_null_mask_reduction(
     return cudf::detail::valid_if(
       segment_length_iterator,
       segment_length_iterator + num_segments,
-      [] __device__(auto const& length) { return length > 0; },
+      [valid_initial_value] __device__(auto const& length) {
+        return valid_initial_value.value_or(length > 0);
+      },
       stream,
       mr);
   }
@@ -567,11 +573,12 @@ std::pair<rmm::device_buffer, size_type> segmented_null_mask_reduction(
   return cudf::detail::valid_if(
     length_and_valid_count,
     length_and_valid_count + num_segments,
-    [null_handling] __device__(auto const& length_and_valid_count) {
+    [null_handling, valid_initial_value] __device__(auto const& length_and_valid_count) {
       auto const length      = thrust::get<0>(length_and_valid_count);
       auto const valid_count = thrust::get<1>(length_and_valid_count);
-      return (length > 0) and
-             ((null_handling == null_policy::EXCLUDE) ? valid_count > 0 : valid_count == length);
+      return (null_handling == null_policy::EXCLUDE)
+               ? (valid_initial_value.value_or(false) || valid_count > 0)
+               : (valid_initial_value.value_or(length > 0) && valid_count == length);
     },
     stream,
     mr);
diff --git a/cpp/include/cudf/detail/reduction.cuh b/cpp/include/cudf/detail/reduction.cuh
index 879f01394cc..f1fe0d2e1a9 100644
--- a/cpp/include/cudf/detail/reduction.cuh
+++ b/cpp/include/cudf/detail/reduction.cuh
@@ -41,7 +41,8 @@ namespace detail {
  * @param[in] d_in      the begin iterator
  * @param[in] num_items the number of items
  * @param[in] op        the reduction operator
- * @param[in] stream    CUDA stream used for device memory operations and kernel launches.
+ * @param[in] init      Optional initial value of the reduction
+ * @param[in] stream    CUDA stream used for device memory operations and kernel launches
  * @param[in] mr        Device memory resource used to allocate the returned scalar's device
  * memory
  * @returns   Output scalar in device memory
@@ -57,13 +58,14 @@ template <typename Op,
                            not cudf::is_fixed_point<OutputType>()>* = nullptr>
 std::unique_ptr<scalar> reduce(InputIterator d_in,
                                cudf::size_type num_items,
-                               op::simple_op<Op> sop,
+                               op::simple_op<Op> op,
+                               std::optional<OutputType> init,
                                rmm::cuda_stream_view stream,
                                rmm::mr::device_memory_resource* mr)
 {
-  auto binary_op  = sop.get_binary_op();
-  auto identity   = sop.template get_identity<OutputType>();
-  auto dev_result = rmm::device_scalar<OutputType>{identity, stream, mr};
+  auto const binary_op     = op.get_binary_op();
+  auto const initial_value = init.value_or(op.template get_identity<OutputType>());
+  auto dev_result          = rmm::device_scalar<OutputType>{initial_value, stream, mr};
 
   // Allocate temporary storage
   rmm::device_buffer d_temp_storage;
@@ -74,7 +76,7 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
                             dev_result.data(),
                             num_items,
                             binary_op,
-                            identity,
+                            initial_value,
                             stream.value());
   d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream};
 
@@ -85,7 +87,7 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
                             dev_result.data(),
                             num_items,
                             binary_op,
-                            identity,
+                            initial_value,
                             stream.value());
 
   // only for string_view, data is copied
@@ -99,7 +101,8 @@ template <typename Op,
           std::enable_if_t<is_fixed_point<OutputType>()>* = nullptr>
 std::unique_ptr<scalar> reduce(InputIterator d_in,
                                cudf::size_type num_items,
-                               op::simple_op<Op> sop,
+                               op::simple_op<Op> op,
+                               std::optional<OutputType> init,
                                rmm::cuda_stream_view stream,
                                rmm::mr::device_memory_resource* mr)
 {
@@ -115,13 +118,14 @@ template <typename Op,
           std::enable_if_t<std::is_same_v<OutputType, string_view>>* = nullptr>
 std::unique_ptr<scalar> reduce(InputIterator d_in,
                                cudf::size_type num_items,
-                               op::simple_op<Op> sop,
+                               op::simple_op<Op> op,
+                               std::optional<OutputType> init,
                                rmm::cuda_stream_view stream,
                                rmm::mr::device_memory_resource* mr)
 {
-  auto binary_op  = sop.get_binary_op();
-  auto identity   = sop.template get_identity<OutputType>();
-  auto dev_result = rmm::device_scalar<OutputType>{identity, stream};
+  auto const binary_op     = op.get_binary_op();
+  auto const initial_value = init.value_or(op.template get_identity<OutputType>());
+  auto dev_result          = rmm::device_scalar<OutputType>{initial_value, stream};
 
   // Allocate temporary storage
   rmm::device_buffer d_temp_storage;
@@ -132,7 +136,7 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
                             dev_result.data(),
                             num_items,
                             binary_op,
-                            identity,
+                            initial_value,
                             stream.value());
   d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream};
 
@@ -143,7 +147,7 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
                             dev_result.data(),
                             num_items,
                             binary_op,
-                            identity,
+                            initial_value,
                             stream.value());
 
   using ScalarType = cudf::scalar_type_t<OutputType>;
@@ -154,13 +158,14 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
 /**
  * @brief compute reduction by the compound operator (reduce and transform)
  *
- * @param[in] d_in      the begin iterator
- * @param[in] num_items the number of items
- * @param[in] op        the reduction operator
- * @param[in] valid_count   the intermediate operator argument 1
- * @param[in] ddof      the intermediate operator argument 2
- * @param[in] stream    CUDA stream used for device memory operations and kernel launches.
- * @param[in] mr        Device memory resource used to allocate the returned scalar's device
+ * @param[in] d_in        the begin iterator
+ * @param[in] num_items   the number of items
+ * @param[in] op          the reduction operator
+ * @param[in] valid_count Number of valid items
+ * @param[in] ddof        Delta degrees of freedom used for standard deviation and variance
+ * @param[in] init        Optional initial value of the reduction
+ * @param[in] stream      CUDA stream used for device memory operations and kernel launches
+ * @param[in] mr          Device memory resource used to allocate the returned scalar's device
  * memory
  * @returns   Output scalar in device memory
  *
@@ -178,15 +183,16 @@ template <typename Op,
           typename IntermediateType = typename thrust::iterator_value<InputIterator>::type>
 std::unique_ptr<scalar> reduce(InputIterator d_in,
                                cudf::size_type num_items,
-                               op::compound_op<Op> cop,
+                               op::compound_op<Op> op,
                                cudf::size_type valid_count,
                                cudf::size_type ddof,
                                rmm::cuda_stream_view stream,
                                rmm::mr::device_memory_resource* mr)
 {
-  auto binary_op            = cop.get_binary_op();
-  IntermediateType identity = cop.template get_identity<IntermediateType>();
-  rmm::device_scalar<IntermediateType> intermediate_result{identity, stream};
+  auto const binary_op     = op.get_binary_op();
+  auto const initial_value = op.template get_identity<IntermediateType>();
+
+  rmm::device_scalar<IntermediateType> intermediate_result{initial_value, stream};
 
   // Allocate temporary storage
   rmm::device_buffer d_temp_storage;
@@ -197,7 +203,7 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
                             intermediate_result.data(),
                             num_items,
                             binary_op,
-                            identity,
+                            initial_value,
                             stream.value());
   d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream};
 
@@ -208,7 +214,7 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
                             intermediate_result.data(),
                             num_items,
                             binary_op,
-                            identity,
+                            initial_value,
                             stream.value());
 
   // compute the result value from intermediate value in device
@@ -217,8 +223,8 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
   thrust::for_each_n(rmm::exec_policy(stream),
                      intermediate_result.data(),
                      1,
-                     [dres = result->data(), cop, valid_count, ddof] __device__(auto i) {
-                       *dres = cop.template compute_result<OutputType>(i, valid_count, ddof);
+                     [dres = result->data(), op, valid_count, ddof] __device__(auto i) {
+                       *dres = op.template compute_result<OutputType>(i, valid_count, ddof);
                      });
   return std::unique_ptr<scalar>(result);
 }
@@ -240,7 +246,8 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
  * @param[out] d_out        the begin iterator to output
  * @param[in] binary_op     the reduction operator
  * @param[in] identity      the identity element of the reduction operator
- * @param[in] stream        CUDA stream used for device memory operations and kernel launches.
+ * @param[in] initial_value Initial value of the reduction
+ * @param[in] stream        CUDA stream used for device memory operations and kernel launches
  *
  */
 template <typename InputIterator,
@@ -255,10 +262,10 @@ void segmented_reduce(InputIterator d_in,
                       OffsetIterator d_offset_end,
                       OutputIterator d_out,
                       BinaryOp binary_op,
-                      OutputType identity,
+                      OutputType initial_value,
                       rmm::cuda_stream_view stream)
 {
-  auto num_segments = static_cast<size_type>(std::distance(d_offset_begin, d_offset_end)) - 1;
+  auto const num_segments = static_cast<size_type>(std::distance(d_offset_begin, d_offset_end)) - 1;
 
   // Allocate temporary storage
   rmm::device_buffer d_temp_storage;
@@ -271,7 +278,7 @@ void segmented_reduce(InputIterator d_in,
                                      d_offset_begin,
                                      d_offset_begin + 1,
                                      binary_op,
-                                     identity,
+                                     initial_value,
                                      stream.value());
   d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream};
 
@@ -284,7 +291,7 @@ void segmented_reduce(InputIterator d_in,
                                      d_offset_begin,
                                      d_offset_begin + 1,
                                      binary_op,
-                                     identity,
+                                     initial_value,
                                      stream.value());
 }
 
diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp
index 4ec4a02cd10..58cf2dc84e7 100644
--- a/cpp/include/cudf/detail/reduction_functions.hpp
+++ b/cpp/include/cudf/detail/reduction_functions.hpp
@@ -35,13 +35,15 @@ namespace reduction {
  *
  * @param col input column to compute sum
  * @param output_dtype data type of return type and typecast elements of input column
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param init initial value of the sum
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
- * @return Sum as scalar of type `output_dtype`.
+ * @return Sum as scalar of type `output_dtype`
  */
 std::unique_ptr<scalar> sum(
   column_view const& col,
   data_type const output_dtype,
+  std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -52,15 +54,17 @@ std::unique_ptr<scalar> sum(
  *
  * @throw cudf::logic_error if input column type is convertible to `output_dtype`
  *
- * @param col input column to compute minimum.
+ * @param col input column to compute minimum
  * @param output_dtype data type of return type and typecast elements of input column
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param init initial value of the minimum
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
- * @return Minimum element as scalar of type `output_dtype`.
+ * @return Minimum element as scalar of type `output_dtype`
  */
 std::unique_ptr<scalar> min(
   column_view const& col,
   data_type const output_dtype,
+  std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -71,15 +75,17 @@ std::unique_ptr<scalar> min(
  *
  * @throw cudf::logic_error if input column type is convertible to `output_dtype`
  *
- * @param col input column to compute maximum.
+ * @param col input column to compute maximum
  * @param output_dtype data type of return type and typecast elements of input column
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param init initial value of the maximum
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
- * @return Maximum element as scalar of type `output_dtype`.
+ * @return Maximum element as scalar of type `output_dtype`
  */
 std::unique_ptr<scalar> max(
   column_view const& col,
   data_type const output_dtype,
+  std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -91,15 +97,17 @@ std::unique_ptr<scalar> max(
  * @throw cudf::logic_error if input column type is not convertible to bool
  * @throw cudf::logic_error if `output_dtype` is not bool
  *
- * @param col input column to compute any_of.
+ * @param col input column to compute any
  * @param output_dtype data type of return type and typecast elements of input column
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param init initial value of the any
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return bool scalar if any of elements is true when typecasted to bool
  */
 std::unique_ptr<scalar> any(
   column_view const& col,
   data_type const output_dtype,
+  std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -111,15 +119,17 @@ std::unique_ptr<scalar> any(
  * @throw cudf::logic_error if input column type is not convertible to bool
  * @throw cudf::logic_error if `output_dtype` is not bool
  *
- * @param col input column to compute all_of.
+ * @param col input column to compute all
  * @param output_dtype data type of return type and typecast elements of input column
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param init initial value of the all
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return bool scalar if all of elements is true when typecasted to bool
  */
 std::unique_ptr<scalar> all(
   column_view const& col,
   data_type const output_dtype,
+  std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -131,15 +141,17 @@ std::unique_ptr<scalar> all(
  * @throw cudf::logic_error if input column type is not convertible to `output_dtype`
  * @throw cudf::logic_error if `output_dtype` is not an arithmetic type
  *
- * @param col input column to compute product.
+ * @param col input column to compute product
  * @param output_dtype data type of return type and typecast elements of input column
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param init initial value of the product
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
- * @return Product as scalar of type `output_dtype`.
+ * @return Product as scalar of type `output_dtype`
  */
 std::unique_ptr<scalar> product(
   column_view const& col,
   data_type const output_dtype,
+  std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -151,11 +163,11 @@ std::unique_ptr<scalar> product(
  * @throw cudf::logic_error if input column type is not convertible to `output_dtype`
  * @throw cudf::logic_error if `output_dtype` is not an arithmetic type
  *
- * @param col input column to compute sum of squares.
+ * @param col input column to compute sum of squares
  * @param output_dtype data type of return type and typecast elements of input column
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
- * @return Sum of squares as scalar of type `output_dtype`.
+ * @return Sum of squares as scalar of type `output_dtype`
  */
 std::unique_ptr<scalar> sum_of_squares(
   column_view const& col,
@@ -171,11 +183,11 @@ std::unique_ptr<scalar> sum_of_squares(
  * @throw cudf::logic_error if input column type is not arithmetic type
  * @throw cudf::logic_error if `output_dtype` is not floating point type
  *
- * @param col input column to compute mean.
- * @param output_dtype data type of return type and typecast elements of input column.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned scalar's device memory.
- * @return Mean as scalar of type `output_dtype`.
+ * @param col input column to compute mean
+ * @param output_dtype data type of return type and typecast elements of input column
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @return Mean as scalar of type `output_dtype`
  */
 std::unique_ptr<scalar> mean(
   column_view const& col,
@@ -191,13 +203,13 @@ std::unique_ptr<scalar> mean(
  * @throw cudf::logic_error if input column type is not arithmetic type
  * @throw cudf::logic_error if `output_dtype` is not floating point type
  *
- * @param col input column to compute variance.
- * @param output_dtype data type of return type and typecast elements of input column.
+ * @param col input column to compute variance
+ * @param output_dtype data type of return type and typecast elements of input column
  * @param ddof Delta degrees of freedom. The divisor used is N - ddof, where N represents the number
  * of elements.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned scalar's device memory.
- * @return Variance as scalar of type `output_dtype`.
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @return Variance as scalar of type `output_dtype`
  */
 std::unique_ptr<scalar> variance(
   column_view const& col,
@@ -214,13 +226,13 @@ std::unique_ptr<scalar> variance(
  * @throw cudf::logic_error if input column type is not arithmetic type
  * @throw cudf::logic_error if `output_dtype` is not floating point type
  *
- * @param col input column to compute standard deviation.
- * @param output_dtype data type of return type and typecast elements of input column.
+ * @param col input column to compute standard deviation
+ * @param output_dtype data type of return type and typecast elements of input column
  * @param ddof Delta degrees of freedom. The divisor used is N - ddof, where N represents the number
  * of elements.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned scalar's device memory.
- * @return Standard deviation as scalar of type `output_dtype`.
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @return Standard deviation as scalar of type `output_dtype`
  */
 std::unique_ptr<scalar> standard_deviation(
   column_view const& col,
@@ -246,11 +258,11 @@ std::unique_ptr<scalar> standard_deviation(
  * number of valid * elements in the input column if `null_handling` is `null_policy::EXCLUDE`,
  * else `col.size()`.
  *
- * @param col input column to get nth element from.
+ * @param col input column to get nth element from
  * @param n index of element to get
- * @param null_handling Indicates if null values will be counted while indexing.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned scalar's device memory.
+ * @param null_handling Indicates if null values will be counted while indexing
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return nth element as scalar
  */
 std::unique_ptr<scalar> nth_element(
@@ -264,7 +276,7 @@ std::unique_ptr<scalar> nth_element(
  * @brief Collect input column into a (list) scalar
  *
  * @param col input column to collect from
- * @param null_handling Indicates if null values will be counted while collecting.
+ * @param null_handling Indicates if null values will be counted while collecting
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return collected list as scalar
@@ -292,10 +304,10 @@ std::unique_ptr<scalar> merge_lists(
  * @brief Collect input column into a (list) scalar without duplicated elements
  *
  * @param col input column to collect from
- * @param null_handling Indicates if null values will be counted while collecting.
- * @param nulls_equal Indicates if null values will be considered as equal values.
- * @param nans_equal Indicates if nan values will be considered as equal values.
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param null_handling Indicates if null values will be counted while collecting
+ * @param nulls_equal Indicates if null values will be considered as equal values
+ * @param nans_equal Indicates if nan values will be considered as equal values
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return collected list with unique elements as scalar
  */
@@ -311,8 +323,8 @@ std::unique_ptr<scalar> collect_set(
  * @brief Merge a bunch of list scalars into single list scalar then drop duplicated elements
  *
  * @param col input list column representing numbers of list scalars to be merged
- * @param nulls_equal Indicates if null values will be considered as equal values.
- * @param nans_equal Indicates if nan values will be considered as equal values.
+ * @param nulls_equal Indicates if null values will be considered as equal values
+ * @param nans_equal Indicates if nan values will be considered as equal values
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return collected list with unique elements as scalar
@@ -332,21 +344,23 @@ std::unique_ptr<scalar> merge_sets(
  * @throw cudf::logic_error if input column type is not convertible to `output_dtype`.
  * @throw cudf::logic_error if `output_dtype` is not an arithmetic type.
  *
- * @param col Input column to compute sum.
- * @param offsets Indices to identify segment boundaries.
- * @param output_dtype Data type of return type and typecast elements of input column.
- * @param null_handling If `INCLUDE`, the reduction is valid if all elements in
- * a segment are valid, otherwise null. If `EXCLUDE`, the reduction is valid if
- * any element in the segment is valid, otherwise null.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return Sums of segments in type `output_dtype`.
+ * @param col Input column to compute sum
+ * @param offsets Indices to identify segment boundaries
+ * @param output_dtype Data type of return type and typecast elements of input column
+ * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the
+ * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element
+ * in the segment is valid.
+ * @param init Initial value of each sum
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return Sums of segments in type `output_dtype`
  */
 std::unique_ptr<column> segmented_sum(
   column_view const& col,
   device_span<size_type const> offsets,
   data_type const output_dtype,
   null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -358,21 +372,23 @@ std::unique_ptr<column> segmented_sum(
  * @throw cudf::logic_error if input column type is not convertible to `output_dtype`.
  * @throw cudf::logic_error if `output_dtype` is not an arithmetic type.
  *
- * @param col Input column to compute product.
- * @param offsets Indices to identify segment boundaries.
- * @param output_dtype data type of return type and typecast elements of input column.
- * @param null_handling If `INCLUDE`, the reduction is valid if all elements in
- * a segment are valid, otherwise null. If `EXCLUDE`, the reduction is valid if
- * any element in the segment is valid, otherwise null.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned scalar's device memory.
- * @return Product as scalar of type `output_dtype`.
+ * @param col Input column to compute product
+ * @param offsets Indices to identify segment boundaries
+ * @param output_dtype data type of return type and typecast elements of input column
+ * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the
+ * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element
+ * in the segment is valid.
+ * @param init Initial value of each product
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @return Product as scalar of type `output_dtype`
  */
 std::unique_ptr<column> segmented_product(
   column_view const& col,
   device_span<size_type const> offsets,
   data_type const output_dtype,
   null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -383,21 +399,23 @@ std::unique_ptr<column> segmented_product(
  *
  * @throw cudf::logic_error if input column type is convertible to `output_dtype`.
  *
- * @param col Input column to compute minimum.
- * @param offsets Indices to identify segment boundaries.
- * @param output_dtype Data type of return type and typecast elements of input column.
- * @param null_handling If `INCLUDE`, the reduction is valid if all elements in
- * a segment are valid, otherwise null. If `EXCLUDE`, the reduction is valid if
- * any element in the segment is valid, otherwise null.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned scalar's device memory.
- * @return Minimums of segments in type `output_dtype`.
+ * @param col Input column to compute minimum
+ * @param offsets Indices to identify segment boundaries
+ * @param output_dtype Data type of return type and typecast elements of input column
+ * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the
+ * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element
+ * in the segment is valid.
+ * @param init Initial value of each minimum
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @return Minimums of segments in type `output_dtype`
  */
 std::unique_ptr<column> segmented_min(
   column_view const& col,
   device_span<size_type const> offsets,
   data_type const output_dtype,
   null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -408,21 +426,23 @@ std::unique_ptr<column> segmented_min(
  *
  * @throw cudf::logic_error if input column type is convertible to `output_dtype`.
  *
- * @param col Input column to compute maximum.
- * @param offsets Indices to identify segment boundaries.
- * @param output_dtype Data type of return type and typecast elements of input column.
- * @param null_handling If `INCLUDE`, the reduction is valid if all elements in
- * a segment are valid, otherwise null. If `EXCLUDE`, the reduction is valid if
- * any element in the segment is valid, otherwise null.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned scalar's device memory.
- * @return Maximums of segments in type `output_dtype`.
+ * @param col Input column to compute maximum
+ * @param offsets Indices to identify segment boundaries
+ * @param output_dtype Data type of return type and typecast elements of input column
+ * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the
+ * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element
+ * in the segment is valid.
+ * @param init Initial value of each maximum
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @return Maximums of segments in type `output_dtype`
  */
 std::unique_ptr<column> segmented_max(
   column_view const& col,
   device_span<size_type const> offsets,
   data_type const output_dtype,
   null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -434,21 +454,23 @@ std::unique_ptr<column> segmented_max(
  * @throw cudf::logic_error if input column type is not convertible to bool.
  * @throw cudf::logic_error if `output_dtype` is not bool8.
  *
- * @param col Input column to compute any_of.
- * @param offsets Indices to identify segment boundaries.
- * @param output_dtype Data type of return type and typecast elements of input column.
- * @param null_handling If `INCLUDE`, the reduction is valid if all elements in
- * a segment are valid, otherwise null. If `EXCLUDE`, the reduction is valid if
- * any element in the segment is valid, otherwise null.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned scalar's device memory.
- * @return Column of bool8 for the results of the segments.
+ * @param col Input column to compute any
+ * @param offsets Indices to identify segment boundaries
+ * @param output_dtype Data type of return type and typecast elements of input column
+ * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the
+ * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element
+ * in the segment is valid.
+ * @param init Initial value of each any
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @return Column of bool8 for the results of the segments
  */
 std::unique_ptr<column> segmented_any(
   column_view const& col,
   device_span<size_type const> offsets,
   data_type const output_dtype,
   null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -460,21 +482,23 @@ std::unique_ptr<column> segmented_any(
  * @throw cudf::logic_error if input column type is not convertible to bool.
  * @throw cudf::logic_error if `output_dtype` is not bool8.
  *
- * @param col Input column to compute all_of.
- * @param offsets Indices to identify segment boundaries.
- * @param output_dtype Data type of return type and typecast elements of input column.
- * @param null_handling If `INCLUDE`, the reduction is valid if all elements in
- * a segment are valid, otherwise null. If `EXCLUDE`, the reduction is valid if
- * any element in the segment is valid, otherwise null.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned scalar's device memory.
- * @return Column of bool8 for the results of the segments.
+ * @param col Input column to compute all
+ * @param offsets Indices to identify segment boundaries
+ * @param output_dtype Data type of return type and typecast elements of input column
+ * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the
+ * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element
+ * in the segment is valid.
+ * @param init Initial value of each all
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @return Column of bool8 for the results of the segments
  */
 std::unique_ptr<column> segmented_all(
   column_view const& col,
   device_span<size_type const> offsets,
   data_type const output_dtype,
   null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/include/cudf/reduction.hpp b/cpp/include/cudf/reduction.hpp
index bf1246aaad8..6c9178bcfbd 100644
--- a/cpp/include/cudf/reduction.hpp
+++ b/cpp/include/cudf/reduction.hpp
@@ -72,6 +72,24 @@ std::unique_ptr<scalar> reduce(
   data_type output_dtype,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief  Computes the reduction of the values in all rows of a column with an initial value. Only
+ * SUM, PRODUCT, MIN, MAX, ANY, and ALL aggregations are supported.
+ *
+ * @param col Input column view
+ * @param agg Aggregation operator applied by the reduction
+ * @param output_dtype The computation and output precision
+ * @param init The initial value of the reduction
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @returns Output scalar with reduce result
+ */
+std::unique_ptr<scalar> reduce(
+  column_view const& col,
+  std::unique_ptr<reduce_aggregation> const& agg,
+  data_type output_dtype,
+  std::optional<std::reference_wrapper<scalar const>> init,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief  Compute reduction of each segment in the input column
  *
@@ -103,14 +121,13 @@ std::unique_ptr<scalar> reduce(
  * output type is not bool8.
  *
  * @param segmented_values Column view of segmented inputs
- * @param offsets Each segment's offset of @p segmented_values. A list of offsets
- * with size `num_segments + 1`. The size of `i`th segment is `offsets[i+1] -
- * offsets[i]`.
+ * @param offsets Each segment's offset of @p segmented_values. A list of offsets with size
+ * `num_segments + 1`. The size of `i`th segment is `offsets[i+1] - offsets[i]`.
  * @param agg Aggregation operator applied by the reduction
  * @param output_dtype  The output precision
- * @param null_handling If `INCLUDE`, the reduction is valid if all elements in
- * a segment are valid, otherwise null. If `EXCLUDE`, the reduction is valid if
- * any element in the segment is valid, otherwise null.
+ * @param null_handling If `INCLUDE`, the reduction is valid if all elements in a segment are valid,
+ * otherwise null. If `EXCLUDE`, the reduction is valid if any element in the segment is valid,
+ * otherwise null.
  * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @returns Output column with results of segmented reduction
  */
@@ -122,6 +139,31 @@ std::unique_ptr<column> segmented_reduce(
   null_policy null_handling,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief  Compute reduction of each segment in the input column with an initial value. Only SUM,
+ * PRODUCT, MIN, MAX, ANY, and ALL aggregations are supported.
+ *
+ * @param segmented_values Column view of segmented inputs
+ * @param offsets Each segment's offset of @p segmented_values. A list of offsets with size
+ * `num_segments + 1`. The size of `i`th segment is `offsets[i+1] - offsets[i]`.
+ * @param agg Aggregation operator applied by the reduction
+ * @param output_dtype  The output precision
+ * @param null_handling If `INCLUDE`, the reduction is valid if all elements in a segment are valid,
+ * otherwise null. If `EXCLUDE`, the reduction is valid if any element in the segment is valid,
+ * otherwise null.
+ * @param init The initial value of the reduction
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @returns Output column with results of segmented reduction.
+ */
+std::unique_ptr<column> segmented_reduce(
+  column_view const& segmented_values,
+  device_span<size_type const> offsets,
+  segmented_reduce_aggregation const& agg,
+  data_type output_dtype,
+  null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief  Computes the scan of a column.
  *
@@ -132,11 +174,10 @@ std::unique_ptr<column> segmented_reduce(
  *
  * @param[in] input The input column view for the scan
  * @param[in] agg unique_ptr to aggregation operator applied by the scan
- * @param[in] inclusive The flag for applying an inclusive scan if
- *            scan_type::INCLUSIVE, an exclusive scan if scan_type::EXCLUSIVE.
- * @param[in] null_handling Exclude null values when computing the result if
- * null_policy::EXCLUDE. Include nulls if null_policy::INCLUDE.
- * Any operation with a null results in a null.
+ * @param[in] inclusive The flag for applying an inclusive scan if scan_type::INCLUSIVE, an
+ * exclusive scan if scan_type::EXCLUSIVE.
+ * @param[in] null_handling Exclude null values when computing the result if null_policy::EXCLUDE.
+ * Include nulls if null_policy::INCLUDE. Any operation with a null results in a null.
  * @param[in] mr Device memory resource used to allocate the returned scalar's device memory
  * @returns Scanned output column
  */
@@ -153,8 +194,8 @@ std::unique_ptr<column> scan(
  *
  * @param col column to compute minmax
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return A std::pair of scalars with the first scalar being the minimum value
- *         and the second scalar being the maximum value of the input column.
+ * @return A std::pair of scalars with the first scalar being the minimum value and the second
+ * scalar being the maximum value of the input column.
  */
 std::pair<std::unique_ptr<scalar>, std::unique_ptr<scalar>> minmax(
   column_view const& col,
diff --git a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu
index e9975b4e751..207680b9613 100644
--- a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu
+++ b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu
@@ -67,6 +67,7 @@ std::unique_ptr<column> apply_boolean_mask(lists_column_view const& input,
                                                       boolean_mask_sliced_offsets,
                                                       offset_data_type,
                                                       null_policy::EXCLUDE,
+                                                      std::nullopt,
                                                       stream);
     auto const d_sizes     = column_device_view::create(*sizes, stream);
     auto const sizes_begin = cudf::detail::make_null_replacement_iterator(*d_sizes, offset_type{0});
diff --git a/cpp/src/reductions/all.cu b/cpp/src/reductions/all.cu
index 8e9becb96ec..185e14b6e2f 100644
--- a/cpp/src/reductions/all.cu
+++ b/cpp/src/reductions/all.cu
@@ -82,6 +82,7 @@ struct all_fn {
 
 std::unique_ptr<cudf::scalar> all(column_view const& col,
                                   cudf::data_type const output_dtype,
+                                  std::optional<std::reference_wrapper<scalar const>> init,
                                   rmm::cuda_stream_view stream,
                                   rmm::mr::device_memory_resource* mr)
 {
@@ -97,6 +98,7 @@ std::unique_ptr<cudf::scalar> all(column_view const& col,
     col.type(),
     simple::detail::bool_result_element_dispatcher<cudf::reduction::op::min>{},
     col,
+    init,
     stream,
     mr);
 }
diff --git a/cpp/src/reductions/any.cu b/cpp/src/reductions/any.cu
index 0057fb3d111..871672e5c03 100644
--- a/cpp/src/reductions/any.cu
+++ b/cpp/src/reductions/any.cu
@@ -82,6 +82,7 @@ struct any_fn {
 
 std::unique_ptr<cudf::scalar> any(column_view const& col,
                                   cudf::data_type const output_dtype,
+                                  std::optional<std::reference_wrapper<scalar const>> init,
                                   rmm::cuda_stream_view stream,
                                   rmm::mr::device_memory_resource* mr)
 {
@@ -97,6 +98,7 @@ std::unique_ptr<cudf::scalar> any(column_view const& col,
     col.type(),
     simple::detail::bool_result_element_dispatcher<cudf::reduction::op::max>{},
     col,
+    init,
     stream,
     mr);
 }
diff --git a/cpp/src/reductions/compound.cuh b/cpp/src/reductions/compound.cuh
index 05445e7eb62..f901ebd2c24 100644
--- a/cpp/src/reductions/compound.cuh
+++ b/cpp/src/reductions/compound.cuh
@@ -36,10 +36,10 @@ namespace detail {
  * @tparam Op           the compound operator derived from `cudf::reduction::op::compound_op`
  *
  * @param col input column view
- * @param output_dtype data type of return type and typecast elements of input column.
+ * @param output_dtype data type of return type and typecast elements of input column
  * @param ddof Delta degrees of freedom used for standard deviation and variance. The divisor used
  * is N - ddof, where N represents the number of elements.
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return Output scalar in device memory
  */
@@ -50,7 +50,7 @@ std::unique_ptr<scalar> compound_reduction(column_view const& col,
                                            rmm::cuda_stream_view stream,
                                            rmm::mr::device_memory_resource* mr)
 {
-  cudf::size_type valid_count = col.size() - col.null_count();
+  auto const valid_count = col.size() - col.null_count();
 
   // reduction by iterator
   auto dcol = cudf::column_device_view::create(col, stream);
diff --git a/cpp/src/reductions/max.cu b/cpp/src/reductions/max.cu
index 4adf35414dd..b57896e5fc0 100644
--- a/cpp/src/reductions/max.cu
+++ b/cpp/src/reductions/max.cu
@@ -25,6 +25,7 @@ namespace reduction {
 
 std::unique_ptr<cudf::scalar> max(column_view const& col,
                                   cudf::data_type const output_dtype,
+                                  std::optional<std::reference_wrapper<scalar const>> init,
                                   rmm::cuda_stream_view stream,
                                   rmm::mr::device_memory_resource* mr)
 {
@@ -38,6 +39,7 @@ std::unique_ptr<cudf::scalar> max(column_view const& col,
     dispatch_type,
     simple::detail::same_element_type_dispatcher<cudf::reduction::op::max>{},
     col,
+    init,
     stream,
     mr);
 }
diff --git a/cpp/src/reductions/min.cu b/cpp/src/reductions/min.cu
index ac9bdfe9cdc..ed16cec5ffd 100644
--- a/cpp/src/reductions/min.cu
+++ b/cpp/src/reductions/min.cu
@@ -23,6 +23,7 @@ namespace reduction {
 
 std::unique_ptr<cudf::scalar> min(column_view const& col,
                                   data_type const output_dtype,
+                                  std::optional<std::reference_wrapper<scalar const>> init,
                                   rmm::cuda_stream_view stream,
                                   rmm::mr::device_memory_resource* mr)
 {
@@ -36,6 +37,7 @@ std::unique_ptr<cudf::scalar> min(column_view const& col,
     dispatch_type,
     simple::detail::same_element_type_dispatcher<cudf::reduction::op::min>{},
     col,
+    init,
     stream,
     mr);
 }
diff --git a/cpp/src/reductions/product.cu b/cpp/src/reductions/product.cu
index 5caf498712a..39e031f69d1 100644
--- a/cpp/src/reductions/product.cu
+++ b/cpp/src/reductions/product.cu
@@ -25,6 +25,7 @@ namespace reduction {
 
 std::unique_ptr<cudf::scalar> product(column_view const& col,
                                       cudf::data_type const output_dtype,
+                                      std::optional<std::reference_wrapper<scalar const>> init,
                                       rmm::cuda_stream_view stream,
                                       rmm::mr::device_memory_resource* mr)
 {
@@ -33,6 +34,7 @@ std::unique_ptr<cudf::scalar> product(column_view const& col,
     simple::detail::element_type_dispatcher<cudf::reduction::op::product>{},
     col,
     output_dtype,
+    init,
     stream,
     mr);
 }
diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp
index b4ccddbd2e2..523865e0df0 100644
--- a/cpp/src/reductions/reductions.cpp
+++ b/cpp/src/reductions/reductions.cpp
@@ -35,14 +35,16 @@ namespace detail {
 struct reduce_dispatch_functor {
   column_view const col;
   data_type output_dtype;
+  std::optional<std::reference_wrapper<scalar const>> init;
   rmm::mr::device_memory_resource* mr;
   rmm::cuda_stream_view stream;
 
   reduce_dispatch_functor(column_view const& col,
                           data_type output_dtype,
+                          std::optional<std::reference_wrapper<scalar const>> init,
                           rmm::cuda_stream_view stream,
                           rmm::mr::device_memory_resource* mr)
-    : col(col), output_dtype(output_dtype), mr(mr), stream(stream)
+    : col(col), output_dtype(output_dtype), init(init), mr(mr), stream(stream)
   {
   }
 
@@ -50,24 +52,23 @@ struct reduce_dispatch_functor {
   std::unique_ptr<scalar> operator()(std::unique_ptr<reduce_aggregation> const& agg)
   {
     switch (k) {
-      case aggregation::SUM: return reduction::sum(col, output_dtype, stream, mr); break;
-      case aggregation::PRODUCT: return reduction::product(col, output_dtype, stream, mr); break;
-      case aggregation::MIN: return reduction::min(col, output_dtype, stream, mr); break;
-      case aggregation::MAX: return reduction::max(col, output_dtype, stream, mr); break;
-      case aggregation::ANY: return reduction::any(col, output_dtype, stream, mr); break;
-      case aggregation::ALL: return reduction::all(col, output_dtype, stream, mr); break;
+      case aggregation::SUM: return reduction::sum(col, output_dtype, init, stream, mr);
+      case aggregation::PRODUCT: return reduction::product(col, output_dtype, init, stream, mr);
+      case aggregation::MIN: return reduction::min(col, output_dtype, init, stream, mr);
+      case aggregation::MAX: return reduction::max(col, output_dtype, init, stream, mr);
+      case aggregation::ANY: return reduction::any(col, output_dtype, init, stream, mr);
+      case aggregation::ALL: return reduction::all(col, output_dtype, init, stream, mr);
       case aggregation::SUM_OF_SQUARES:
         return reduction::sum_of_squares(col, output_dtype, stream, mr);
-        break;
-      case aggregation::MEAN: return reduction::mean(col, output_dtype, stream, mr); break;
+      case aggregation::MEAN: return reduction::mean(col, output_dtype, stream, mr);
       case aggregation::VARIANCE: {
         auto var_agg = dynamic_cast<var_aggregation const*>(agg.get());
         return reduction::variance(col, output_dtype, var_agg->_ddof, stream, mr);
-      } break;
+      }
       case aggregation::STD: {
         auto var_agg = dynamic_cast<std_aggregation const*>(agg.get());
         return reduction::standard_deviation(col, output_dtype, var_agg->_ddof, stream, mr);
-      } break;
+      }
       case aggregation::MEDIAN: {
         auto sorted_indices = sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream);
         auto valid_sorted_indices =
@@ -75,7 +76,7 @@ struct reduce_dispatch_functor {
         auto col_ptr =
           quantile(col, {0.5}, interpolation::LINEAR, valid_sorted_indices, true, stream);
         return get_element(*col_ptr, 0, stream, mr);
-      } break;
+      }
       case aggregation::QUANTILE: {
         auto quantile_agg = dynamic_cast<quantile_aggregation const*>(agg.get());
         CUDF_EXPECTS(quantile_agg->_quantiles.size() == 1,
@@ -91,7 +92,7 @@ struct reduce_dispatch_functor {
                                 true,
                                 stream);
         return get_element(*col_ptr, 0, stream, mr);
-      } break;
+      }
       case aggregation::NUNIQUE: {
         auto nunique_agg = dynamic_cast<nunique_aggregation const*>(agg.get());
         return make_fixed_width_scalar(
@@ -99,39 +100,39 @@ struct reduce_dispatch_functor {
             col, nunique_agg->_null_handling, nan_policy::NAN_IS_VALID, stream),
           stream,
           mr);
-      } break;
+      }
       case aggregation::NTH_ELEMENT: {
         auto nth_agg = dynamic_cast<nth_element_aggregation const*>(agg.get());
         return reduction::nth_element(col, nth_agg->_n, nth_agg->_null_handling, stream, mr);
-      } break;
+      }
       case aggregation::COLLECT_LIST: {
         auto col_agg = dynamic_cast<collect_list_aggregation const*>(agg.get());
         return reduction::collect_list(col, col_agg->_null_handling, stream, mr);
-      } break;
+      }
       case aggregation::COLLECT_SET: {
         auto col_agg = dynamic_cast<collect_set_aggregation const*>(agg.get());
         return reduction::collect_set(
           col, col_agg->_null_handling, col_agg->_nulls_equal, col_agg->_nans_equal, stream, mr);
-      } break;
+      }
       case aggregation::MERGE_LISTS: {
         return reduction::merge_lists(col, stream, mr);
-      } break;
+      }
       case aggregation::MERGE_SETS: {
         auto col_agg = dynamic_cast<merge_sets_aggregation const*>(agg.get());
         return reduction::merge_sets(col, col_agg->_nulls_equal, col_agg->_nans_equal, stream, mr);
-      } break;
+      }
       case aggregation::TDIGEST: {
         CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT,
                      "Tdigest aggregations expect output type to be STRUCT");
         auto td_agg = dynamic_cast<tdigest_aggregation const*>(agg.get());
         return detail::tdigest::reduce_tdigest(col, td_agg->max_centroids, stream, mr);
-      } break;
+      }
       case aggregation::MERGE_TDIGEST: {
         CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT,
                      "Tdigest aggregations expect output type to be STRUCT");
         auto td_agg = dynamic_cast<merge_tdigest_aggregation const*>(agg.get());
         return detail::tdigest::reduce_merge_tdigest(col, td_agg->max_centroids, stream, mr);
-      } break;
+      }
       default: CUDF_FAIL("Unsupported reduction operator");
     }
   }
@@ -141,9 +142,18 @@ std::unique_ptr<scalar> reduce(
   column_view const& col,
   std::unique_ptr<reduce_aggregation> const& agg,
   data_type output_dtype,
+  std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
+  CUDF_EXPECTS(!init.has_value() || col.type() == init.value().get().type(),
+               "column and initial value must be the same type");
+  if (init.has_value() && !(agg->kind == aggregation::SUM || agg->kind == aggregation::PRODUCT ||
+                            agg->kind == aggregation::MIN || agg->kind == aggregation::MAX ||
+                            agg->kind == aggregation::ANY || agg->kind == aggregation::ALL)) {
+    CUDF_FAIL(
+      "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, and ALL aggregation types");
+  }
   // Returns default scalar if input column is non-valid. In terms of nested columns, we need to
   // handcraft the default scalar with input column.
   if (col.size() <= col.null_count()) {
@@ -166,7 +176,7 @@ std::unique_ptr<scalar> reduce(
   }
 
   return aggregation_dispatcher(
-    agg->kind, reduce_dispatch_functor{col, output_dtype, stream, mr}, agg);
+    agg->kind, reduce_dispatch_functor{col, output_dtype, init, stream, mr}, agg);
 }
 }  // namespace detail
 
@@ -176,7 +186,16 @@ std::unique_ptr<scalar> reduce(column_view const& col,
                                rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::reduce(col, agg, output_dtype, cudf::default_stream_value, mr);
+  return detail::reduce(col, agg, output_dtype, std::nullopt, cudf::default_stream_value, mr);
 }
 
+std::unique_ptr<scalar> reduce(column_view const& col,
+                               std::unique_ptr<reduce_aggregation> const& agg,
+                               data_type output_dtype,
+                               std::optional<std::reference_wrapper<scalar const>> init,
+                               rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::reduce(col, agg, output_dtype, init, cudf::default_stream_value, mr);
+}
 }  // namespace cudf
diff --git a/cpp/src/reductions/segmented_all.cu b/cpp/src/reductions/segmented_all.cu
index a04da1ac2fa..4536785fe82 100644
--- a/cpp/src/reductions/segmented_all.cu
+++ b/cpp/src/reductions/segmented_all.cu
@@ -21,12 +21,14 @@
 namespace cudf {
 namespace reduction {
 
-std::unique_ptr<cudf::column> segmented_all(column_view const& col,
-                                            device_span<size_type const> offsets,
-                                            cudf::data_type const output_dtype,
-                                            null_policy null_handling,
-                                            rmm::cuda_stream_view stream,
-                                            rmm::mr::device_memory_resource* mr)
+std::unique_ptr<cudf::column> segmented_all(
+  column_view const& col,
+  device_span<size_type const> offsets,
+  cudf::data_type const output_dtype,
+  null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(output_dtype == cudf::data_type(cudf::type_id::BOOL8),
                "segmented_all() operation requires output type `BOOL8`");
@@ -38,6 +40,7 @@ std::unique_ptr<cudf::column> segmented_all(column_view const& col,
     col,
     offsets,
     null_handling,
+    init,
     stream,
     mr);
 }
diff --git a/cpp/src/reductions/segmented_any.cu b/cpp/src/reductions/segmented_any.cu
index ad44289175b..cc50eb1e1f4 100644
--- a/cpp/src/reductions/segmented_any.cu
+++ b/cpp/src/reductions/segmented_any.cu
@@ -21,12 +21,14 @@
 namespace cudf {
 namespace reduction {
 
-std::unique_ptr<cudf::column> segmented_any(column_view const& col,
-                                            device_span<size_type const> offsets,
-                                            cudf::data_type const output_dtype,
-                                            null_policy null_handling,
-                                            rmm::cuda_stream_view stream,
-                                            rmm::mr::device_memory_resource* mr)
+std::unique_ptr<cudf::column> segmented_any(
+  column_view const& col,
+  device_span<size_type const> offsets,
+  cudf::data_type const output_dtype,
+  null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(output_dtype == cudf::data_type(cudf::type_id::BOOL8),
                "segmented_any() operation requires output type `BOOL8`");
@@ -38,6 +40,7 @@ std::unique_ptr<cudf::column> segmented_any(column_view const& col,
     col,
     offsets,
     null_handling,
+    init,
     stream,
     mr);
 }
diff --git a/cpp/src/reductions/segmented_max.cu b/cpp/src/reductions/segmented_max.cu
index 19896064343..494aff66797 100644
--- a/cpp/src/reductions/segmented_max.cu
+++ b/cpp/src/reductions/segmented_max.cu
@@ -21,12 +21,14 @@
 namespace cudf {
 namespace reduction {
 
-std::unique_ptr<cudf::column> segmented_max(column_view const& col,
-                                            device_span<size_type const> offsets,
-                                            cudf::data_type const output_dtype,
-                                            null_policy null_handling,
-                                            rmm::cuda_stream_view stream,
-                                            rmm::mr::device_memory_resource* mr)
+std::unique_ptr<cudf::column> segmented_max(
+  column_view const& col,
+  device_span<size_type const> offsets,
+  cudf::data_type const output_dtype,
+  null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(col.type() == output_dtype,
                "segmented_max() operation requires matching output type");
@@ -36,6 +38,7 @@ std::unique_ptr<cudf::column> segmented_max(column_view const& col,
     col,
     offsets,
     null_handling,
+    init,
     stream,
     mr);
 }
diff --git a/cpp/src/reductions/segmented_min.cu b/cpp/src/reductions/segmented_min.cu
index 5c880f45bf8..dee6a989ad6 100644
--- a/cpp/src/reductions/segmented_min.cu
+++ b/cpp/src/reductions/segmented_min.cu
@@ -21,12 +21,14 @@
 namespace cudf {
 namespace reduction {
 
-std::unique_ptr<cudf::column> segmented_min(column_view const& col,
-                                            device_span<size_type const> offsets,
-                                            data_type const output_dtype,
-                                            null_policy null_handling,
-                                            rmm::cuda_stream_view stream,
-                                            rmm::mr::device_memory_resource* mr)
+std::unique_ptr<cudf::column> segmented_min(
+  column_view const& col,
+  device_span<size_type const> offsets,
+  data_type const output_dtype,
+  null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(col.type() == output_dtype,
                "segmented_min() operation requires matching output type");
@@ -36,6 +38,7 @@ std::unique_ptr<cudf::column> segmented_min(column_view const& col,
     col,
     offsets,
     null_handling,
+    init,
     stream,
     mr);
 }
diff --git a/cpp/src/reductions/segmented_product.cu b/cpp/src/reductions/segmented_product.cu
index 1b852870820..25b31d117b3 100644
--- a/cpp/src/reductions/segmented_product.cu
+++ b/cpp/src/reductions/segmented_product.cu
@@ -21,12 +21,14 @@
 namespace cudf {
 namespace reduction {
 
-std::unique_ptr<cudf::column> segmented_product(column_view const& col,
-                                                device_span<size_type const> offsets,
-                                                cudf::data_type const output_dtype,
-                                                null_policy null_handling,
-                                                rmm::cuda_stream_view stream,
-                                                rmm::mr::device_memory_resource* mr)
+std::unique_ptr<cudf::column> segmented_product(
+  column_view const& col,
+  device_span<size_type const> offsets,
+  cudf::data_type const output_dtype,
+  null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   return cudf::type_dispatcher(
     col.type(),
@@ -35,6 +37,7 @@ std::unique_ptr<cudf::column> segmented_product(column_view const& col,
     offsets,
     output_dtype,
     null_handling,
+    init,
     stream,
     mr);
 }
diff --git a/cpp/src/reductions/segmented_reductions.cpp b/cpp/src/reductions/segmented_reductions.cpp
index c662650b144..d87644e7126 100644
--- a/cpp/src/reductions/segmented_reductions.cpp
+++ b/cpp/src/reductions/segmented_reductions.cpp
@@ -32,21 +32,35 @@ struct segmented_reduce_dispatch_functor {
   device_span<size_type const> offsets;
   data_type output_dtype;
   null_policy null_handling;
-  rmm::mr::device_memory_resource* mr;
+  std::optional<std::reference_wrapper<scalar const>> init;
   rmm::cuda_stream_view stream;
+  rmm::mr::device_memory_resource* mr;
 
   segmented_reduce_dispatch_functor(column_view const& segmented_values,
                                     device_span<size_type const> offsets,
                                     data_type output_dtype,
                                     null_policy null_handling,
+                                    std::optional<std::reference_wrapper<scalar const>> init,
                                     rmm::cuda_stream_view stream,
                                     rmm::mr::device_memory_resource* mr)
     : col(segmented_values),
       offsets(offsets),
       output_dtype(output_dtype),
       null_handling(null_handling),
-      mr(mr),
-      stream(stream)
+      init(init),
+      stream(stream),
+      mr(mr)
+  {
+  }
+
+  segmented_reduce_dispatch_functor(column_view const& segmented_values,
+                                    device_span<size_type const> offsets,
+                                    data_type output_dtype,
+                                    null_policy null_handling,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr)
+    : segmented_reduce_dispatch_functor(
+        segmented_values, offsets, output_dtype, null_handling, std::nullopt, stream, mr)
   {
   }
 
@@ -55,17 +69,23 @@ struct segmented_reduce_dispatch_functor {
   {
     switch (k) {
       case segmented_reduce_aggregation::SUM:
-        return reduction::segmented_sum(col, offsets, output_dtype, null_handling, stream, mr);
+        return reduction::segmented_sum(
+          col, offsets, output_dtype, null_handling, init, stream, mr);
       case segmented_reduce_aggregation::PRODUCT:
-        return reduction::segmented_product(col, offsets, output_dtype, null_handling, stream, mr);
+        return reduction::segmented_product(
+          col, offsets, output_dtype, null_handling, init, stream, mr);
       case segmented_reduce_aggregation::MIN:
-        return reduction::segmented_min(col, offsets, output_dtype, null_handling, stream, mr);
+        return reduction::segmented_min(
+          col, offsets, output_dtype, null_handling, init, stream, mr);
       case segmented_reduce_aggregation::MAX:
-        return reduction::segmented_max(col, offsets, output_dtype, null_handling, stream, mr);
+        return reduction::segmented_max(
+          col, offsets, output_dtype, null_handling, init, stream, mr);
       case segmented_reduce_aggregation::ANY:
-        return reduction::segmented_any(col, offsets, output_dtype, null_handling, stream, mr);
+        return reduction::segmented_any(
+          col, offsets, output_dtype, null_handling, init, stream, mr);
       case segmented_reduce_aggregation::ALL:
-        return reduction::segmented_all(col, offsets, output_dtype, null_handling, stream, mr);
+        return reduction::segmented_all(
+          col, offsets, output_dtype, null_handling, init, stream, mr);
       default:
         CUDF_FAIL("Unsupported aggregation type.");
         // TODO: Add support for compound_ops
@@ -78,15 +98,24 @@ std::unique_ptr<column> segmented_reduce(column_view const& segmented_values,
                                          segmented_reduce_aggregation const& agg,
                                          data_type output_dtype,
                                          null_policy null_handling,
+                                         std::optional<std::reference_wrapper<scalar const>> init,
                                          rmm::cuda_stream_view stream,
                                          rmm::mr::device_memory_resource* mr)
 {
+  CUDF_EXPECTS(!init.has_value() || segmented_values.type() == init.value().get().type(),
+               "column and initial value must be the same type");
+  if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT ||
+                            agg.kind == aggregation::MIN || agg.kind == aggregation::MAX ||
+                            agg.kind == aggregation::ANY || agg.kind == aggregation::ALL)) {
+    CUDF_FAIL(
+      "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, and ALL aggregation types");
+  }
   CUDF_EXPECTS(offsets.size() > 0, "`offsets` should have at least 1 element.");
 
   return aggregation_dispatcher(
     agg.kind,
     segmented_reduce_dispatch_functor{
-      segmented_values, offsets, output_dtype, null_handling, stream, mr});
+      segmented_values, offsets, output_dtype, null_handling, init, stream, mr});
 }
 }  // namespace detail
 
@@ -98,8 +127,33 @@ std::unique_ptr<column> segmented_reduce(column_view const& segmented_values,
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::segmented_reduce(
-    segmented_values, offsets, agg, output_dtype, null_handling, cudf::default_stream_value, mr);
+  return detail::segmented_reduce(segmented_values,
+                                  offsets,
+                                  agg,
+                                  output_dtype,
+                                  null_handling,
+                                  std::nullopt,
+                                  cudf::default_stream_value,
+                                  mr);
+}
+
+std::unique_ptr<column> segmented_reduce(column_view const& segmented_values,
+                                         device_span<size_type const> offsets,
+                                         segmented_reduce_aggregation const& agg,
+                                         data_type output_dtype,
+                                         null_policy null_handling,
+                                         std::optional<std::reference_wrapper<scalar const>> init,
+                                         rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::segmented_reduce(segmented_values,
+                                  offsets,
+                                  agg,
+                                  output_dtype,
+                                  null_handling,
+                                  init,
+                                  cudf::default_stream_value,
+                                  mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/reductions/segmented_sum.cu b/cpp/src/reductions/segmented_sum.cu
index f2deeddbcbb..4caaa727371 100644
--- a/cpp/src/reductions/segmented_sum.cu
+++ b/cpp/src/reductions/segmented_sum.cu
@@ -21,12 +21,14 @@
 namespace cudf {
 namespace reduction {
 
-std::unique_ptr<cudf::column> segmented_sum(column_view const& col,
-                                            device_span<size_type const> offsets,
-                                            cudf::data_type const output_dtype,
-                                            null_policy null_handling,
-                                            rmm::cuda_stream_view stream,
-                                            rmm::mr::device_memory_resource* mr)
+std::unique_ptr<cudf::column> segmented_sum(
+  column_view const& col,
+  device_span<size_type const> offsets,
+  cudf::data_type const output_dtype,
+  null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   return cudf::type_dispatcher(col.type(),
                                simple::detail::column_type_dispatcher<cudf::reduction::op::sum>{},
@@ -34,6 +36,7 @@ std::unique_ptr<cudf::column> segmented_sum(column_view const& col,
                                offsets,
                                output_dtype,
                                null_handling,
+                               init,
                                stream,
                                mr);
 }
diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh
index 231d814a376..76ed864a92d 100644
--- a/cpp/src/reductions/simple.cuh
+++ b/cpp/src/reductions/simple.cuh
@@ -50,12 +50,14 @@ namespace detail {
  * @tparam Op           the operator of cudf::reduction::op::
 
  * @param col Input column of data to reduce
- * @param stream Used for device memory operations and kernel launches.
+ * @param init Optional initial value of the reduction
+ * @param stream Used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return Output scalar in device memory
  */
 template <typename ElementType, typename ResultType, typename Op>
 std::unique_ptr<scalar> simple_reduction(column_view const& col,
+                                         std::optional<std::reference_wrapper<scalar const>> init,
                                          rmm::cuda_stream_view stream,
                                          rmm::mr::device_memory_resource* mr)
 {
@@ -63,20 +65,32 @@ std::unique_ptr<scalar> simple_reduction(column_view const& col,
   auto dcol      = cudf::column_device_view::create(col, stream);
   auto simple_op = Op{};
 
+  // Cast initial value
+  std::optional<ResultType> const initial_value = [&] {
+    if (init.has_value() && init.value().get().is_valid()) {
+      using ScalarType = cudf::scalar_type_t<ElementType>;
+      auto input_value = static_cast<const ScalarType*>(&init.value().get())->value(stream);
+      return std::optional<ResultType>(static_cast<ResultType>(input_value));
+    } else {
+      return std::optional<ResultType>(std::nullopt);
+    }
+  }();
+
   auto result = [&] {
     if (col.has_nulls()) {
       auto f  = simple_op.template get_null_replacing_element_transformer<ResultType>();
       auto it = thrust::make_transform_iterator(dcol->pair_begin<ElementType, true>(), f);
-      return cudf::reduction::detail::reduce(it, col.size(), simple_op, stream, mr);
+      return cudf::reduction::detail::reduce(it, col.size(), simple_op, initial_value, stream, mr);
     } else {
       auto f  = simple_op.template get_element_transformer<ResultType>();
       auto it = thrust::make_transform_iterator(dcol->begin<ElementType>(), f);
-      return cudf::reduction::detail::reduce(it, col.size(), simple_op, stream, mr);
+      return cudf::reduction::detail::reduce(it, col.size(), simple_op, initial_value, stream, mr);
     }
   }();
 
   // set scalar is valid
-  result->set_valid_async(col.null_count() < col.size(), stream);
+  result->set_valid_async(
+    col.null_count() < col.size() && (!init.has_value() || init.value().get().is_valid()), stream);
   return result;
 }
 
@@ -87,36 +101,51 @@ std::unique_ptr<scalar> simple_reduction(column_view const& col,
  * @tparam Op         The operator of cudf::reduction::op::
  *
  * @param col         Input column of data to reduce
- * @param stream      Used for device memory operations and kernel launches.
+ * @param init        Optional initial value of the reduction
+ * @param stream      Used for device memory operations and kernel launches
  * @param mr          Device memory resource used to allocate the returned scalar's device memory
  * @return            Output scalar in device memory
  */
 template <typename DecimalXX, typename Op>
-std::unique_ptr<scalar> fixed_point_reduction(column_view const& col,
-                                              rmm::cuda_stream_view stream,
-                                              rmm::mr::device_memory_resource* mr)
+std::unique_ptr<scalar> fixed_point_reduction(
+  column_view const& col,
+  std::optional<std::reference_wrapper<scalar const>> init,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   using Type = device_storage_type_t<DecimalXX>;
 
   auto dcol      = cudf::column_device_view::create(col, stream);
   auto simple_op = Op{};
 
+  // Cast initial value
+  std::optional<Type> const initial_value = [&] {
+    if (init.has_value() && init.value().get().is_valid()) {
+      using ScalarType = cudf::scalar_type_t<Type>;
+      return std::optional<Type>(
+        static_cast<const ScalarType*>(&init.value().get())->value(stream));
+    } else {
+      return std::optional<Type>(std::nullopt);
+    }
+  }();
+
   auto result = [&] {
     if (col.has_nulls()) {
       auto f  = simple_op.template get_null_replacing_element_transformer<Type>();
       auto it = thrust::make_transform_iterator(dcol->pair_begin<Type, true>(), f);
-      return cudf::reduction::detail::reduce(it, col.size(), simple_op, stream, mr);
+      return cudf::reduction::detail::reduce(it, col.size(), simple_op, initial_value, stream, mr);
     } else {
       auto f  = simple_op.template get_element_transformer<Type>();
       auto it = thrust::make_transform_iterator(dcol->begin<Type>(), f);
-      return cudf::reduction::detail::reduce(it, col.size(), simple_op, stream, mr);
+      return cudf::reduction::detail::reduce(it, col.size(), simple_op, initial_value, stream, mr);
     }
   }();
 
   auto const scale = [&] {
     if (std::is_same_v<Op, cudf::reduction::op::product>) {
       auto const valid_count = static_cast<int32_t>(col.size() - col.null_count());
-      return numeric::scale_type{col.type().scale() * valid_count};
+      return numeric::scale_type{col.type().scale() *
+                                 (valid_count + (initial_value.has_value() ? 1 : 0))};
     } else if (std::is_same_v<Op, cudf::reduction::op::sum_of_squares>) {
       return numeric::scale_type{col.type().scale() * 2};
     }
@@ -124,7 +153,11 @@ std::unique_ptr<scalar> fixed_point_reduction(column_view const& col,
   }();
 
   auto const val = static_cast<cudf::scalar_type_t<Type>*>(result.get());
-  return cudf::make_fixed_point_scalar<DecimalXX>(val->value(stream), scale, stream, mr);
+  auto result_scalar =
+    cudf::make_fixed_point_scalar<DecimalXX>(val->value(stream), scale, stream, mr);
+  result_scalar->set_valid_async(
+    col.null_count() < col.size() && (!init.has_value() || init.value().get().is_valid()), stream);
+  return result_scalar;
 }
 
 /**
@@ -135,15 +168,20 @@ std::unique_ptr<scalar> fixed_point_reduction(column_view const& col,
  * @tparam Op           The operator of cudf::reduction::op::
  *
  * @param col Input dictionary column of data to reduce
- * @param stream Used for device memory operations and kernel launches.
+ * @param init Optional initial value of the reduction
+ * @param stream Used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return Output scalar in device memory
  */
 template <typename ElementType, typename ResultType, typename Op>
-std::unique_ptr<scalar> dictionary_reduction(column_view const& col,
-                                             rmm::cuda_stream_view stream,
-                                             rmm::mr::device_memory_resource* mr)
+std::unique_ptr<scalar> dictionary_reduction(
+  column_view const& col,
+  std::optional<std::reference_wrapper<scalar const>> init,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
+  if (init.has_value()) { CUDF_FAIL("Initial value not supported for dictionary reductions"); }
+
   auto dcol      = cudf::column_device_view::create(col, stream);
   auto simple_op = Op{};
 
@@ -152,11 +190,12 @@ std::unique_ptr<scalar> dictionary_reduction(column_view const& col,
     auto p =
       cudf::dictionary::detail::make_dictionary_pair_iterator<ElementType>(*dcol, col.has_nulls());
     auto it = thrust::make_transform_iterator(p, f);
-    return cudf::reduction::detail::reduce(it, col.size(), simple_op, stream, mr);
+    return cudf::reduction::detail::reduce(it, col.size(), simple_op, {}, stream, mr);
   }();
 
   // set scalar is valid
-  result->set_valid_async(col.null_count() < col.size(), stream);
+  result->set_valid_async(
+    col.null_count() < col.size() && (!init.has_value() || init.value().get().is_valid()), stream);
   return result;
 }
 
@@ -231,15 +270,17 @@ template <typename Op>
 struct bool_result_element_dispatcher {
   template <typename ElementType, std::enable_if_t<std::is_arithmetic_v<ElementType>>* = nullptr>
   std::unique_ptr<scalar> operator()(column_view const& col,
+                                     std::optional<std::reference_wrapper<scalar const>> init,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
-    return simple_reduction<ElementType, bool, Op>(col, stream, mr);
+    return simple_reduction<ElementType, bool, Op>(col, init, stream, mr);
   }
 
   template <typename ElementType,
             std::enable_if_t<not std::is_arithmetic_v<ElementType>>* = nullptr>
   std::unique_ptr<scalar> operator()(column_view const&,
+                                     std::optional<std::reference_wrapper<scalar const>>,
                                      rmm::cuda_stream_view,
                                      rmm::mr::device_memory_resource*)
   {
@@ -288,9 +329,12 @@ struct same_element_type_dispatcher {
                              (std::is_same_v<Op, cudf::reduction::op::min> ||
                               std::is_same_v<Op, cudf::reduction::op::max>)>* = nullptr>
   std::unique_ptr<scalar> operator()(column_view const& input,
+                                     std::optional<std::reference_wrapper<scalar const>> init,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
+    if (init.has_value()) { CUDF_FAIL("Initial value not supported for struct reductions"); }
+
     if (input.is_empty()) { return cudf::make_empty_scalar_like(input, stream, mr); }
 
     // We will do reduction to find the ARGMIN/ARGMAX index, then return the element at that index.
@@ -309,14 +353,16 @@ struct same_element_type_dispatcher {
             std::enable_if_t<is_supported<ElementType>() && !cudf::is_fixed_point<ElementType>() &&
                              !std::is_same_v<ElementType, cudf::struct_view>>* = nullptr>
   std::unique_ptr<scalar> operator()(column_view const& col,
+                                     std::optional<std::reference_wrapper<scalar const>> init,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
     if (!cudf::is_dictionary(col.type())) {
-      return simple_reduction<ElementType, ElementType, Op>(col, stream, mr);
+      return simple_reduction<ElementType, ElementType, Op>(col, init, stream, mr);
     }
     auto index = simple_reduction<ElementType, ElementType, Op>(
       dictionary_column_view(col).get_indices_annotated(),
+      init,
       stream,
       rmm::mr::get_current_device_resource());
     return resolve_key<ElementType>(dictionary_column_view(col).keys(), *index, stream, mr);
@@ -324,14 +370,16 @@ struct same_element_type_dispatcher {
 
   template <typename ElementType, std::enable_if_t<cudf::is_fixed_point<ElementType>()>* = nullptr>
   std::unique_ptr<scalar> operator()(column_view const& col,
+                                     std::optional<std::reference_wrapper<scalar const>> init,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
-    return fixed_point_reduction<ElementType, Op>(col, stream, mr);
+    return fixed_point_reduction<ElementType, Op>(col, init, stream, mr);
   }
 
   template <typename ElementType, std::enable_if_t<not is_supported<ElementType>()>* = nullptr>
   std::unique_ptr<scalar> operator()(column_view const&,
+                                     std::optional<std::reference_wrapper<scalar const>>,
                                      rmm::cuda_stream_view,
                                      rmm::mr::device_memory_resource*)
   {
@@ -357,12 +405,13 @@ struct element_type_dispatcher {
             std::enable_if_t<std::is_floating_point_v<ElementType>>* = nullptr>
   std::unique_ptr<scalar> reduce_numeric(column_view const& col,
                                          data_type const output_type,
+                                         std::optional<std::reference_wrapper<scalar const>> init,
                                          rmm::cuda_stream_view stream,
                                          rmm::mr::device_memory_resource* mr)
   {
     auto result = !cudf::is_dictionary(col.type())
-                    ? simple_reduction<ElementType, double, Op>(col, stream, mr)
-                    : dictionary_reduction<ElementType, double, Op>(col, stream, mr);
+                    ? simple_reduction<ElementType, double, Op>(col, init, stream, mr)
+                    : dictionary_reduction<ElementType, double, Op>(col, init, stream, mr);
     if (output_type == result->type()) return result;
 
     // this will cast the result to the output_type
@@ -379,12 +428,13 @@ struct element_type_dispatcher {
   template <typename ElementType, std::enable_if_t<std::is_integral_v<ElementType>>* = nullptr>
   std::unique_ptr<scalar> reduce_numeric(column_view const& col,
                                          data_type const output_type,
+                                         std::optional<std::reference_wrapper<scalar const>> init,
                                          rmm::cuda_stream_view stream,
                                          rmm::mr::device_memory_resource* mr)
   {
     auto result = !cudf::is_dictionary(col.type())
-                    ? simple_reduction<ElementType, int64_t, Op>(col, stream, mr)
-                    : dictionary_reduction<ElementType, int64_t, Op>(col, stream, mr);
+                    ? simple_reduction<ElementType, int64_t, Op>(col, init, stream, mr)
+                    : dictionary_reduction<ElementType, int64_t, Op>(col, init, stream, mr);
     if (output_type == result->type()) return result;
 
     // this will cast the result to the output_type
@@ -399,24 +449,25 @@ struct element_type_dispatcher {
    * @brief Called by the type-dispatcher to reduce the input column `col` using
    * the `Op` operation.
    *
-   * @tparam ElementType The input column type or key type.
+   * @tparam ElementType The input column type or key type
    * @param col Input column (must be numeric)
    * @param output_type Requested type of the scalar result
-   * @param stream CUDA stream used for device memory operations and kernel launches.
+   * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the returned scalar's device memory
    */
   template <typename ElementType, std::enable_if_t<cudf::is_numeric<ElementType>()>* = nullptr>
   std::unique_ptr<scalar> operator()(column_view const& col,
                                      data_type const output_type,
+                                     std::optional<std::reference_wrapper<scalar const>> init,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
     if (output_type.id() == cudf::type_to_id<ElementType>())
       return !cudf::is_dictionary(col.type())
-               ? simple_reduction<ElementType, ElementType, Op>(col, stream, mr)
-               : dictionary_reduction<ElementType, ElementType, Op>(col, stream, mr);
+               ? simple_reduction<ElementType, ElementType, Op>(col, init, stream, mr)
+               : dictionary_reduction<ElementType, ElementType, Op>(col, init, stream, mr);
     // reduce and map to output type
-    return reduce_numeric<ElementType>(col, output_type, stream, mr);
+    return reduce_numeric<ElementType>(col, output_type, init, stream, mr);
   }
 
   /**
@@ -425,12 +476,13 @@ struct element_type_dispatcher {
   template <typename ElementType, std::enable_if_t<cudf::is_fixed_point<ElementType>()>* = nullptr>
   std::unique_ptr<scalar> operator()(column_view const& col,
                                      data_type const output_type,
+                                     std::optional<std::reference_wrapper<scalar const>> init,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
     CUDF_EXPECTS(output_type == col.type(), "Output type must be same as input column type.");
 
-    return fixed_point_reduction<ElementType, Op>(col, stream, mr);
+    return fixed_point_reduction<ElementType, Op>(col, init, stream, mr);
   }
 
   template <typename ElementType,
@@ -438,6 +490,7 @@ struct element_type_dispatcher {
                              not cudf::is_fixed_point<ElementType>()>* = nullptr>
   std::unique_ptr<scalar> operator()(column_view const&,
                                      data_type const,
+                                     std::optional<std::reference_wrapper<scalar const>> init,
                                      rmm::cuda_stream_view,
                                      rmm::mr::device_memory_resource*)
   {
diff --git a/cpp/src/reductions/simple_segmented.cuh b/cpp/src/reductions/simple_segmented.cuh
index d8479db1f09..7dde713e638 100644
--- a/cpp/src/reductions/simple_segmented.cuh
+++ b/cpp/src/reductions/simple_segmented.cuh
@@ -53,31 +53,44 @@ namespace detail {
  * @tparam ResultType   the output data-type
  * @tparam Op           the operator of cudf::reduction::op::
 
- * @param col Input column of data to reduce.
- * @param offsets Indices to segment boundaries.
- * @param null_handling If `null_policy::INCLUDE`, all elements in a segment
- * must be valid for the reduced value to be valid. If `null_policy::EXCLUDE`,
- * the reduced value is valid if any element in the segment is valid.
- * @param stream Used for device memory operations and kernel launches.
+ * @param col Input column of data to reduce
+ * @param offsets Indices to segment boundaries
+ * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the
+ * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element
+ * in the segment is valid.
+ * @param init Optional initial value of the reduction
+ * @param stream Used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @return Output column in device memory
  */
 template <typename InputType, typename ResultType, typename Op>
-std::unique_ptr<column> simple_segmented_reduction(column_view const& col,
-                                                   device_span<size_type const> offsets,
-                                                   null_policy null_handling,
-                                                   rmm::cuda_stream_view stream,
-                                                   rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> simple_segmented_reduction(
+  column_view const& col,
+  device_span<size_type const> offsets,
+  null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   // TODO: Rewrites this function to accept a pair of iterators for start/end indices
   // to enable `2N` type offset input.
   // reduction by iterator
-  auto dcol              = cudf::column_device_view::create(col, stream);
-  auto simple_op         = Op{};
-  size_type num_segments = offsets.size() - 1;
-
-  auto binary_op = simple_op.get_binary_op();
-  auto identity  = simple_op.template get_identity<ResultType>();
+  auto dcol               = cudf::column_device_view::create(col, stream);
+  auto simple_op          = Op{};
+  auto const num_segments = offsets.size() - 1;
+
+  auto const binary_op = simple_op.get_binary_op();
+
+  // Cast initial value
+  ResultType initial_value = [&] {
+    if (init.has_value() && init.value().get().is_valid()) {
+      using ScalarType = cudf::scalar_type_t<InputType>;
+      auto input_value = static_cast<const ScalarType*>(&init.value().get())->value(stream);
+      return static_cast<ResultType>(input_value);
+    } else {
+      return simple_op.template get_identity<ResultType>();
+    }
+  }();
 
   auto const result_type =
     cudf::is_fixed_point(col.type()) ? col.type() : data_type{type_to_id<ResultType>()};
@@ -90,27 +103,28 @@ std::unique_ptr<column> simple_segmented_reduction(column_view const& col,
     auto f  = simple_op.template get_null_replacing_element_transformer<ResultType>();
     auto it = thrust::make_transform_iterator(dcol->pair_begin<InputType, true>(), f);
     cudf::reduction::detail::segmented_reduce(
-      it, offsets.begin(), offsets.end(), outit, binary_op, identity, stream);
+      it, offsets.begin(), offsets.end(), outit, binary_op, initial_value, stream);
   } else {
     auto f  = simple_op.template get_element_transformer<ResultType>();
     auto it = thrust::make_transform_iterator(dcol->begin<InputType>(), f);
     cudf::reduction::detail::segmented_reduce(
-      it, offsets.begin(), offsets.end(), outit, binary_op, identity, stream);
+      it, offsets.begin(), offsets.end(), outit, binary_op, initial_value, stream);
   }
 
   // Compute the output null mask
-  auto const bitmask                 = col.null_mask();
-  auto const first_bit_indices_begin = offsets.begin();
-  auto const first_bit_indices_end   = offsets.end() - 1;
-  auto const last_bit_indices_begin  = first_bit_indices_begin + 1;
-  auto const [output_null_mask, output_null_count] =
-    cudf::detail::segmented_null_mask_reduction(bitmask,
-                                                first_bit_indices_begin,
-                                                first_bit_indices_end,
-                                                last_bit_indices_begin,
-                                                null_handling,
-                                                stream,
-                                                mr);
+  auto const bitmask                               = col.null_mask();
+  auto const first_bit_indices_begin               = offsets.begin();
+  auto const first_bit_indices_end                 = offsets.end() - 1;
+  auto const last_bit_indices_begin                = first_bit_indices_begin + 1;
+  auto const [output_null_mask, output_null_count] = cudf::detail::segmented_null_mask_reduction(
+    bitmask,
+    first_bit_indices_begin,
+    first_bit_indices_end,
+    last_bit_indices_begin,
+    null_handling,
+    init.has_value() ? std::optional(init.value().get().is_valid()) : std::nullopt,
+    stream,
+    mr);
   result->set_null_mask(output_null_mask, output_null_count, stream);
 
   return result;
@@ -125,12 +139,12 @@ std::unique_ptr<column> simple_segmented_reduction(column_view const& col,
  * @tparam InputType    the input column data-type
  * @tparam Op           the operator of cudf::reduction::op::
 
- * @param col Input column of data to reduce.
- * @param offsets Indices to segment boundaries.
- * @param null_handling If `null_policy::INCLUDE`, all elements in a segment
- * must be valid for the reduced value to be valid. If `null_policy::EXCLUDE`,
- * the reduced value is valid if any element in the segment is valid.
- * @param stream Used for device memory operations and kernel launches.
+ * @param col Input column of data to reduce
+ * @param offsets Indices to segment boundaries
+ * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the
+ * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element
+ * in the segment is valid.
+ * @param stream Used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @return Output column in device memory
  */
@@ -178,6 +192,7 @@ std::unique_ptr<column> string_segmented_reduction(column_view const& col,
                                                 offsets.end() - 1,
                                                 offsets.begin() + 1,
                                                 null_handling,
+                                                std::nullopt,
                                                 stream,
                                                 mr);
 
@@ -227,12 +242,12 @@ std::unique_ptr<column> string_segmented_reduction(column_view const& col,
  * @tparam InputType    the input column data-type
  * @tparam Op           the operator of cudf::reduction::op::
 
- * @param col Input column of data to reduce.
- * @param offsets Indices to segment boundaries.
- * @param null_handling If `null_policy::INCLUDE`, all elements in a segment
- * must be valid for the reduced value to be valid. If `null_policy::EXCLUDE`,
- * the reduced value is valid if any element in the segment is valid.
- * @param stream Used for device memory operations and kernel launches.
+ * @param col Input column of data to reduce
+ * @param offsets Indices to segment boundaries
+ * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the
+ * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element
+ * in the segment is valid.
+ * @param stream Used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @return Output column in device memory
  */
@@ -241,25 +256,30 @@ template <typename InputType,
           typename Op,
           CUDF_ENABLE_IF(std::is_same_v<Op, cudf::reduction::op::min> ||
                          std::is_same_v<Op, cudf::reduction::op::max>)>
-std::unique_ptr<column> fixed_point_segmented_reduction(column_view const& col,
-                                                        device_span<size_type const> offsets,
-                                                        null_policy null_handling,
-                                                        rmm::cuda_stream_view stream,
-                                                        rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> fixed_point_segmented_reduction(
+  column_view const& col,
+  device_span<size_type const> offsets,
+  null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>> init,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   using RepType = device_storage_type_t<InputType>;
-  return simple_segmented_reduction<RepType, RepType, Op>(col, offsets, null_handling, stream, mr);
+  return simple_segmented_reduction<RepType, RepType, Op>(
+    col, offsets, null_handling, init, stream, mr);
 }
 
 template <typename InputType,
           typename Op,
           CUDF_ENABLE_IF(!std::is_same_v<Op, cudf::reduction::op::min>() &&
                          !std::is_same_v<Op, cudf::reduction::op::max>())>
-std::unique_ptr<column> fixed_point_segmented_reduction(column_view const& col,
-                                                        device_span<size_type const> offsets,
-                                                        null_policy null_handling,
-                                                        rmm::cuda_stream_view stream,
-                                                        rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> fixed_point_segmented_reduction(
+  column_view const& col,
+  device_span<size_type const> offsets,
+  null_policy null_handling,
+  std::optional<std::reference_wrapper<scalar const>>,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FAIL("Segmented reduction on fixed point column only supports min and max reduction.");
 }
@@ -277,17 +297,19 @@ struct bool_result_column_dispatcher {
   std::unique_ptr<column> operator()(column_view const& col,
                                      device_span<size_type const> offsets,
                                      null_policy null_handling,
+                                     std::optional<std::reference_wrapper<scalar const>> init,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
     return simple_segmented_reduction<ElementType, bool, Op>(
-      col, offsets, null_handling, stream, mr);
+      col, offsets, null_handling, init, stream, mr);
   }
 
   template <typename ElementType, std::enable_if_t<not cudf::is_numeric<ElementType>()>* = nullptr>
   std::unique_ptr<column> operator()(column_view const&,
                                      device_span<size_type const>,
                                      null_policy,
+                                     std::optional<std::reference_wrapper<scalar const>>,
                                      rmm::cuda_stream_view,
                                      rmm::mr::device_memory_resource*)
   {
@@ -320,11 +342,12 @@ struct same_column_type_dispatcher {
   std::unique_ptr<column> operator()(column_view const& col,
                                      device_span<size_type const> offsets,
                                      null_policy null_handling,
+                                     std::optional<std::reference_wrapper<scalar const>> init,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
     return simple_segmented_reduction<ElementType, ElementType, Op>(
-      col, offsets, null_handling, stream, mr);
+      col, offsets, null_handling, init, stream, mr);
   }
 
   template <typename ElementType,
@@ -332,9 +355,12 @@ struct same_column_type_dispatcher {
   std::unique_ptr<column> operator()(column_view const& col,
                                      device_span<size_type const> offsets,
                                      null_policy null_handling,
+                                     std::optional<std::reference_wrapper<scalar const>> init,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
+    if (init.has_value()) { CUDF_FAIL("Initial value not supported for strings"); }
+
     return string_segmented_reduction<ElementType, Op>(col, offsets, null_handling, stream, mr);
   }
 
@@ -343,17 +369,19 @@ struct same_column_type_dispatcher {
   std::unique_ptr<column> operator()(column_view const& col,
                                      device_span<size_type const> offsets,
                                      null_policy null_handling,
+                                     std::optional<std::reference_wrapper<scalar const>> init,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
     return fixed_point_segmented_reduction<ElementType, Op>(
-      col, offsets, null_handling, stream, mr);
+      col, offsets, null_handling, init, stream, mr);
   }
 
   template <typename ElementType, CUDF_ENABLE_IF(!is_supported<ElementType>())>
   std::unique_ptr<column> operator()(column_view const&,
                                      device_span<size_type const>,
                                      null_policy,
+                                     std::optional<std::reference_wrapper<scalar const>>,
                                      rmm::cuda_stream_view,
                                      rmm::mr::device_memory_resource*)
   {
@@ -381,12 +409,13 @@ struct column_type_dispatcher {
                                          device_span<size_type const> offsets,
                                          data_type const output_type,
                                          null_policy null_handling,
+                                         std::optional<std::reference_wrapper<scalar const>> init,
                                          rmm::cuda_stream_view stream,
                                          rmm::mr::device_memory_resource* mr)
   {
     // TODO: per gh-9988, we should change the compute precision to `output_type`.
-    auto result =
-      simple_segmented_reduction<ElementType, double, Op>(col, offsets, null_handling, stream, mr);
+    auto result = simple_segmented_reduction<ElementType, double, Op>(
+      col, offsets, null_handling, init, stream, mr);
     if (output_type == result->type()) { return result; }
     return cudf::detail::cast(*result, output_type, stream, mr);
   }
@@ -400,12 +429,13 @@ struct column_type_dispatcher {
                                          device_span<size_type const> offsets,
                                          data_type const output_type,
                                          null_policy null_handling,
+                                         std::optional<std::reference_wrapper<scalar const>> init,
                                          rmm::cuda_stream_view stream,
                                          rmm::mr::device_memory_resource* mr)
   {
     // TODO: per gh-9988, we should change the compute precision to `output_type`.
-    auto result =
-      simple_segmented_reduction<ElementType, int64_t, Op>(col, offsets, null_handling, stream, mr);
+    auto result = simple_segmented_reduction<ElementType, int64_t, Op>(
+      col, offsets, null_handling, init, stream, mr);
     if (output_type == result->type()) { return result; }
     return cudf::detail::cast(*result, output_type, stream, mr);
   }
@@ -414,14 +444,14 @@ struct column_type_dispatcher {
    * @brief Called by the type-dispatcher to reduce the input column `col` using
    * the `Op` operation.
    *
-   * @tparam ElementType The input column type or key type.
+   * @tparam ElementType The input column type or key type
    * @param col Input column (must be numeric)
    * @param offsets Indices to segment boundaries
    * @param output_type Requested type of the scalar result
-   * @param null_handling If `null_policy::INCLUDE`, all elements in a segment
-   * must be valid for the reduced value to be valid. If `null_policy::EXCLUDE`,
-   * the reduced value is valid if any element in the segment is valid.
-   * @param stream CUDA stream used for device memory operations and kernel launches.
+   * @param null_handling If `null_policy::INCLUDE`, all elements in a segment must be valid for the
+   * reduced value to be valid. If `null_policy::EXCLUDE`, the reduced value is valid if any element
+   * in the segment is valid.
+   * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the returned scalar's device memory
    */
   template <typename ElementType,
@@ -430,15 +460,16 @@ struct column_type_dispatcher {
                                      device_span<size_type const> offsets,
                                      data_type const output_type,
                                      null_policy null_handling,
+                                     std::optional<std::reference_wrapper<scalar const>> init,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
     if (output_type.id() == cudf::type_to_id<ElementType>()) {
       return simple_segmented_reduction<ElementType, ElementType, Op>(
-        col, offsets, null_handling, stream, mr);
+        col, offsets, null_handling, init, stream, mr);
     }
     // reduce and map to output type
-    return reduce_numeric<ElementType>(col, offsets, output_type, null_handling, stream, mr);
+    return reduce_numeric<ElementType>(col, offsets, output_type, null_handling, init, stream, mr);
   }
 
   template <typename ElementType,
@@ -447,6 +478,7 @@ struct column_type_dispatcher {
                                      device_span<size_type const>,
                                      data_type const,
                                      null_policy,
+                                     std::optional<std::reference_wrapper<scalar const>>,
                                      rmm::cuda_stream_view,
                                      rmm::mr::device_memory_resource*)
   {
diff --git a/cpp/src/reductions/sum.cu b/cpp/src/reductions/sum.cu
index 2db19939bd5..b919d871cc2 100644
--- a/cpp/src/reductions/sum.cu
+++ b/cpp/src/reductions/sum.cu
@@ -25,6 +25,7 @@ namespace reduction {
 
 std::unique_ptr<cudf::scalar> sum(column_view const& col,
                                   cudf::data_type const output_dtype,
+                                  std::optional<std::reference_wrapper<scalar const>> init,
                                   rmm::cuda_stream_view stream,
                                   rmm::mr::device_memory_resource* mr)
 {
@@ -33,6 +34,7 @@ std::unique_ptr<cudf::scalar> sum(column_view const& col,
     simple::detail::element_type_dispatcher<cudf::reduction::op::sum>{},
     col,
     output_dtype,
+    init,
     stream,
     mr);
 }
diff --git a/cpp/src/reductions/sum_of_squares.cu b/cpp/src/reductions/sum_of_squares.cu
index a3e9368bb02..af28ba19c9a 100644
--- a/cpp/src/reductions/sum_of_squares.cu
+++ b/cpp/src/reductions/sum_of_squares.cu
@@ -33,6 +33,7 @@ std::unique_ptr<cudf::scalar> sum_of_squares(column_view const& col,
     simple::detail::element_type_dispatcher<cudf::reduction::op::sum_of_squares>{},
     col,
     output_dtype,
+    std::nullopt,
     stream,
     mr);
 }
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index 0b90c241f31..7f04d3edb14 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -27,6 +27,7 @@
 #include <cudf/fixed_point/fixed_point.hpp>
 #include <cudf/reduction.hpp>
 #include <cudf/scalar/scalar.hpp>
+#include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/types.hpp>
 #include <cudf/wrappers/timestamps.hpp>
 
@@ -39,26 +40,22 @@ using aggregation        = cudf::aggregation;
 using reduce_aggregation = cudf::reduce_aggregation;
 
 template <typename T>
-std::enable_if_t<!cudf::is_timestamp_t<T>::value, std::vector<T>> convert_values(
-  std::vector<int> const& int_values)
+auto convert_int(int value)
 {
-  std::vector<T> v(int_values.size());
-  std::transform(int_values.begin(), int_values.end(), v.begin(), [](int x) {
-    if (std::is_unsigned_v<T>) x = std::abs(x);
-    return static_cast<T>(x);
-  });
-  return v;
+  if (std::is_unsigned_v<T>) value = std::abs(value);
+  if constexpr (cudf::is_timestamp_t<T>::value) {
+    return T{typename T::duration(value)};
+  } else {
+    return static_cast<T>(value);
+  }
 }
 
 template <typename T>
-std::enable_if_t<cudf::is_timestamp_t<T>::value, std::vector<T>> convert_values(
-  std::vector<int> const& int_values)
+auto convert_values(std::vector<int> const& int_values)
 {
   std::vector<T> v(int_values.size());
-  std::transform(int_values.begin(), int_values.end(), v.begin(), [](int x) {
-    if (std::is_unsigned_v<T>) x = std::abs(x);
-    return T{typename T::duration(x)};
-  });
+  std::transform(
+    int_values.begin(), int_values.end(), v.begin(), [](int x) { return convert_int<T>(x); });
   return v;
 }
 
@@ -96,28 +93,30 @@ struct ReductionTest : public cudf::test::BaseFixture {
   ~ReductionTest() {}
 
   template <typename T_out>
-  void reduction_test(const cudf::column_view underlying_column,
-                      T_out expected_value,
-                      bool succeeded_condition,
-                      std::unique_ptr<reduce_aggregation> const& agg,
-                      cudf::data_type output_dtype = cudf::data_type{},
-                      bool expected_null           = false)
+  std::pair<T_out, bool> reduction_test(cudf::column_view const& underlying_column,
+                                        std::unique_ptr<reduce_aggregation> const& agg,
+                                        std::optional<cudf::data_type> _output_dtype = {})
   {
-    if (cudf::data_type{} == output_dtype) output_dtype = underlying_column.type();
-
-    auto statement = [&]() {
-      std::unique_ptr<cudf::scalar> result = cudf::reduce(underlying_column, agg, output_dtype);
-      using ScalarType                     = cudf::scalar_type_t<T_out>;
-      auto result1                         = static_cast<ScalarType*>(result.get());
-      EXPECT_EQ(expected_null, !result1->is_valid());
-      if (result1->is_valid()) { EXPECT_EQ(expected_value, T_out{result1->value()}); }
-    };
+    auto const output_dtype                 = _output_dtype.value_or(underlying_column.type());
+    std::unique_ptr<cudf::scalar> reduction = cudf::reduce(underlying_column, agg, output_dtype);
+    using ScalarType                        = cudf::scalar_type_t<T_out>;
+    auto result                             = static_cast<ScalarType*>(reduction.get());
+    return {result->value(), result->is_valid()};
+  }
 
-    if (succeeded_condition) {
-      CUDF_EXPECT_NO_THROW(statement());
-    } else {
-      EXPECT_ANY_THROW(statement());
-    }
+  // Test with initial value
+  template <typename T_out>
+  std::pair<T_out, bool> reduction_test(cudf::column_view const& underlying_column,
+                                        cudf::scalar const& initial_value,
+                                        std::unique_ptr<reduce_aggregation> const& agg,
+                                        std::optional<cudf::data_type> _output_dtype = {})
+  {
+    auto const output_dtype = _output_dtype.value_or(underlying_column.type());
+    std::unique_ptr<cudf::scalar> reduction =
+      cudf::reduce(underlying_column, agg, output_dtype, initial_value);
+    using ScalarType = cudf::scalar_type_t<T_out>;
+    auto result      = static_cast<ScalarType*>(reduction.get());
+    return {result->value(), result->is_valid()};
   }
 };
 
@@ -125,31 +124,49 @@ template <typename T>
 struct MinMaxReductionTest : public ReductionTest<T> {
 };
 
-using MinMaxTypes = cudf::test::AllTypes;
+using MinMaxTypes = cudf::test::Types<int16_t, int32_t, float, double>;
 TYPED_TEST_SUITE(MinMaxReductionTest, MinMaxTypes);
 
 // ------------------------------------------------------------------------
-TYPED_TEST(MinMaxReductionTest, MinMax)
+TYPED_TEST(MinMaxReductionTest, MinMaxTypes)
 {
   using T = TypeParam;
   std::vector<int> int_values({5, 0, -120, -111, 0, 64, 63, 99, 123, -16});
   std::vector<bool> host_bools({1, 1, 0, 1, 1, 1, 0, 1, 0, 1});
   std::vector<bool> all_null({0, 0, 0, 0, 0, 0, 0, 0, 0, 0});
-  std::vector<T> v = convert_values<T>(int_values);
+  std::vector<T> v       = convert_values<T>(int_values);
+  T init_value           = convert_int<T>(100);
+  auto const init_scalar = cudf::make_fixed_width_scalar<T>(init_value);
 
   // Min/Max succeeds for any gdf types including
   // non-arithmetic types (date32, date64, timestamp, category)
-  bool result_error(true);
 
   // test without nulls
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
 
-  T expected_min_result = *(std::min_element(v.begin(), v.end()));
-  T expected_max_result = *(std::max_element(v.begin(), v.end()));
-  this->reduction_test(
-    col, expected_min_result, result_error, cudf::make_min_aggregation<reduce_aggregation>());
-  this->reduction_test(
-    col, expected_max_result, result_error, cudf::make_max_aggregation<reduce_aggregation>());
+  T expected_min_result      = *(std::min_element(v.begin(), v.end()));
+  T expected_max_result      = *(std::max_element(v.begin(), v.end()));
+  T expected_min_init_result = std::accumulate(
+    v.begin(), v.end(), init_value, [](const T& a, const T& b) { return std::min<T>(a, b); });
+  T expected_max_init_result = std::accumulate(
+    v.begin(), v.end(), init_value, [](const T& a, const T& b) { return std::max<T>(a, b); });
+
+  EXPECT_EQ(
+    this->template reduction_test<T>(col, cudf::make_min_aggregation<reduce_aggregation>()).first,
+    expected_min_result);
+  EXPECT_EQ(
+    this->template reduction_test<T>(col, cudf::make_max_aggregation<reduce_aggregation>()).first,
+    expected_max_result);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(
+                col, *init_scalar, cudf::make_min_aggregation<reduce_aggregation>())
+              .first,
+            expected_min_init_result);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(
+                col, *init_scalar, cudf::make_max_aggregation<reduce_aggregation>())
+              .first,
+            expected_max_init_result);
 
   auto res = cudf::minmax(col);
 
@@ -167,15 +184,33 @@ TYPED_TEST(MinMaxReductionTest, MinMax)
 
   T expected_min_null_result = *(std::min_element(r_min.begin(), r_min.end()));
   T expected_max_null_result = *(std::max_element(r_max.begin(), r_max.end()));
-
-  this->reduction_test(col_nulls,
-                       expected_min_null_result,
-                       result_error,
-                       cudf::make_min_aggregation<reduce_aggregation>());
-  this->reduction_test(col_nulls,
-                       expected_max_null_result,
-                       result_error,
-                       cudf::make_max_aggregation<reduce_aggregation>());
+  T expected_min_init_null_result =
+    std::accumulate(r_min.begin(), r_min.end(), init_value, [](const T& a, const T& b) {
+      return std::min<T>(a, b);
+    });
+  T expected_max_init_null_result =
+    std::accumulate(r_max.begin(), r_max.end(), init_value, [](const T& a, const T& b) {
+      return std::max<T>(a, b);
+    });
+
+  EXPECT_EQ(
+    this->template reduction_test<T>(col_nulls, cudf::make_min_aggregation<reduce_aggregation>())
+      .first,
+    expected_min_null_result);
+  EXPECT_EQ(
+    this->template reduction_test<T>(col_nulls, cudf::make_max_aggregation<reduce_aggregation>())
+      .first,
+    expected_max_null_result);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(
+                col_nulls, *init_scalar, cudf::make_min_aggregation<reduce_aggregation>())
+              .first,
+            expected_min_init_null_result);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(
+                col_nulls, *init_scalar, cudf::make_max_aggregation<reduce_aggregation>())
+              .first,
+            expected_max_init_null_result);
 
   auto null_res = cudf::minmax(col_nulls);
 
@@ -187,27 +222,24 @@ TYPED_TEST(MinMaxReductionTest, MinMax)
 
   // test with all null
   cudf::test::fixed_width_column_wrapper<T> col_all_nulls = construct_null_column(v, all_null);
-
-  auto all_null_r_min = replace_nulls(v, all_null, std::numeric_limits<T>::max());
-  auto all_null_r_max = replace_nulls(v, all_null, std::numeric_limits<T>::lowest());
-
-  T expected_min_all_null_result =
-    *(std::min_element(all_null_r_min.begin(), all_null_r_min.end()));
-  T expected_max_all_null_result =
-    *(std::max_element(all_null_r_max.begin(), all_null_r_max.end()));
-
-  this->reduction_test(col_all_nulls,
-                       expected_min_all_null_result,
-                       result_error,
-                       cudf::make_min_aggregation<reduce_aggregation>(),
-                       cudf::data_type{},
-                       true);
-  this->reduction_test(col_all_nulls,
-                       expected_max_all_null_result,
-                       result_error,
-                       cudf::make_max_aggregation<reduce_aggregation>(),
-                       cudf::data_type{},
-                       true);
+  init_scalar->set_valid_async(false);
+
+  EXPECT_FALSE(
+    this
+      ->template reduction_test<T>(col_all_nulls, cudf::make_min_aggregation<reduce_aggregation>())
+      .second);
+  EXPECT_FALSE(
+    this
+      ->template reduction_test<T>(col_all_nulls, cudf::make_max_aggregation<reduce_aggregation>())
+      .second);
+  EXPECT_FALSE(this
+                 ->template reduction_test<T>(
+                   col_all_nulls, *init_scalar, cudf::make_min_aggregation<reduce_aggregation>())
+                 .second);
+  EXPECT_FALSE(this
+                 ->template reduction_test<T>(
+                   col_all_nulls, *init_scalar, cudf::make_max_aggregation<reduce_aggregation>())
+                 .second);
 
   auto all_null_res = cudf::minmax(col_all_nulls);
 
@@ -221,7 +253,7 @@ TYPED_TEST(MinMaxReductionTest, MinMax)
 template <typename T>
 struct SumReductionTest : public ReductionTest<T> {
 };
-using SumTypes = cudf::test::Concat<cudf::test::NumericTypes, cudf::test::DurationTypes>;
+using SumTypes = cudf::test::Types<int16_t, int32_t, float, double>;
 TYPED_TEST_SUITE(SumReductionTest, SumTypes);
 
 TYPED_TEST(SumReductionTest, Sum)
@@ -229,27 +261,41 @@ TYPED_TEST(SumReductionTest, Sum)
   using T = TypeParam;
   std::vector<int> int_values({6, -14, 13, 64, 0, -13, -20, 45});
   std::vector<bool> host_bools({1, 1, 0, 0, 1, 1, 1, 1});
-  std::vector<T> v = convert_values<T>(int_values);
+  std::vector<T> v       = convert_values<T>(int_values);
+  T init_value           = convert_int<T>(100);
+  auto const init_scalar = cudf::make_fixed_width_scalar<T>(init_value);
 
   // test without nulls
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
-  T expected_value = std::accumulate(v.begin(), v.end(), T{0});
-  this->reduction_test(col,
-                       expected_value,
-                       this->ret_non_arithmetic,
-                       cudf::make_sum_aggregation<reduce_aggregation>());
+  T expected_value      = std::accumulate(v.begin(), v.end(), T{0});
+  T expected_value_init = std::accumulate(v.begin(), v.end(), init_value);
+
+  EXPECT_EQ(
+    this->template reduction_test<T>(col, cudf::make_sum_aggregation<reduce_aggregation>()).first,
+    expected_value);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(
+                col, *init_scalar, cudf::make_sum_aggregation<reduce_aggregation>())
+              .first,
+            expected_value_init);
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
   auto r                                              = replace_nulls(v, host_bools, T{0});
   T expected_null_value                               = std::accumulate(r.begin(), r.end(), T{0});
+  init_scalar->set_valid_async(false);
 
-  this->reduction_test(col_nulls,
-                       expected_null_value,
-                       this->ret_non_arithmetic,
-                       cudf::make_sum_aggregation<reduce_aggregation>());
+  EXPECT_EQ(
+    this->template reduction_test<T>(col_nulls, cudf::make_sum_aggregation<reduce_aggregation>())
+      .first,
+    expected_null_value);
+  EXPECT_FALSE(this
+                 ->template reduction_test<T>(
+                   col_nulls, *init_scalar, cudf::make_sum_aggregation<reduce_aggregation>())
+                 .second);
 }
 
+using ReductionTypes = cudf::test::Types<int16_t, int32_t, float, double>;
 TYPED_TEST_SUITE(ReductionTest, cudf::test::NumericTypes);
 
 TYPED_TEST(ReductionTest, Product)
@@ -258,31 +304,49 @@ TYPED_TEST(ReductionTest, Product)
   std::vector<int> int_values({5, -1, 1, 0, 3, 2, 4});
   std::vector<bool> host_bools({1, 1, 0, 0, 1, 1, 1});
   std::vector<TypeParam> v = convert_values<TypeParam>(int_values);
+  T init_value             = convert_int<T>(4);
+  auto const init_scalar   = cudf::make_fixed_width_scalar<T>(init_value);
 
   auto calc_prod = [](std::vector<T>& v) {
-    T expected_value =
-      std::accumulate(v.begin(), v.end(), T{1}, [](T acc, T i) { return acc * i; });
+    T expected_value = std::accumulate(v.begin(), v.end(), T{1}, std::multiplies<T>());
+    return expected_value;
+  };
+
+  auto calc_prod_init = [](std::vector<T>& v, T init) {
+    T expected_value = std::accumulate(v.begin(), v.end(), init, std::multiplies<T>());
     return expected_value;
   };
 
   // test without nulls
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
-  TypeParam expected_value = calc_prod(v);
-
-  this->reduction_test(col,
-                       expected_value,
-                       this->ret_non_arithmetic,
-                       cudf::make_product_aggregation<reduce_aggregation>());
+  TypeParam expected_value      = calc_prod(v);
+  TypeParam expected_value_init = calc_prod_init(v, init_value);
+
+  EXPECT_EQ(
+    this->template reduction_test<T>(col, cudf::make_product_aggregation<reduce_aggregation>())
+      .first,
+    expected_value);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(
+                col, *init_scalar, cudf::make_product_aggregation<reduce_aggregation>())
+              .first,
+            expected_value_init);
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
   auto r                                              = replace_nulls(v, host_bools, T{1});
   TypeParam expected_null_value                       = calc_prod(r);
+  init_scalar->set_valid_async(false);
 
-  this->reduction_test(col_nulls,
-                       expected_null_value,
-                       this->ret_non_arithmetic,
-                       cudf::make_product_aggregation<reduce_aggregation>());
+  EXPECT_EQ(
+    this
+      ->template reduction_test<T>(col_nulls, cudf::make_product_aggregation<reduce_aggregation>())
+      .first,
+    expected_null_value);
+  EXPECT_FALSE(this
+                 ->template reduction_test<T>(
+                   col_nulls, *init_scalar, cudf::make_product_aggregation<reduce_aggregation>())
+                 .second);
 }
 
 TYPED_TEST(ReductionTest, SumOfSquare)
@@ -301,62 +365,91 @@ TYPED_TEST(ReductionTest, SumOfSquare)
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
   T expected_value = calc_reduction(v);
 
-  this->reduction_test(col,
-                       expected_value,
-                       this->ret_non_arithmetic,
-                       cudf::make_sum_of_squares_aggregation<reduce_aggregation>());
+  EXPECT_EQ(
+    this
+      ->template reduction_test<T>(col, cudf::make_sum_of_squares_aggregation<reduce_aggregation>())
+      .first,
+    expected_value);
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
   auto r                                              = replace_nulls(v, host_bools, T{0});
   T expected_null_value                               = calc_reduction(r);
 
-  this->reduction_test(col_nulls,
-                       expected_null_value,
-                       this->ret_non_arithmetic,
-                       cudf::make_sum_of_squares_aggregation<reduce_aggregation>());
+  EXPECT_EQ(this
+              ->template reduction_test<T>(
+                col_nulls, cudf::make_sum_of_squares_aggregation<reduce_aggregation>())
+              .first,
+            expected_null_value);
 }
 
 template <typename T>
 struct ReductionAnyAllTest : public ReductionTest<bool> {
 };
-
-TYPED_TEST_SUITE(ReductionAnyAllTest, cudf::test::NumericTypes);
+using AnyAllTypes = cudf::test::Types<int32_t, float, bool>;
+TYPED_TEST_SUITE(ReductionAnyAllTest, AnyAllTypes);
 
 TYPED_TEST(ReductionAnyAllTest, AnyAllTrueTrue)
 {
   using T = TypeParam;
   std::vector<int> int_values({true, true, true, true});
   std::vector<bool> host_bools({1, 1, 0, 1});
-  std::vector<T> v = convert_values<T>(int_values);
+  std::vector<T> v       = convert_values<T>(int_values);
+  auto const init_scalar = cudf::make_fixed_width_scalar<T>(convert_int<T>(true));
 
   // Min/Max succeeds for any gdf types including
   // non-arithmetic types (date32, date64, timestamp, category)
-  bool result_error = true;
-  bool expected     = true;
+  bool expected = true;
   cudf::data_type output_dtype(cudf::type_id::BOOL8);
 
   // test without nulls
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
 
-  this->reduction_test(
-    col, expected, result_error, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype);
-  this->reduction_test(
-    col, expected, result_error, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype);
+  EXPECT_EQ(this
+              ->template reduction_test<bool>(
+                col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+              .first,
+            expected);
+  EXPECT_EQ(this
+              ->template reduction_test<bool>(
+                col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+              .first,
+            expected);
+  EXPECT_EQ(this
+              ->template reduction_test<bool>(
+                col, *init_scalar, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+              .first,
+            expected);
+  EXPECT_EQ(this
+              ->template reduction_test<bool>(
+                col, *init_scalar, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+              .first,
+            expected);
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
-
-  this->reduction_test(col_nulls,
-                       expected,
-                       result_error,
-                       cudf::make_any_aggregation<reduce_aggregation>(),
-                       output_dtype);
-  this->reduction_test(col_nulls,
-                       expected,
-                       result_error,
-                       cudf::make_all_aggregation<reduce_aggregation>(),
-                       output_dtype);
+  init_scalar->set_valid_async(false);
+
+  EXPECT_EQ(this
+              ->template reduction_test<bool>(
+                col_nulls, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+              .first,
+            expected);
+  EXPECT_EQ(this
+              ->template reduction_test<bool>(
+                col_nulls, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+              .first,
+            expected);
+  EXPECT_FALSE(
+    this
+      ->template reduction_test<bool>(
+        col_nulls, *init_scalar, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+      .second);
+  EXPECT_FALSE(
+    this
+      ->template reduction_test<bool>(
+        col_nulls, *init_scalar, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+      .second);
 }
 
 TYPED_TEST(ReductionAnyAllTest, AnyAllFalseFalse)
@@ -364,35 +457,62 @@ TYPED_TEST(ReductionAnyAllTest, AnyAllFalseFalse)
   using T = TypeParam;
   std::vector<int> int_values({false, false, false, false});
   std::vector<bool> host_bools({1, 1, 0, 1});
-  std::vector<T> v = convert_values<T>(int_values);
+  std::vector<T> v       = convert_values<T>(int_values);
+  auto const init_scalar = cudf::make_fixed_width_scalar<T>(convert_int<T>(false));
 
   // Min/Max succeeds for any gdf types including
   // non-arithmetic types (date32, date64, timestamp, category)
-  bool result_error = true;
-  bool expected     = false;
+  bool expected = false;
   cudf::data_type output_dtype(cudf::type_id::BOOL8);
 
   // test without nulls
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
 
-  this->reduction_test(
-    col, expected, result_error, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype);
-  this->reduction_test(
-    col, expected, result_error, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype);
+  EXPECT_EQ(this
+              ->template reduction_test<bool>(
+                col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+              .first,
+            expected);
+  EXPECT_EQ(this
+              ->template reduction_test<bool>(
+                col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+              .first,
+            expected);
+  EXPECT_EQ(this
+              ->template reduction_test<bool>(
+                col, *init_scalar, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+              .first,
+            expected);
+  EXPECT_EQ(this
+              ->template reduction_test<bool>(
+                col, *init_scalar, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+              .first,
+            expected);
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
-
-  this->reduction_test(col_nulls,
-                       expected,
-                       result_error,
-                       cudf::make_any_aggregation<reduce_aggregation>(),
-                       output_dtype);
-  this->reduction_test(col_nulls,
-                       expected,
-                       result_error,
-                       cudf::make_all_aggregation<reduce_aggregation>(),
-                       output_dtype);
+  init_scalar->set_valid_async(false);
+
+  EXPECT_EQ(this
+              ->template reduction_test<bool>(
+                col_nulls, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+              .first,
+            expected);
+  EXPECT_EQ(this
+              ->template reduction_test<bool>(
+                col_nulls, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+              .first,
+            expected);
+  EXPECT_FALSE(
+    this
+      ->template reduction_test<bool>(
+        col_nulls, *init_scalar, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+      .second);
+  EXPECT_FALSE(
+    this
+      ->template reduction_test<bool>(
+        col_nulls, *init_scalar, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+      .second);
 }
 
 // ----------------------------------------------------------------------------
@@ -400,8 +520,7 @@ TYPED_TEST(ReductionAnyAllTest, AnyAllFalseFalse)
 template <typename T>
 struct MultiStepReductionTest : public ReductionTest<T> {
 };
-
-using MultiStepReductionTypes = cudf::test::NumericTypes;
+using MultiStepReductionTypes = cudf::test::Types<int16_t, int32_t, float, double>;
 TYPED_TEST_SUITE(MultiStepReductionTest, MultiStepReductionTypes);
 
 TYPED_TEST(MultiStepReductionTest, Mean)
@@ -419,11 +538,13 @@ TYPED_TEST(MultiStepReductionTest, Mean)
   std::vector<T> v = convert_values<T>(int_values);
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
   double expected_value = calc_mean(v, v.size());
-  this->reduction_test(col,
-                       expected_value,
-                       true,
-                       cudf::make_mean_aggregation<reduce_aggregation>(),
-                       cudf::data_type(cudf::type_id::FLOAT64));
+
+  EXPECT_EQ(this
+              ->template reduction_test<double>(col,
+                                                cudf::make_mean_aggregation<reduce_aggregation>(),
+                                                cudf::data_type(cudf::type_id::FLOAT64))
+              .first,
+            expected_value);
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
@@ -432,11 +553,13 @@ TYPED_TEST(MultiStepReductionTest, Mean)
   auto replaced_array = replace_nulls(v, host_bools, T{0});
 
   double expected_value_nulls = calc_mean(replaced_array, valid_count);
-  this->reduction_test(col_nulls,
-                       expected_value_nulls,
-                       true,
-                       cudf::make_mean_aggregation<reduce_aggregation>(),
-                       cudf::data_type(cudf::type_id::FLOAT64));
+
+  EXPECT_EQ(this
+              ->template reduction_test<double>(col_nulls,
+                                                cudf::make_mean_aggregation<reduce_aggregation>(),
+                                                cudf::data_type(cudf::type_id::FLOAT64))
+              .first,
+            expected_value_nulls);
 }
 
 // This test is disabled for only a Debug build because a compiler error
@@ -474,8 +597,14 @@ TYPED_TEST(MultiStepReductionTest, DISABLED_var_std)
   auto var_agg    = cudf::make_variance_aggregation<reduce_aggregation>(ddof);
   auto std_agg    = cudf::make_std_aggregation<reduce_aggregation>(ddof);
 
-  this->reduction_test(col, var, true, var_agg, cudf::data_type(cudf::type_id::FLOAT64));
-  this->reduction_test(col, std, true, std_agg, cudf::data_type(cudf::type_id::FLOAT64));
+  EXPECT_EQ(
+    this->template reduction_test<double>(col, var_agg, cudf::data_type(cudf::type_id::FLOAT64))
+      .first,
+    var);
+  EXPECT_EQ(
+    this->template reduction_test<double>(col, std_agg, cudf::data_type(cudf::type_id::FLOAT64))
+      .first,
+    std);
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
@@ -486,10 +615,16 @@ TYPED_TEST(MultiStepReductionTest, DISABLED_var_std)
   double var_nulls = calc_var(replaced_array, valid_count, ddof);
   double std_nulls = std::sqrt(var_nulls);
 
-  this->reduction_test(
-    col_nulls, var_nulls, true, var_agg, cudf::data_type(cudf::type_id::FLOAT64));
-  this->reduction_test(
-    col_nulls, std_nulls, true, std_agg, cudf::data_type(cudf::type_id::FLOAT64));
+  EXPECT_EQ(
+    this
+      ->template reduction_test<double>(col_nulls, var_agg, cudf::data_type(cudf::type_id::FLOAT64))
+      .first,
+    var_nulls);
+  EXPECT_EQ(
+    this
+      ->template reduction_test<double>(col_nulls, std_agg, cudf::data_type(cudf::type_id::FLOAT64))
+      .first,
+    std_nulls);
 }
 
 // ----------------------------------------------------------------------------
@@ -732,7 +867,7 @@ struct ReductionErrorTest : public cudf::test::BaseFixture {
 TEST_F(ReductionErrorTest, empty_column)
 {
   using T        = int32_t;
-  auto statement = [](const cudf::column_view col) {
+  auto statement = [](cudf::column_view const& col) {
     std::unique_ptr<cudf::scalar> result = cudf::reduce(
       col, cudf::make_sum_aggregation<reduce_aggregation>(), cudf::data_type(cudf::type_id::INT64));
     EXPECT_EQ(result->is_valid(), false);
@@ -799,8 +934,14 @@ TEST_P(ReductionParamTest, DISABLED_std_var)
   auto var_agg = cudf::make_variance_aggregation<reduce_aggregation>(ddof);
   auto std_agg = cudf::make_std_aggregation<reduce_aggregation>(ddof);
 
-  this->reduction_test(col, var, true, var_agg, cudf::data_type(cudf::type_id::FLOAT64));
-  this->reduction_test(col, std, true, std_agg, cudf::data_type(cudf::type_id::FLOAT64));
+  EXPECT_EQ(
+    this->template reduction_test<double>(col, var_agg, cudf::data_type(cudf::type_id::FLOAT64))
+      .first,
+    var);
+  EXPECT_EQ(
+    this->template reduction_test<double>(col, std_agg, cudf::data_type(cudf::type_id::FLOAT64))
+      .first,
+    std);
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<double> col_nulls =
@@ -812,10 +953,16 @@ TEST_P(ReductionParamTest, DISABLED_std_var)
   double var_nulls = calc_var(replaced_array, valid_count);
   double std_nulls = std::sqrt(var_nulls);
 
-  this->reduction_test(
-    col_nulls, var_nulls, true, var_agg, cudf::data_type(cudf::type_id::FLOAT64));
-  this->reduction_test(
-    col_nulls, std_nulls, true, std_agg, cudf::data_type(cudf::type_id::FLOAT64));
+  EXPECT_EQ(
+    this
+      ->template reduction_test<double>(col_nulls, var_agg, cudf::data_type(cudf::type_id::FLOAT64))
+      .first,
+    var_nulls);
+  EXPECT_EQ(
+    this
+      ->template reduction_test<double>(col_nulls, std_agg, cudf::data_type(cudf::type_id::FLOAT64))
+      .first,
+    std_nulls);
 }
 
 //-------------------------------------------------------------------
@@ -823,7 +970,7 @@ struct StringReductionTest : public cudf::test::BaseFixture,
                              public testing::WithParamInterface<std::vector<std::string>> {
   // Min/Max
 
-  void reduction_test(const cudf::column_view underlying_column,
+  void reduction_test(cudf::column_view const& underlying_column,
                       std::string expected_value,
                       bool succeeded_condition,
                       std::unique_ptr<reduce_aggregation> const& agg,
@@ -848,6 +995,35 @@ struct StringReductionTest : public cudf::test::BaseFixture,
       EXPECT_ANY_THROW(statement());
     }
   }
+
+  void reduction_test(cudf::column_view const& underlying_column,
+                      std::string initial_value,
+                      std::string expected_value,
+                      bool succeeded_condition,
+                      std::unique_ptr<reduce_aggregation> const& agg,
+                      cudf::data_type output_dtype = cudf::data_type{})
+  {
+    if (cudf::data_type{} == output_dtype) output_dtype = underlying_column.type();
+    auto string_scalar = cudf::make_string_scalar(initial_value);
+
+    auto statement = [&]() {
+      std::unique_ptr<cudf::scalar> result =
+        cudf::reduce(underlying_column, agg, output_dtype, *string_scalar);
+      using ScalarType = cudf::scalar_type_t<cudf::string_view>;
+      auto result1     = static_cast<ScalarType*>(result.get());
+      EXPECT_TRUE(result1->is_valid());
+      if (!result1->is_valid())
+        std::cout << "expected=" << expected_value << ",got=" << result1->to_string() << std::endl;
+      EXPECT_EQ(expected_value, result1->to_string())
+        << (agg->kind == aggregation::MIN ? "MIN" : "MAX");
+    };
+
+    if (succeeded_condition) {
+      CUDF_EXPECT_NO_THROW(statement());
+    } else {
+      EXPECT_ANY_THROW(statement());
+    }
+  }
 };
 
 // ------------------------------------------------------------------------
@@ -869,12 +1045,15 @@ TEST_P(StringReductionTest, MinMax)
   std::vector<std::string> host_strings(GetParam());
   std::vector<bool> host_bools({1, 0, 1, 1, 1, 1, 0, 0, 1});
   bool succeed(true);
+  std::string initial_value = "init";
 
   // all valid string column
   cudf::test::strings_column_wrapper col(host_strings.begin(), host_strings.end());
 
   std::string expected_min_result = *(std::min_element(host_strings.begin(), host_strings.end()));
   std::string expected_max_result = *(std::max_element(host_strings.begin(), host_strings.end()));
+  std::string expected_min_init_result = std::min(expected_min_result, initial_value);
+  std::string expected_max_init_result = std::max(expected_max_result, initial_value);
 
   // string column with nulls
   cudf::test::strings_column_wrapper col_nulls(
@@ -888,17 +1067,39 @@ TEST_P(StringReductionTest, MinMax)
 
   std::string expected_min_null_result = *(std::min_element(r_strings.begin(), r_strings.end()));
   std::string expected_max_null_result = *(std::max_element(r_strings.begin(), r_strings.end()));
+  std::string expected_min_init_null_result = std::min(expected_min_null_result, initial_value);
+  std::string expected_max_init_null_result = std::max(expected_max_null_result, initial_value);
 
   // MIN
   this->reduction_test(
     col, expected_min_result, succeed, cudf::make_min_aggregation<reduce_aggregation>());
   this->reduction_test(
     col_nulls, expected_min_null_result, succeed, cudf::make_min_aggregation<reduce_aggregation>());
+  this->reduction_test(col,
+                       initial_value,
+                       expected_min_init_result,
+                       succeed,
+                       cudf::make_min_aggregation<reduce_aggregation>());
+  this->reduction_test(col_nulls,
+                       initial_value,
+                       expected_min_init_null_result,
+                       succeed,
+                       cudf::make_min_aggregation<reduce_aggregation>());
   // MAX
   this->reduction_test(
     col, expected_max_result, succeed, cudf::make_max_aggregation<reduce_aggregation>());
   this->reduction_test(
     col_nulls, expected_max_null_result, succeed, cudf::make_max_aggregation<reduce_aggregation>());
+  this->reduction_test(col,
+                       initial_value,
+                       expected_max_init_result,
+                       succeed,
+                       cudf::make_max_aggregation<reduce_aggregation>());
+  this->reduction_test(col_nulls,
+                       initial_value,
+                       expected_max_init_null_result,
+                       succeed,
+                       cudf::make_max_aggregation<reduce_aggregation>());
 
   // MINMAX
   auto result = cudf::minmax(col);
@@ -965,6 +1166,8 @@ TEST_F(StringReductionTest, AllNull)
   std::vector<std::string> host_strings(
     {"one", "two", "three", "four", "five", "six", "seven", "eight", "nine"});
   std::vector<bool> host_bools(host_strings.size(), false);
+  auto initial_value = cudf::make_string_scalar("init");
+  initial_value->set_valid_async(false);
 
   // string column with nulls
   cudf::test::strings_column_wrapper col_nulls(
@@ -975,9 +1178,15 @@ TEST_F(StringReductionTest, AllNull)
   auto result =
     cudf::reduce(col_nulls, cudf::make_min_aggregation<reduce_aggregation>(), output_dtype);
   EXPECT_FALSE(result->is_valid());
+  result = cudf::reduce(
+    col_nulls, cudf::make_min_aggregation<reduce_aggregation>(), output_dtype, *initial_value);
+  EXPECT_FALSE(result->is_valid());
   // MAX
   result = cudf::reduce(col_nulls, cudf::make_max_aggregation<reduce_aggregation>(), output_dtype);
   EXPECT_FALSE(result->is_valid());
+  result = cudf::reduce(
+    col_nulls, cudf::make_max_aggregation<reduce_aggregation>(), output_dtype, *initial_value);
+  EXPECT_FALSE(result->is_valid());
   // MINMAX
   auto mm_result = cudf::minmax(col_nulls);
   EXPECT_FALSE(mm_result.first->is_valid());
@@ -999,10 +1208,10 @@ TYPED_TEST(ReductionTest, Median)
     if (std::is_signed_v<T>) return 3.0;
     return 13.5;
   }();
-  this->reduction_test(col,
-                       expected_value,
-                       this->ret_non_arithmetic,
-                       cudf::make_median_aggregation<reduce_aggregation>());
+  EXPECT_EQ(
+    this->template reduction_test<double>(col, cudf::make_median_aggregation<reduce_aggregation>())
+      .first,
+    expected_value);
 
   auto col_odd              = cudf::split(col, {1})[1];
   double expected_value_odd = [] {
@@ -1010,10 +1219,12 @@ TYPED_TEST(ReductionTest, Median)
     if (std::is_signed_v<T>) return 0.0;
     return 14.0;
   }();
-  this->reduction_test(col_odd,
-                       expected_value_odd,
-                       this->ret_non_arithmetic,
-                       cudf::make_median_aggregation<reduce_aggregation>());
+  EXPECT_EQ(this
+              ->template reduction_test<double>(col_odd,
+                                                cudf::make_median_aggregation<reduce_aggregation>())
+              .first,
+            expected_value_odd);
+
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
   double expected_null_value                          = [] {
@@ -1022,10 +1233,11 @@ TYPED_TEST(ReductionTest, Median)
     return 13.0;
   }();
 
-  this->reduction_test(col_nulls,
-                       expected_null_value,
-                       this->ret_non_arithmetic,
-                       cudf::make_median_aggregation<reduce_aggregation>());
+  EXPECT_EQ(this
+              ->template reduction_test<double>(col_nulls,
+                                                cudf::make_median_aggregation<reduce_aggregation>())
+              .first,
+            expected_null_value);
 
   auto col_nulls_odd             = cudf::split(col_nulls, {1})[1];
   double expected_null_value_odd = [] {
@@ -1033,10 +1245,11 @@ TYPED_TEST(ReductionTest, Median)
     if (std::is_signed_v<T>) return -6.5;
     return 13.5;
   }();
-  this->reduction_test(col_nulls_odd,
-                       expected_null_value_odd,
-                       this->ret_non_arithmetic,
-                       cudf::make_median_aggregation<reduce_aggregation>());
+  EXPECT_EQ(this
+              ->template reduction_test<double>(col_nulls_odd,
+                                                cudf::make_median_aggregation<reduce_aggregation>())
+              .first,
+            expected_null_value_odd);
 }
 
 TYPED_TEST(ReductionTest, Quantile)
@@ -1051,28 +1264,33 @@ TYPED_TEST(ReductionTest, Quantile)
   // test without nulls
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
   double expected_value0 = std::is_same_v<T, bool> || std::is_unsigned_v<T> ? v[4] : v[6];
-  this->reduction_test(col,
-                       expected_value0,
-                       this->ret_non_arithmetic,
-                       cudf::make_quantile_aggregation<reduce_aggregation>({0.0}, interp));
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col, cudf::make_quantile_aggregation<reduce_aggregation>({0.0}, interp))
+              .first,
+            expected_value0);
+
   double expected_value1 = v[3];
-  this->reduction_test(col,
-                       expected_value1,
-                       this->ret_non_arithmetic,
-                       cudf::make_quantile_aggregation<reduce_aggregation>({1.0}, interp));
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col, cudf::make_quantile_aggregation<reduce_aggregation>({1.0}, interp))
+              .first,
+            expected_value1);
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
   double expected_null_value1                         = v[7];
 
-  this->reduction_test(col_nulls,
-                       expected_value0,
-                       this->ret_non_arithmetic,
-                       cudf::make_quantile_aggregation<reduce_aggregation>({0}, interp));
-  this->reduction_test(col_nulls,
-                       expected_null_value1,
-                       this->ret_non_arithmetic,
-                       cudf::make_quantile_aggregation<reduce_aggregation>({1}, interp));
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col_nulls, cudf::make_quantile_aggregation<reduce_aggregation>({0}, interp))
+              .first,
+            expected_value0);
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col_nulls, cudf::make_quantile_aggregation<reduce_aggregation>({1}, interp))
+              .first,
+            expected_null_value1);
 }
 
 TYPED_TEST(ReductionTest, UniqueCount)
@@ -1085,32 +1303,34 @@ TYPED_TEST(ReductionTest, UniqueCount)
   // test without nulls
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
   cudf::size_type expected_value = std::is_same_v<T, bool> ? 2 : 6;
-  this->reduction_test(
-    col,
-    expected_value,
-    this->ret_non_arithmetic,
-    cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE));
-  this->reduction_test(
-    col,
-    expected_value,
-    this->ret_non_arithmetic,
-    cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::EXCLUDE));
+  EXPECT_EQ(this
+              ->template reduction_test<cudf::size_type>(
+                col, cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE))
+              .first,
+            expected_value);
+  EXPECT_EQ(this
+              ->template reduction_test<cudf::size_type>(
+                col, cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::EXCLUDE))
+              .first,
+            expected_value);
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
   cudf::size_type expected_null_value0                = std::is_same_v<T, bool> ? 3 : 7;
   cudf::size_type expected_null_value1                = std::is_same_v<T, bool> ? 2 : 6;
 
-  this->reduction_test(
-    col_nulls,
-    expected_null_value0,
-    this->ret_non_arithmetic,
-    cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE));
-  this->reduction_test(
-    col_nulls,
-    expected_null_value1,
-    this->ret_non_arithmetic,
-    cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::EXCLUDE));
+  EXPECT_EQ(
+    this
+      ->template reduction_test<cudf::size_type>(
+        col_nulls, cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE))
+      .first,
+    expected_null_value0);
+  EXPECT_EQ(
+    this
+      ->template reduction_test<cudf::size_type>(
+        col_nulls, cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::EXCLUDE))
+      .first,
+    expected_null_value1);
 }
 
 template <typename T>
@@ -1129,6 +1349,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionProductZeroScale)
   auto const THREE = decimalXX{3, scale_type{0}};
   auto const FOUR  = decimalXX{4, scale_type{0}};
   auto const _24   = decimalXX{24, scale_type{0}};
+  auto const _48   = decimalXX{48, scale_type{0}};
 
   auto const in       = std::vector<decimalXX>{ONE, TWO, THREE, FOUR};
   auto const column   = cudf::test::fixed_width_column_wrapper<decimalXX>(in.cbegin(), in.cend());
@@ -1142,6 +1363,19 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionProductZeroScale)
 
   EXPECT_EQ(result_fp, expected);
   EXPECT_EQ(result_fp, _24);
+
+  // Test with initial value
+  auto const init_expected =
+    std::accumulate(in.cbegin(), in.cend(), TWO, std::multiplies<decimalXX>());
+  auto const init_scalar = cudf::make_fixed_point_scalar<decimalXX>(2, scale_type{0});
+
+  auto const init_result = cudf::reduce(
+    column, cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+  auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
+  auto const init_result_fp     = decimalXX{init_result_scalar->value()};
+
+  EXPECT_EQ(init_result_fp, init_expected);
+  EXPECT_EQ(init_result_fp, _48);
 }
 
 TYPED_TEST(FixedPointTestAllReps, FixedPointReductionProduct)
@@ -1162,6 +1396,16 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionProduct)
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
+
+    // Test with initial value
+    auto const init_expected = decimalXX{scaled_integer<RepType>{72, scale_type{i * 7}}};
+    auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(2, scale);
+
+    auto const init_result = cudf::reduce(
+      column, cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+    auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
+
+    EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
   }
 }
 
@@ -1183,6 +1427,16 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionProductWithNulls)
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
+
+    // Test with initial value
+    auto const init_expected = decimalXX{scaled_integer<RepType>{12, scale_type{i * 4}}};
+    auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(2, scale);
+
+    auto const init_result = cudf::reduce(
+      column, cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+    auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
+
+    EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
   }
 }
 
@@ -1205,6 +1459,16 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSum)
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
+
+    // Test with initial value
+    auto const init_expected = decimalXX{scaled_integer<RepType>{12, scale}};
+    auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(2, scale);
+
+    auto const init_result = cudf::reduce(
+      column, cudf::make_sum_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+    auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
+
+    EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
   }
 }
 
@@ -1213,12 +1477,13 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumAlternate)
   using namespace numeric;
   using decimalXX = TypeParam;
 
-  auto const ZERO  = decimalXX{0, scale_type{0}};
-  auto const ONE   = decimalXX{1, scale_type{0}};
-  auto const TWO   = decimalXX{2, scale_type{0}};
-  auto const THREE = decimalXX{3, scale_type{0}};
-  auto const FOUR  = decimalXX{4, scale_type{0}};
-  auto const TEN   = decimalXX{10, scale_type{0}};
+  auto const ZERO   = decimalXX{0, scale_type{0}};
+  auto const ONE    = decimalXX{1, scale_type{0}};
+  auto const TWO    = decimalXX{2, scale_type{0}};
+  auto const THREE  = decimalXX{3, scale_type{0}};
+  auto const FOUR   = decimalXX{4, scale_type{0}};
+  auto const TEN    = decimalXX{10, scale_type{0}};
+  auto const TWELVE = decimalXX{12, scale_type{0}};
 
   auto const in       = std::vector<decimalXX>{ONE, TWO, THREE, FOUR};
   auto const column   = cudf::test::fixed_width_column_wrapper<decimalXX>(in.cbegin(), in.cend());
@@ -1231,6 +1496,17 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumAlternate)
 
   EXPECT_EQ(result_scalar->fixed_point_value(), expected);
   EXPECT_EQ(result_scalar->fixed_point_value(), TEN);
+
+  // Test with initial value
+  auto const init_expected = std::accumulate(in.cbegin(), in.cend(), TWO, std::plus<decimalXX>());
+  auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(2, scale_type{0});
+
+  auto const init_result =
+    cudf::reduce(column, cudf::make_sum_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+  auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
+
+  EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
+  EXPECT_EQ(init_result_scalar->fixed_point_value(), TWELVE);
 }
 
 TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumFractional)
@@ -1251,6 +1527,16 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumFractional)
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
+
+    // Test with initial value
+    auto const init_expected = decimalXX{scaled_integer<RepType>{668, scale}};
+    auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(2, scale);
+
+    auto const init_result = cudf::reduce(
+      column, cudf::make_sum_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+    auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
+
+    EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
   }
 }
 
@@ -1275,6 +1561,19 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumLarge)
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
+
+    // Test with initial value
+    int const init_value = 2;
+    auto const init_expected_value =
+      std::accumulate(values.cbegin(), values.cend(), RepType{init_value});
+    auto const init_expected = decimalXX{scaled_integer<RepType>{init_expected_value, scale}};
+    auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(init_value, scale);
+
+    auto const init_result = cudf::reduce(
+      column, cudf::make_sum_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+    auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
+
+    EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
   }
 }
 
@@ -1296,6 +1595,16 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMin)
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), ONE);
+
+    // Test with initial value
+    auto const init_expected = decimalXX{scaled_integer<RepType>{0, scale}};
+    auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(0, scale);
+
+    auto const init_result = cudf::reduce(
+      column, cudf::make_min_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+    auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
+
+    EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
   }
 }
 
@@ -1318,6 +1627,16 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMinLarge)
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
+
+    // Test with initial value
+    auto const init_expected = decimalXX{scaled_integer<RepType>{0, scale}};
+    auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(0, scale);
+
+    auto const init_result = cudf::reduce(
+      column, cudf::make_min_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+    auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
+
+    EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
   }
 }
 
@@ -1339,6 +1658,16 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMax)
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), FOUR);
+
+    // Test with initial value
+    auto const init_expected = decimalXX{scaled_integer<RepType>{5, scale}};
+    auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(5, scale);
+
+    auto const init_result = cudf::reduce(
+      column, cudf::make_max_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+    auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
+
+    EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
   }
 }
 
@@ -1361,6 +1690,16 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMaxLarge)
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
+
+    // Test with initial value
+    auto const init_expected = decimalXX{scaled_integer<RepType>{43, scale}};
+    auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(43, scale);
+
+    auto const init_result = cudf::reduce(
+      column, cudf::make_max_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+    auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
+
+    EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
   }
 }
 
@@ -1516,6 +1855,17 @@ TEST_F(Decimal128Only, Decimal128ProductReduction)
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimal128>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
+
+    // Test with initial value
+    auto const init_expected = decimal128{scaled_integer<RepType>{1024, scale_type{i * 10}}};
+    auto const init_scalar   = cudf::make_fixed_point_scalar<decimal128>(2, scale);
+
+    auto const init_result = cudf::reduce(
+      column, cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+    auto const init_result_scalar =
+      static_cast<cudf::scalar_type_t<decimal128>*>(init_result.get());
+
+    EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
   }
 }
 
@@ -1536,6 +1886,17 @@ TEST_F(Decimal128Only, Decimal128ProductReduction2)
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimal128>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
+
+    // Test with initial value
+    auto const init_expected = decimal128{scaled_integer<RepType>{2160, scale_type{i * 7}}};
+    auto const init_scalar   = cudf::make_fixed_point_scalar<decimal128>(3, scale);
+
+    auto const init_result = cudf::reduce(
+      column, cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+    auto const init_result_scalar =
+      static_cast<cudf::scalar_type_t<decimal128>*>(init_result.get());
+
+    EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
   }
 }
 
@@ -1557,6 +1918,15 @@ TEST_F(Decimal128Only, Decimal128ProductReduction3)
   auto const result_scalar = static_cast<cudf::scalar_type_t<decimal128>*>(result.get());
 
   EXPECT_EQ(result_scalar->fixed_point_value(), expected);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_point_scalar<decimal128>(5, scale);
+
+  auto const init_result = cudf::reduce(
+    column, cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+  auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimal128>*>(init_result.get());
+
+  EXPECT_EQ(init_result_scalar->fixed_point_value(), expected);
 }
 
 TYPED_TEST(ReductionTest, NthElement)
@@ -1584,57 +1954,48 @@ TYPED_TEST(ReductionTest, NthElement)
        {-input_size, -input_size / 2, -2, -1, 0, 1, 2, input_size / 2, input_size - 1}) {
     auto const index         = mod(n, v.size());
     T expected_value_nonull  = v[index];
-    bool const expected_null = !host_bools[index];
-    this->reduction_test(
-      col,
-      expected_value_nonull,
-      true,
-      cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE));
-    this->reduction_test(
-      col,
-      expected_value_nonull,
-      true,
-      cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::EXCLUDE));
-    this->reduction_test(
+    bool const expected_null = host_bools[index];
+    EXPECT_EQ(this
+                ->template reduction_test<T>(col,
+                                             cudf::make_nth_element_aggregation<reduce_aggregation>(
+                                               n, cudf::null_policy::INCLUDE))
+                .first,
+              expected_value_nonull);
+    EXPECT_EQ(this
+                ->template reduction_test<T>(col,
+                                             cudf::make_nth_element_aggregation<reduce_aggregation>(
+                                               n, cudf::null_policy::EXCLUDE))
+                .first,
+              expected_value_nonull);
+    auto res = this->template reduction_test<T>(
       col_nulls,
-      expected_value_nonull,
-      true,
-      cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE),
-      cudf::data_type{},
-      expected_null);
+      cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE));
+    EXPECT_EQ(res.first, expected_value_nonull);
+    EXPECT_EQ(res.second, expected_null);
   }
   // valid only
   for (cudf::size_type n :
        {-valid_count, -valid_count / 2, -2, -1, 0, 1, 2, valid_count / 2, valid_count - 1}) {
     T expected_value_null = v_valid[mod(n, v_valid.size())];
-    this->reduction_test(
-      col_nulls,
-      expected_value_null,
-      true,
-      cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::EXCLUDE));
+    EXPECT_EQ(this
+                ->template reduction_test<T>(col_nulls,
+                                             cudf::make_nth_element_aggregation<reduce_aggregation>(
+                                               n, cudf::null_policy::EXCLUDE))
+                .first,
+              expected_value_null);
   }
   // error cases
   for (cudf::size_type n : {-input_size - 1, input_size}) {
-    this->reduction_test(
-      col,
-      T{},
-      false,
-      cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE));
-    this->reduction_test(
+    EXPECT_ANY_THROW(this->template reduction_test<T>(
+      col, cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE)));
+    EXPECT_ANY_THROW(this->template reduction_test<T>(
       col_nulls,
-      T{},
-      false,
-      cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE));
-    this->reduction_test(
-      col,
-      T{},
-      false,
-      cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::EXCLUDE));
-    this->reduction_test(
+      cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE)));
+    EXPECT_ANY_THROW(this->template reduction_test<T>(
+      col, cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::EXCLUDE)));
+    EXPECT_ANY_THROW(this->template reduction_test<T>(
       col_nulls,
-      T{},
-      false,
-      cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::EXCLUDE));
+      cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::EXCLUDE)));
   }
 }
 
@@ -1683,7 +2044,7 @@ TEST_P(DictionaryStringReductionTest, MinMax)
 template <typename T>
 struct DictionaryAnyAllTest : public ReductionTest<bool> {
 };
-
+using DictionaryAnyAllTypes = cudf::test::Types<int16_t, int32_t, float, double, bool>;
 TYPED_TEST_SUITE(DictionaryAnyAllTest, cudf::test::NumericTypes);
 TYPED_TEST(DictionaryAnyAllTest, AnyAll)
 {
@@ -1699,61 +2060,85 @@ TYPED_TEST(DictionaryAnyAllTest, AnyAll)
   // without nulls
   {
     cudf::test::dictionary_column_wrapper<T> all_col(v_all.begin(), v_all.end());
-    this->reduction_test(
-      all_col, true, true, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype);
-    this->reduction_test(
-      all_col, true, true, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype);
+    EXPECT_TRUE(this
+                  ->template reduction_test<bool>(
+                    all_col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                  .first);
+    EXPECT_TRUE(this
+                  ->template reduction_test<bool>(
+                    all_col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                  .first);
     cudf::test::dictionary_column_wrapper<T> none_col(v_none.begin(), v_none.end());
-    this->reduction_test(
-      none_col, false, true, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype);
-    this->reduction_test(
-      none_col, false, true, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype);
+    EXPECT_FALSE(this
+                   ->template reduction_test<bool>(
+                     none_col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                   .first);
+    EXPECT_FALSE(this
+                   ->template reduction_test<bool>(
+                     none_col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                   .first);
     cudf::test::dictionary_column_wrapper<T> some_col(v_some.begin(), v_some.end());
-    this->reduction_test(
-      some_col, true, true, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype);
-    this->reduction_test(
-      some_col, false, true, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype);
+    EXPECT_TRUE(this
+                  ->template reduction_test<bool>(
+                    some_col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                  .first);
+    EXPECT_FALSE(this
+                   ->template reduction_test<bool>(
+                     some_col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                   .first);
     // sliced test
-    this->reduction_test(cudf::slice(some_col, {1, 3}).front(),
-                         true,
-                         true,
-                         cudf::make_any_aggregation<reduce_aggregation>(),
-                         output_dtype);
-    this->reduction_test(cudf::slice(some_col, {1, 2}).front(),
-                         true,
-                         true,
-                         cudf::make_all_aggregation<reduce_aggregation>(),
-                         output_dtype);
+    EXPECT_TRUE(this
+                  ->template reduction_test<bool>(cudf::slice(some_col, {1, 3}).front(),
+                                                  cudf::make_any_aggregation<reduce_aggregation>(),
+                                                  output_dtype)
+                  .first);
+    EXPECT_TRUE(this
+                  ->template reduction_test<bool>(cudf::slice(some_col, {1, 2}).front(),
+                                                  cudf::make_all_aggregation<reduce_aggregation>(),
+                                                  output_dtype)
+                  .first);
   }
   // with nulls
   {
     std::vector<bool> valid({1, 1, 0, 1});
     cudf::test::dictionary_column_wrapper<T> all_col(v_all.begin(), v_all.end(), valid.begin());
-    this->reduction_test(
-      all_col, true, true, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype);
-    this->reduction_test(
-      all_col, true, true, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype);
+    EXPECT_TRUE(this
+                  ->template reduction_test<bool>(
+                    all_col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                  .first);
+    EXPECT_TRUE(this
+                  ->template reduction_test<bool>(
+                    all_col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                  .first);
     cudf::test::dictionary_column_wrapper<T> none_col(v_none.begin(), v_none.end(), valid.begin());
-    this->reduction_test(
-      none_col, false, true, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype);
-    this->reduction_test(
-      none_col, false, true, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype);
+    EXPECT_FALSE(this
+                   ->template reduction_test<bool>(
+                     none_col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                   .first);
+    EXPECT_FALSE(this
+                   ->template reduction_test<bool>(
+                     none_col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                   .first);
     cudf::test::dictionary_column_wrapper<T> some_col(v_some.begin(), v_some.end(), valid.begin());
-    this->reduction_test(
-      some_col, true, true, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype);
-    this->reduction_test(
-      some_col, false, true, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype);
+    EXPECT_TRUE(this
+                  ->template reduction_test<bool>(
+                    some_col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                  .first);
+    EXPECT_FALSE(this
+                   ->template reduction_test<bool>(
+                     some_col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                   .first);
     // sliced test
-    this->reduction_test(cudf::slice(some_col, {0, 3}).front(),
-                         true,
-                         true,
-                         cudf::make_any_aggregation<reduce_aggregation>(),
-                         output_dtype);
-    this->reduction_test(cudf::slice(some_col, {1, 4}).front(),
-                         true,
-                         true,
-                         cudf::make_all_aggregation<reduce_aggregation>(),
-                         output_dtype);
+    EXPECT_TRUE(this
+                  ->template reduction_test<bool>(cudf::slice(some_col, {0, 3}).front(),
+                                                  cudf::make_any_aggregation<reduce_aggregation>(),
+                                                  output_dtype)
+                  .first);
+    EXPECT_TRUE(this
+                  ->template reduction_test<bool>(cudf::slice(some_col, {1, 4}).front(),
+                                                  cudf::make_all_aggregation<reduce_aggregation>(),
+                                                  output_dtype)
+                  .first);
   }
 }
 
@@ -1761,7 +2146,7 @@ template <typename T>
 struct DictionaryReductionTest : public ReductionTest<T> {
 };
 
-using DictionaryTypes = cudf::test::Types<int16_t, uint32_t, float, double>;
+using DictionaryTypes = cudf::test::Types<int16_t, int32_t, float, double>;
 TYPED_TEST_SUITE(DictionaryReductionTest, DictionaryTypes);
 TYPED_TEST(DictionaryReductionTest, Sum)
 {
@@ -1773,11 +2158,11 @@ TYPED_TEST(DictionaryReductionTest, Sum)
   cudf::test::dictionary_column_wrapper<T> col(v.begin(), v.end());
 
   T expected_value = std::accumulate(v.begin(), v.end(), T{0});
-  this->reduction_test(col,
-                       expected_value,
-                       this->ret_non_arithmetic,
-                       cudf::make_sum_aggregation<reduce_aggregation>(),
-                       output_type);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(
+                col, cudf::make_sum_aggregation<reduce_aggregation>(), output_type)
+              .first,
+            expected_value);
 
   // test with nulls
   std::vector<bool> validity({1, 1, 0, 0, 1, 1, 1, 1});
@@ -1786,11 +2171,11 @@ TYPED_TEST(DictionaryReductionTest, Sum)
     auto const r = replace_nulls(v, validity, T{0});
     return std::accumulate(r.begin(), r.end(), T{0});
   }();
-  this->reduction_test(col_nulls,
-                       expected_value,
-                       this->ret_non_arithmetic,
-                       cudf::make_sum_aggregation<reduce_aggregation>(),
-                       output_type);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(
+                col_nulls, cudf::make_sum_aggregation<reduce_aggregation>(), output_type)
+              .first,
+            expected_value);
 }
 
 TYPED_TEST(DictionaryReductionTest, Product)
@@ -1801,27 +2186,27 @@ TYPED_TEST(DictionaryReductionTest, Product)
   cudf::data_type output_type{cudf::type_to_id<T>()};
 
   auto calc_prod = [](std::vector<T> const& v) {
-    return std::accumulate(v.cbegin(), v.cend(), T{1}, [](T acc, T i) { return acc * i; });
+    return std::accumulate(v.cbegin(), v.cend(), T{1}, std::multiplies<T>());
   };
 
   // test without nulls
   cudf::test::dictionary_column_wrapper<T> col(v.begin(), v.end());
 
-  this->reduction_test(col,
-                       calc_prod(v),  // expected result
-                       this->ret_non_arithmetic,
-                       cudf::make_product_aggregation<reduce_aggregation>(),
-                       output_type);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(
+                col, cudf::make_product_aggregation<reduce_aggregation>(), output_type)
+              .first,
+            calc_prod(v));
 
   // test with nulls
   std::vector<bool> validity({1, 1, 0, 0, 1, 1, 1});
   cudf::test::dictionary_column_wrapper<T> col_nulls(v.begin(), v.end(), validity.begin());
 
-  this->reduction_test(col_nulls,
-                       calc_prod(replace_nulls(v, validity, T{1})),  // expected
-                       this->ret_non_arithmetic,
-                       cudf::make_product_aggregation<reduce_aggregation>(),
-                       output_type);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(
+                col_nulls, cudf::make_product_aggregation<reduce_aggregation>(), output_type)
+              .first,
+            calc_prod(replace_nulls(v, validity, T{1})));
 }
 
 TYPED_TEST(DictionaryReductionTest, SumOfSquare)
@@ -1838,21 +2223,21 @@ TYPED_TEST(DictionaryReductionTest, SumOfSquare)
   // test without nulls
   cudf::test::dictionary_column_wrapper<T> col(v.begin(), v.end());
 
-  this->reduction_test(col,
-                       calc_reduction(v),
-                       this->ret_non_arithmetic,
-                       cudf::make_sum_of_squares_aggregation<reduce_aggregation>(),
-                       output_type);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(
+                col, cudf::make_sum_of_squares_aggregation<reduce_aggregation>(), output_type)
+              .first,
+            calc_reduction(v));
 
   // test with nulls
   std::vector<bool> validity({1, 1, 0, 0, 1, 1, 1, 1});
   cudf::test::dictionary_column_wrapper<T> col_nulls(v.begin(), v.end(), validity.begin());
 
-  this->reduction_test(col_nulls,
-                       calc_reduction(replace_nulls(v, validity, T{0})),  // expected
-                       this->ret_non_arithmetic,
-                       cudf::make_sum_of_squares_aggregation<reduce_aggregation>(),
-                       output_type);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(
+                col_nulls, cudf::make_sum_of_squares_aggregation<reduce_aggregation>(), output_type)
+              .first,
+            calc_reduction(replace_nulls(v, validity, T{0})));
 }
 
 TYPED_TEST(DictionaryReductionTest, Mean)
@@ -1870,22 +2255,23 @@ TYPED_TEST(DictionaryReductionTest, Mean)
   // test without nulls
   cudf::test::dictionary_column_wrapper<T> col(v.begin(), v.end());
 
-  this->reduction_test(col,
-                       calc_mean(v, v.size()),  // expected_value,
-                       true,
-                       cudf::make_mean_aggregation<reduce_aggregation>(),
-                       output_type);
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col, cudf::make_mean_aggregation<reduce_aggregation>(), output_type)
+              .first,
+            calc_mean(v, v.size()));
 
   // test with nulls
   std::vector<bool> validity({1, 1, 0, 1, 1, 1, 0, 1});
   cudf::test::dictionary_column_wrapper<T> col_nulls(v.begin(), v.end(), validity.begin());
 
   cudf::size_type valid_count = std::count(validity.begin(), validity.end(), true);
-  this->reduction_test(col_nulls,
-                       calc_mean(replace_nulls(v, validity, T{0}), valid_count),
-                       true,
-                       cudf::make_mean_aggregation<reduce_aggregation>(),
-                       output_type);
+
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col_nulls, cudf::make_mean_aggregation<reduce_aggregation>(), output_type)
+              .first,
+            calc_mean(replace_nulls(v, validity, T{0}), valid_count));
 }
 
 #ifdef NDEBUG
@@ -1918,8 +2304,8 @@ TYPED_TEST(DictionaryReductionTest, DISABLED_VarStd)
   auto var_agg               = cudf::make_variance_aggregation<reduce_aggregation>(ddof);
   auto std_agg               = cudf::make_std_aggregation<reduce_aggregation>(ddof);
 
-  this->reduction_test(col, var, true, var_agg, output_type);
-  this->reduction_test(col, std, true, std_agg, output_type);
+  EXPECT_EQ(this->template reduction_test<double>(col, var_agg, output_type).first, var);
+  EXPECT_EQ(this->template reduction_test<double>(col, std_agg, output_type).first, std);
 
   // test with nulls
   std::vector<bool> validity({1, 1, 0, 1, 1, 1, 0, 1});
@@ -1930,8 +2316,10 @@ TYPED_TEST(DictionaryReductionTest, DISABLED_VarStd)
   double var_nulls = calc_var(replace_nulls(v, validity, T{0}), valid_count, ddof);
   double std_nulls = std::sqrt(var_nulls);
 
-  this->reduction_test(col_nulls, var_nulls, true, var_agg, output_type);
-  this->reduction_test(col_nulls, std_nulls, true, std_agg, output_type);
+  EXPECT_EQ(this->template reduction_test<double>(col_nulls, var_agg, output_type).first,
+            var_nulls);
+  EXPECT_EQ(this->template reduction_test<double>(col_nulls, std_agg, output_type).first,
+            std_nulls);
 }
 
 TYPED_TEST(DictionaryReductionTest, NthElement)
@@ -1944,30 +2332,32 @@ TYPED_TEST(DictionaryReductionTest, NthElement)
   // test without nulls
   cudf::test::dictionary_column_wrapper<T> col(v.begin(), v.end());
   cudf::size_type n = 5;
-  this->reduction_test(
-    col,
-    v[n],  // expected_value,
-    true,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE),
-    output_type);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(col,
+                                           cudf::make_nth_element_aggregation<reduce_aggregation>(
+                                             n, cudf::null_policy::INCLUDE),
+                                           output_type)
+              .first,
+            v[n]);
 
   // test with nulls
   std::vector<bool> validity({1, 1, 0, 1, 1, 1, 0, 1});
   cudf::test::dictionary_column_wrapper<T> col_nulls(v.begin(), v.end(), validity.begin());
 
-  this->reduction_test(
-    col_nulls,
-    v[n],  // expected_value,
-    true,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE),
-    output_type);
-  this->reduction_test(
-    col_nulls,
-    v[2],  // null element
-    true,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(2, cudf::null_policy::INCLUDE),
-    output_type,
-    true);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(col_nulls,
+                                           cudf::make_nth_element_aggregation<reduce_aggregation>(
+                                             n, cudf::null_policy::INCLUDE),
+                                           output_type)
+              .first,
+            v[n]);
+  EXPECT_FALSE(
+    this
+      ->template reduction_test<T>(
+        col_nulls,
+        cudf::make_nth_element_aggregation<reduce_aggregation>(2, cudf::null_policy::INCLUDE),
+        output_type)
+      .second);
 }
 
 TYPED_TEST(DictionaryReductionTest, UniqueCount)
@@ -1979,29 +2369,32 @@ TYPED_TEST(DictionaryReductionTest, UniqueCount)
 
   // test without nulls
   cudf::test::dictionary_column_wrapper<T> col(v.begin(), v.end());
-  this->reduction_test(
-    col,
-    6,
-    this->ret_non_arithmetic,
-    cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE),
-    output_type);
+  EXPECT_EQ(this
+              ->template reduction_test<int>(
+                col,
+                cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE),
+                output_type)
+              .first,
+            6);
 
   // test with nulls
   std::vector<bool> validity({1, 1, 1, 0, 1, 1, 1, 1});
   cudf::test::dictionary_column_wrapper<T> col_nulls(v.begin(), v.end(), validity.begin());
 
-  this->reduction_test(
-    col_nulls,
-    7,
-    this->ret_non_arithmetic,
-    cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE),
-    output_type);
-  this->reduction_test(
-    col_nulls,
-    6,
-    this->ret_non_arithmetic,
-    cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::EXCLUDE),
-    output_type);
+  EXPECT_EQ(this
+              ->template reduction_test<int>(
+                col_nulls,
+                cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE),
+                output_type)
+              .first,
+            7);
+  EXPECT_EQ(this
+              ->template reduction_test<int>(
+                col_nulls,
+                cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::EXCLUDE),
+                output_type)
+              .first,
+            6);
 }
 
 TYPED_TEST(DictionaryReductionTest, Median)
@@ -2013,20 +2406,19 @@ TYPED_TEST(DictionaryReductionTest, Median)
 
   // test without nulls
   cudf::test::dictionary_column_wrapper<T> col(v.begin(), v.end());
-  this->reduction_test(col,
-                       (std::is_signed_v<T>) ? 3.0 : 13.5,
-                       this->ret_non_arithmetic,
-                       cudf::make_median_aggregation<reduce_aggregation>(),
-                       output_type);
+  EXPECT_EQ(
+    this->template reduction_test<double>(col, cudf::make_median_aggregation<reduce_aggregation>())
+      .first,
+    (std::is_signed_v<T>) ? 3.0 : 13.5);
 
   // test with nulls
   std::vector<bool> validity({1, 1, 1, 0, 1, 1, 1, 1});
   cudf::test::dictionary_column_wrapper<T> col_nulls(v.begin(), v.end(), validity.begin());
-  this->reduction_test(col_nulls,
-                       (std::is_signed_v<T>) ? 0.0 : 13.0,
-                       this->ret_non_arithmetic,
-                       cudf::make_median_aggregation<reduce_aggregation>(),
-                       output_type);
+  EXPECT_EQ(this
+              ->template reduction_test<double>(col_nulls,
+                                                cudf::make_median_aggregation<reduce_aggregation>())
+              .first,
+            (std::is_signed_v<T>) ? 0.0 : 13.0);
 }
 
 TYPED_TEST(DictionaryReductionTest, Quantile)
@@ -2040,31 +2432,31 @@ TYPED_TEST(DictionaryReductionTest, Quantile)
   // test without nulls
   cudf::test::dictionary_column_wrapper<T> col(v.begin(), v.end());
   double expected_value = std::is_same_v<T, bool> || std::is_unsigned_v<T> ? 0.0 : -20.0;
-  this->reduction_test(col,
-                       expected_value,
-                       this->ret_non_arithmetic,
-                       cudf::make_quantile_aggregation<reduce_aggregation>({0.0}, interp),
-                       output_type);
-  this->reduction_test(col,
-                       64.0,
-                       this->ret_non_arithmetic,
-                       cudf::make_quantile_aggregation<reduce_aggregation>({1.0}, interp),
-                       output_type);
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col, cudf::make_quantile_aggregation<reduce_aggregation>({0.0}, interp))
+              .first,
+            expected_value);
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col, cudf::make_quantile_aggregation<reduce_aggregation>({1.0}, interp))
+              .first,
+            64.0);
 
   // test with nulls
   std::vector<bool> validity({1, 1, 1, 0, 1, 1, 1, 1});
   cudf::test::dictionary_column_wrapper<T> col_nulls(v.begin(), v.end(), validity.begin());
 
-  this->reduction_test(col_nulls,
-                       expected_value,
-                       this->ret_non_arithmetic,
-                       cudf::make_quantile_aggregation<reduce_aggregation>({0}, interp),
-                       output_type);
-  this->reduction_test(col_nulls,
-                       45.0,
-                       this->ret_non_arithmetic,
-                       cudf::make_quantile_aggregation<reduce_aggregation>({1}, interp),
-                       output_type);
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col_nulls, cudf::make_quantile_aggregation<reduce_aggregation>({0}, interp))
+              .first,
+            expected_value);
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col_nulls, cudf::make_quantile_aggregation<reduce_aggregation>({1}, interp))
+              .first,
+            45.0);
 }
 
 struct ListReductionTest : public cudf::test::BaseFixture {
diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp
index 3fa369d6a53..4fd62f9b938 100644
--- a/cpp/tests/reductions/segmented_reduction_tests.cpp
+++ b/cpp/tests/reductions/segmented_reduction_tests.cpp
@@ -20,6 +20,7 @@
 
 #include <cudf/aggregation.hpp>
 #include <cudf/reduction.hpp>
+#include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/types.hpp>
 
 #include <thrust/device_vector.h>
@@ -48,11 +49,12 @@ TYPED_TEST(SegmentedReductionTest, SumExcludeNulls)
   // nullmask: {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}
   // outputs:  {6, 4, 1, XXX, XXX, XXX}
   // output nullmask: {1, 1, 1, 0, 0, 0}
-  auto input     = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
-                                                     {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
-  auto offsets   = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect = fixed_width_column_wrapper<TypeParam>{{6, 4, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}};
+  auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
+                                                           {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
+  auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect =
+    fixed_width_column_wrapper<TypeParam>{{6, 4, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}};
 
   auto res = segmented_reduce(input,
                               d_offsets,
@@ -60,6 +62,29 @@ TYPED_TEST(SegmentedReductionTest, SumExcludeNulls)
                               data_type{type_to_id<TypeParam>()},
                               null_policy::EXCLUDE);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<TypeParam>(3);
+  auto const init_expect =
+    fixed_width_column_wrapper<TypeParam>{{9, 7, 4, 3, 3, 3}, {1, 1, 1, 1, 1, 1}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_sum_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::EXCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_sum_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::EXCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
 }
 
 TYPED_TEST(SegmentedReductionTest, ProductExcludeNulls)
@@ -70,11 +95,11 @@ TYPED_TEST(SegmentedReductionTest, ProductExcludeNulls)
   // nullmask:  {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}
   // outputs:   {15, 15, 1, XXX, XXX, XXX}
   // output nullmask: {1, 1, 1, 0, 0, 0}
-  auto input     = fixed_width_column_wrapper<TypeParam>{{1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX},
-                                                     {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}};
-  auto offsets   = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect =
+  auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX},
+                                                           {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}};
+  auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect =
     fixed_width_column_wrapper<TypeParam>{{15, 15, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}};
 
   auto res = segmented_reduce(input,
@@ -83,6 +108,29 @@ TYPED_TEST(SegmentedReductionTest, ProductExcludeNulls)
                               data_type{type_to_id<TypeParam>()},
                               null_policy::EXCLUDE);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<TypeParam>(3);
+  auto const init_expect =
+    fixed_width_column_wrapper<TypeParam>{{45, 45, 3, 3, 3, 3}, {1, 1, 1, 1, 1, 1}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_product_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::EXCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_product_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::EXCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
 }
 
 TYPED_TEST(SegmentedReductionTest, MaxExcludeNulls)
@@ -93,11 +141,12 @@ TYPED_TEST(SegmentedReductionTest, MaxExcludeNulls)
   // nullmask:  {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}
   // outputs:   {3, 3, 1, XXX, XXX, XXX}
   // output nullmask: {1, 1, 1, 0, 0, 0}
-  auto input     = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
-                                                     {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
-  auto offsets   = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect = fixed_width_column_wrapper<TypeParam>{{3, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}};
+  auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
+                                                           {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
+  auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect =
+    fixed_width_column_wrapper<TypeParam>{{3, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}};
 
   auto res = segmented_reduce(input,
                               d_offsets,
@@ -105,6 +154,29 @@ TYPED_TEST(SegmentedReductionTest, MaxExcludeNulls)
                               data_type{type_to_id<TypeParam>()},
                               null_policy::EXCLUDE);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<TypeParam>(2);
+  auto const init_expect =
+    fixed_width_column_wrapper<TypeParam>{{3, 3, 2, 2, 2, 2}, {1, 1, 1, 1, 1, 1}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_max_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::EXCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_max_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::EXCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
 }
 
 TYPED_TEST(SegmentedReductionTest, MinExcludeNulls)
@@ -115,11 +187,12 @@ TYPED_TEST(SegmentedReductionTest, MinExcludeNulls)
   // nullmask: {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}
   // outputs:  {1, 1, 1, XXX, XXX, XXX}
   // output nullmask: {1, 1, 1, 0, 0, 0}
-  auto input     = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
-                                                     {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
-  auto offsets   = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect = fixed_width_column_wrapper<TypeParam>{{1, 1, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}};
+  auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
+                                                           {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
+  auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect =
+    fixed_width_column_wrapper<TypeParam>{{1, 1, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}};
 
   auto res = segmented_reduce(input,
                               d_offsets,
@@ -127,6 +200,29 @@ TYPED_TEST(SegmentedReductionTest, MinExcludeNulls)
                               data_type{type_to_id<TypeParam>()},
                               null_policy::EXCLUDE);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<TypeParam>(2);
+  auto const init_expect =
+    fixed_width_column_wrapper<TypeParam>{{1, 1, 1, 2, 2, 2}, {1, 1, 1, 1, 1, 1}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_min_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::EXCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_min_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::EXCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
 }
 
 TYPED_TEST(SegmentedReductionTest, AnyExcludeNulls)
@@ -137,12 +233,12 @@ TYPED_TEST(SegmentedReductionTest, AnyExcludeNulls)
   // nullmask:{1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0}
   // outputs: {0, 0, 1, 1, XXX, 0, 1, XXX, XXX}
   // output nullmask: {1, 1, 1, 1, 0, 1, 1, 0, 0}
-  auto input = fixed_width_column_wrapper<TypeParam>{
+  auto const input = fixed_width_column_wrapper<TypeParam>{
     {0, 0, 0, 0, XXX, 0, 0, 1, 0, 1, XXX, 0, 0, 1, XXX, XXX, XXX},
     {1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0}};
-  auto offsets   = std::vector<size_type>{0, 3, 6, 9, 12, 12, 13, 14, 15, 17};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect    = fixed_width_column_wrapper<bool>{
+  auto const offsets   = std::vector<size_type>{0, 3, 6, 9, 12, 12, 13, 14, 15, 17};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect    = fixed_width_column_wrapper<bool>{
     {false, false, true, true, bool{XXX}, false, true, bool{XXX}, bool{XXX}},
     {true, true, true, true, false, true, true, false, false}};
 
@@ -152,6 +248,30 @@ TYPED_TEST(SegmentedReductionTest, AnyExcludeNulls)
                               data_type{type_id::BOOL8},
                               null_policy::EXCLUDE);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<TypeParam>(1);
+  auto const init_expect =
+    fixed_width_column_wrapper<bool>{{true, true, true, true, true, true, true, true, true},
+                                     {true, true, true, true, true, true, true, true, true}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_any_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_id::BOOL8},
+                         null_policy::EXCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_any_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_id::BOOL8},
+                         null_policy::EXCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
 }
 
 TYPED_TEST(SegmentedReductionTest, AllExcludeNulls)
@@ -162,12 +282,12 @@ TYPED_TEST(SegmentedReductionTest, AllExcludeNulls)
   // nullmask: {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}
   // outputs: {true, true, XXX, true, XXX, XXX, false, false, false}
   // output nullmask: {1, 1, 0, 1, 0, 0, 1, 1, 1}
-  auto input = fixed_width_column_wrapper<TypeParam>{
+  auto const input = fixed_width_column_wrapper<TypeParam>{
     {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX, 1, 0, 3, 1, XXX, 0, 0},
     {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}};
-  auto offsets   = std::vector<size_type>{0, 3, 6, 6, 7, 8, 10, 13, 16, 17};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect    = fixed_width_column_wrapper<bool>{
+  auto const offsets   = std::vector<size_type>{0, 3, 6, 6, 7, 8, 10, 13, 16, 17};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect    = fixed_width_column_wrapper<bool>{
     {true, true, bool{XXX}, true, bool{XXX}, bool{XXX}, false, false, false},
     {true, true, false, true, false, false, true, true, true}};
 
@@ -178,6 +298,30 @@ TYPED_TEST(SegmentedReductionTest, AllExcludeNulls)
                               null_policy::EXCLUDE);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<TypeParam>(1);
+  auto const init_expect =
+    fixed_width_column_wrapper<bool>{{true, true, true, true, true, true, false, false, false},
+                                     {true, true, true, true, true, true, true, true, true}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_all_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_id::BOOL8},
+                         null_policy::EXCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_all_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_id::BOOL8},
+                         null_policy::EXCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
 }
 
 TYPED_TEST(SegmentedReductionTest, SumIncludeNulls)
@@ -188,11 +332,11 @@ TYPED_TEST(SegmentedReductionTest, SumIncludeNulls)
   // nullmask: {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}
   // outputs:  {6, XXX, 1, XXX, XXX, XXX}
   // output nullmask: {1, 0, 1, 0, 0, 0}
-  auto input     = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
-                                                     {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
-  auto offsets   = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect =
+  auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
+                                                           {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
+  auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect =
     fixed_width_column_wrapper<TypeParam>{{6, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}};
 
   auto res = segmented_reduce(input,
@@ -201,6 +345,32 @@ TYPED_TEST(SegmentedReductionTest, SumIncludeNulls)
                               data_type{type_to_id<TypeParam>()},
                               null_policy::INCLUDE);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<TypeParam>(3);
+  auto const init_expect =
+    fixed_width_column_wrapper<TypeParam>{{9, XXX, 4, XXX, XXX, 3}, {1, 0, 1, 0, 0, 1}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_sum_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  auto null_init_expect =
+    fixed_width_column_wrapper<TypeParam>{{XXX, XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0, 0}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_sum_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, null_init_expect);
 }
 
 TYPED_TEST(SegmentedReductionTest, ProductIncludeNulls)
@@ -211,11 +381,11 @@ TYPED_TEST(SegmentedReductionTest, ProductIncludeNulls)
   // nullmask:  {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}
   // outputs:   {15, XXX, 1, XXX, XXX, XXX}
   // output nullmask: {1, 0, 1, 0, 0, 0}
-  auto input     = fixed_width_column_wrapper<TypeParam>{{1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX},
-                                                     {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}};
-  auto offsets   = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect =
+  auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX},
+                                                           {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}};
+  auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect =
     fixed_width_column_wrapper<TypeParam>{{15, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}};
 
   auto res = segmented_reduce(input,
@@ -224,6 +394,32 @@ TYPED_TEST(SegmentedReductionTest, ProductIncludeNulls)
                               data_type{type_to_id<TypeParam>()},
                               null_policy::INCLUDE);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<TypeParam>(3);
+  auto const init_expect =
+    fixed_width_column_wrapper<TypeParam>{{45, XXX, 3, XXX, XXX, 3}, {1, 0, 1, 0, 0, 1}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_product_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  auto null_init_expect =
+    fixed_width_column_wrapper<TypeParam>{{XXX, XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0, 0}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_product_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, null_init_expect);
 }
 
 TYPED_TEST(SegmentedReductionTest, MaxIncludeNulls)
@@ -234,11 +430,11 @@ TYPED_TEST(SegmentedReductionTest, MaxIncludeNulls)
   // nullmask:  {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}
   // outputs:   {3, XXX, 1, XXX, XXX, XXX}
   // output nullmask: {1, 0, 1, 0, 0, 0}
-  auto input     = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
-                                                     {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
-  auto offsets   = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect =
+  auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
+                                                           {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
+  auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect =
     fixed_width_column_wrapper<TypeParam>{{3, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}};
 
   auto res = segmented_reduce(input,
@@ -247,6 +443,32 @@ TYPED_TEST(SegmentedReductionTest, MaxIncludeNulls)
                               data_type{type_to_id<TypeParam>()},
                               null_policy::INCLUDE);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<TypeParam>(2);
+  auto const init_expect =
+    fixed_width_column_wrapper<TypeParam>{{3, XXX, 2, XXX, XXX, 2}, {1, 0, 1, 0, 0, 1}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_max_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  auto null_init_expect =
+    fixed_width_column_wrapper<TypeParam>{{XXX, XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0, 0}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_max_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, null_init_expect);
 }
 
 TYPED_TEST(SegmentedReductionTest, MinIncludeNulls)
@@ -257,11 +479,11 @@ TYPED_TEST(SegmentedReductionTest, MinIncludeNulls)
   // nullmask: {1, 1, 1, 1, 0, 1, 1, 0, 0}
   // outputs:  {1, XXX, 1, XXX, XXX, XXX}
   // output nullmask: {1, 0, 1, 0, 0, 0}
-  auto input     = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
-                                                     {1, 1, 1, 1, 0, 1, 1, 0, 0}};
-  auto offsets   = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect =
+  auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
+                                                           {1, 1, 1, 1, 0, 1, 1, 0, 0}};
+  auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect =
     fixed_width_column_wrapper<TypeParam>{{1, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}};
 
   auto res = segmented_reduce(input,
@@ -270,6 +492,32 @@ TYPED_TEST(SegmentedReductionTest, MinIncludeNulls)
                               data_type{type_to_id<TypeParam>()},
                               null_policy::INCLUDE);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<TypeParam>(2);
+  auto const init_expect =
+    fixed_width_column_wrapper<TypeParam>{{1, XXX, 1, XXX, XXX, 2}, {1, 0, 1, 0, 0, 1}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_min_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  auto null_init_expect =
+    fixed_width_column_wrapper<TypeParam>{{XXX, XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0, 0}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_min_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<TypeParam>()},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, null_init_expect);
 }
 
 TYPED_TEST(SegmentedReductionTest, AnyIncludeNulls)
@@ -280,12 +528,12 @@ TYPED_TEST(SegmentedReductionTest, AnyIncludeNulls)
   // nullmask:{1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0}
   // outputs: {0, XXX, 1, XXX, XXX, 0, 1, XXX, XXX}
   // output nullmask: {1, 0, 1, 0, 0, 1, 1, 0, 0}
-  auto input = fixed_width_column_wrapper<TypeParam>{
+  auto const input = fixed_width_column_wrapper<TypeParam>{
     {0, 0, 0, 0, XXX, 0, 0, 1, 0, 1, XXX, 0, 0, 1, XXX, XXX, XXX},
     {1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0}};
-  auto offsets   = std::vector<size_type>{0, 3, 6, 9, 12, 12, 13, 14, 15, 17};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect    = fixed_width_column_wrapper<bool>{
+  auto const offsets   = std::vector<size_type>{0, 3, 6, 9, 12, 12, 13, 14, 15, 17};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect    = fixed_width_column_wrapper<bool>{
     {false, bool{XXX}, true, bool{XXX}, bool{XXX}, false, true, bool{XXX}, bool{XXX}},
     {true, false, true, false, false, true, true, false, false}};
 
@@ -295,6 +543,42 @@ TYPED_TEST(SegmentedReductionTest, AnyIncludeNulls)
                               data_type{type_id::BOOL8},
                               null_policy::INCLUDE);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<TypeParam>(1);
+  auto const init_expect = fixed_width_column_wrapper<bool>{
+    {true, bool{XXX}, true, bool{XXX}, true, true, true, bool{XXX}, bool{XXX}},
+    {true, false, true, false, true, true, true, false, false}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_any_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_id::BOOL8},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  auto null_init_expect = fixed_width_column_wrapper<bool>{
+    {bool{XXX},
+     bool{XXX},
+     bool{XXX},
+     bool{XXX},
+     bool{XXX},
+     bool{XXX},
+     bool{XXX},
+     bool{XXX},
+     bool{XXX}},
+    {false, false, false, false, false, false, false, false, false}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_any_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_id::BOOL8},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, null_init_expect);
 }
 
 TYPED_TEST(SegmentedReductionTest, AllIncludeNulls)
@@ -305,12 +589,12 @@ TYPED_TEST(SegmentedReductionTest, AllIncludeNulls)
   // nullmask: {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}
   // outputs: {true, XXX, XXX, true, XXX, XXX, false, XXX, false}
   // output nullmask: {1, 0, 0, 1, 0, 0, 1, 0, 1}
-  auto input = fixed_width_column_wrapper<TypeParam>{
+  auto const input = fixed_width_column_wrapper<TypeParam>{
     {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX, 1, 0, 3, 1, XXX, 0, 0},
     {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}};
-  auto offsets   = std::vector<size_type>{0, 3, 6, 6, 7, 8, 10, 13, 16, 17};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect    = fixed_width_column_wrapper<bool>{
+  auto const offsets   = std::vector<size_type>{0, 3, 6, 6, 7, 8, 10, 13, 16, 17};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect    = fixed_width_column_wrapper<bool>{
     {true, bool{XXX}, bool{XXX}, true, bool{XXX}, bool{XXX}, false, bool{XXX}, false},
     {true, false, false, true, false, false, true, false, true}};
 
@@ -321,6 +605,42 @@ TYPED_TEST(SegmentedReductionTest, AllIncludeNulls)
                               null_policy::INCLUDE);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<TypeParam>(1);
+  auto const init_expect = fixed_width_column_wrapper<bool>{
+    {true, bool{XXX}, true, true, bool{XXX}, bool{XXX}, false, bool{XXX}, false},
+    {true, false, true, true, false, false, true, false, true}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_all_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_id::BOOL8},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  auto null_init_expect = fixed_width_column_wrapper<bool>{
+    {bool{XXX},
+     bool{XXX},
+     bool{XXX},
+     bool{XXX},
+     bool{XXX},
+     bool{XXX},
+     bool{XXX},
+     bool{XXX},
+     bool{XXX}},
+    {false, false, false, false, false, false, false, false, false}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_all_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_id::BOOL8},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, null_init_expect);
 }
 
 TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction)
@@ -333,11 +653,11 @@ TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction)
   // outputs: {1, 5, 4}
   // output nullmask: {1, 1, 1}
 
-  auto input     = fixed_width_column_wrapper<int32_t>{{1, 2, 3, 4, 5, 6, 7},
-                                                   {true, true, true, true, true, true, true}};
-  auto offsets   = std::vector<size_type>{0, 1, 3, 4};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect    = fixed_width_column_wrapper<int32_t>{{1, 5, 4}, {true, true, true}};
+  auto const input = fixed_width_column_wrapper<int32_t>{
+    {1, 2, 3, 4, 5, 6, 7}, {true, true, true, true, true, true, true}};
+  auto const offsets   = std::vector<size_type>{0, 1, 3, 4};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect    = fixed_width_column_wrapper<int32_t>{{1, 5, 4}, {true, true, true}};
 
   auto res = segmented_reduce(input,
                               d_offsets,
@@ -346,6 +666,31 @@ TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction)
                               null_policy::INCLUDE);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<int32_t>(3);
+  auto const init_expect = fixed_width_column_wrapper<int32_t>{{4, 8, 7}, {true, true, true}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_sum_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_id::INT32},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  auto null_init_expect =
+    fixed_width_column_wrapper<int32_t>{{XXX, XXX, XXX}, {false, false, false}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_sum_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_id::INT32},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, null_init_expect);
 }
 
 TEST_F(SegmentedReductionTestUntyped, NonNullableInput)
@@ -358,10 +703,11 @@ TEST_F(SegmentedReductionTestUntyped, NonNullableInput)
   // outputs: {1, 5, 4}
   // output nullmask: {1, 1, 1}
 
-  auto input     = fixed_width_column_wrapper<int32_t>{1, 2, 3, 4, 5, 6, 7};
-  auto offsets   = std::vector<size_type>{0, 1, 1, 3, 7};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect    = fixed_width_column_wrapper<int32_t>{{1, XXX, 5, 22}, {true, false, true, true}};
+  auto const input     = fixed_width_column_wrapper<int32_t>{1, 2, 3, 4, 5, 6, 7};
+  auto const offsets   = std::vector<size_type>{0, 1, 1, 3, 7};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect =
+    fixed_width_column_wrapper<int32_t>{{1, XXX, 5, 22}, {true, false, true, true}};
 
   auto res = segmented_reduce(input,
                               d_offsets,
@@ -370,14 +716,40 @@ TEST_F(SegmentedReductionTestUntyped, NonNullableInput)
                               null_policy::INCLUDE);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<int32_t>(3);
+  auto const init_expect =
+    fixed_width_column_wrapper<int32_t>{{4, 3, 8, 25}, {true, true, true, true}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_sum_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_id::INT32},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, init_expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  auto null_init_expect =
+    fixed_width_column_wrapper<int32_t>{{XXX, XXX, XXX, XXX}, {false, false, false, false}};
+
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_sum_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_id::INT32},
+                         null_policy::INCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, null_init_expect);
 }
 
 TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn)
 {
-  auto input     = fixed_width_column_wrapper<int32_t>{};
-  auto offsets   = std::vector<size_type>{0};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect    = fixed_width_column_wrapper<int32_t>{};
+  auto const input     = fixed_width_column_wrapper<int32_t>{};
+  auto const offsets   = std::vector<size_type>{0};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect    = fixed_width_column_wrapper<int32_t>{};
 
   auto res = segmented_reduce(input,
                               d_offsets,
@@ -385,14 +757,35 @@ TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn)
                               data_type{type_to_id<int32_t>()},
                               null_policy::EXCLUDE);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with initial value
+  auto const init_scalar = cudf::make_fixed_width_scalar<int32_t>(3);
+  res                    = segmented_reduce(input,
+                         d_offsets,
+                         *make_sum_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<int32_t>()},
+                         null_policy::EXCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+
+  // Test with null initial value
+  init_scalar->set_valid_async(false);
+  res = segmented_reduce(input,
+                         d_offsets,
+                         *make_sum_aggregation<segmented_reduce_aggregation>(),
+                         data_type{type_to_id<int32_t>()},
+                         null_policy::EXCLUDE,
+                         *init_scalar);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
 }
 
 TEST_F(SegmentedReductionTestUntyped, EmptyInputWithOffsets)
 {
-  auto input     = fixed_width_column_wrapper<int32_t>{};
-  auto offsets   = std::vector<size_type>{0, 0, 0, 0, 0, 0};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect    = fixed_width_column_wrapper<int32_t>{{XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0}};
+  auto const input     = fixed_width_column_wrapper<int32_t>{};
+  auto const offsets   = std::vector<size_type>{0, 0, 0, 0, 0, 0};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect =
+    fixed_width_column_wrapper<int32_t>{{XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0}};
 
   auto aggregates =
     std::vector<std::unique_ptr<cudf::segmented_reduce_aggregation,
@@ -408,7 +801,8 @@ TEST_F(SegmentedReductionTestUntyped, EmptyInputWithOffsets)
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expect);
   }
 
-  auto expect_bool = fixed_width_column_wrapper<bool>{{XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0}};
+  auto const expect_bool =
+    fixed_width_column_wrapper<bool>{{XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0}};
 
   auto result = segmented_reduce(input,
                                  d_offsets,
@@ -443,13 +837,13 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxIncludeNulls)
   using RepType = device_storage_type_t<TypeParam>;
 
   for (auto scale : {-2, 0, 5}) {
-    auto input     = fixed_point_column_wrapper<RepType>({1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
-                                                     {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
-                                                     numeric::scale_type{scale});
-    auto offsets   = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-    auto d_offsets = thrust::device_vector<size_type>(offsets);
-    auto out_type  = column_view(input).type();
-    auto expect    = fixed_point_column_wrapper<RepType>(
+    auto const input   = fixed_point_column_wrapper<RepType>({1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
+                                                           {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
+                                                           numeric::scale_type{scale});
+    auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
+    auto const d_offsets = thrust::device_vector<size_type>(offsets);
+    auto out_type        = column_view(input).type();
+    auto const expect    = fixed_point_column_wrapper<RepType>(
       {3, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}, numeric::scale_type{scale});
 
     auto res = segmented_reduce(input,
@@ -475,13 +869,13 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxExcludeNulls)
   using RepType = device_storage_type_t<TypeParam>;
 
   for (auto scale : {-2, 0, 5}) {
-    auto input     = fixed_point_column_wrapper<RepType>({1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
-                                                     {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
-                                                     numeric::scale_type{scale});
-    auto offsets   = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-    auto d_offsets = thrust::device_vector<size_type>(offsets);
-    auto out_type  = column_view(input).type();
-    auto expect    = fixed_point_column_wrapper<RepType>(
+    auto const input   = fixed_point_column_wrapper<RepType>({1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
+                                                           {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
+                                                           numeric::scale_type{scale});
+    auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
+    auto const d_offsets = thrust::device_vector<size_type>(offsets);
+    auto out_type        = column_view(input).type();
+    auto const expect    = fixed_point_column_wrapper<RepType>(
       {3, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}, numeric::scale_type{scale});
 
     auto res = segmented_reduce(input,
@@ -507,13 +901,13 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinIncludeNulls)
   using RepType = device_storage_type_t<TypeParam>;
 
   for (auto scale : {-2, 0, 5}) {
-    auto input     = fixed_point_column_wrapper<RepType>({1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
-                                                     {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
-                                                     numeric::scale_type{scale});
-    auto offsets   = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-    auto d_offsets = thrust::device_vector<size_type>(offsets);
-    auto out_type  = column_view(input).type();
-    auto expect    = fixed_point_column_wrapper<RepType>(
+    auto const input   = fixed_point_column_wrapper<RepType>({1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
+                                                           {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
+                                                           numeric::scale_type{scale});
+    auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
+    auto const d_offsets = thrust::device_vector<size_type>(offsets);
+    auto out_type        = column_view(input).type();
+    auto const expect    = fixed_point_column_wrapper<RepType>(
       {1, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}, numeric::scale_type{scale});
 
     auto res = segmented_reduce(input,
@@ -539,13 +933,13 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinExcludeNulls)
   using RepType = device_storage_type_t<TypeParam>;
 
   for (auto scale : {-2, 0, 5}) {
-    auto input     = fixed_point_column_wrapper<RepType>({1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
-                                                     {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
-                                                     numeric::scale_type{scale});
-    auto offsets   = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-    auto d_offsets = thrust::device_vector<size_type>(offsets);
-    auto out_type  = column_view(input).type();
-    auto expect    = fixed_point_column_wrapper<RepType>(
+    auto const input   = fixed_point_column_wrapper<RepType>({1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
+                                                           {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
+                                                           numeric::scale_type{scale});
+    auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
+    auto const d_offsets = thrust::device_vector<size_type>(offsets);
+    auto out_type        = column_view(input).type();
+    auto const expect    = fixed_point_column_wrapper<RepType>(
       {1, 1, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}, numeric::scale_type{scale});
 
     auto res = segmented_reduce(input,
@@ -570,11 +964,12 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxNonNullableInput)
   using RepType = device_storage_type_t<TypeParam>;
 
   for (auto scale : {-2, 0, 5}) {
-    auto input     = fixed_point_column_wrapper<RepType>({1, 2, 3, 1}, numeric::scale_type{scale});
-    auto offsets   = std::vector<size_type>{0, 3, 4, 4};
-    auto d_offsets = thrust::device_vector<size_type>(offsets);
-    auto out_type  = column_view(input).type();
-    auto expect =
+    auto const input =
+      fixed_point_column_wrapper<RepType>({1, 2, 3, 1}, numeric::scale_type{scale});
+    auto const offsets   = std::vector<size_type>{0, 3, 4, 4};
+    auto const d_offsets = thrust::device_vector<size_type>(offsets);
+    auto out_type        = column_view(input).type();
+    auto const expect =
       fixed_point_column_wrapper<RepType>({3, 1, XXX}, {1, 1, 0}, numeric::scale_type{scale});
 
     auto include_null_res = segmented_reduce(input,
@@ -606,11 +1001,12 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinNonNullableInput)
   using RepType = device_storage_type_t<TypeParam>;
 
   for (auto scale : {-2, 0, 5}) {
-    auto input     = fixed_point_column_wrapper<RepType>({1, 2, 3, 1}, numeric::scale_type{scale});
-    auto offsets   = std::vector<size_type>{0, 3, 4, 4};
-    auto d_offsets = thrust::device_vector<size_type>(offsets);
-    auto out_type  = column_view(input).type();
-    auto expect =
+    auto const input =
+      fixed_point_column_wrapper<RepType>({1, 2, 3, 1}, numeric::scale_type{scale});
+    auto const offsets   = std::vector<size_type>{0, 3, 4, 4};
+    auto const d_offsets = thrust::device_vector<size_type>(offsets);
+    auto out_type        = column_view(input).type();
+    auto const expect =
       fixed_point_column_wrapper<RepType>({1, 1, XXX}, {1, 1, 0}, numeric::scale_type{scale});
 
     auto include_null_res = segmented_reduce(input,
@@ -753,10 +1149,10 @@ TEST_F(SegmentedReductionStringTest, MinExcludeNulls)
 
 TEST_F(SegmentedReductionStringTest, EmptyInputWithOffsets)
 {
-  auto input     = strings_column_wrapper{};
-  auto offsets   = std::vector<size_type>{0, 0, 0, 0};
-  auto d_offsets = thrust::device_vector<size_type>(offsets);
-  auto expect    = strings_column_wrapper({XXX, XXX, XXX}, {0, 0, 0});
+  auto const input     = strings_column_wrapper{};
+  auto const offsets   = std::vector<size_type>{0, 0, 0, 0};
+  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const expect    = strings_column_wrapper({XXX, XXX, XXX}, {0, 0, 0});
 
   auto result = segmented_reduce(input,
                                  d_offsets,