diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
index adae9b76c5b..b8edf5ab4cb 100644
--- a/cpp/include/cudf/detail/gather.cuh
+++ b/cpp/include/cudf/detail/gather.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -176,11 +176,9 @@ struct column_gatherer_impl {
     auto destination_column =
       cudf::detail::allocate_like(source_column, num_rows, policy, stream, mr);
 
-    using Type = device_storage_type_t<Element>;
-
-    gather_helper(source_column.data<Type>(),
+    gather_helper(source_column.data<Element>(),
                   source_column.size(),
-                  destination_column->mutable_view().template begin<Type>(),
+                  destination_column->mutable_view().template begin<Element>(),
                   gather_map_begin,
                   gather_map_end,
                   nullify_out_of_bounds,
@@ -633,14 +631,14 @@ std::unique_ptr<table> gather(
   for (auto const& source_column : source_table) {
     // The data gather for n columns will be put on the first n streams
     destination_columns.push_back(
-      cudf::type_dispatcher(source_column.type(),
-                            column_gatherer{},
-                            source_column,
-                            gather_map_begin,
-                            gather_map_end,
-                            bounds_policy == out_of_bounds_policy::NULLIFY,
-                            stream,
-                            mr));
+      cudf::type_dispatcher<dispatch_storage_type>(source_column.type(),
+                                                   column_gatherer{},
+                                                   source_column,
+                                                   gather_map_begin,
+                                                   gather_map_end,
+                                                   bounds_policy == out_of_bounds_policy::NULLIFY,
+                                                   stream,
+                                                   mr));
   }
 
   gather_bitmask_op const op = bounds_policy == out_of_bounds_policy::NULLIFY
diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh
index 7d5c3f4d2ee..2cb1cbffc68 100644
--- a/cpp/include/cudf/detail/scatter.cuh
+++ b/cpp/include/cudf/detail/scatter.cuh
@@ -91,15 +91,13 @@ struct column_scatterer_impl {
     auto result      = std::make_unique<column>(target, stream, mr);
     auto result_view = result->mutable_view();
 
-    using Type = device_storage_type_t<Element>;
-
     // NOTE use source.begin + scatter rows rather than source.end in case the
     // scatter map is smaller than the number of source rows
     thrust::scatter(rmm::exec_policy(stream),
-                    source.begin<Type>(),
-                    source.begin<Type>() + cudf::distance(scatter_map_begin, scatter_map_end),
+                    source.begin<Element>(),
+                    source.begin<Element>() + cudf::distance(scatter_map_begin, scatter_map_end),
                     scatter_map_begin,
-                    result_view.begin<Type>());
+                    result_view.begin<Element>());
 
     return result;
   }
@@ -285,14 +283,14 @@ std::unique_ptr<table> scatter(
                  target.begin(),
                  result.begin(),
                  [=](auto const& source_col, auto const& target_col) {
-                   return type_dispatcher(source_col.type(),
-                                          scatter_functor,
-                                          source_col,
-                                          updated_scatter_map_begin,
-                                          updated_scatter_map_end,
-                                          target_col,
-                                          stream,
-                                          mr);
+                   return type_dispatcher<dispatch_storage_type>(source_col.type(),
+                                                                 scatter_functor,
+                                                                 source_col,
+                                                                 updated_scatter_map_begin,
+                                                                 updated_scatter_map_end,
+                                                                 target_col,
+                                                                 stream,
+                                                                 mr);
                  });
 
   auto gather_map = scatter_to_gather(
diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp
index 271d722396e..26c51d0435a 100644
--- a/cpp/include/cudf/utilities/type_dispatcher.hpp
+++ b/cpp/include/cudf/utilities/type_dispatcher.hpp
@@ -190,6 +190,19 @@ CUDF_TYPE_MAPPING(numeric::decimal32, type_id::DECIMAL32);
 CUDF_TYPE_MAPPING(numeric::decimal64, type_id::DECIMAL64);
 CUDF_TYPE_MAPPING(cudf::struct_view, type_id::STRUCT);
 
+/**
+ * @brief Use this specialization on `type_dispatcher` whenever you only need to operate on the
+ * underlying stored type.
+ *
+ * For example, `cudf::sort` in sort.cu uses `cudf::type_dispatcher<dispatch_storage_type>(...)`.
+ * `cudf::gather` in gather.cuh also uses `cudf::type_dispatcher<dispatch_storage_type>(...)`.
+ * However, reductions needs both `data_type` and underlying type, so cannot use this.
+ */
+template <cudf::type_id Id>
+struct dispatch_storage_type {
+  using type = device_storage_type_t<typename id_to_type_impl<Id>::type>;
+};
+
 template <typename T>
 struct type_to_scalar_type_impl {
   using ScalarType = cudf::scalar;
diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu
index 57932fb63e1..8cf9db465f3 100644
--- a/cpp/src/copying/concatenate.cu
+++ b/cpp/src/copying/concatenate.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -300,13 +300,11 @@ struct concatenate_dispatch {
     bool const has_nulls =
       std::any_of(views.cbegin(), views.cend(), [](auto const& col) { return col.has_nulls(); });
 
-    using Type = device_storage_type_t<T>;
-
     // Use a heuristic to guess when the fused kernel will be faster
     if (use_fused_kernel_heuristic(has_nulls, views.size())) {
-      return fused_concatenate<Type>(views, has_nulls, stream, mr);
+      return fused_concatenate<T>(views, has_nulls, stream, mr);
     } else {
-      return for_each_concatenate<Type>(views, has_nulls, stream, mr);
+      return for_each_concatenate<T>(views, has_nulls, stream, mr);
     }
   }
 };
@@ -409,8 +407,8 @@ std::unique_ptr<column> concatenate(std::vector<column_view> const& columns_to_c
     return empty_like(columns_to_concat.front());
   }
 
-  return type_dispatcher(columns_to_concat.front().type(),
-                         concatenate_dispatch{columns_to_concat, stream, mr});
+  return type_dispatcher<dispatch_storage_type>(
+    columns_to_concat.front().type(), concatenate_dispatch{columns_to_concat, stream, mr});
 }
 
 std::unique_ptr<table> concatenate(std::vector<table_view> const& tables_to_concat,
diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu
index a87eedf8412..422fc0821a0 100644
--- a/cpp/src/copying/copy.cu
+++ b/cpp/src/copying/copy.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,28 +41,26 @@ struct copy_if_else_functor_impl {
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
-    using Type = device_storage_type_t<T>;
-
     if (left_nullable) {
       if (right_nullable) {
-        auto lhs_iter = cudf::detail::make_pair_iterator<Type, true>(lhs);
-        auto rhs_iter = cudf::detail::make_pair_iterator<Type, true>(rhs);
+        auto lhs_iter = cudf::detail::make_pair_iterator<T, true>(lhs);
+        auto rhs_iter = cudf::detail::make_pair_iterator<T, true>(rhs);
         return detail::copy_if_else(
           true, lhs_iter, lhs_iter + size, rhs_iter, filter, lhs.type(), stream, mr);
       }
-      auto lhs_iter = cudf::detail::make_pair_iterator<Type, true>(lhs);
-      auto rhs_iter = cudf::detail::make_pair_iterator<Type, false>(rhs);
+      auto lhs_iter = cudf::detail::make_pair_iterator<T, true>(lhs);
+      auto rhs_iter = cudf::detail::make_pair_iterator<T, false>(rhs);
       return detail::copy_if_else(
         true, lhs_iter, lhs_iter + size, rhs_iter, filter, lhs.type(), stream, mr);
     }
     if (right_nullable) {
-      auto lhs_iter = cudf::detail::make_pair_iterator<Type, false>(lhs);
-      auto rhs_iter = cudf::detail::make_pair_iterator<Type, true>(rhs);
+      auto lhs_iter = cudf::detail::make_pair_iterator<T, false>(lhs);
+      auto rhs_iter = cudf::detail::make_pair_iterator<T, true>(rhs);
       return detail::copy_if_else(
         true, lhs_iter, lhs_iter + size, rhs_iter, filter, lhs.type(), stream, mr);
     }
-    auto lhs_iter = cudf::detail::make_pair_iterator<Type, false>(lhs);
-    auto rhs_iter = cudf::detail::make_pair_iterator<Type, false>(rhs);
+    auto lhs_iter = cudf::detail::make_pair_iterator<T, false>(lhs);
+    auto rhs_iter = cudf::detail::make_pair_iterator<T, false>(rhs);
     return detail::copy_if_else(
       false, lhs_iter, lhs_iter + size, rhs_iter, filter, lhs.type(), stream, mr);
   }
@@ -182,30 +180,30 @@ std::unique_ptr<column> copy_if_else(Left const& lhs,
     auto filter = [bool_mask_device] __device__(cudf::size_type i) {
       return bool_mask_device.is_valid_nocheck(i) and bool_mask_device.element<bool>(i);
     };
-    return cudf::type_dispatcher(lhs.type(),
-                                 copy_if_else_functor{},
-                                 lhs,
-                                 rhs,
-                                 boolean_mask.size(),
-                                 left_nullable,
-                                 right_nullable,
-                                 filter,
-                                 stream,
-                                 mr);
+    return cudf::type_dispatcher<dispatch_storage_type>(lhs.type(),
+                                                        copy_if_else_functor{},
+                                                        lhs,
+                                                        rhs,
+                                                        boolean_mask.size(),
+                                                        left_nullable,
+                                                        right_nullable,
+                                                        filter,
+                                                        stream,
+                                                        mr);
   } else {
     auto filter = [bool_mask_device] __device__(cudf::size_type i) {
       return bool_mask_device.element<bool>(i);
     };
-    return cudf::type_dispatcher(lhs.type(),
-                                 copy_if_else_functor{},
-                                 lhs,
-                                 rhs,
-                                 boolean_mask.size(),
-                                 left_nullable,
-                                 right_nullable,
-                                 filter,
-                                 stream,
-                                 mr);
+    return cudf::type_dispatcher<dispatch_storage_type>(lhs.type(),
+                                                        copy_if_else_functor{},
+                                                        lhs,
+                                                        rhs,
+                                                        boolean_mask.size(),
+                                                        left_nullable,
+                                                        right_nullable,
+                                                        filter,
+                                                        stream,
+                                                        mr);
   }
 }
 
diff --git a/cpp/src/copying/copy_range.cu b/cpp/src/copying/copy_range.cu
index 974d03b0fc0..31a8796f950 100644
--- a/cpp/src/copying/copy_range.cu
+++ b/cpp/src/copying/copy_range.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -108,9 +108,8 @@ struct out_of_place_copy_range_dispatch {
     }
 
     if (source_end != source_begin) {  // otherwise no-op
-      using Type    = cudf::device_storage_type_t<T>;
       auto ret_view = p_ret->mutable_view();
-      in_place_copy_range<Type>(source, ret_view, source_begin, source_end, target_begin, stream);
+      in_place_copy_range<T>(source, ret_view, source_begin, source_end, target_begin, stream);
     }
 
     return p_ret;
@@ -261,13 +260,14 @@ std::unique_ptr<column> copy_range(column_view const& source,
                "Range is out of bounds.");
   CUDF_EXPECTS(target.type() == source.type(), "Data type mismatch.");
 
-  return cudf::type_dispatcher(target.type(),
-                               out_of_place_copy_range_dispatch{source, target},
-                               source_begin,
-                               source_end,
-                               target_begin,
-                               stream,
-                               mr);
+  return cudf::type_dispatcher<dispatch_storage_type>(
+    target.type(),
+    out_of_place_copy_range_dispatch{source, target},
+    source_begin,
+    source_end,
+    target_begin,
+    stream,
+    mr);
 }
 
 }  // namespace detail
diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu
index 552b6c22a29..cedac96cee6 100644
--- a/cpp/src/copying/scatter.cu
+++ b/cpp/src/copying/scatter.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -106,10 +106,8 @@ struct column_scalar_scatterer_impl {
     auto result      = std::make_unique<column>(target, stream, mr);
     auto result_view = result->mutable_view();
 
-    using Type = device_storage_type_t<Element>;
-
     // Use permutation iterator with constant index to dereference scalar data
-    auto scalar_impl = static_cast<const scalar_type_t<Type>*>(&source.get());
+    auto scalar_impl = static_cast<const scalar_type_t<Element>*>(&source.get());
     auto scalar_iter =
       thrust::make_permutation_iterator(scalar_impl->data(), thrust::make_constant_iterator(0));
 
@@ -117,7 +115,7 @@ struct column_scalar_scatterer_impl {
                     scalar_iter,
                     scalar_iter + scatter_rows,
                     scatter_iter,
-                    result_view.begin<Type>());
+                    result_view.begin<Element>());
 
     return result;
   }
@@ -300,14 +298,14 @@ std::unique_ptr<table> scatter(std::vector<std::reference_wrapper<const scalar>>
                  target.begin(),
                  result.begin(),
                  [=](auto const& source_scalar, auto const& target_col) {
-                   return type_dispatcher(target_col.type(),
-                                          scatter_functor,
-                                          source_scalar,
-                                          scatter_iter,
-                                          scatter_rows,
-                                          target_col,
-                                          stream,
-                                          mr);
+                   return type_dispatcher<dispatch_storage_type>(target_col.type(),
+                                                                 scatter_functor,
+                                                                 source_scalar,
+                                                                 scatter_iter,
+                                                                 scatter_rows,
+                                                                 target_col,
+                                                                 stream,
+                                                                 mr);
                  });
 
   scatter_scalar_bitmask(source, scatter_iter, scatter_rows, result, stream, mr);
diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu
index 2b7a426bb73..cf85bf51e80 100644
--- a/cpp/src/copying/shift.cu
+++ b/cpp/src/copying/shift.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -60,8 +60,7 @@ struct shift_functor {
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr)
   {
-    using Type       = device_storage_type_t<T>;
-    using ScalarType = cudf::scalar_type_t<Type>;
+    using ScalarType = cudf::scalar_type_t<T>;
     auto& scalar     = static_cast<ScalarType const&>(fill_value);
 
     auto device_input = column_device_view::create(input);
@@ -88,7 +87,7 @@ struct shift_functor {
       output->set_null_count(std::get<1>(mask_pair));
     }
 
-    auto data = device_output->data<Type>();
+    auto data = device_output->data<T>();
 
     // avoid assigning elements we know to be invalid.
     if (not scalar.is_valid()) {
@@ -103,7 +102,7 @@ struct shift_functor {
     auto func_value =
       [size, offset, fill = scalar.data(), input = *device_input] __device__(size_type idx) {
         auto src_idx = idx - offset;
-        return out_of_bounds(size, src_idx) ? *fill : input.element<Type>(src_idx);
+        return out_of_bounds(size, src_idx) ? *fill : input.element<T>(src_idx);
       };
 
     thrust::transform(rmm::exec_policy(stream), index_begin, index_end, data, func_value);
@@ -128,7 +127,8 @@ std::unique_ptr<column> shift(column_view const& input,
 
   if (input.is_empty()) { return empty_like(input); }
 
-  return type_dispatcher(input.type(), shift_functor{}, input, offset, fill_value, stream, mr);
+  return type_dispatcher<dispatch_storage_type>(
+    input.type(), shift_functor{}, input, offset, fill_value, stream, mr);
 }
 
 }  // namespace detail
diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu
index 56c2d32a71f..0aaf10707f4 100644
--- a/cpp/src/dictionary/search.cu
+++ b/cpp/src/dictionary/search.cu
@@ -74,18 +74,17 @@ struct find_index_fn {
     CUDF_EXPECTS(input.keys().type() == key.type(),
                  "search key type must match dictionary keys type");
 
-    using Type       = device_storage_type_t<Element>;
     using ScalarType = cudf::scalar_type_t<Element>;
     auto find_key    = static_cast<ScalarType const&>(key).value(stream);
     auto keys_view   = column_device_view::create(input.keys(), stream);
     auto iter = thrust::equal_range(thrust::device,  // segfaults: rmm::exec_policy(stream) and
                                                      // thrust::cuda::par.on(stream)
-                                    keys_view->begin<Type>(),
-                                    keys_view->end<Type>(),
+                                    keys_view->begin<Element>(),
+                                    keys_view->end<Element>(),
                                     find_key);
     return type_dispatcher(input.indices().type(),
                            dispatch_scalar_index{},
-                           thrust::distance(keys_view->begin<Type>(), iter.first),
+                           thrust::distance(keys_view->begin<Element>(), iter.first),
                            (thrust::distance(iter.first, iter.second) > 0),
                            stream,
                            mr);
@@ -135,15 +134,14 @@ struct find_insert_index_fn {
     CUDF_EXPECTS(input.keys().type() == key.type(),
                  "search key type must match dictionary keys type");
 
-    using Type       = device_storage_type_t<Element>;
     using ScalarType = cudf::scalar_type_t<Element>;
     auto find_key    = static_cast<ScalarType const&>(key).value(stream);
     auto keys_view   = column_device_view::create(input.keys(), stream);
     auto iter        = thrust::lower_bound(
-      rmm::exec_policy(stream), keys_view->begin<Type>(), keys_view->end<Type>(), find_key);
+      rmm::exec_policy(stream), keys_view->begin<Element>(), keys_view->end<Element>(), find_key);
     return type_dispatcher(input.indices().type(),
                            dispatch_scalar_index{},
-                           thrust::distance(keys_view->begin<Type>(), iter),
+                           thrust::distance(keys_view->begin<Element>(), iter),
                            true,
                            stream,
                            mr);
@@ -171,7 +169,8 @@ std::unique_ptr<scalar> get_index(dictionary_column_view const& dictionary,
 {
   if (dictionary.is_empty())
     return std::make_unique<numeric_scalar<uint32_t>>(0, false, stream, mr);
-  return type_dispatcher(dictionary.keys().type(), find_index_fn(), dictionary, key, stream, mr);
+  return type_dispatcher<dispatch_storage_type>(
+    dictionary.keys().type(), find_index_fn(), dictionary, key, stream, mr);
 }
 
 std::unique_ptr<scalar> get_insert_index(dictionary_column_view const& dictionary,
@@ -181,7 +180,7 @@ std::unique_ptr<scalar> get_insert_index(dictionary_column_view const& dictionar
 {
   if (dictionary.is_empty())
     return std::make_unique<numeric_scalar<uint32_t>>(0, false, stream, mr);
-  return type_dispatcher(
+  return type_dispatcher<dispatch_storage_type>(
     dictionary.keys().type(), find_insert_index_fn(), dictionary, key, stream, mr);
 }
 
diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu
index 06f1a5dcab7..be13405b469 100644
--- a/cpp/src/merge/merge.cu
+++ b/cpp/src/merge/merge.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -272,12 +272,10 @@ struct column_merger {
     //
     merged_col->set_null_count(lcol.null_count() + rcol.null_count());
 
-    using Type = device_storage_type_t<Element>;
-
     // to resolve view.data()'s types use: Element
     //
-    auto const d_lcol = lcol.data<Type>();
-    auto const d_rcol = rcol.data<Type>();
+    auto const d_lcol = lcol.data<Element>();
+    auto const d_rcol = rcol.data<Element>();
 
     // capture lcol, rcol
     // and "gather" into merged_view.data()[indx_merged]
@@ -286,7 +284,7 @@ struct column_merger {
     thrust::transform(rmm::exec_policy(stream),
                       row_order_.begin(),
                       row_order_.end(),
-                      merged_view.begin<Type>(),
+                      merged_view.begin<Element>(),
                       [d_lcol, d_rcol] __device__(index_type const& index_pair) {
                         // When C++17, use structure bindings
                         auto side  = thrust::get<0>(index_pair);
@@ -383,7 +381,7 @@ table_ptr_type merge(cudf::table_view const& left_table,
             right_table.begin(),
             std::back_inserter(merged_cols),
             [&](auto const& left_col, auto const& right_col) {
-              return cudf::type_dispatcher(
+              return cudf::type_dispatcher<dispatch_storage_type>(
                 left_col.type(), merger, left_col, right_col, stream, mr);
             });
 
diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index 4d409edac65..46f00ecb75d 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -393,12 +393,10 @@ struct copy_block_partitions_dispatcher {
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
-    using Type = device_storage_type_t<DataType>;
+    rmm::device_buffer output(input.size() * sizeof(DataType), stream, mr);
 
-    rmm::device_buffer output(input.size() * sizeof(Type), stream, mr);
-
-    copy_block_partitions_impl(input.data<Type>(),
-                               static_cast<Type*>(output.data()),
+    copy_block_partitions_impl(input.data<DataType>(),
+                               static_cast<DataType*>(output.data()),
                                input.size(),
                                num_partitions,
                                row_partition_numbers,
@@ -569,17 +567,17 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition_table(
 
     // Copy input to output by partition per column
     std::transform(input.begin(), input.end(), output_cols.begin(), [=](auto const& col) {
-      return cudf::type_dispatcher(col.type(),
-                                   copy_block_partitions_dispatcher{},
-                                   col,
-                                   num_partitions,
-                                   row_partition_numbers_ptr,
-                                   row_partition_offset_ptr,
-                                   block_partition_sizes_ptr,
-                                   scanned_block_partition_sizes_ptr,
-                                   grid_size,
-                                   stream,
-                                   mr);
+      return cudf::type_dispatcher<dispatch_storage_type>(col.type(),
+                                                          copy_block_partitions_dispatcher{},
+                                                          col,
+                                                          num_partitions,
+                                                          row_partition_numbers_ptr,
+                                                          row_partition_offset_ptr,
+                                                          block_partition_sizes_ptr,
+                                                          scanned_block_partition_sizes_ptr,
+                                                          grid_size,
+                                                          stream,
+                                                          mr);
     });
 
     if (has_nulls(input)) {
diff --git a/cpp/src/reductions/scan.cu b/cpp/src/reductions/scan.cu
index ca9ab82c27c..011b34031fe 100644
--- a/cpp/src/reductions/scan.cu
+++ b/cpp/src/reductions/scan.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,7 +40,7 @@ namespace detail {
  * @tparam Op device binary operator
  */
 template <typename Op>
-struct ScanDispatcher {
+struct scan_dispatcher {
  private:
   template <typename T>
   static constexpr bool is_string_supported()
@@ -213,12 +213,9 @@ struct ScanDispatcher {
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
   {
-    auto output = [&] {
-      using Type = device_storage_type_t<T>;
-      return inclusive == scan_type::INCLUSIVE
-               ? inclusive_scan<Type>(input, null_handling, stream, mr)
-               : exclusive_scan<Type>(input, null_handling, stream, mr);
-    }();
+    auto output = inclusive == scan_type::INCLUSIVE
+                    ? inclusive_scan<T>(input, null_handling, stream, mr)
+                    : exclusive_scan<T>(input, null_handling, stream, mr);
 
     if (null_handling == null_policy::EXCLUDE) {
       CUDF_EXPECTS(input.null_count() == output->null_count(),
@@ -253,40 +250,40 @@ std::unique_ptr<column> scan(
 
   switch (agg->kind) {
     case aggregation::SUM:
-      return cudf::type_dispatcher(input.type(),
-                                   ScanDispatcher<cudf::DeviceSum>(),
-                                   input,
-                                   inclusive,
-                                   null_handling,
-                                   stream,
-                                   mr);
+      return cudf::type_dispatcher<dispatch_storage_type>(input.type(),
+                                                          scan_dispatcher<cudf::DeviceSum>(),
+                                                          input,
+                                                          inclusive,
+                                                          null_handling,
+                                                          stream,
+                                                          mr);
     case aggregation::MIN:
-      return cudf::type_dispatcher(input.type(),
-                                   ScanDispatcher<cudf::DeviceMin>(),
-                                   input,
-                                   inclusive,
-                                   null_handling,
-                                   stream,
-                                   mr);
+      return cudf::type_dispatcher<dispatch_storage_type>(input.type(),
+                                                          scan_dispatcher<cudf::DeviceMin>(),
+                                                          input,
+                                                          inclusive,
+                                                          null_handling,
+                                                          stream,
+                                                          mr);
     case aggregation::MAX:
-      return cudf::type_dispatcher(input.type(),
-                                   ScanDispatcher<cudf::DeviceMax>(),
-                                   input,
-                                   inclusive,
-                                   null_handling,
-                                   stream,
-                                   mr);
+      return cudf::type_dispatcher<dispatch_storage_type>(input.type(),
+                                                          scan_dispatcher<cudf::DeviceMax>(),
+                                                          input,
+                                                          inclusive,
+                                                          null_handling,
+                                                          stream,
+                                                          mr);
     case aggregation::PRODUCT:
       // a product scan on a decimal type with non-zero scale would result in each element having
       // a different scale, and because scale is stored once per column, this is not possible
       if (is_fixed_point(input.type())) CUDF_FAIL("decimal32/64 cannot support product scan");
-      return cudf::type_dispatcher(input.type(),
-                                   ScanDispatcher<cudf::DeviceProduct>(),
-                                   input,
-                                   inclusive,
-                                   null_handling,
-                                   stream,
-                                   mr);
+      return cudf::type_dispatcher<dispatch_storage_type>(input.type(),
+                                                          scan_dispatcher<cudf::DeviceProduct>(),
+                                                          input,
+                                                          inclusive,
+                                                          null_handling,
+                                                          stream,
+                                                          mr);
     default: CUDF_FAIL("Unsupported aggregation operator for scan");
   }
 }
diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu
index f63fe793a04..1f7e0672404 100644
--- a/cpp/src/replace/clamp.cu
+++ b/cpp/src/replace/clamp.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -249,14 +249,12 @@ struct dispatch_clamp {
   {
     CUDF_EXPECTS(lo.type() == input.type(), "mismatching types of scalar and input");
 
-    using Type = device_storage_type_t<T>;
+    auto lo_itr         = make_pair_iterator<T>(lo);
+    auto hi_itr         = make_pair_iterator<T>(hi);
+    auto lo_replace_itr = make_pair_iterator<T>(lo_replace);
+    auto hi_replace_itr = make_pair_iterator<T>(hi_replace);
 
-    auto lo_itr         = make_pair_iterator<Type>(lo);
-    auto hi_itr         = make_pair_iterator<Type>(hi);
-    auto lo_replace_itr = make_pair_iterator<Type>(lo_replace);
-    auto hi_replace_itr = make_pair_iterator<Type>(hi_replace);
-
-    return clamp<Type>(input, lo_itr, lo_replace_itr, hi_itr, hi_replace_itr, stream, mr);
+    return clamp<T>(input, lo_itr, lo_replace_itr, hi_itr, hi_replace_itr, stream, mr);
   }
 };
 
@@ -322,15 +320,15 @@ std::unique_ptr<column> dispatch_clamp::operator()<cudf::dictionary32>(
 
   // call clamp with the scalar indexes and the matched indices
   auto matched_indices = matched_view.get_indices_annotated();
-  auto new_indices     = cudf::type_dispatcher(matched_indices.type(),
-                                           dispatch_clamp{},
-                                           matched_indices,
-                                           *lo_index,
-                                           *lo_replace_index,
-                                           *hi_index,
-                                           *hi_replace_index,
-                                           stream,
-                                           mr);
+  auto new_indices     = cudf::type_dispatcher<dispatch_storage_type>(matched_indices.type(),
+                                                                  dispatch_clamp{},
+                                                                  matched_indices,
+                                                                  *lo_index,
+                                                                  *lo_replace_index,
+                                                                  *hi_index,
+                                                                  *hi_replace_index,
+                                                                  stream,
+                                                                  mr);
 
   auto const indices_type = new_indices->type();
   auto const output_size  = new_indices->size();
@@ -387,7 +385,7 @@ std::unique_ptr<column> clamp(
     CUDF_EXPECTS(hi_replace.is_valid(stream), "hi_replace can't be null if hi is not null");
   }
 
-  return cudf::type_dispatcher(
+  return cudf::type_dispatcher<dispatch_storage_type>(
     input.type(), dispatch_clamp{}, input, lo, lo_replace, hi, hi_replace, stream, mr);
 }
 
diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu
index c78b1c67c04..50b1b7062d9 100644
--- a/cpp/src/replace/nulls.cu
+++ b/cpp/src/replace/nulls.cu
@@ -308,17 +308,16 @@ struct replace_nulls_scalar_kernel_forwarder {
       cudf::allocate_like(input, cudf::mask_allocation_policy::NEVER, mr);
     auto output_view = output->mutable_view();
 
-    using Type       = cudf::device_storage_type_t<col_type>;
     using ScalarType = cudf::scalar_type_t<col_type>;
     auto s1          = static_cast<ScalarType const&>(replacement);
     auto device_in   = cudf::column_device_view::create(input);
 
-    auto func = replace_nulls_functor<Type>{s1.data()};
+    auto func = replace_nulls_functor<col_type>{s1.data()};
     thrust::transform(rmm::exec_policy(stream),
-                      input.data<Type>(),
-                      input.data<Type>() + input.size(),
+                      input.data<col_type>(),
+                      input.data<col_type>() + input.size(),
                       cudf::detail::make_validity_iterator(*device_in),
-                      output_view.data<Type>(),
+                      output_view.data<col_type>(),
                       func);
     return output;
   }
@@ -424,7 +423,6 @@ std::unique_ptr<cudf::column> replace_nulls(cudf::column_view const& input,
   CUDF_EXPECTS(replacement.size() == input.size(), "Column size mismatch");
 
   if (input.is_empty()) { return cudf::empty_like(input); }
-
   if (!input.has_nulls()) { return std::make_unique<cudf::column>(input); }
 
   return cudf::type_dispatcher(
@@ -437,12 +435,11 @@ std::unique_ptr<cudf::column> replace_nulls(cudf::column_view const& input,
                                             rmm::mr::device_memory_resource* mr)
 {
   if (input.is_empty()) { return cudf::empty_like(input); }
-
   if (!input.has_nulls() || !replacement.is_valid()) {
     return std::make_unique<cudf::column>(input, stream, mr);
   }
 
-  return cudf::type_dispatcher(
+  return cudf::type_dispatcher<dispatch_storage_type>(
     input.type(), replace_nulls_scalar_kernel_forwarder{}, input, replacement, stream, mr);
 }
 
@@ -452,7 +449,6 @@ std::unique_ptr<cudf::column> replace_nulls(cudf::column_view const& input,
                                             rmm::mr::device_memory_resource* mr)
 {
   if (input.is_empty()) { return cudf::empty_like(input); }
-
   if (!input.has_nulls()) { return std::make_unique<cudf::column>(input, stream, mr); }
 
   return replace_nulls_policy_impl(input, replace_policy, stream, mr);
diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu
index 6126b4824e5..783e0b4b1de 100644
--- a/cpp/src/replace/replace.cu
+++ b/cpp/src/replace/replace.cu
@@ -17,7 +17,7 @@
  * limitations under the License.
  */
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -241,9 +241,7 @@ __global__ void replace_kernel(cudf::column_device_view input,
                                cudf::column_device_view values_to_replace,
                                cudf::column_device_view replacement)
 {
-  using Type = cudf::device_storage_type_t<T>;
-
-  Type* __restrict__ output_data = output.data<Type>();
+  T* __restrict__ output_data = output.data<T>();
 
   cudf::size_type i = blockIdx.x * blockDim.x + threadIdx.x;
 
@@ -260,12 +258,12 @@ __global__ void replace_kernel(cudf::column_device_view input,
       output_is_valid = input_is_valid;
     }
     if (input_is_valid)
-      thrust::tie(output_data[i], output_is_valid) = get_new_value<Type, replacement_has_nulls>(
+      thrust::tie(output_data[i], output_is_valid) = get_new_value<T, replacement_has_nulls>(
         i,
-        input.data<Type>(),
-        values_to_replace.data<Type>(),
-        values_to_replace.data<Type>() + values_to_replace.size(),
-        replacement.data<Type>(),
+        input.data<T>(),
+        values_to_replace.data<T>(),
+        values_to_replace.data<T>() + values_to_replace.size(),
+        replacement.data<T>(),
         replacement.null_mask());
 
     /* output valid counts calculations*/
@@ -461,7 +459,7 @@ std::unique_ptr<cudf::column> replace_kernel_forwarder::operator()<cudf::diction
     cudf::dictionary::detail::set_keys(replacements, matched_view.keys(), stream);
 
   auto indices_type = matched_view.indices().type();
-  auto new_indices  = cudf::type_dispatcher(
+  auto new_indices  = cudf::type_dispatcher<cudf::dispatch_storage_type>(
     indices_type,
     replace_kernel_forwarder{},
     matched_view.get_indices_annotated(),
@@ -502,13 +500,13 @@ std::unique_ptr<cudf::column> find_and_replace_all(cudf::column_view const& inpu
     return std::make_unique<cudf::column>(input_col);
   }
 
-  return cudf::type_dispatcher(input_col.type(),
-                               replace_kernel_forwarder{},
-                               input_col,
-                               values_to_replace,
-                               replacement_values,
-                               stream,
-                               mr);
+  return cudf::type_dispatcher<dispatch_storage_type>(input_col.type(),
+                                                      replace_kernel_forwarder{},
+                                                      input_col,
+                                                      values_to_replace,
+                                                      replacement_values,
+                                                      stream,
+                                                      mr);
 }
 
 }  // namespace detail
diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu
index 25b245bee6b..3e2cb4ac02f 100644
--- a/cpp/src/reshape/interleave_columns.cu
+++ b/cpp/src/reshape/interleave_columns.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -137,16 +137,14 @@ struct interleave_columns_functor {
     auto index_begin   = thrust::make_counting_iterator<size_type>(0);
     auto index_end     = thrust::make_counting_iterator<size_type>(output_size);
 
-    using Type = device_storage_type_t<T>;
-
     auto func_value = [input   = *device_input,
                        divisor = input.num_columns()] __device__(size_type idx) {
-      return input.column(idx % divisor).element<Type>(idx / divisor);
+      return input.column(idx % divisor).element<T>(idx / divisor);
     };
 
     if (not create_mask) {
       thrust::transform(
-        rmm::exec_policy(stream), index_begin, index_end, device_output->begin<Type>(), func_value);
+        rmm::exec_policy(stream), index_begin, index_end, device_output->begin<T>(), func_value);
 
       return output;
     }
@@ -159,7 +157,7 @@ struct interleave_columns_functor {
     thrust::transform_if(rmm::exec_policy(stream),
                          index_begin,
                          index_end,
-                         device_output->begin<Type>(),
+                         device_output->begin<T>(),
                          func_value,
                          func_validity);
 
@@ -193,12 +191,12 @@ std::unique_ptr<column> interleave_columns(table_view const& input,
   auto const output_needs_mask = std::any_of(
     std::cbegin(input), std::cend(input), [](auto const& col) { return col.nullable(); });
 
-  return type_dispatcher(dtype,
-                         detail::interleave_columns_functor{},
-                         input,
-                         output_needs_mask,
-                         rmm::cuda_stream_default,
-                         mr);
+  return type_dispatcher<dispatch_storage_type>(dtype,
+                                                detail::interleave_columns_functor{},
+                                                input,
+                                                output_needs_mask,
+                                                rmm::cuda_stream_default,
+                                                mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/sort/sort.cu b/cpp/src/sort/sort.cu
index c500b8ae49f..b6262a1848d 100644
--- a/cpp/src/sort/sort.cu
+++ b/cpp/src/sort/sort.cu
@@ -60,17 +60,10 @@ struct inplace_column_sort_fn {
   void operator()(mutable_column_view& col, bool ascending, rmm::cuda_stream_view stream) const
   {
     CUDF_EXPECTS(!col.has_nulls(), "Nulls not supported for in-place sort");
-    using DeviceT = device_storage_type_t<T>;
     if (ascending) {
-      thrust::sort(rmm::exec_policy(stream),
-                   col.begin<DeviceT>(),
-                   col.end<DeviceT>(),
-                   thrust::less<DeviceT>());
+      thrust::sort(rmm::exec_policy(stream), col.begin<T>(), col.end<T>(), thrust::less<T>());
     } else {
-      thrust::sort(rmm::exec_policy(stream),
-                   col.begin<DeviceT>(),
-                   col.end<DeviceT>(),
-                   thrust::greater<DeviceT>());
+      thrust::sort(rmm::exec_policy(stream), col.begin<T>(), col.end<T>(), thrust::greater<T>());
     }
   }
 
@@ -95,7 +88,8 @@ std::unique_ptr<table> sort(table_view input,
     auto output    = std::make_unique<column>(input.column(0), stream, mr);
     auto view      = output->mutable_view();
     bool ascending = (column_order.empty() ? true : column_order.front() == order::ASCENDING);
-    cudf::type_dispatcher(output->type(), inplace_column_sort_fn{}, view, ascending, stream);
+    cudf::type_dispatcher<dispatch_storage_type>(
+      output->type(), inplace_column_sort_fn{}, view, ascending, stream);
     std::vector<std::unique_ptr<column>> columns;
     columns.emplace_back(std::move(output));
     return std::make_unique<table>(std::move(columns));
diff --git a/cpp/src/sort/sort_column.cu b/cpp/src/sort/sort_column.cu
index 070aa6eae03..74c796e7962 100644
--- a/cpp/src/sort/sort_column.cu
+++ b/cpp/src/sort/sort_column.cu
@@ -54,19 +54,18 @@ struct column_sorted_order_fn {
     // But this also requires making a copy of the input data.
     auto temp_col = column(input, stream);
     auto d_col    = temp_col.mutable_view();
-    using DeviceT = device_storage_type_t<T>;
     if (ascending) {
       thrust::sort_by_key(rmm::exec_policy(stream),
-                          d_col.begin<DeviceT>(),
-                          d_col.end<DeviceT>(),
+                          d_col.begin<T>(),
+                          d_col.end<T>(),
                           indices.begin<size_type>(),
-                          thrust::less<DeviceT>());
+                          thrust::less<T>());
     } else {
       thrust::sort_by_key(rmm::exec_policy(stream),
-                          d_col.begin<DeviceT>(),
-                          d_col.end<DeviceT>(),
+                          d_col.begin<T>(),
+                          d_col.end<T>(),
                           indices.begin<size_type>(),
-                          thrust::greater<DeviceT>());
+                          thrust::greater<T>());
     }
   }
   template <typename T, typename std::enable_if_t<!is_radix_sort_supported<T>()>* = nullptr>
@@ -132,13 +131,13 @@ std::unique_ptr<column> sorted_order<false>(column_view const& input,
   mutable_column_view indices_view = sorted_indices->mutable_view();
   thrust::sequence(
     rmm::exec_policy(stream), indices_view.begin<size_type>(), indices_view.end<size_type>(), 0);
-  cudf::type_dispatcher(input.type(),
-                        column_sorted_order_fn{},
-                        input,
-                        indices_view,
-                        column_order == order::ASCENDING,
-                        null_precedence,
-                        stream);
+  cudf::type_dispatcher<dispatch_storage_type>(input.type(),
+                                               column_sorted_order_fn{},
+                                               input,
+                                               indices_view,
+                                               column_order == order::ASCENDING,
+                                               null_precedence,
+                                               stream);
   return sorted_indices;
 }
 
diff --git a/cpp/src/sort/stable_sort_column.cu b/cpp/src/sort/stable_sort_column.cu
index abeaa7bef76..49aecf52625 100644
--- a/cpp/src/sort/stable_sort_column.cu
+++ b/cpp/src/sort/stable_sort_column.cu
@@ -35,11 +35,8 @@ struct column_stable_sorted_order_fn {
   {
     auto temp_col = column(input, stream);
     auto d_col    = temp_col.mutable_view();
-    using DeviceT = device_storage_type_t<T>;
-    thrust::stable_sort_by_key(rmm::exec_policy(stream),
-                               d_col.begin<DeviceT>(),
-                               d_col.end<DeviceT>(),
-                               indices.begin<size_type>());
+    thrust::stable_sort_by_key(
+      rmm::exec_policy(stream), d_col.begin<T>(), d_col.end<T>(), indices.begin<size_type>());
   }
   template <typename T, typename std::enable_if_t<!cudf::is_fixed_width<T>()>* = nullptr>
   void faster_stable_sort(column_view const&, mutable_column_view&, rmm::cuda_stream_view)
@@ -103,13 +100,13 @@ std::unique_ptr<column> sorted_order<true>(column_view const& input,
   mutable_column_view indices_view = sorted_indices->mutable_view();
   thrust::sequence(
     rmm::exec_policy(stream), indices_view.begin<size_type>(), indices_view.end<size_type>(), 0);
-  cudf::type_dispatcher(input.type(),
-                        column_stable_sorted_order_fn{},
-                        input,
-                        indices_view,
-                        column_order == order::ASCENDING,
-                        null_precedence,
-                        stream);
+  cudf::type_dispatcher<dispatch_storage_type>(input.type(),
+                                               column_stable_sorted_order_fn{},
+                                               input,
+                                               indices_view,
+                                               column_order == order::ASCENDING,
+                                               null_precedence,
+                                               stream);
   return sorted_indices;
 }