diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 4386a62e32a..a46b281a7dd 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -55,7 +55,7 @@ test:
     - test -f $PREFIX/include/cudf/ast/transform.hpp
     - test -f $PREFIX/include/cudf/ast/detail/linearizer.hpp
     - test -f $PREFIX/include/cudf/ast/detail/operators.hpp
-    - test -f $PREFIX/include/cudf/ast/linearizer.hpp
+    - test -f $PREFIX/include/cudf/ast/nodes.hpp
     - test -f $PREFIX/include/cudf/ast/operators.hpp
     - test -f $PREFIX/include/cudf/binaryop.hpp
     - test -f $PREFIX/include/cudf/labeling/label_bins.hpp
diff --git a/cpp/include/cudf/ast/detail/linearizer.hpp b/cpp/include/cudf/ast/detail/linearizer.hpp
index 68319a24e5d..166a0408703 100644
--- a/cpp/include/cudf/ast/detail/linearizer.hpp
+++ b/cpp/include/cudf/ast/detail/linearizer.hpp
@@ -83,10 +83,7 @@ class linearizer;
  * This class is a part of a "visitor" pattern with the `linearizer` class.
  * Nodes inheriting from this class can accept visitors.
  */
-class node {
-  friend class detail::linearizer;
-
- private:
+struct node {
   virtual cudf::size_type accept(detail::linearizer& visitor) const = 0;
 };
 
@@ -102,10 +99,6 @@ class node {
  * resolved into intermediate data storage in shared memory.
  */
 class linearizer {
-  friend class literal;
-  friend class column_reference;
-  friend class expression;
-
  public:
   /**
    * @brief Construct a new linearizer object
diff --git a/cpp/include/cudf/ast/detail/operators.hpp b/cpp/include/cudf/ast/detail/operators.hpp
index 27bcb0d320b..8ae60f96997 100644
--- a/cpp/include/cudf/ast/detail/operators.hpp
+++ b/cpp/include/cudf/ast/detail/operators.hpp
@@ -753,43 +753,6 @@ struct operator_functor<ast_operator::NOT> {
   }
 };
 
-#if 0
-/**
- * @brief Functor used to double-type-dispatch binary operators.
- *
- * This functor's `operator()` is templated to validate calls to its operators based on the input
- * type, as determined by the `is_valid_binary_op` trait.
- *
- * @tparam OperatorFunctor Binary operator functor.
- */
-template <typename OperatorFunctor>
-struct double_dispatch_binary_operator_types {
-  template <typename LHS,
-            typename RHS,
-            typename F,
-            typename... Ts,
-            std::enable_if_t<is_valid_binary_op<OperatorFunctor, LHS, RHS>>* = nullptr>
-  CUDA_HOST_DEVICE_CALLABLE void operator()(F&& f, Ts&&... args)
-  {
-    f.template operator()<OperatorFunctor, LHS, RHS>(std::forward<Ts>(args)...);
-  }
-
-  template <typename LHS,
-            typename RHS,
-            typename F,
-            typename... Ts,
-            std::enable_if_t<!is_valid_binary_op<OperatorFunctor, LHS, RHS>>* = nullptr>
-  CUDA_HOST_DEVICE_CALLABLE void operator()(F&& f, Ts&&... args)
-  {
-#ifndef __CUDA_ARCH__
-    CUDF_FAIL("Invalid binary operation.");
-#else
-    cudf_assert(false && "Invalid binary operation.");
-#endif
-  }
-};
-#endif
-
 /**
  * @brief Functor used to single-type-dispatch binary operators.
  *
@@ -856,16 +819,6 @@ struct type_dispatch_binary_op {
                                             F&& f,
                                             Ts&&... args)
   {
-#if 0
-    // Double dispatch
-    /*
-    double_type_dispatcher(lhs_type,
-                           rhs_type,
-                           detail::double_dispatch_binary_operator_types<operator_functor<op>>{},
-                           std::forward<F>(f),
-                           std::forward<Ts>(args)...);
-    */
-#endif
     // Single dispatch (assume lhs_type == rhs_type)
     type_dispatcher(lhs_type,
                     detail::single_dispatch_binary_operator_types<operator_functor<op>>{},
diff --git a/cpp/include/cudf/ast/detail/transform.cuh b/cpp/include/cudf/ast/detail/transform.cuh
index f9d7426e2e4..f69927a3601 100644
--- a/cpp/include/cudf/ast/detail/transform.cuh
+++ b/cpp/include/cudf/ast/detail/transform.cuh
@@ -15,8 +15,9 @@
  */
 #pragma once
 
+#include <cudf/ast/detail/linearizer.hpp>
 #include <cudf/ast/detail/operators.hpp>
-#include <cudf/ast/linearizer.hpp>
+#include <cudf/ast/nodes.hpp>
 #include <cudf/ast/operators.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
@@ -25,6 +26,7 @@
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
 #include <cudf/utilities/traits.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -155,10 +157,11 @@ struct row_evaluator {
    * storing intermediates.
    * @param output_column The output column where results are stored.
    */
-  __device__ row_evaluator(table_device_view const& table,
-                           const cudf::detail::fixed_width_scalar_device_view_base* literals,
-                           std::int64_t* thread_intermediate_storage,
-                           mutable_column_device_view* output_column)
+  __device__ row_evaluator(
+    table_device_view const& table,
+    device_span<const cudf::detail::fixed_width_scalar_device_view_base> literals,
+    std::int64_t* thread_intermediate_storage,
+    mutable_column_device_view* output_column)
     : table(table),
       literals(literals),
       thread_intermediate_storage(thread_intermediate_storage),
@@ -264,7 +267,7 @@ struct row_evaluator {
 
  private:
   table_device_view const& table;
-  const cudf::detail::fixed_width_scalar_device_view_base* literals;
+  device_span<const cudf::detail::fixed_width_scalar_device_view_base> literals;
   std::int64_t* thread_intermediate_storage;
   mutable_column_device_view* output_column;
 };
@@ -298,15 +301,15 @@ __device__ void row_output::resolve_output(detail::device_data_reference device_
  * @param num_operators Number of operators.
  * @param row_index Row index of data column(s).
  */
-__device__ void evaluate_row_expression(detail::row_evaluator const& evaluator,
-                                        const detail::device_data_reference* data_references,
-                                        const ast_operator* operators,
-                                        const cudf::size_type* operator_source_indices,
-                                        cudf::size_type num_operators,
-                                        cudf::size_type row_index)
+__device__ void evaluate_row_expression(
+  detail::row_evaluator const& evaluator,
+  device_span<const detail::device_data_reference> data_references,
+  device_span<const ast_operator> operators,
+  device_span<const cudf::size_type> operator_source_indices,
+  cudf::size_type row_index)
 {
-  auto operator_source_index = cudf::size_type(0);
-  for (cudf::size_type operator_index(0); operator_index < num_operators; operator_index++) {
+  auto operator_source_index = static_cast<cudf::size_type>(0);
+  for (cudf::size_type operator_index = 0; operator_index < operators.size(); operator_index++) {
     // Execute operator
     auto const op    = operators[operator_index];
     auto const arity = ast_operator_arity(op);
@@ -336,41 +339,79 @@ __device__ void evaluate_row_expression(detail::row_evaluator const& evaluator,
   }
 }
 
+/**
+ * @brief The AST plan creates a device buffer of data needed to execute an AST.
+ *
+ * On construction, an AST plan creates a single "packed" host buffer of all necessary data arrays,
+ * and copies that to the device with a single host-device memory copy. Because the plan tends to be
+ * small, this is the most efficient approach for low latency.
+ *
+ */
 struct ast_plan {
- public:
-  ast_plan() : sizes(), data_pointers() {}
+  ast_plan(linearizer const& expr_linearizer,
+           rmm::cuda_stream_view stream,
+           rmm::mr::device_memory_resource* mr)
+    : _sizes{}, _data_pointers{}
+  {
+    add_to_plan(expr_linearizer.data_references());
+    add_to_plan(expr_linearizer.literals());
+    add_to_plan(expr_linearizer.operators());
+    add_to_plan(expr_linearizer.operator_source_indices());
+
+    // Create device buffer
+    auto const buffer_size = std::accumulate(_sizes.cbegin(), _sizes.cend(), 0);
+    auto buffer_offsets    = std::vector<int>(_sizes.size());
+    thrust::exclusive_scan(_sizes.cbegin(), _sizes.cend(), buffer_offsets.begin(), 0);
+
+    auto h_data_buffer = std::make_unique<char[]>(buffer_size);
+    for (unsigned int i = 0; i < _data_pointers.size(); ++i) {
+      std::memcpy(h_data_buffer.get() + buffer_offsets[i], _data_pointers[i], _sizes[i]);
+    }
 
-  using buffer_type = std::pair<std::unique_ptr<char[]>, int>;
+    _device_data_buffer = rmm::device_buffer(h_data_buffer.get(), buffer_size, stream, mr);
+
+    stream.synchronize();
+
+    // Create device pointers to components of plan
+    auto device_data_buffer_ptr = static_cast<const char*>(_device_data_buffer.data());
+    _device_data_references     = device_span<const detail::device_data_reference>(
+      reinterpret_cast<const detail::device_data_reference*>(device_data_buffer_ptr +
+                                                             buffer_offsets[0]),
+      expr_linearizer.data_references().size());
+    _device_literals = device_span<const cudf::detail::fixed_width_scalar_device_view_base>(
+      reinterpret_cast<const cudf::detail::fixed_width_scalar_device_view_base*>(
+        device_data_buffer_ptr + buffer_offsets[1]),
+      expr_linearizer.literals().size());
+    _device_operators = device_span<const ast_operator>(
+      reinterpret_cast<const ast_operator*>(device_data_buffer_ptr + buffer_offsets[2]),
+      expr_linearizer.operators().size());
+    _device_operator_source_indices = device_span<const cudf::size_type>(
+      reinterpret_cast<const cudf::size_type*>(device_data_buffer_ptr + buffer_offsets[3]),
+      expr_linearizer.operator_source_indices().size());
+  }
 
+  /**
+   * @brief Helper function for adding components (operators, literals, etc) to AST plan
+   *
+   * @tparam T  The underlying type of the input `std::vector`
+   * @param  v  The `std::vector` containing components (operators, literals, etc)
+   */
   template <typename T>
   void add_to_plan(std::vector<T> const& v)
   {
     auto const data_size = sizeof(T) * v.size();
-    sizes.push_back(data_size);
-    data_pointers.push_back(v.data());
+    _sizes.push_back(data_size);
+    _data_pointers.push_back(v.data());
   }
 
-  buffer_type get_host_data_buffer() const
-  {
-    auto const total_size = std::accumulate(sizes.cbegin(), sizes.cend(), 0);
-    auto host_data_buffer = std::make_unique<char[]>(total_size);
-    auto const offsets    = get_offsets();
-    for (unsigned int i = 0; i < data_pointers.size(); ++i) {
-      std::memcpy(host_data_buffer.get() + offsets[i], data_pointers[i], sizes[i]);
-    }
-    return std::make_pair(std::move(host_data_buffer), total_size);
-  }
+  std::vector<cudf::size_type> _sizes;
+  std::vector<const void*> _data_pointers;
 
-  std::vector<cudf::size_type> get_offsets() const
-  {
-    auto offsets = std::vector<int>(sizes.size());
-    thrust::exclusive_scan(sizes.cbegin(), sizes.cend(), offsets.begin(), 0);
-    return offsets;
-  }
-
- private:
-  std::vector<cudf::size_type> sizes;
-  std::vector<const void*> data_pointers;
+  rmm::device_buffer _device_data_buffer;
+  device_span<const detail::device_data_reference> _device_data_references;
+  device_span<const cudf::detail::fixed_width_scalar_device_view_base> _device_literals;
+  device_span<const ast_operator> _device_operators;
+  device_span<const cudf::size_type> _device_operator_source_indices;
 };
 
 /**
diff --git a/cpp/include/cudf/ast/linearizer.hpp b/cpp/include/cudf/ast/nodes.hpp
similarity index 90%
rename from cpp/include/cudf/ast/linearizer.hpp
rename to cpp/include/cudf/ast/nodes.hpp
index e5ccb2e8069..70dda58816e 100644
--- a/cpp/include/cudf/ast/linearizer.hpp
+++ b/cpp/include/cudf/ast/nodes.hpp
@@ -38,17 +38,10 @@ enum class table_reference {
   OUTPUT  // Column index in the output table
 };
 
-// Forward declaration
-class literal;
-class column_reference;
-class expression;
-
 /**
  * @brief A literal value used in an abstract syntax tree.
  */
 class literal : public detail::node {
-  friend class detail::linearizer;
-
  public:
   /**
    * @brief Construct a new literal object.
@@ -90,7 +83,6 @@ class literal : public detail::node {
    */
   cudf::data_type get_data_type() const { return get_value().type(); }
 
- private:
   /**
    * @brief Get the value object.
    *
@@ -106,6 +98,7 @@ class literal : public detail::node {
    */
   cudf::size_type accept(detail::linearizer& visitor) const override;
 
+ private:
   const cudf::detail::fixed_width_scalar_device_view_base value;
 };
 
@@ -113,8 +106,6 @@ class literal : public detail::node {
  * @brief A node referring to data from a column in a table.
  */
 class column_reference : public detail::node {
-  friend class detail::linearizer;
-
  public:
   /**
    * @brief Construct a new column reference object
@@ -175,7 +166,6 @@ class column_reference : public detail::node {
     return table.column(get_column_index()).type();
   }
 
- private:
   /**
    * @brief Accepts a visitor class.
    *
@@ -184,6 +174,7 @@ class column_reference : public detail::node {
    */
   cudf::size_type accept(detail::linearizer& visitor) const override;
 
+ private:
   cudf::size_type column_index;
   table_reference table_source;
 };
@@ -192,8 +183,6 @@ class column_reference : public detail::node {
  * @brief An expression node holds an operator and zero or more operands.
  */
 class expression : public detail::node {
-  friend class detail::linearizer;
-
  public:
   /**
    * @brief Construct a new unary expression object.
@@ -208,11 +197,6 @@ class expression : public detail::node {
     }
   }
 
-  /**
-   * @brief `expression` doesn't accept r-value references for expression nodes
-   */
-  expression(ast_operator op, node&& input) = delete;
-
   /**
    * @brief Construct a new binary expression object.
    *
@@ -227,19 +211,11 @@ class expression : public detail::node {
     }
   }
 
-  /**
-   * @brief `expression` doesn't accept r-value references for expression nodes
-   */
-  expression(ast_operator op, node&& left, node&& right) = delete;
-
-  /**
-   * @brief `expression` doesn't accept r-value references for expression nodes
-   */
+  // expression only stores references to nodes, so it does not accept r-value
+  // references: the calling code must own the nodes.
+  expression(ast_operator op, node&& input)                   = delete;
+  expression(ast_operator op, node&& left, node&& right)      = delete;
   expression(ast_operator op, node&& left, node const& right) = delete;
-
-  /**
-   * @brief `expression` doesn't accept r-value references for expression nodes
-   */
   expression(ast_operator op, node const& left, node&& right) = delete;
 
   /**
@@ -256,7 +232,6 @@ class expression : public detail::node {
    */
   std::vector<std::reference_wrapper<const node>> get_operands() const { return operands; }
 
- private:
   /**
    * @brief Accepts a visitor class.
    *
@@ -265,6 +240,7 @@ class expression : public detail::node {
    */
   cudf::size_type accept(detail::linearizer& visitor) const override;
 
+ private:
   const ast_operator op;
   const std::vector<std::reference_wrapper<const node>> operands;
 };
diff --git a/cpp/include/cudf/ast/transform.hpp b/cpp/include/cudf/ast/transform.hpp
index 513f92ea251..59697e5f75c 100644
--- a/cpp/include/cudf/ast/transform.hpp
+++ b/cpp/include/cudf/ast/transform.hpp
@@ -15,7 +15,7 @@
  */
 #pragma once
 
-#include <cudf/ast/linearizer.hpp>
+#include <cudf/ast/nodes.hpp>
 #include <cudf/table/table_view.hpp>
 
 namespace cudf {
diff --git a/cpp/src/ast/linearizer.cpp b/cpp/src/ast/linearizer.cpp
index cc70845e1ff..66a32ead35e 100644
--- a/cpp/src/ast/linearizer.cpp
+++ b/cpp/src/ast/linearizer.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include <cudf/ast/detail/linearizer.hpp>
-#include <cudf/ast/linearizer.hpp>
+#include <cudf/ast/nodes.hpp>
 #include <cudf/ast/operators.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/scalar/scalar_device_view.cuh>
diff --git a/cpp/src/ast/transform.cu b/cpp/src/ast/transform.cu
index bc055d46869..43d3bde97c2 100644
--- a/cpp/src/ast/transform.cu
+++ b/cpp/src/ast/transform.cu
@@ -15,7 +15,7 @@
  */
 
 #include <cudf/ast/detail/transform.cuh>
-#include <cudf/ast/linearizer.hpp>
+#include <cudf/ast/nodes.hpp>
 #include <cudf/ast/operators.hpp>
 #include <cudf/ast/transform.hpp>
 #include <cudf/column/column_device_view.cuh>
@@ -61,27 +61,25 @@ namespace detail {
  * each thread.
  */
 template <cudf::size_type max_block_size>
-__launch_bounds__(max_block_size) __global__
-  void compute_column_kernel(table_device_view const table,
-                             const cudf::detail::fixed_width_scalar_device_view_base* literals,
-                             mutable_column_device_view output_column,
-                             const detail::device_data_reference* data_references,
-                             const ast_operator* operators,
-                             const cudf::size_type* operator_source_indices,
-                             cudf::size_type num_operators,
-                             cudf::size_type num_intermediates)
+__launch_bounds__(max_block_size) __global__ void compute_column_kernel(
+  table_device_view const table,
+  device_span<const cudf::detail::fixed_width_scalar_device_view_base> literals,
+  mutable_column_device_view output_column,
+  device_span<const detail::device_data_reference> data_references,
+  device_span<const ast_operator> operators,
+  device_span<const cudf::size_type> operator_source_indices,
+  cudf::size_type num_intermediates)
 {
   extern __shared__ std::int64_t intermediate_storage[];
   auto thread_intermediate_storage = &intermediate_storage[threadIdx.x * num_intermediates];
-  auto const start_idx             = cudf::size_type(threadIdx.x + blockIdx.x * blockDim.x);
-  auto const stride                = cudf::size_type(blockDim.x * gridDim.x);
-  auto const num_rows              = table.num_rows();
+  auto const start_idx = static_cast<cudf::size_type>(threadIdx.x + blockIdx.x * blockDim.x);
+  auto const stride    = static_cast<cudf::size_type>(blockDim.x * gridDim.x);
   auto const evaluator =
     cudf::ast::detail::row_evaluator(table, literals, thread_intermediate_storage, &output_column);
 
-  for (cudf::size_type row_index = start_idx; row_index < num_rows; row_index += stride) {
+  for (cudf::size_type row_index = start_idx; row_index < table.num_rows(); row_index += stride) {
     evaluate_row_expression(
-      evaluator, data_references, operators, operator_source_indices, num_operators, row_index);
+      evaluator, data_references, operators, operator_source_indices, row_index);
   }
 }
 
@@ -90,40 +88,8 @@ std::unique_ptr<column> compute_column(table_view const table,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr)
 {
-  // Linearize the AST
-  auto const expr_linearizer         = linearizer(expr, table);
-  auto const data_references         = expr_linearizer.data_references();
-  auto const literals                = expr_linearizer.literals();
-  auto const operators               = expr_linearizer.operators();
-  auto const num_operators           = cudf::size_type(operators.size());
-  auto const operator_source_indices = expr_linearizer.operator_source_indices();
-  auto const expr_data_type          = expr_linearizer.root_data_type();
-
-  // Create ast_plan and device buffer
-  auto plan = ast_plan();
-  plan.add_to_plan(data_references);
-  plan.add_to_plan(literals);
-  plan.add_to_plan(operators);
-  plan.add_to_plan(operator_source_indices);
-  auto const host_data_buffer = plan.get_host_data_buffer();
-  auto const buffer_offsets   = plan.get_offsets();
-  auto const buffer_size      = host_data_buffer.second;
-  auto device_data_buffer =
-    rmm::device_buffer(host_data_buffer.first.get(), buffer_size, stream, mr);
-  // To reduce overhead, we don't call a stream sync here.
-  // The stream is synced later when the table_device_view is created.
-
-  // Create device pointers to components of plan
-  auto const device_data_buffer_ptr = static_cast<const char*>(device_data_buffer.data());
-  auto const device_data_references = reinterpret_cast<const detail::device_data_reference*>(
-    device_data_buffer_ptr + buffer_offsets[0]);
-  auto const device_literals =
-    reinterpret_cast<const cudf::detail::fixed_width_scalar_device_view_base*>(
-      device_data_buffer_ptr + buffer_offsets[1]);
-  auto const device_operators =
-    reinterpret_cast<const ast_operator*>(device_data_buffer_ptr + buffer_offsets[2]);
-  auto const device_operator_source_indices =
-    reinterpret_cast<const cudf::size_type*>(device_data_buffer_ptr + buffer_offsets[3]);
+  auto const expr_linearizer = linearizer(expr, table);                // Linearize the AST
+  auto const plan            = ast_plan{expr_linearizer, stream, mr};  // Create ast_plan
 
   // Create table device view
   auto table_device         = table_device_view::create(table, stream);
@@ -131,7 +97,7 @@ std::unique_ptr<column> compute_column(table_view const table,
 
   // Prepare output column
   auto output_column = cudf::make_fixed_width_column(
-    expr_data_type, table_num_rows, mask_state::UNALLOCATED, stream, mr);
+    expr_linearizer.root_data_type(), table_num_rows, mask_state::UNALLOCATED, stream, mr);
   auto mutable_output_device =
     cudf::mutable_column_device_view::create(output_column->mutable_view(), stream);
 
@@ -155,12 +121,11 @@ std::unique_ptr<column> compute_column(table_view const table,
   cudf::ast::detail::compute_column_kernel<MAX_BLOCK_SIZE>
     <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
       *table_device,
-      device_literals,
+      plan._device_literals,
       *mutable_output_device,
-      device_data_references,
-      device_operators,
-      device_operator_source_indices,
-      num_operators,
+      plan._device_data_references,
+      plan._device_operators,
+      plan._device_operator_source_indices,
       num_intermediates);
   CHECK_CUDA(stream.value());
   return output_column;
diff --git a/cpp/tests/ast/transform_tests.cpp b/cpp/tests/ast/transform_tests.cpp
index 8f4a46e2a54..74937d4deea 100644
--- a/cpp/tests/ast/transform_tests.cpp
+++ b/cpp/tests/ast/transform_tests.cpp
@@ -18,6 +18,7 @@
 #include <cudf/ast/transform.hpp>
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_view.hpp>
+#include <cudf/detail/iterator.cuh>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/scalar/scalar_device_view.cuh>
 #include <cudf/scalar/scalar_factories.hpp>
@@ -30,6 +31,8 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/table_utilities.hpp>
 
+#include <thrust/iterator/counting_iterator.h>
+
 #include <limits>
 #include <type_traits>
 
@@ -55,6 +58,22 @@ TEST_F(TransformTest, BasicAddition)
   cudf::test::expect_columns_equal(expected, result->view(), true);
 }
 
+TEST_F(TransformTest, BasicAdditionLarge)
+{
+  auto a     = thrust::make_counting_iterator(0);
+  auto col   = column_wrapper<int32_t>(a, a + 2000);
+  auto table = cudf::table_view{{col, col}};
+
+  auto col_ref    = cudf::ast::column_reference(0);
+  auto expression = cudf::ast::expression(cudf::ast::ast_operator::ADD, col_ref, col_ref);
+
+  auto b        = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
+  auto expected = column_wrapper<int32_t>(b, b + 2000);
+  auto result   = cudf::ast::compute_column(table, expression);
+
+  cudf::test::expect_columns_equal(expected, result->view(), true);
+}
+
 TEST_F(TransformTest, LessComparator)
 {
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
@@ -71,6 +90,25 @@ TEST_F(TransformTest, LessComparator)
   cudf::test::expect_columns_equal(expected, result->view(), true);
 }
 
+TEST_F(TransformTest, LessComparatorLarge)
+{
+  auto a     = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
+  auto b     = thrust::make_counting_iterator(500);
+  auto c_0   = column_wrapper<int32_t>(a, a + 2000);
+  auto c_1   = column_wrapper<int32_t>(b, b + 2000);
+  auto table = cudf::table_view{{c_0, c_1}};
+
+  auto col_ref_0  = cudf::ast::column_reference(0);
+  auto col_ref_1  = cudf::ast::column_reference(1);
+  auto expression = cudf::ast::expression(cudf::ast::ast_operator::LESS, col_ref_0, col_ref_1);
+
+  auto c        = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i < 500; });
+  auto expected = column_wrapper<bool>(c, c + 2000);
+  auto result   = cudf::ast::compute_column(table, expression);
+
+  cudf::test::expect_columns_equal(expected, result->view(), true);
+}
+
 TEST_F(TransformTest, MultiLevelTreeArithmetic)
 {
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
@@ -97,6 +135,34 @@ TEST_F(TransformTest, MultiLevelTreeArithmetic)
   cudf::test::expect_columns_equal(expected, result->view(), true);
 }
 
+TEST_F(TransformTest, MultiLevelTreeArithmeticLarge)
+{
+  using namespace cudf::ast;
+
+  auto a     = thrust::make_counting_iterator(0);
+  auto b     = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i + 1; });
+  auto c     = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
+  auto c_0   = column_wrapper<int32_t>(a, a + 2000);
+  auto c_1   = column_wrapper<int32_t>(b, b + 2000);
+  auto c_2   = column_wrapper<int32_t>(c, c + 2000);
+  auto table = cudf::table_view{{c_0, c_1, c_2}};
+
+  auto col_ref_0 = column_reference(0);
+  auto col_ref_1 = column_reference(1);
+  auto col_ref_2 = column_reference(2);
+
+  auto expr_left_subtree  = expression(cudf::ast::ast_operator::MUL, col_ref_0, col_ref_1);
+  auto expr_right_subtree = expression(cudf::ast::ast_operator::ADD, col_ref_2, col_ref_0);
+  auto expr_tree          = expression(ast_operator::SUB, expr_left_subtree, expr_right_subtree);
+
+  auto result = cudf::ast::compute_column(table, expr_tree);
+  auto calc   = [](auto i) { return (i * (i + 1)) - (i + (i * 2)); };
+  auto d      = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return calc(i); });
+  auto expected = column_wrapper<int32_t>(d, d + 2000);
+
+  cudf::test::expect_columns_equal(expected, result->view(), true);
+}
+
 TEST_F(TransformTest, ImbalancedTreeArithmetic)
 {
   auto c_0   = column_wrapper<double>{0.15, 0.37, 4.2, 21.3};