diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9722e1f0f80..ff12b5cd30a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -56,6 +56,7 @@
 - PR #3193 Add cuPy as a formal dependency
 - PR #3195 Support for zero columned `table_view`
 - PR #3165 Java device memory size for string category
+- PR #3205 Move transform files to legacy
 - PR #3202 Rename and move error.hpp to public headers
 
 ## Bug Fixes
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 11bc8f714b7..5505e22c534 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -77,7 +77,7 @@ test:
     - test -f $PREFIX/include/cudf/rolling.hpp
     - test -f $PREFIX/include/cudf/legacy/search.hpp
     - test -f $PREFIX/include/cudf/stream_compaction.hpp
-    - test -f $PREFIX/include/cudf/transform.hpp
+    - test -f $PREFIX/include/cudf/legacy/transform.hpp
     - test -f $PREFIX/include/cudf/types.h
     - test -f $PREFIX/include/cudf/types.hpp
     - test -f $PREFIX/include/cudf/legacy/unary.hpp
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 26bc3c5f1b2..aed93db7a4c 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -388,9 +388,9 @@ add_library(cudf
             src/jit/parser.cpp
             src/jit/cache.cpp
             src/jit/launcher.cpp
-            src/transform/transform.cpp
+            src/transform/legacy/transform.cpp
             src/transform/jit/code/kernel.cpp
-            src/transform/nans_to_nulls.cu
+            src/transform/legacy/nans_to_nulls.cu
             src/bitmask/legacy/bitmask_ops.cu
             src/stream_compaction/apply_boolean_mask.cu
             src/stream_compaction/drop_nulls.cu
diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/legacy/transform.hpp
similarity index 97%
rename from cpp/include/cudf/transform.hpp
rename to cpp/include/cudf/legacy/transform.hpp
index 7baa53bce8a..5e42e1f803d 100644
--- a/cpp/include/cudf/transform.hpp
+++ b/cpp/include/cudf/legacy/transform.hpp
@@ -17,8 +17,8 @@
 #ifndef CUDF_TRANSFORM_HPP
 #define CUDF_TRANSFORM_HPP
 
-#include "cudf.h"
-#include "types.h"
+#include "cudf/cudf.h"
+#include "cudf/types.h"
 
 namespace cudf {
 
diff --git a/cpp/src/bitmask/legacy/valid_if.cuh b/cpp/src/bitmask/legacy/valid_if.cuh
index 515dc8a7088..adaad97fd2a 100644
--- a/cpp/src/bitmask/legacy/valid_if.cuh
+++ b/cpp/src/bitmask/legacy/valid_if.cuh
@@ -18,7 +18,7 @@
 #define __BITMASK_VALID_IF_CUH__
 
 #include <cudf/types.h>
-#include <utilities/error_utils.hpp>
+#include <cudf/utilities/error.hpp>
 #include <utilities/bit_util.cuh>
 #include <utilities/cuda_utils.hpp>
 #include <bitmask/legacy/bit_mask.cuh>
@@ -42,9 +42,9 @@ constexpr int block_size = 256;
  * @return[out] result of each block is returned in thread 0.
  */
 template <class bit_container, int lane = 0>
-__device__ __inline__ gdf_size_type single_lane_popc_block_reduce(bit_container bit_mask) {
+__device__ __inline__ cudf::size_type single_lane_popc_block_reduce(bit_container bit_mask) {
   
-  static __shared__ gdf_size_type warp_count[warp_size];
+  static __shared__ cudf::size_type warp_count[warp_size];
   
   int lane_id = (threadIdx.x % warp_size);
   int warp_id = (threadIdx.x / warp_size);
@@ -56,7 +56,7 @@ __device__ __inline__ gdf_size_type single_lane_popc_block_reduce(bit_container
   }
   __syncthreads();
 
-  gdf_size_type block_count = 0;
+  cudf::size_type block_count = 0;
 
   if (warp_id == 0) {
     
@@ -65,12 +65,12 @@ __device__ __inline__ gdf_size_type single_lane_popc_block_reduce(bit_container
 
     // Maximum block size is 1024 and 1024 / 32 = 32
     // so one single warp is enough to do the reduction over different warps
-    gdf_size_type count = 
+    cudf::size_type count = 
       (lane_id < (blockDim.x / warp_size)) ? warp_count[lane_id] : 0;
     
     __shared__
-        typename cub::WarpReduce<gdf_size_type>::TempStorage temp_storage;
-    block_count = cub::WarpReduce<gdf_size_type>(temp_storage).Sum(count);
+        typename cub::WarpReduce<cudf::size_type>::TempStorage temp_storage;
+    block_count = cub::WarpReduce<cudf::size_type>(temp_storage).Sum(count);
 
   }
 
diff --git a/cpp/src/bitmask/valid_if.cuh b/cpp/src/bitmask/valid_if.cuh
deleted file mode 100644
index adaad97fd2a..00000000000
--- a/cpp/src/bitmask/valid_if.cuh
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2019, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */ 
-
-#ifndef __BITMASK_VALID_IF_CUH__
-#define __BITMASK_VALID_IF_CUH__
-
-#include <cudf/types.h>
-#include <cudf/utilities/error.hpp>
-#include <utilities/bit_util.cuh>
-#include <utilities/cuda_utils.hpp>
-#include <bitmask/legacy/bit_mask.cuh>
-
-#include <cub/cub.cuh>
-
-using bit_mask::bit_mask_t;
-
-namespace cudf {
-
-namespace detail {
-
-constexpr int warp_size = 32;
-
-constexpr int block_size = 256;
-
-/**
- * @brief for each warp in the block do a reduction (summation) of the
- * `__popc(bit_mask)` on a certain lane (default is lane 0).
- * @param[in] bit_mask The bit_mask to be reduced.
- * @return[out] result of each block is returned in thread 0.
- */
-template <class bit_container, int lane = 0>
-__device__ __inline__ cudf::size_type single_lane_popc_block_reduce(bit_container bit_mask) {
-  
-  static __shared__ cudf::size_type warp_count[warp_size];
-  
-  int lane_id = (threadIdx.x % warp_size);
-  int warp_id = (threadIdx.x / warp_size);
-
-  // Assuming one lane of each warp holds the value that we want to perform
-  // reduction
-  if (lane_id == lane) {
-    warp_count[warp_id] = __popc(bit_mask);
-  }
-  __syncthreads();
-
-  cudf::size_type block_count = 0;
-
-  if (warp_id == 0) {
-    
-    static_assert(block_size <= 1024,
-      "Reduction code only works with a block size less or equal to 1024.");
-
-    // Maximum block size is 1024 and 1024 / 32 = 32
-    // so one single warp is enough to do the reduction over different warps
-    cudf::size_type count = 
-      (lane_id < (blockDim.x / warp_size)) ? warp_count[lane_id] : 0;
-    
-    __shared__
-        typename cub::WarpReduce<cudf::size_type>::TempStorage temp_storage;
-    block_count = cub::WarpReduce<cudf::size_type>(temp_storage).Sum(count);
-
-  }
-
-  return block_count;
-
-}
-
-template <bool source_mask_valid, typename bit_container, typename predicate, typename size_type>
-__global__ void valid_if_kernel(
-    const bit_container* source_mask, 
-    bit_container* destination_mask, 
-    predicate p,
-    size_type num_bits,
-    size_type* p_valid_count
-  ){
- 
-  static_assert(warp_size == util::size_in_bits<bit_container>(), 
-      "warp size is different from bit_container size.");
-
-  size_type bit_index_base = blockIdx.x * blockDim.x;
-
-  while (bit_index_base < num_bits) {
-  
-    size_type bit_index = bit_index_base + threadIdx.x;
-   
-    bool thread_active = bit_index < num_bits;
-    bit_container active_threads =
-        __ballot_sync(0xffffffff, thread_active);
-
-    bit_container result_mask = 0;
-
-    if(thread_active){
-      
-      bool const predicate_is_true = p(bit_index);
-      const bit_container ballot_result =
-          __ballot_sync(active_threads, predicate_is_true);
-
-      // Only one thread writes output
-      if (0 == threadIdx.x % warp_size) {
-        const size_type container_index = 
-          util::detail::bit_container_index<bit_container>(bit_index);
-
-        result_mask = source_mask_valid ?
-          source_mask[container_index] & ballot_result : ballot_result;
-        destination_mask[container_index] = result_mask;
-      }
-    
-    }
-    
-    result_mask = single_lane_popc_block_reduce(result_mask);
-    if(0 == threadIdx.x){
-      atomicAdd(p_valid_count, result_mask);
-    }
-    
-    bit_index_base += blockDim.x * gridDim.x;
-  
-  }
-
-}
-
-} // namespace detail
-
- /**
-  * @brief Generate a bitmask where every bit is marked with valid 
-  * if and only if predicate(bit) and source_mask(bit) are both true.
-  * 
-  * @param source_mask The source mask
-  * @param p The predicate that has an operator() member function
-  * @param num_bits Number of bits
-  * @param stream An optional cudaStream_t object
-  * @return The generated bitmask as well as its null_count
-  */
-template <typename bit_container, typename predicate, typename size_type>
-std::pair<bit_container*, size_type> valid_if(
-    const bit_container* source_mask, 
-    const predicate& p,
-    size_type num_bits,
-    cudaStream_t stream = 0
-  ){
-  
-  bit_container* destination_mask = nullptr;
-  CUDF_EXPECTS(GDF_SUCCESS == bit_mask::create_bit_mask(&destination_mask, num_bits), 
-      "Failed to allocate bit_mask buffer.");
-
-  auto kernel = source_mask ? 
-    detail::valid_if_kernel<true,  bit_container, predicate, size_type> :
-    detail::valid_if_kernel<false, bit_container, predicate, size_type> ;
-
-  rmm::device_vector<size_type> valid_count(1);
-  
-  const int grid_size = util::cuda::grid_config_1d(num_bits, detail::block_size).num_blocks;
-  
-  // launch the kernel
-  kernel<<<grid_size, detail::block_size, 0, stream>>>(
-      source_mask, destination_mask, p, num_bits, valid_count.data().get());
-
-  size_type valid_count_host;
-  CUDA_TRY(cudaMemcpyAsync(&valid_count_host, valid_count.data().get(),
-        sizeof(size_type), cudaMemcpyDeviceToHost, stream));
-  
-  // Synchronize the stream before null_count is updated on the host.
-  cudaStreamSynchronize(stream);
-  size_type null_count = num_bits - valid_count_host;
-
-  CHECK_STREAM(stream);
-  return std::pair<bit_container*, size_type>(destination_mask, null_count);
-
-}
-
-} // namespace cudf
-#endif
diff --git a/cpp/src/stream_compaction/drop_duplicates.cu b/cpp/src/stream_compaction/drop_duplicates.cu
index a4985a551fe..cf03e41fc56 100644
--- a/cpp/src/stream_compaction/drop_duplicates.cu
+++ b/cpp/src/stream_compaction/drop_duplicates.cu
@@ -27,7 +27,7 @@
 #include <cudf/legacy/table.hpp>
 #include <table/legacy/device_table.cuh>
 #include <table/legacy/device_table_row_operators.cuh>
-#include <cudf/transform.hpp>
+#include <cudf/legacy/transform.hpp>
 
 #include <rmm/thrust_rmm_allocator.h>
 #include <cudf/utilities/legacy/nvcategory_util.hpp>
diff --git a/cpp/src/strings/copying/copying.cu b/cpp/src/strings/copying/copying.cu
index 28e9f041e1a..33add200624 100644
--- a/cpp/src/strings/copying/copying.cu
+++ b/cpp/src/strings/copying/copying.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <bitmask/valid_if.cuh>
+#include <bitmask/legacy/valid_if.cuh>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/strings/copying.hpp>
diff --git a/cpp/src/strings/utilities.cuh b/cpp/src/strings/utilities.cuh
index cb6b5b4fa9e..e7961a4fbf4 100644
--- a/cpp/src/strings/utilities.cuh
+++ b/cpp/src/strings/utilities.cuh
@@ -16,7 +16,7 @@
 #pragma once
 
 #include <cuda_runtime.h>
-#include <bitmask/valid_if.cuh>
+#include <bitmask/legacy/valid_if.cuh>
 #include <cudf/strings/string_view.cuh>
 #include <cudf/column/column_view.hpp>
 
diff --git a/cpp/src/transform/nans_to_nulls.cu b/cpp/src/transform/legacy/nans_to_nulls.cu
similarity index 100%
rename from cpp/src/transform/nans_to_nulls.cu
rename to cpp/src/transform/legacy/nans_to_nulls.cu
diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/legacy/transform.cpp
similarity index 99%
rename from cpp/src/transform/transform.cpp
rename to cpp/src/transform/legacy/transform.cpp
index 9bd69b750ea..38e38af792e 100644
--- a/cpp/src/transform/transform.cpp
+++ b/cpp/src/transform/legacy/transform.cpp
@@ -30,7 +30,7 @@
 #include <jit/launcher.h>
 #include <jit/legacy/type.h>
 #include <jit/parser.h>
-#include "jit/code/code.h"
+#include "../jit/code/code.h"
 
 #include <types.h.jit>
 #include <types.hpp.jit>
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 4f4d854f8c4..bbcb710558b 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -189,12 +189,12 @@ set(LEGACY_BITMASK_TEST_SRC
 ConfigureTest(LEGACY_BITMASK_TEST "${LEGACY_BITMASK_TEST_SRC}")
 
 ###################################################################################################
-# - nans_to_nulls tests ---------------------------------------------------------------------------
+# - legacy nans_to_nulls tests ---------------------------------------------------------------------------
 
-set(NANS_TO_NULLS_TEST_SRC
-  "${CMAKE_CURRENT_SOURCE_DIR}/bitmask/nans_to_nulls_tests.cu")
+set(LEGACY_NANS_TO_NULLS_TEST_SRC
+  "${CMAKE_CURRENT_SOURCE_DIR}/bitmask/legacy/nans_to_nulls_tests.cu")
 
-ConfigureTest(NANS_TO_NULLS_TEST "${NANS_TO_NULLS_TEST_SRC}")
+ConfigureTest(LEGACY_NANS_TO_NULLS_TEST "${LEGACY_NANS_TO_NULLS_TEST_SRC}")
 
 
 ###################################################################################################
@@ -282,12 +282,12 @@ set(BINARY_TEST_SRC
 ConfigureTest(BINARY_TEST "${BINARY_TEST_SRC}")
 
 ###################################################################################################
-# - unary transform tests -------------------------------------------------------------------------
+# - legacy unary transform tests -------------------------------------------------------------------------
 
-set(TRANSFORM_TEST_SRC 
-    "${CMAKE_CURRENT_SOURCE_DIR}/transform/integration/unary-operation-integration-test.cu")
+set(LEGACY_TRANSFORM_TEST_SRC 
+    "${CMAKE_CURRENT_SOURCE_DIR}/transform/integration/legacy/unary-operation-integration-test.cu")
 
-ConfigureTest(TRANSFORM_TEST "${TRANSFORM_TEST_SRC}")
+ConfigureTest(LEGACY_TRANSFORM_TEST "${LEGACY_TRANSFORM_TEST_SRC}")
 
 ###################################################################################################
 # - jit cache tests -------------------------------------------------------------------------------
diff --git a/cpp/tests/bitmask/nans_to_nulls_tests.cu b/cpp/tests/bitmask/legacy/nans_to_nulls_tests.cu
similarity index 99%
rename from cpp/tests/bitmask/nans_to_nulls_tests.cu
rename to cpp/tests/bitmask/legacy/nans_to_nulls_tests.cu
index 71f1edcc155..cadfb614812 100644
--- a/cpp/tests/bitmask/nans_to_nulls_tests.cu
+++ b/cpp/tests/bitmask/legacy/nans_to_nulls_tests.cu
@@ -28,7 +28,7 @@
 #include <cudf/utilities/legacy/wrapper_types.hpp>
 
 #include <random>
-#include <cudf/transform.hpp>
+#include <cudf/legacy/transform.hpp>
 
 using bit_mask::bit_mask_t;
 
diff --git a/cpp/tests/transform/integration/unary-operation-integration-test.cu b/cpp/tests/transform/integration/legacy/unary-operation-integration-test.cu
similarity index 98%
rename from cpp/tests/transform/integration/unary-operation-integration-test.cu
rename to cpp/tests/transform/integration/legacy/unary-operation-integration-test.cu
index 4fac697d34c..030aecbb521 100644
--- a/cpp/tests/transform/integration/unary-operation-integration-test.cu
+++ b/cpp/tests/transform/integration/legacy/unary-operation-integration-test.cu
@@ -18,8 +18,8 @@
  */
 
 #include <tests/utilities/legacy/cudf_test_fixtures.h>
-#include <cudf/transform.hpp>
-#include "assert-unary.h"
+#include <cudf/legacy/transform.hpp>
+#include "../assert-unary.h"
 #include <cudf/types.h>
 
 #include <cctype>
diff --git a/python/cudf/cudf/_lib/includes/unaryops.pxd b/python/cudf/cudf/_lib/includes/unaryops.pxd
index 4eba682f746..aeec3125d72 100644
--- a/python/cudf/cudf/_lib/includes/unaryops.pxd
+++ b/python/cudf/cudf/_lib/includes/unaryops.pxd
@@ -49,7 +49,7 @@ cdef extern from "cudf/cudf.h" nogil:
         gdf_column *output
     ) except +
 
-cdef extern from "cudf/transform.hpp" namespace "cudf" nogil:
+cdef extern from "cudf/legacy/transform.hpp" namespace "cudf" nogil:
     cdef gdf_column transform(
         const gdf_column& input,
         const string& ptx,