From 9d8e43ef6ad75f6babc08fea88642ea006822e04 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 23 May 2024 11:41:49 -0400
Subject: [PATCH] Remove legacy JSON reader and concurrent_unordered_map.cuh.
 (#15813)

This completes the final two steps and closes https://github.com/rapidsai/cudf/issues/15537. Also addresses one step of https://github.com/rapidsai/cudf/issues/12261.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - David Wendt (https://github.com/davidwendt)
  - Shruti Shivakumar (https://github.com/shrshi)
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/15813
---
 cpp/CMakeLists.txt                            |   2 -
 cpp/include/cudf/io/json.hpp                  |  32 -
 cpp/src/groupby/hash/groupby.cu               |   1 -
 cpp/src/hash/concurrent_unordered_map.cuh     | 557 ---------------
 cpp/src/hash/managed.cuh                      |  41 --
 cpp/src/io/json/legacy/json_gpu.cu            | 615 ----------------
 cpp/src/io/json/legacy/json_gpu.hpp           |  99 ---
 cpp/src/io/json/legacy/read_json.hpp          |  38 -
 cpp/src/io/json/legacy/reader_impl.cu         | 667 ------------------
 cpp/src/io/json/read_json.cu                  |   9 -
 cpp/tests/CMakeLists.txt                      |   4 -
 cpp/tests/hash_map/map_test.cu                | 217 ------
 cpp/tests/io/json_test.cpp                    |  49 +-
 cpp/tests/io/nested_json_test.cpp             |   2 +-
 python/cudf/cudf/_lib/json.pyx                |   2 -
 .../cudf/_lib/pylibcudf/libcudf/io/json.pxd   |   3 -
 python/cudf/cudf/io/json.py                   |   1 -
 17 files changed, 8 insertions(+), 2331 deletions(-)
 delete mode 100644 cpp/src/hash/concurrent_unordered_map.cuh
 delete mode 100644 cpp/src/hash/managed.cuh
 delete mode 100644 cpp/src/io/json/legacy/json_gpu.cu
 delete mode 100644 cpp/src/io/json/legacy/json_gpu.hpp
 delete mode 100644 cpp/src/io/json/legacy/read_json.hpp
 delete mode 100644 cpp/src/io/json/legacy/reader_impl.cu
 delete mode 100644 cpp/tests/hash_map/map_test.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 7390c465ccb..228d21ddccb 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -390,8 +390,6 @@ add_library(
   src/io/json/json_tree.cu
   src/io/json/nested_json_gpu.cu
   src/io/json/read_json.cu
-  src/io/json/legacy/json_gpu.cu
-  src/io/json/legacy/reader_impl.cu
   src/io/json/parser_features.cpp
   src/io/json/write_json.cu
   src/io/orc/aggregate_orc_metadata.cpp
diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp
index aa4bee4fb5e..65ba8f25577 100644
--- a/cpp/include/cudf/io/json.hpp
+++ b/cpp/include/cudf/io/json.hpp
@@ -270,15 +270,6 @@ class json_reader_options {
    */
   bool is_enabled_dayfirst() const { return _dayfirst; }
 
-  /**
-   * @brief Whether the legacy reader should be used.
-   *
-   * @deprecated Since 24.06
-   *
-   * @returns true if the legacy reader will be used, false otherwise
-   */
-  [[deprecated]] bool is_enabled_legacy() const { return _legacy; }
-
   /**
    * @brief Whether the reader should keep quotes of string values.
    *
@@ -406,15 +397,6 @@ class json_reader_options {
    */
   void enable_dayfirst(bool val) { _dayfirst = val; }
 
-  /**
-   * @brief Set whether to use the legacy reader.
-   *
-   * @deprecated Since 24.06
-   *
-   * @param val Boolean value to enable/disable the legacy reader
-   */
-  [[deprecated]] void enable_legacy(bool val) { _legacy = val; }
-
   /**
    * @brief Set whether the reader should keep quotes of string values.
    *
@@ -605,20 +587,6 @@ class json_reader_options_builder {
     return *this;
   }
 
-  /**
-   * @brief Set whether to use the legacy reader.
-   *
-   * @deprecated Since 24.06
-   *
-   * @param val Boolean value to enable/disable legacy parsing
-   * @return this for chaining
-   */
-  [[deprecated]] json_reader_options_builder& legacy(bool val)
-  {
-    options._legacy = val;
-    return *this;
-  }
-
   /**
    * @brief Set whether the reader should keep quotes of string values.
    *
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 4f75ab19c66..0ec293ae3f0 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -16,7 +16,6 @@
 
 #include "groupby/common/utils.hpp"
 #include "groupby/hash/groupby_kernels.cuh"
-#include "hash/concurrent_unordered_map.cuh"
 
 #include <cudf/aggregation.hpp>
 #include <cudf/column/column.hpp>
diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh
deleted file mode 100644
index a010a462de3..00000000000
--- a/cpp/src/hash/concurrent_unordered_map.cuh
+++ /dev/null
@@ -1,557 +0,0 @@
-/*
- * Copyright (c) 2017-2024, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "hash/managed.cuh"
-
-#include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/hashing/detail/default_hash.cuh>
-#include <cudf/hashing/detail/helper_functions.cuh>
-#include <cudf/utilities/default_stream.hpp>
-#include <cudf/utilities/error.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/polymorphic_allocator.hpp>
-
-#include <cuda/atomic>
-#include <thrust/pair.h>
-
-#include <iostream>
-#include <iterator>
-#include <limits>
-#include <type_traits>
-
-namespace {
-template <std::size_t N>
-struct packed {
-  using type = void;
-};
-template <>
-struct packed<sizeof(uint64_t)> {
-  using type = uint64_t;
-};
-template <>
-struct packed<sizeof(uint32_t)> {
-  using type = uint32_t;
-};
-template <typename pair_type>
-using packed_t = typename packed<sizeof(pair_type)>::type;
-
-/**
- * @brief Indicates if a pair type can be packed.
- *
- * When the size of the key,value pair being inserted into the hash table is
- * equal in size to a type where atomicCAS is natively supported, it is more
- * efficient to "pack" the pair and insert it with a single atomicCAS.
- *
- * Only integral key and value types may be packed because we use
- * bitwise equality comparison, which may not be valid for non-integral
- * types.
- *
- * Also, the `pair_type` must not contain any padding bits otherwise
- * accessing the packed value would be undefined.
- *
- * @tparam pair_type The pair type that will be packed
- * @return true If the pair type can be packed
- * @return false  If the pair type cannot be packed
- */
-template <typename pair_type,
-          typename key_type   = typename pair_type::first_type,
-          typename value_type = typename pair_type::second_type>
-constexpr bool is_packable()
-{
-  return std::is_integral_v<key_type> and std::is_integral_v<value_type> and
-         not std::is_void_v<packed_t<pair_type>> and
-         std::has_unique_object_representations_v<pair_type>;
-}
-
-/**
- * @brief Allows viewing a pair in a packed representation
- *
- * Used as an optimization for inserting when a pair can be inserted with a
- * single atomicCAS
- */
-template <typename pair_type, typename Enable = void>
-union pair_packer;
-
-template <typename pair_type>
-union pair_packer<pair_type, std::enable_if_t<is_packable<pair_type>()>> {
-  using packed_type = packed_t<pair_type>;
-  packed_type packed;
-  pair_type pair;
-
-  __device__ pair_packer(pair_type _pair) : pair{_pair} {}
-
-  __device__ pair_packer(packed_type _packed) : packed{_packed} {}
-};
-}  // namespace
-
-/**
- * Supports concurrent insert, but not concurrent insert and find.
- *
- * @note The user is responsible for the following stream semantics:
- * - Either the same stream should be used to create the map as is used by the kernels that access
- * it, or
- * - the stream used to create the map should be synchronized before it is accessed from a different
- * stream or from host code.
- *
- * TODO:
- *  - add constructor that takes pointer to hash_table to avoid allocations
- */
-template <typename Key,
-          typename Element,
-          typename Hasher    = cudf::hashing::detail::default_hash<Key>,
-          typename Equality  = equal_to<Key>,
-          typename Allocator = rmm::mr::polymorphic_allocator<thrust::pair<Key, Element>>>
-class concurrent_unordered_map {
- public:
-  using size_type      = size_t;
-  using hasher         = Hasher;
-  using key_equal      = Equality;
-  using allocator_type = Allocator;
-  using key_type       = Key;
-  using mapped_type    = Element;
-  using value_type     = thrust::pair<Key, Element>;
-  using iterator       = cycle_iterator_adapter<value_type*>;
-  using const_iterator = cycle_iterator_adapter<value_type*> const;
-
- public:
-  /**
-   * @brief Factory to construct a new concurrent unordered map.
-   *
-   * Returns a `std::unique_ptr` to a new concurrent unordered map object. The
-   * map is non-owning and trivially copyable and should be passed by value into
-   * kernels. The `unique_ptr` contains a custom deleter that will free the
-   * map's contents.
-   *
-   * @note The implementation of this unordered_map uses sentinel values to
-   * indicate an entry in the hash table that is empty, i.e., if a hash bucket
-   * is empty, the pair residing there will be equal to (unused_key,
-   * unused_element). As a result, attempting to insert a key equal to
-   *`unused_key` results in undefined behavior.
-   *
-   * @note All allocations, kernels and copies in the constructor take place
-   * on stream but the constructor does not synchronize the stream. It is the user's
-   * responsibility to synchronize or use the same stream to access the map.
-   *
-   * @param capacity The maximum number of pairs the map may hold
-   * @param stream CUDA stream used for device memory operations and kernel launches.
-   * @param unused_element The sentinel value to use for an empty value
-   * @param unused_key The sentinel value to use for an empty key
-   * @param hash_function The hash function to use for hashing keys
-   * @param equal The equality comparison function for comparing if two keys are
-   * equal
-   * @param allocator The allocator to use for allocation the hash table's
-   * storage
-   */
-  static auto create(size_type capacity,
-                     rmm::cuda_stream_view stream,
-                     mapped_type const unused_element = std::numeric_limits<mapped_type>::max(),
-                     key_type const unused_key        = std::numeric_limits<key_type>::max(),
-                     Hasher const& hash_function      = hasher(),
-                     Equality const& equal            = key_equal(),
-                     allocator_type const& allocator  = allocator_type())
-  {
-    CUDF_FUNC_RANGE();
-    using Self = concurrent_unordered_map<Key, Element, Hasher, Equality, Allocator>;
-
-    // Note: need `(*p).destroy` instead of `p->destroy` here
-    // due to compiler bug: https://github.com/rapidsai/cudf/pull/5692
-    auto deleter = [stream](Self* p) { (*p).destroy(stream); };
-
-    return std::unique_ptr<Self, std::function<void(Self*)>>{
-      new Self(capacity, unused_element, unused_key, hash_function, equal, allocator, stream),
-      deleter};
-  }
-
-  /**
-   * @brief Returns an iterator to the first element in the map
-   *
-   * @note `__device__` code that calls this function should either run in the
-   * same stream as `create()`, or the accessing stream either be running on the
-   * same stream as create(), or the accessing stream should be appropriately
-   * synchronized with the creating stream.
-   *
-   * @returns iterator to the first element in the map.
-   */
-  __device__ iterator begin()
-  {
-    return iterator(m_hashtbl_values, m_hashtbl_values + m_capacity, m_hashtbl_values);
-  }
-
-  /**
-   * @brief Returns a constant iterator to the first element in the map
-   *
-   * @note `__device__` code that calls this function should either run in the
-   * same stream as `create()`, or the accessing stream either be running on the
-   * same stream as create(), or the accessing stream should be appropriately
-   * synchronized with the creating stream.
-   *
-   * @returns constant iterator to the first element in the map.
-   */
-  __device__ const_iterator begin() const
-  {
-    return const_iterator(m_hashtbl_values, m_hashtbl_values + m_capacity, m_hashtbl_values);
-  }
-
-  /**
-   * @brief Returns an iterator to the one past the last element in the map
-   *
-   * @note `__device__` code that calls this function should either run in the
-   * same stream as `create()`, or the accessing stream either be running on the
-   * same stream as create(), or the accessing stream should be appropriately
-   * synchronized with the creating stream.
-   *
-   * @returns iterator to the one past the last element in the map.
-   */
-  __device__ iterator end()
-  {
-    return iterator(m_hashtbl_values, m_hashtbl_values + m_capacity, m_hashtbl_values + m_capacity);
-  }
-
-  /**
-   * @brief Returns a constant iterator to the one past the last element in the map
-   *
-   * @note When called in a device code, user should make sure that it should
-   * either be running on the same stream as create(), or the accessing stream
-   * should be appropriately synchronized with the creating stream.
-   *
-   * @returns constant iterator to the one past the last element in the map.
-   */
-  __device__ const_iterator end() const
-  {
-    return const_iterator(
-      m_hashtbl_values, m_hashtbl_values + m_capacity, m_hashtbl_values + m_capacity);
-  }
-  __host__ __device__ value_type* data() const { return m_hashtbl_values; }
-
-  __host__ __device__ key_type get_unused_key() const { return m_unused_key; }
-
-  __host__ __device__ mapped_type get_unused_element() const { return m_unused_element; }
-
-  [[nodiscard]] __host__ __device__ size_type capacity() const { return m_capacity; }
-
- private:
-  /**
-   * @brief Enumeration of the possible results of attempting to insert into
-   *a hash bucket
-   */
-  enum class insert_result {
-    CONTINUE,  ///< Insert did not succeed, continue trying to insert
-               ///< (collision)
-    SUCCESS,   ///< New pair inserted successfully
-    DUPLICATE  ///< Insert did not succeed, key is already present
-  };
-
-  /**
-   * @brief Specialization for value types that can be packed.
-   *
-   * When the size of the key,value pair being inserted is equal in size to
-   *a type where atomicCAS is natively supported, this optimization path
-   *will insert the pair in a single atomicCAS operation.
-   */
-  template <typename pair_type = value_type>
-  __device__ std::enable_if_t<is_packable<pair_type>(), insert_result> attempt_insert(
-    value_type* const __restrict__ insert_location, value_type const& insert_pair)
-  {
-    pair_packer<pair_type> expected{thrust::make_pair(m_unused_key, m_unused_element)};
-    pair_packer<pair_type> desired{insert_pair};
-
-    using packed_type = typename pair_packer<pair_type>::packed_type;
-
-    auto* insert_ptr = reinterpret_cast<packed_type*>(insert_location);
-    cuda::atomic_ref<packed_type, cuda::thread_scope_device> ref{*insert_ptr};
-    auto const success =
-      ref.compare_exchange_strong(expected.packed, desired.packed, cuda::std::memory_order_relaxed);
-
-    if (success) {
-      return insert_result::SUCCESS;
-    } else if (m_equal(expected.pair.first, insert_pair.first)) {
-      return insert_result::DUPLICATE;
-    }
-    return insert_result::CONTINUE;
-  }
-
-  /**
-   * @brief Attempts to insert a key,value pair at the specified hash bucket.
-   *
-   * @param[in] insert_location Pointer to hash bucket to attempt insert
-   * @param[in] insert_pair The pair to insert
-   * @return Enum indicating result of insert attempt.
-   */
-  template <typename pair_type = value_type>
-  __device__ std::enable_if_t<not is_packable<pair_type>(), insert_result> attempt_insert(
-    value_type* const __restrict__ insert_location, value_type const& insert_pair)
-  {
-    auto expected = m_unused_key;
-    cuda::atomic_ref<key_type, cuda::thread_scope_device> ref{insert_location->first};
-    auto const key_success =
-      ref.compare_exchange_strong(expected, insert_pair.first, cuda::std::memory_order_relaxed);
-
-    // Hash bucket empty
-    if (key_success) {
-      insert_location->second = insert_pair.second;
-      return insert_result::SUCCESS;
-    }
-    // Key already exists
-    else if (m_equal(expected, insert_pair.first)) {
-      return insert_result::DUPLICATE;
-    }
-
-    return insert_result::CONTINUE;
-  }
-
- public:
-  /**
-   * @brief Attempts to insert a key, value pair into the map.
-   *
-   * Returns an iterator, boolean pair.
-   *
-   * If the new key already present in the map, the iterator points to
-   * the location of the existing key and the boolean is `false` indicating
-   * that the insert did not succeed.
-   *
-   * If the new key was not present, the iterator points to the location
-   * where the insert occurred and the boolean is `true` indicating that the
-   *insert succeeded.
-   *
-   * @param insert_pair The key and value pair to insert
-   * @return Iterator, Boolean pair. Iterator is to the location of the
-   *newly inserted pair, or the existing pair that prevented the insert.
-   *Boolean indicates insert success.
-   */
-  __device__ thrust::pair<iterator, bool> insert(value_type const& insert_pair)
-  {
-    size_type const key_hash{m_hf(insert_pair.first)};
-    size_type index{key_hash % m_capacity};
-
-    insert_result status{insert_result::CONTINUE};
-
-    value_type* current_bucket{nullptr};
-
-    while (status == insert_result::CONTINUE) {
-      current_bucket = &m_hashtbl_values[index];
-      status         = attempt_insert(current_bucket, insert_pair);
-      index          = (index + 1) % m_capacity;
-    }
-
-    bool const insert_success = status == insert_result::SUCCESS;
-
-    return thrust::make_pair(
-      iterator(m_hashtbl_values, m_hashtbl_values + m_capacity, current_bucket), insert_success);
-  }
-
-  /**
-   * @brief Searches the map for the specified key.
-   *
-   * @note `find` is not threadsafe with `insert`. I.e., it is not safe to
-   *do concurrent `insert` and `find` operations.
-   *
-   * @param k The key to search for
-   * @return An iterator to the key if it exists, else map.end()
-   */
-  __device__ const_iterator find(key_type const& k) const
-  {
-    size_type const key_hash = m_hf(k);
-    size_type index          = key_hash % m_capacity;
-
-    value_type* current_bucket = &m_hashtbl_values[index];
-
-    while (true) {
-      key_type const existing_key = current_bucket->first;
-
-      if (m_unused_key == existing_key) { return this->end(); }
-
-      if (m_equal(k, existing_key)) {
-        return const_iterator(m_hashtbl_values, m_hashtbl_values + m_capacity, current_bucket);
-      }
-
-      index          = (index + 1) % m_capacity;
-      current_bucket = &m_hashtbl_values[index];
-    }
-  }
-
-  /**
-   * @brief Searches the map for the specified key.
-   *
-   * This version of the find function specifies a hashing function and an
-   * equality comparison.  This allows the caller to use different functions
-   * for insert and find (for example, when you want to insert keys from
-   * one table and use find to match keys from a different table with the
-   * keys from the first table).
-   *
-   * @note `find` is not threadsafe with `insert`. I.e., it is not safe to
-   * do concurrent `insert` and `find` operations.
-   *
-   * @tparam find_hasher     Type of hashing function
-   * @tparam find_key_equal  Type of equality comparison
-   *
-   * @param k         The key to search for
-   * @param f_hash    The hashing function to use to hash this key
-   * @param f_equal   The equality function to use to compare this key with the
-   *                  contents of the hash table
-   * @return An iterator to the key if it exists, else map.end()
-   */
-  template <typename find_hasher, typename find_key_equal>
-  __device__ const_iterator find(key_type const& k,
-                                 find_hasher f_hash,
-                                 find_key_equal f_equal) const
-  {
-    size_type const key_hash = f_hash(k);
-    size_type index          = key_hash % m_capacity;
-
-    value_type* current_bucket = &m_hashtbl_values[index];
-
-    while (true) {
-      key_type const existing_key = current_bucket->first;
-
-      if (m_unused_key == existing_key) { return this->end(); }
-
-      if (f_equal(k, existing_key)) {
-        return const_iterator(m_hashtbl_values, m_hashtbl_values + m_capacity, current_bucket);
-      }
-
-      index          = (index + 1) % m_capacity;
-      current_bucket = &m_hashtbl_values[index];
-    }
-  }
-
-  void assign_async(concurrent_unordered_map const& other, rmm::cuda_stream_view stream)
-  {
-    if (other.m_capacity <= m_capacity) {
-      m_capacity = other.m_capacity;
-    } else {
-      m_allocator.deallocate(m_hashtbl_values, m_capacity, stream);
-      m_capacity = other.m_capacity;
-      m_capacity = other.m_capacity;
-
-      m_hashtbl_values = m_allocator.allocate(m_capacity, stream);
-    }
-    CUDF_CUDA_TRY(cudaMemcpyAsync(m_hashtbl_values,
-                                  other.m_hashtbl_values,
-                                  m_capacity * sizeof(value_type),
-                                  cudaMemcpyDefault,
-                                  stream.value()));
-  }
-
-  void clear_async(rmm::cuda_stream_view stream)
-  {
-    constexpr int block_size = 128;
-    init_hashtbl<<<((m_capacity - 1) / block_size) + 1, block_size, 0, stream.value()>>>(
-      m_hashtbl_values, m_capacity, m_unused_key, m_unused_element);
-  }
-
-  void print()
-  {
-    for (size_type i = 0; i < m_capacity; ++i) {
-      std::cout << i << ": " << m_hashtbl_values[i].first << "," << m_hashtbl_values[i].second
-                << std::endl;
-    }
-  }
-
-  void prefetch(int const dev_id, rmm::cuda_stream_view stream)
-  {
-    cudaPointerAttributes hashtbl_values_ptr_attributes;
-    cudaError_t status = cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values);
-
-    if (cudaSuccess == status && isPtrManaged(hashtbl_values_ptr_attributes)) {
-      CUDF_CUDA_TRY(cudaMemPrefetchAsync(
-        m_hashtbl_values, m_capacity * sizeof(value_type), dev_id, stream.value()));
-    }
-    CUDF_CUDA_TRY(cudaMemPrefetchAsync(this, sizeof(*this), dev_id, stream.value()));
-  }
-
-  /**
-   * @brief Frees the contents of the map and destroys the map object.
-   *
-   * This function is invoked as the deleter of the `std::unique_ptr` returned
-   * from the `create()` factory function.
-   *
-   * @param stream CUDA stream used for device memory operations and kernel launches.
-   */
-  void destroy(rmm::cuda_stream_view stream)
-  {
-    m_allocator.deallocate(m_hashtbl_values, m_capacity, stream);
-    delete this;
-  }
-
-  concurrent_unordered_map()                                           = delete;
-  concurrent_unordered_map(concurrent_unordered_map const&)            = default;
-  concurrent_unordered_map(concurrent_unordered_map&&)                 = default;
-  concurrent_unordered_map& operator=(concurrent_unordered_map const&) = default;
-  concurrent_unordered_map& operator=(concurrent_unordered_map&&)      = default;
-  ~concurrent_unordered_map()                                          = default;
-
- private:
-  hasher m_hf;
-  key_equal m_equal;
-  mapped_type m_unused_element;
-  key_type m_unused_key;
-  allocator_type m_allocator;
-  size_type m_capacity;
-  value_type* m_hashtbl_values;
-
-  /**
-   * @brief Private constructor used by `create` factory function.
-   *
-   * @param capacity The desired m_capacity of the hash table
-   * @param unused_element The sentinel value to use for an empty value
-   * @param unused_key The sentinel value to use for an empty key
-   * @param hash_function The hash function to use for hashing keys
-   * @param equal The equality comparison function for comparing if two keys
-   *are equal
-   * @param allocator The allocator to use for allocation the hash table's
-   * storage
-   * @param stream CUDA stream used for device memory operations and kernel launches.
-   */
-  concurrent_unordered_map(size_type capacity,
-                           mapped_type const unused_element,
-                           key_type const unused_key,
-                           Hasher const& hash_function,
-                           Equality const& equal,
-                           allocator_type const& allocator,
-                           rmm::cuda_stream_view stream)
-    : m_hf(hash_function),
-      m_equal(equal),
-      m_allocator(allocator),
-      m_capacity(capacity),
-      m_unused_element(unused_element),
-      m_unused_key(unused_key)
-  {
-    m_hashtbl_values         = m_allocator.allocate(m_capacity, stream);
-    constexpr int block_size = 128;
-    {
-      cudaPointerAttributes hashtbl_values_ptr_attributes;
-      cudaError_t status =
-        cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values);
-
-      if (cudaSuccess == status && isPtrManaged(hashtbl_values_ptr_attributes)) {
-        int dev_id = 0;
-        CUDF_CUDA_TRY(cudaGetDevice(&dev_id));
-        CUDF_CUDA_TRY(cudaMemPrefetchAsync(
-          m_hashtbl_values, m_capacity * sizeof(value_type), dev_id, stream.value()));
-      }
-    }
-
-    if (m_capacity > 0) {
-      init_hashtbl<<<((m_capacity - 1) / block_size) + 1, block_size, 0, stream.value()>>>(
-        m_hashtbl_values, m_capacity, m_unused_key, m_unused_element);
-    }
-
-    CUDF_CHECK_CUDA(stream.value());
-  }
-};
diff --git a/cpp/src/hash/managed.cuh b/cpp/src/hash/managed.cuh
deleted file mode 100644
index 9797c83c47c..00000000000
--- a/cpp/src/hash/managed.cuh
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2017-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cassert>
-#include <new>
-
-struct managed {
-  static void* operator new(size_t n)
-  {
-    void* ptr          = nullptr;
-    cudaError_t result = cudaMallocManaged(&ptr, n);
-    if (cudaSuccess != result || 0 == ptr) throw std::bad_alloc();
-    return ptr;
-  }
-
-  static void operator delete(void* ptr) noexcept
-  {
-    auto const free_result = cudaFree(ptr);
-    assert(free_result == cudaSuccess);
-  }
-};
-
-inline bool isPtrManaged(cudaPointerAttributes attr)
-{
-  return (attr.type == cudaMemoryTypeManaged);
-}
diff --git a/cpp/src/io/json/legacy/json_gpu.cu b/cpp/src/io/json/legacy/json_gpu.cu
deleted file mode 100644
index ff4845fcecb..00000000000
--- a/cpp/src/io/json/legacy/json_gpu.cu
+++ /dev/null
@@ -1,615 +0,0 @@
-/*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "io/utilities/column_type_histogram.hpp"
-#include "io/utilities/parsing_utils.cuh"
-#include "io/utilities/trie.cuh"
-#include "json_gpu.hpp"
-
-#include <cudf/detail/utilities/cuda.cuh>
-#include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
-#include <cudf/types.hpp>
-#include <cudf/utilities/bit.hpp>
-#include <cudf/utilities/span.hpp>
-#include <cudf/utilities/traits.hpp>
-#include <cudf/utilities/type_dispatcher.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_buffer.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/advance.h>
-#include <thrust/detail/copy.h>
-#include <thrust/execution_policy.h>
-#include <thrust/find.h>
-#include <thrust/generate.h>
-#include <thrust/iterator/reverse_iterator.h>
-#include <thrust/mismatch.h>
-#include <thrust/optional.h>
-#include <thrust/pair.h>
-
-using cudf::device_span;
-using cudf::detail::grid_1d;
-
-namespace cudf::io::json::detail::legacy {
-
-namespace {
-/**
- * @brief CUDA Kernel that adjusts the row range to exclude the character outside of the top level
- * brackets.
- *
- * The top level brackets characters are excluded from the resulting range.
- *
- * @param[in] begin Pointer to the first character in the row
- * @param[in] end pointer to the first character after the row
- */
-__device__ std::pair<char const*, char const*> limit_range_to_brackets(char const* begin,
-                                                                       char const* end)
-{
-  auto const data_begin = thrust::next(thrust::find_if(
-    thrust::seq, begin, end, [] __device__(auto c) { return c == '[' || c == '{'; }));
-  auto const data_end   = thrust::next(thrust::find_if(thrust::seq,
-                                                     thrust::make_reverse_iterator(end),
-                                                     thrust::make_reverse_iterator(data_begin),
-                                                     [](auto c) { return c == ']' || c == '}'; }))
-                          .base();
-  return {data_begin, data_end};
-}
-
-/**
- * @brief Find the first JSON object key in the range.
- *
- * Assumes that begin is not in the middle of a field.
- *
- * @param[in] begin Pointer to the first character in the parsing range
- * @param[in] end pointer to the first character after the parsing range
- * @param[in] quotechar The character used to denote quotes
- *
- * @return Begin and end iterators of the key name; (`end`, `end`) if a key is not found
- */
-__device__ std::pair<char const*, char const*> get_next_key(char const* begin,
-                                                            char const* end,
-                                                            char quotechar)
-{
-  // Key starts after the first quote
-  auto const key_begin = thrust::find(thrust::seq, begin, end, quotechar) + 1;
-  if (key_begin > end) return {end, end};
-
-  // Key ends after the next unescaped quote
-  auto const key_end_pair = thrust::mismatch(
-    thrust::seq, key_begin, end - 1, key_begin + 1, [quotechar] __device__(auto prev_ch, auto ch) {
-      return !(ch == quotechar && prev_ch != '\\');
-    });
-
-  return {key_begin, key_end_pair.second};
-}
-
-/**
- * @brief Returns true is the input character is a valid digit.
- * Supports both decimal and hexadecimal digits (uppercase and lowercase).
- *
- * @param c Character to check
- * @param is_hex Whether to check as a hexadecimal
- *
- * @return `true` if it is digit-like, `false` otherwise
- */
-__device__ __inline__ bool is_digit(char c, bool is_hex = false)
-{
-  if (c >= '0' && c <= '9') return true;
-
-  if (is_hex) {
-    if (c >= 'A' && c <= 'F') return true;
-    if (c >= 'a' && c <= 'f') return true;
-  }
-
-  return false;
-}
-
-/**
- * @brief Returns true if the counters indicate a potentially valid float.
- * False positives are possible because positions are not taken into account.
- * For example, field "e.123-" would match the pattern.
- */
-__device__ __inline__ bool is_like_float(
-  long len, long digit_cnt, long decimal_cnt, long dash_cnt, long exponent_cnt)
-{
-  // Can't have more than one exponent and one decimal point
-  if (decimal_cnt > 1) return false;
-  if (exponent_cnt > 1) return false;
-  // Without the exponent or a decimal point, this is an integer, not a float
-  if (decimal_cnt == 0 && exponent_cnt == 0) return false;
-
-  // Can only have one '-' per component
-  if (dash_cnt > 1 + exponent_cnt) return false;
-
-  // If anything other than these characters is present, it's not a float
-  if (digit_cnt + decimal_cnt + dash_cnt + exponent_cnt != len) return false;
-
-  // Needs at least 1 digit, 2 if exponent is present
-  if (digit_cnt < 1 + exponent_cnt) return false;
-
-  return true;
-}
-
-/**
- * @brief Contains information on a JSON file field.
- */
-struct field_descriptor {
-  cudf::size_type column;
-  char const* value_begin;
-  char const* value_end;
-  bool is_quoted;
-};
-
-/**
- * @brief Parse the first field in the given range and return its descriptor.
- *
- * @param[in] begin Pointer to the first character in the parsing range
- * @param[in] end pointer to the first character after the parsing range
- * @param[in] opts The global parsing behavior options
- * @param[in] field_idx Index of the current field in the input row
- * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory.
- * nullptr is passed when the input file does not consist of objects.
- * @return Descriptor of the parsed field
- */
-__device__ field_descriptor next_field_descriptor(char const* begin,
-                                                  char const* end,
-                                                  parse_options_view const& opts,
-                                                  cudf::size_type field_idx,
-                                                  col_map_type col_map)
-{
-  auto const desc_pre_trim =
-    col_map.capacity() == 0
-      // No key - column and begin are trivial
-      ? field_descriptor{field_idx,
-                         begin,
-                         cudf::io::gpu::seek_field_end(begin, end, opts, true),
-                         false}
-      : [&]() {
-          auto const key_range = get_next_key(begin, end, opts.quotechar);
-          auto const key_hash  = cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>{}(
-            cudf::string_view(key_range.first, key_range.second - key_range.first));
-          auto const hash_col = col_map.find(key_hash);
-          // Fall back to field index if not found (parsing error)
-          auto const column = (hash_col != col_map.end()) ? (*hash_col).second : field_idx;
-
-          // Skip the colon between the key and the value
-          auto const value_begin = thrust::find(thrust::seq, key_range.second, end, ':') + 1;
-          return field_descriptor{column,
-                                  value_begin,
-                                  cudf::io::gpu::seek_field_end(value_begin, end, opts, true),
-                                  false};
-        }();
-
-  // Modify start & end to ignore whitespace and quotechars
-  auto const trimmed_value_range =
-    trim_whitespaces(desc_pre_trim.value_begin, desc_pre_trim.value_end);
-  bool const is_quoted =
-    thrust::distance(trimmed_value_range.first, trimmed_value_range.second) >= 2 and
-    *trimmed_value_range.first == opts.quotechar and
-    *thrust::prev(trimmed_value_range.second) == opts.quotechar;
-  return {desc_pre_trim.column,
-          trimmed_value_range.first + static_cast<std::ptrdiff_t>(is_quoted),
-          trimmed_value_range.second - static_cast<std::ptrdiff_t>(is_quoted),
-          is_quoted};
-}
-
-/**
- * @brief Returns the range that contains the data in a given row.
- *
- * Excludes the top-level brackets.
- *
- * @param[in] data Device span pointing to the JSON data in device memory
- * @param[in] row_offsets The offset of each row in the input
- * @param[in] row Index of the row for which the range is returned
- *
- * @return The begin and end iterators of the row data.
- */
-__device__ std::pair<char const*, char const*> get_row_data_range(
-  device_span<char const> const data, device_span<uint64_t const> const row_offsets, size_type row)
-{
-  auto const row_begin = data.begin() + row_offsets[row];
-  auto const row_end =
-    data.begin() + ((row < row_offsets.size() - 1) ? row_offsets[row + 1] : data.size());
-  return limit_range_to_brackets(row_begin, row_end);
-}
-
-/**
- * @brief CUDA kernel that parses and converts plain text data into cuDF column data.
- *
- * Data is processed one record at a time
- *
- * @param[in] opts A set of parsing options
- * @param[in] data The entire data to read
- * @param[in] row_offsets The offset of each row in the input
- * @param[in] column_types The data type of each column
- * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory.
- * nullptr is passed when the input file does not consist of objects.
- * @param[out] output_columns The output column data
- * @param[out] valid_fields The bitmaps indicating whether column fields are valid
- * @param[out] num_valid_fields The numbers of valid fields in columns
- */
-CUDF_KERNEL void convert_data_to_columns_kernel(parse_options_view opts,
-                                                device_span<char const> const data,
-                                                device_span<uint64_t const> const row_offsets,
-                                                device_span<data_type const> const column_types,
-                                                col_map_type col_map,
-                                                device_span<void* const> const output_columns,
-                                                device_span<bitmask_type* const> const valid_fields,
-                                                device_span<cudf::size_type> const num_valid_fields)
-{
-  auto const rec_id = grid_1d::global_thread_id();
-  if (rec_id >= row_offsets.size()) return;
-
-  auto const row_data_range = get_row_data_range(data, row_offsets, rec_id);
-
-  auto current = row_data_range.first;
-  for (size_type input_field_index = 0;
-       input_field_index < column_types.size() && current < row_data_range.second;
-       input_field_index++) {
-    auto const desc =
-      next_field_descriptor(current, row_data_range.second, opts, input_field_index, col_map);
-    auto const value_len = static_cast<size_t>(std::max(desc.value_end - desc.value_begin, 0L));
-    auto const is_quoted = static_cast<std::ptrdiff_t>(desc.is_quoted);
-
-    current = desc.value_end + 1;
-
-    using string_index_pair = thrust::pair<char const*, size_type>;
-
-    if (!serialized_trie_contains(opts.trie_na,
-                                  {desc.value_begin - is_quoted, value_len + is_quoted * 2})) {
-      // Type dispatcher does not handle strings
-      if (column_types[desc.column].id() == type_id::STRING) {
-        auto str_list           = static_cast<string_index_pair*>(output_columns[desc.column]);
-        str_list[rec_id].first  = desc.value_begin;
-        str_list[rec_id].second = value_len;
-
-        // set the valid bitmap - all bits were set to 0 to start
-        set_bit(valid_fields[desc.column], rec_id);
-        atomicAdd(&num_valid_fields[desc.column], 1);
-      } else {
-        if (cudf::type_dispatcher(column_types[desc.column],
-                                  ConvertFunctor{},
-                                  desc.value_begin,
-                                  desc.value_end,
-                                  output_columns[desc.column],
-                                  rec_id,
-                                  column_types[desc.column],
-                                  opts,
-                                  false)) {
-          // set the valid bitmap - all bits were set to 0 to start
-          set_bit(valid_fields[desc.column], rec_id);
-          atomicAdd(&num_valid_fields[desc.column], 1);
-        }
-      }
-    } else if (column_types[desc.column].id() == type_id::STRING) {
-      auto str_list           = static_cast<string_index_pair*>(output_columns[desc.column]);
-      str_list[rec_id].first  = nullptr;
-      str_list[rec_id].second = 0;
-    }
-  }
-}
-
-/**
- * @brief CUDA kernel that processes a buffer of data and determines information about the
- * column types within.
- *
- * Data is processed in one row/record at a time, so the number of total
- * threads (tid) is equal to the number of rows.
- *
- * @param[in] opts A set of parsing options
- * @param[in] data Input data buffer
- * @param[in] rec_starts The offset of each row in the input
- * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory.
- * nullptr is passed when the input file does not consist of objects.
- * @param[in] num_columns The number of columns of input data
- * @param[out] column_infos The count for each column data type
- */
-CUDF_KERNEL void detect_data_types_kernel(
-  parse_options_view const opts,
-  device_span<char const> const data,
-  device_span<uint64_t const> const row_offsets,
-  col_map_type col_map,
-  int num_columns,
-  device_span<cudf::io::column_type_histogram> const column_infos)
-{
-  auto const rec_id = grid_1d::global_thread_id();
-  if (rec_id >= row_offsets.size()) return;
-
-  auto const are_rows_objects = col_map.capacity() != 0;
-  auto const row_data_range   = get_row_data_range(data, row_offsets, rec_id);
-
-  size_type input_field_index = 0;
-  for (auto current = row_data_range.first;
-       input_field_index < num_columns && current < row_data_range.second;
-       input_field_index++) {
-    auto const desc =
-      next_field_descriptor(current, row_data_range.second, opts, input_field_index, col_map);
-    auto const value_len = static_cast<size_t>(std::max(desc.value_end - desc.value_begin, 0L));
-
-    // Advance to the next field; +1 to skip the delimiter
-    current = desc.value_end + 1;
-
-    // Checking if the field is empty/valid
-    if (serialized_trie_contains(opts.trie_na, {desc.value_begin, value_len})) {
-      // Increase the null count for array rows, where the null count is initialized to zero.
-      if (!are_rows_objects) { atomicAdd(&column_infos[desc.column].null_count, 1); }
-      continue;
-    } else if (are_rows_objects) {
-      // For files with object rows, null count is initialized to row count. The value is decreased
-      // here for every valid field.
-      atomicAdd(&column_infos[desc.column].null_count, -1);
-    }
-    // Don't need counts to detect strings, any field in quotes is deduced to be a string
-    if (desc.is_quoted) {
-      atomicAdd(&column_infos[desc.column].string_count, 1);
-      continue;
-    }
-
-    int digit_count    = 0;
-    int decimal_count  = 0;
-    int slash_count    = 0;
-    int dash_count     = 0;
-    int plus_count     = 0;
-    int colon_count    = 0;
-    int exponent_count = 0;
-    int other_count    = 0;
-
-    bool const maybe_hex =
-      ((value_len > 2 && *desc.value_begin == '0' && *(desc.value_begin + 1) == 'x') ||
-       (value_len > 3 && *desc.value_begin == '-' && *(desc.value_begin + 1) == '0' &&
-        *(desc.value_begin + 2) == 'x'));
-    for (auto pos = desc.value_begin; pos < desc.value_end; ++pos) {
-      if (is_digit(*pos, maybe_hex)) {
-        digit_count++;
-        continue;
-      }
-      // Looking for unique characters that will help identify column types
-      switch (*pos) {
-        case '.': decimal_count++; break;
-        case '-': dash_count++; break;
-        case '+': plus_count++; break;
-        case '/': slash_count++; break;
-        case ':': colon_count++; break;
-        case 'e':
-        case 'E':
-          if (!maybe_hex && pos > desc.value_begin && pos < desc.value_end - 1) exponent_count++;
-          break;
-        default: other_count++; break;
-      }
-    }
-
-    // Integers have to have the length of the string
-    int int_req_number_cnt = value_len;
-    // Off by one if they start with a minus sign
-    if ((*desc.value_begin == '-' || *desc.value_begin == '+') && value_len > 1) {
-      --int_req_number_cnt;
-    }
-    // Off by one if they are a hexadecimal number
-    if (maybe_hex) { --int_req_number_cnt; }
-    if (serialized_trie_contains(opts.trie_true, {desc.value_begin, value_len}) ||
-        serialized_trie_contains(opts.trie_false, {desc.value_begin, value_len})) {
-      atomicAdd(&column_infos[desc.column].bool_count, 1);
-    } else if (digit_count == int_req_number_cnt) {
-      bool is_negative       = (*desc.value_begin == '-');
-      char const* data_begin = desc.value_begin + (is_negative || (*desc.value_begin == '+'));
-      cudf::size_type* ptr   = cudf::io::gpu::infer_integral_field_counter(
-        data_begin, data_begin + digit_count, is_negative, column_infos[desc.column]);
-      atomicAdd(ptr, 1);
-    } else if (is_like_float(
-                 value_len, digit_count, decimal_count, dash_count + plus_count, exponent_count)) {
-      atomicAdd(&column_infos[desc.column].float_count, 1);
-    }
-    // A date-time field cannot have more than 3 non-special characters
-    // A number field cannot have more than one decimal point
-    else if (other_count > 3 || decimal_count > 1) {
-      atomicAdd(&column_infos[desc.column].string_count, 1);
-    } else {
-      // A date field can have either one or two '-' or '\'; A legal combination will only have one
-      // of them To simplify the process of auto column detection, we are not covering all the
-      // date-time formation permutations
-      if ((dash_count > 0 && dash_count <= 2 && slash_count == 0) ||
-          (dash_count == 0 && slash_count > 0 && slash_count <= 2)) {
-        if (colon_count <= 2) {
-          atomicAdd(&column_infos[desc.column].datetime_count, 1);
-        } else {
-          atomicAdd(&column_infos[desc.column].string_count, 1);
-        }
-      } else {
-        // Default field type is string
-        atomicAdd(&column_infos[desc.column].string_count, 1);
-      }
-    }
-  }
-  if (!are_rows_objects) {
-    // For array rows, mark missing fields as null
-    for (; input_field_index < num_columns; ++input_field_index)
-      atomicAdd(&column_infos[input_field_index].null_count, 1);
-  }
-}
-
-/**
- * @brief Input data range that contains a field in key:value format.
- */
-struct key_value_range {
-  char const* key_begin;
-  char const* key_end;
-  char const* value_begin;
-  char const* value_end;
-};
-
-/**
- * @brief Parse the next field in key:value format and return ranges of its parts.
- */
-__device__ key_value_range get_next_key_value_range(char const* begin,
-                                                    char const* end,
-                                                    parse_options_view const& opts)
-{
-  auto const key_range = get_next_key(begin, end, opts.quotechar);
-
-  // Colon between the key and the value
-  auto const colon = thrust::find(thrust::seq, key_range.second, end, ':');
-  if (colon == end) return {end, end, end};
-
-  // Field value (including delimiters)
-  auto const value_end = cudf::io::gpu::seek_field_end(colon + 1, end, opts, true);
-  return {key_range.first, key_range.second, colon + 1, value_end};
-}
-
-/**
- * @brief Cuda kernel that collects information about JSON object keys in the file.
- *
- * @param[in] options A set of parsing options
- * @param[in] data Input data buffer
- * @param[in] row_offsets The offset of each row in the input
- * @param[out] keys_cnt Number of keys found in the file
- * @param[out] keys_info optional, information (offset, length, hash) for each found key
- */
-CUDF_KERNEL void collect_keys_info_kernel(parse_options_view const options,
-                                          device_span<char const> const data,
-                                          device_span<uint64_t const> const row_offsets,
-                                          unsigned long long int* keys_cnt,
-                                          thrust::optional<mutable_table_device_view> keys_info)
-{
-  auto const rec_id = grid_1d::global_thread_id();
-  if (rec_id >= row_offsets.size()) return;
-
-  auto const row_data_range = get_row_data_range(data, row_offsets, rec_id);
-
-  auto advance = [&](char const* begin) {
-    return get_next_key_value_range(begin, row_data_range.second, options);
-  };
-  for (auto field_range = advance(row_data_range.first);
-       field_range.key_begin < row_data_range.second;
-       field_range = advance(field_range.value_end)) {
-    auto const idx = atomicAdd(keys_cnt, 1ULL);
-    if (keys_info.has_value()) {
-      auto const len                              = field_range.key_end - field_range.key_begin;
-      keys_info->column(0).element<uint64_t>(idx) = field_range.key_begin - data.begin();
-      keys_info->column(1).element<uint16_t>(idx) = len;
-      keys_info->column(2).element<uint32_t>(idx) =
-        cudf::hashing::detail::MurmurHash3_x86_32<cudf::string_view>{}(
-          cudf::string_view(field_range.key_begin, len));
-    }
-  }
-}
-
-}  // namespace
-
-/**
- * @copydoc cudf::io::json::detail::legacy::convert_json_to_columns
- */
-void convert_json_to_columns(parse_options_view const& opts,
-                             device_span<char const> const data,
-                             device_span<uint64_t const> const row_offsets,
-                             device_span<data_type const> const column_types,
-                             col_map_type* col_map,
-                             device_span<void* const> const output_columns,
-                             device_span<bitmask_type* const> const valid_fields,
-                             device_span<cudf::size_type> num_valid_fields,
-                             rmm::cuda_stream_view stream)
-{
-  int block_size;
-  int min_grid_size;
-  CUDF_CUDA_TRY(cudaOccupancyMaxPotentialBlockSize(
-    &min_grid_size, &block_size, convert_data_to_columns_kernel));
-
-  int const grid_size = (row_offsets.size() + block_size - 1) / block_size;
-
-  convert_data_to_columns_kernel<<<grid_size, block_size, 0, stream.value()>>>(opts,
-                                                                               data,
-                                                                               row_offsets,
-                                                                               column_types,
-                                                                               *col_map,
-                                                                               output_columns,
-                                                                               valid_fields,
-                                                                               num_valid_fields);
-
-  CUDF_CHECK_CUDA(stream.value());
-}
-
-/**
- * @copydoc cudf::io::json::detail::legacy::detect_data_types
- */
-
-std::vector<cudf::io::column_type_histogram> detect_data_types(
-  parse_options_view const& options,
-  device_span<char const> const data,
-  device_span<uint64_t const> const row_offsets,
-  bool do_set_null_count,
-  int num_columns,
-  col_map_type* col_map,
-  rmm::cuda_stream_view stream)
-{
-  int block_size;
-  int min_grid_size;
-  CUDF_CUDA_TRY(
-    cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, detect_data_types_kernel));
-
-  auto d_column_infos = [&]() {
-    if (do_set_null_count) {
-      rmm::device_uvector<cudf::io::column_type_histogram> d_column_infos(num_columns, stream);
-      // Set the null count to the row count (all fields assumes to be null).
-      thrust::generate(
-        rmm::exec_policy(stream),
-        d_column_infos.begin(),
-        d_column_infos.end(),
-        [num_records = static_cast<cudf::size_type>(row_offsets.size())] __device__() {
-          return cudf::io::column_type_histogram{num_records};
-        });
-      return d_column_infos;
-    } else {
-      return cudf::detail::make_zeroed_device_uvector_async<cudf::io::column_type_histogram>(
-        num_columns, stream, rmm::mr::get_current_device_resource());
-    }
-  }();
-
-  // Calculate actual block count to use based on records count
-  int const grid_size = (row_offsets.size() + block_size - 1) / block_size;
-
-  detect_data_types_kernel<<<grid_size, block_size, 0, stream.value()>>>(
-    options, data, row_offsets, *col_map, num_columns, d_column_infos);
-
-  return cudf::detail::make_std_vector_sync(d_column_infos, stream);
-}
-
-/**
- * @copydoc cudf::io::json::detail::legacy::collect_keys_info
- */
-void collect_keys_info(parse_options_view const& options,
-                       device_span<char const> const data,
-                       device_span<uint64_t const> const row_offsets,
-                       unsigned long long int* keys_cnt,
-                       thrust::optional<mutable_table_device_view> keys_info,
-                       rmm::cuda_stream_view stream)
-{
-  int block_size;
-  int min_grid_size;
-  CUDF_CUDA_TRY(
-    cudaOccupancyMaxPotentialBlockSize(&min_grid_size, &block_size, collect_keys_info_kernel));
-
-  // Calculate actual block count to use based on records count
-  int const grid_size = (row_offsets.size() + block_size - 1) / block_size;
-
-  collect_keys_info_kernel<<<grid_size, block_size, 0, stream.value()>>>(
-    options, data, row_offsets, keys_cnt, keys_info);
-
-  CUDF_CHECK_CUDA(stream.value());
-}
-
-}  // namespace cudf::io::json::detail::legacy
diff --git a/cpp/src/io/json/legacy/json_gpu.hpp b/cpp/src/io/json/legacy/json_gpu.hpp
deleted file mode 100644
index 853e30c9427..00000000000
--- a/cpp/src/io/json/legacy/json_gpu.hpp
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "hash/concurrent_unordered_map.cuh"
-#include "io/utilities/column_type_histogram.hpp"
-#include "io/utilities/parsing_utils.cuh"
-
-#include <cudf/table/table_device_view.cuh>
-#include <cudf/types.hpp>
-#include <cudf/utilities/span.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-
-#include <thrust/optional.h>
-
-using cudf::device_span;
-
-namespace cudf::io::json::detail::legacy {
-
-using col_map_type = concurrent_unordered_map<uint32_t, cudf::size_type>;
-/**
- * @brief Convert a buffer of input data (text) into raw cuDF column data.
- *
- * @param[in] options A set of parsing options
- * @param[in] data The entire data to read
- * @param[in] row_offsets The start of each data record
- * @param[in] dtypes The data type of each column
- * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory.
- * nullptr is passed when the input file does not consist of objects.
- * @param[out] output_columns The output column data
- * @param[out] valid_fields The bitmaps indicating whether column fields are valid
- * @param[out] num_valid_fields The numbers of valid fields in columns
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
- */
-void convert_json_to_columns(parse_options_view const& options,
-                             device_span<char const> data,
-                             device_span<uint64_t const> row_offsets,
-                             device_span<data_type const> column_types,
-                             col_map_type* col_map,
-                             device_span<void* const> output_columns,
-                             device_span<bitmask_type* const> valid_fields,
-                             device_span<cudf::size_type> num_valid_fields,
-                             rmm::cuda_stream_view stream);
-
-/**
- * @brief Process a buffer of data and determine information about the column types within.
- *
- * @param[in] options A set of parsing options
- * @param[in] data Input data buffer
- * @param[in] row_offsets The offset of each row in the input
- * @param[in] num_columns The number of columns of input data
- * @param[in] col_map Pointer to the (column name hash -> column index) map in device memory.
- * nullptr is passed when the input file does not consist of objects.
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
- *
- * @returns The count for each column data type
- */
-std::vector<cudf::io::column_type_histogram> detect_data_types(
-  parse_options_view const& options,
-  device_span<char const> data,
-  device_span<uint64_t const> row_offsets,
-  bool do_set_null_count,
-  int num_columns,
-  col_map_type* col_map,
-  rmm::cuda_stream_view stream);
-
-/**
- * @brief Collects information about JSON object keys in the file.
- *
- * @param[in] options A set of parsing options
- * @param[in] data Input data buffer
- * @param[in] row_offsets The offset of each row in the input
- * @param[out] keys_cnt Number of keys found in the file
- * @param[out] keys_info optional, information (offset, length, hash) for each found key
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
- */
-void collect_keys_info(parse_options_view const& options,
-                       device_span<char const> data,
-                       device_span<uint64_t const> row_offsets,
-                       unsigned long long int* keys_cnt,
-                       thrust::optional<mutable_table_device_view> keys_info,
-                       rmm::cuda_stream_view stream);
-
-}  // namespace cudf::io::json::detail::legacy
diff --git a/cpp/src/io/json/legacy/read_json.hpp b/cpp/src/io/json/legacy/read_json.hpp
deleted file mode 100644
index 2c02fdd402f..00000000000
--- a/cpp/src/io/json/legacy/read_json.hpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf/types.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/resource_ref.hpp>
-
-#include <thrust/mr/memory_resource.h>
-
-#include <memory>
-#include <vector>
-
-namespace cudf::io {
-class json_reader_options;  // forward decl
-}
-
-namespace cudf::io::json::detail::legacy {
-
-table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
-                              json_reader_options const& reader_opts,
-                              rmm::cuda_stream_view stream,
-                              rmm::device_async_resource_ref mr);
-
-}  // namespace cudf::io::json::detail::legacy
diff --git a/cpp/src/io/json/legacy/reader_impl.cu b/cpp/src/io/json/legacy/reader_impl.cu
deleted file mode 100644
index 846b3cfab4e..00000000000
--- a/cpp/src/io/json/legacy/reader_impl.cu
+++ /dev/null
@@ -1,667 +0,0 @@
-/*
- * Copyright (c) 2020-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "hash/concurrent_unordered_map.cuh"
-#include "io/comp/io_uncomp.hpp"
-#include "io/utilities/column_buffer.hpp"
-#include "io/utilities/parsing_utils.cuh"
-#include "json_gpu.hpp"
-
-#include <cudf/column/column_factories.hpp>
-#include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/detail/utilities/visitor_overload.hpp>
-#include <cudf/groupby.hpp>
-#include <cudf/io/datasource.hpp>
-#include <cudf/io/detail/json.hpp>
-#include <cudf/io/json.hpp>
-#include <cudf/sorting.hpp>
-#include <cudf/strings/detail/replace.hpp>
-#include <cudf/table/table.hpp>
-#include <cudf/types.hpp>
-#include <cudf/utilities/error.hpp>
-#include <cudf/utilities/span.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_scalar.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-#include <rmm/resource_ref.hpp>
-
-#include <thrust/for_each.h>
-#include <thrust/functional.h>
-#include <thrust/host_vector.h>
-#include <thrust/iterator/constant_iterator.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/optional.h>
-#include <thrust/pair.h>
-#include <thrust/sort.h>
-#include <thrust/transform.h>
-
-using cudf::host_span;
-
-namespace cudf::io::json::detail::legacy {
-
-using col_map_ptr_type = std::unique_ptr<col_map_type, std::function<void(col_map_type*)>>;
-
-/**
- * @brief Aggregate the table containing keys info by their hash values.
- *
- * @param[in] info Table with columns containing key offsets, lengths and hashes, respectively
- *
- * @return Table with data aggregated by key hash values
- */
-std::unique_ptr<table> aggregate_keys_info(std::unique_ptr<table> info)
-{
-  auto const info_view = info->view();
-  std::vector<groupby::aggregation_request> requests;
-  requests.emplace_back(groupby::aggregation_request{info_view.column(0)});
-  requests.back().aggregations.emplace_back(make_min_aggregation<groupby_aggregation>());
-  requests.back().aggregations.emplace_back(make_nth_element_aggregation<groupby_aggregation>(0));
-
-  requests.emplace_back(groupby::aggregation_request{info_view.column(1)});
-  requests.back().aggregations.emplace_back(make_min_aggregation<groupby_aggregation>());
-  requests.back().aggregations.emplace_back(make_nth_element_aggregation<groupby_aggregation>(0));
-
-  // Aggregate by hash values
-  groupby::groupby gb_obj(
-    table_view({info_view.column(2)}), null_policy::EXCLUDE, sorted::NO, {}, {});
-
-  auto result = gb_obj.aggregate(requests);  // TODO: no stream parameter?
-
-  std::vector<std::unique_ptr<column>> out_columns;
-  out_columns.emplace_back(std::move(result.second[0].results[0]));  // offsets
-  out_columns.emplace_back(std::move(result.second[1].results[0]));  // lengths
-  out_columns.emplace_back(std::move(result.first->release()[0]));   // hashes
-  return std::make_unique<table>(std::move(out_columns));
-}
-
-/**
- * @brief Initializes the (key hash -> column index) hash map.
- */
-col_map_ptr_type create_col_names_hash_map(column_view column_name_hashes,
-                                           rmm::cuda_stream_view stream)
-{
-  auto key_col_map       = col_map_type::create(column_name_hashes.size(), stream);
-  auto const column_data = column_name_hashes.data<uint32_t>();
-  thrust::for_each_n(rmm::exec_policy(stream),
-                     thrust::make_counting_iterator<size_type>(0),
-                     column_name_hashes.size(),
-                     [map = *key_col_map, column_data] __device__(size_type idx) mutable {
-                       map.insert(thrust::make_pair(column_data[idx], idx));
-                     });
-  return key_col_map;
-}
-
-/**
- * @brief Create a table whose columns contain the information on JSON objects' keys.
- *
- * The columns contain name offsets in the file, name lengths and name hashes, respectively.
- *
- * @param[in] options Parsing options (e.g. delimiter and quotation character)
- * @param[in] data Input JSON device data
- * @param[in] row_offsets Device array of row start locations in the input buffer
- * @param[in] stream CUDA stream used for device memory operations and kernel launches
- *
- * @return std::unique_ptr<table> cudf table with three columns (offsets, lengths, hashes)
- */
-std::unique_ptr<table> create_json_keys_info_table(parse_options_view const& parse_opts,
-                                                   device_span<char const> const data,
-                                                   device_span<uint64_t const> const row_offsets,
-                                                   rmm::cuda_stream_view stream)
-{
-  // Count keys
-  rmm::device_scalar<unsigned long long int> key_counter(0, stream);
-  collect_keys_info(parse_opts, data, row_offsets, key_counter.data(), {}, stream);
-
-  // Allocate columns to store hash value, length, and offset of each JSON object key in the input
-  auto const num_keys = key_counter.value(stream);
-  std::vector<std::unique_ptr<column>> info_columns;
-  info_columns.emplace_back(
-    make_numeric_column(data_type(type_id::UINT64), num_keys, mask_state::UNALLOCATED, stream));
-  info_columns.emplace_back(
-    make_numeric_column(data_type(type_id::UINT16), num_keys, mask_state::UNALLOCATED, stream));
-  info_columns.emplace_back(
-    make_numeric_column(data_type(type_id::UINT32), num_keys, mask_state::UNALLOCATED, stream));
-  // Create a table out of these columns to pass them around more easily
-  auto info_table           = std::make_unique<table>(std::move(info_columns));
-  auto const info_table_mdv = mutable_table_device_view::create(info_table->mutable_view(), stream);
-
-  // Reset the key counter - now used for indexing
-  key_counter.set_value_to_zero_async(stream);
-  // Fill the allocated columns
-  collect_keys_info(parse_opts, data, row_offsets, key_counter.data(), {*info_table_mdv}, stream);
-  return info_table;
-}
-
-/**
- * @brief Extract the keys from the JSON file the name offsets/lengths.
- */
-std::vector<std::string> create_key_strings(char const* h_data,
-                                            table_view sorted_info,
-                                            rmm::cuda_stream_view stream)
-{
-  auto const num_cols = sorted_info.num_rows();
-  std::vector<uint64_t> h_offsets(num_cols);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(h_offsets.data(),
-                                sorted_info.column(0).data<uint64_t>(),
-                                sizeof(uint64_t) * num_cols,
-                                cudaMemcpyDefault,
-                                stream.value()));
-
-  std::vector<uint16_t> h_lens(num_cols);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(h_lens.data(),
-                                sorted_info.column(1).data<uint16_t>(),
-                                sizeof(uint16_t) * num_cols,
-                                cudaMemcpyDefault,
-                                stream.value()));
-
-  std::vector<std::string> names(num_cols);
-  std::transform(h_offsets.cbegin(),
-                 h_offsets.cend(),
-                 h_lens.cbegin(),
-                 names.begin(),
-                 [&](auto offset, auto len) { return std::string(h_data + offset, len); });
-  return names;
-}
-
-auto sort_keys_info_by_offset(std::unique_ptr<table> info)
-{
-  auto const agg_offset_col_view = info->get_column(0).view();
-  return sort_by_key(info->view(), table_view({agg_offset_col_view}));
-}
-
-/**
- * @brief Extract JSON object keys from a JSON file.
- *
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
- *
- * @return Names of JSON object keys in the file
- */
-std::pair<std::vector<std::string>, col_map_ptr_type> get_json_object_keys_hashes(
-  parse_options_view const& parse_opts,
-  host_span<char const> h_data,
-  device_span<uint64_t const> rec_starts,
-  device_span<char const> d_data,
-  rmm::cuda_stream_view stream)
-{
-  auto info = create_json_keys_info_table(parse_opts, d_data, rec_starts, stream);
-
-  auto aggregated_info = aggregate_keys_info(std::move(info));
-  auto sorted_info     = sort_keys_info_by_offset(std::move(aggregated_info));
-
-  return {create_key_strings(h_data.data(), sorted_info->view(), stream),
-          create_col_names_hash_map(sorted_info->get_column(2).view(), stream)};
-}
-
-std::vector<uint8_t> ingest_raw_input(host_span<std::unique_ptr<datasource>> sources,
-                                      compression_type compression,
-                                      size_t range_offset,
-                                      size_t range_size,
-                                      size_t range_size_padded)
-{
-  CUDF_FUNC_RANGE();
-  // Iterate through the user defined sources and read the contents into the local buffer
-  size_t total_source_size = 0;
-  for (auto const& source : sources) {
-    total_source_size += source->size();
-  }
-  total_source_size = total_source_size - (range_offset * sources.size());
-
-  auto buffer = std::vector<uint8_t>(total_source_size);
-
-  size_t bytes_read = 0;
-  for (auto const& source : sources) {
-    if (!source->is_empty()) {
-      auto data_size   = (range_size_padded != 0) ? range_size_padded : source->size();
-      auto destination = buffer.data() + bytes_read;
-      bytes_read += source->host_read(range_offset, data_size, destination);
-    }
-  }
-
-  if (compression == compression_type::NONE) {
-    return buffer;
-  } else {
-    return decompress(compression, buffer);
-  }
-}
-
-bool should_load_whole_source(json_reader_options const& reader_opts)
-{
-  return reader_opts.get_byte_range_offset() == 0 and  //
-         reader_opts.get_byte_range_size() == 0;
-}
-
-rmm::device_uvector<uint64_t> find_record_starts(json_reader_options const& reader_opts,
-                                                 host_span<char const> h_data,
-                                                 device_span<char const> d_data,
-                                                 rmm::cuda_stream_view stream)
-{
-  std::vector<char> chars_to_count{'\n'};
-  // Currently, ignoring lineterminations within quotes is handled by recording the records of both,
-  // and then filtering out the records that is a quotechar or a linetermination within a quotechar
-  // pair.
-  // If not starting at an offset, add an extra row to account for the first row in the file
-  cudf::size_type prefilter_count = ((reader_opts.get_byte_range_offset() == 0) ? 1 : 0);
-  if (should_load_whole_source(reader_opts)) {
-    prefilter_count += count_all_from_set(d_data, chars_to_count, stream);
-  } else {
-    prefilter_count += count_all_from_set(h_data, chars_to_count, stream);
-  }
-
-  rmm::device_uvector<uint64_t> rec_starts(prefilter_count, stream);
-
-  auto* find_result_ptr = rec_starts.data();
-  // Manually adding an extra row to account for the first row in the file
-  if (reader_opts.get_byte_range_offset() == 0) {
-    find_result_ptr++;
-    CUDF_CUDA_TRY(cudaMemsetAsync(rec_starts.data(), 0ull, sizeof(uint64_t), stream.value()));
-  }
-
-  std::vector<char> chars_to_find{'\n'};
-  // Passing offset = 1 to return positions AFTER the found character
-  if (should_load_whole_source(reader_opts)) {
-    find_all_from_set(d_data, chars_to_find, 1, find_result_ptr, stream);
-  } else {
-    find_all_from_set(h_data, chars_to_find, 1, find_result_ptr, stream);
-  }
-
-  // Previous call stores the record positions as encountered by all threads
-  // Sort the record positions as subsequent processing may require filtering
-  // certain rows or other processing on specific records
-  thrust::sort(rmm::exec_policy(stream), rec_starts.begin(), rec_starts.end());
-
-  auto filtered_count = prefilter_count;
-
-  // Exclude the ending newline as it does not precede a record start
-  if (h_data.back() == '\n') { filtered_count--; }
-  rec_starts.resize(filtered_count, stream);
-
-  return rec_starts;
-}
-
-/**
- * @brief Uploads the relevant segment of the input json data onto the GPU.
- *
- * Sets the d_data_ data member.
- * Only rows that need to be parsed are copied, based on the byte range
- * Also updates the array of record starts to match the device data offset.
- */
-rmm::device_uvector<char> upload_data_to_device(json_reader_options const& reader_opts,
-                                                host_span<char const> h_data,
-                                                rmm::device_uvector<uint64_t>& rec_starts,
-                                                rmm::cuda_stream_view stream)
-{
-  CUDF_FUNC_RANGE();
-  size_t end_offset = h_data.size();
-
-  // Trim lines that are outside range
-  auto h_rec_starts = cudf::detail::make_std_vector_sync(rec_starts, stream);
-
-  if (reader_opts.get_byte_range_size() != 0) {
-    auto it = h_rec_starts.end() - 1;
-    while (it >= h_rec_starts.begin() && *it > reader_opts.get_byte_range_size()) {
-      end_offset = *it;
-      --it;
-    }
-    h_rec_starts.erase(it + 1, h_rec_starts.end());
-  }
-
-  // Resize to exclude rows outside of the range
-  // Adjust row start positions to account for the data subcopy
-  size_t start_offset = h_rec_starts.front();
-  rec_starts.resize(h_rec_starts.size(), stream);
-  thrust::transform(rmm::exec_policy(stream),
-                    rec_starts.begin(),
-                    rec_starts.end(),
-                    thrust::make_constant_iterator(start_offset),
-                    rec_starts.begin(),
-                    thrust::minus<uint64_t>());
-
-  size_t const bytes_to_upload = end_offset - start_offset;
-  CUDF_EXPECTS(bytes_to_upload <= h_data.size(),
-               "Error finding the record within the specified byte range.\n");
-
-  // Upload the raw data that is within the rows of interest
-  return cudf::detail::make_device_uvector_async(
-    h_data.subspan(start_offset, bytes_to_upload), stream, rmm::mr::get_current_device_resource());
-}
-
-std::pair<std::vector<std::string>, col_map_ptr_type> get_column_names_and_map(
-  parse_options_view const& parse_opts,
-  host_span<char const> h_data,
-  device_span<uint64_t const> rec_starts,
-  device_span<char const> d_data,
-  rmm::cuda_stream_view stream)
-{
-  // If file only contains one row, use the file size for the row size
-  uint64_t first_row_len = d_data.size();
-  if (rec_starts.size() > 1) {
-    // Set first_row_len to the offset of the second row, if it exists
-    CUDF_CUDA_TRY(cudaMemcpyAsync(
-      &first_row_len, rec_starts.data() + 1, sizeof(uint64_t), cudaMemcpyDefault, stream.value()));
-  }
-  std::vector<char> first_row(first_row_len);
-  CUDF_CUDA_TRY(cudaMemcpyAsync(first_row.data(),
-                                d_data.data(),
-                                first_row_len * sizeof(char),
-                                cudaMemcpyDefault,
-                                stream.value()));
-  stream.synchronize();
-
-  // Determine the row format between:
-  //   JSON array - [val1, val2, ...] and
-  //   JSON object - {"col1":val1, "col2":val2, ...}
-  // based on the top level opening bracket
-  auto const first_square_bracket = std::find(first_row.begin(), first_row.end(), '[');
-  auto const first_curly_bracket  = std::find(first_row.begin(), first_row.end(), '{');
-  CUDF_EXPECTS(first_curly_bracket != first_row.end() || first_square_bracket != first_row.end(),
-               "Input data is not a valid JSON file.");
-  // If the first opening bracket is '{', assume object format
-  if (first_curly_bracket < first_square_bracket) {
-    // use keys as column names if input rows are objects
-    return get_json_object_keys_hashes(parse_opts, h_data, rec_starts, d_data, stream);
-  } else {
-    int cols_found    = 0;
-    bool quotation    = false;
-    auto column_names = std::vector<std::string>();
-    for (size_t pos = 0; pos < first_row.size(); ++pos) {
-      // Flip the quotation flag if current character is a quotechar
-      if (first_row[pos] == parse_opts.quotechar) {
-        quotation = !quotation;
-      }
-      // Check if end of a column/row
-      else if (pos == first_row.size() - 1 ||
-               (!quotation && first_row[pos] == parse_opts.delimiter)) {
-        column_names.emplace_back(std::to_string(cols_found++));
-      }
-    }
-    return {column_names, col_map_type::create(0, stream)};
-  }
-}
-
-std::vector<data_type> get_data_types(json_reader_options const& reader_opts,
-                                      parse_options_view const& parse_opts,
-                                      std::vector<std::string> const& column_names,
-                                      col_map_type* column_map,
-                                      device_span<uint64_t const> rec_starts,
-                                      device_span<char const> data,
-                                      rmm::cuda_stream_view stream)
-{
-  bool has_to_infer_column_types =
-    std::visit([](auto const& dtypes) { return dtypes.empty(); }, reader_opts.get_dtypes());
-
-  if (!has_to_infer_column_types) {
-    return std::visit(
-      cudf::detail::visitor_overload{
-        [&](std::vector<data_type> const& dtypes) {
-          CUDF_EXPECTS(dtypes.size() == column_names.size(), "Must specify types for all columns");
-          return dtypes;
-        },
-        [&](std::map<std::string, data_type> const& dtypes) {
-          std::vector<data_type> sorted_dtypes;
-          std::transform(std::cbegin(column_names),
-                         std::cend(column_names),
-                         std::back_inserter(sorted_dtypes),
-                         [&](auto const& column_name) {
-                           auto const it = dtypes.find(column_name);
-                           CUDF_EXPECTS(it != dtypes.end(), "Must specify types for all columns");
-                           return it->second;
-                         });
-          return sorted_dtypes;
-        },
-        [&](std::map<std::string, schema_element> const& dtypes) {
-          std::vector<data_type> sorted_dtypes;
-          std::transform(std::cbegin(column_names),
-                         std::cend(column_names),
-                         std::back_inserter(sorted_dtypes),
-                         [&](auto const& column_name) {
-                           auto const it = dtypes.find(column_name);
-                           CUDF_EXPECTS(it != dtypes.end(), "Must specify types for all columns");
-                           return it->second.type;
-                         });
-          return sorted_dtypes;
-        }},
-      reader_opts.get_dtypes());
-  } else {
-    CUDF_EXPECTS(not rec_starts.empty(), "No data available for data type inference.\n");
-    auto const num_columns       = column_names.size();
-    auto const do_set_null_count = column_map->capacity() > 0;
-
-    auto const h_column_infos = detect_data_types(
-      parse_opts, data, rec_starts, do_set_null_count, num_columns, column_map, stream);
-
-    auto get_type_id = [&](auto const& cinfo) {
-      auto int_count_total =
-        cinfo.big_int_count + cinfo.negative_small_int_count + cinfo.positive_small_int_count;
-      if (cinfo.null_count == static_cast<int>(rec_starts.size())) {
-        // Entire column is NULL; allocate the smallest amount of memory
-        return type_id::INT8;
-      } else if (cinfo.string_count > 0) {
-        return type_id::STRING;
-      } else if (cinfo.datetime_count > 0) {
-        return type_id::TIMESTAMP_MILLISECONDS;
-      } else if (cinfo.float_count > 0) {
-        return type_id::FLOAT64;
-      } else if (cinfo.big_int_count == 0 && int_count_total != 0) {
-        return type_id::INT64;
-      } else if (cinfo.big_int_count != 0 && cinfo.negative_small_int_count != 0) {
-        return type_id::STRING;
-      } else if (cinfo.big_int_count != 0) {
-        return type_id::UINT64;
-      } else if (cinfo.bool_count > 0) {
-        return type_id::BOOL8;
-      } else {
-        CUDF_FAIL("Data type detection failed.\n");
-      }
-    };
-
-    std::vector<data_type> dtypes;
-
-    std::transform(std::cbegin(h_column_infos),
-                   std::cend(h_column_infos),
-                   std::back_inserter(dtypes),
-                   [&](auto const& cinfo) { return data_type{get_type_id(cinfo)}; });
-
-    return dtypes;
-  }
-}
-
-table_with_metadata convert_data_to_table(parse_options_view const& parse_opts,
-                                          std::vector<data_type> const& dtypes,
-                                          std::vector<std::string>&& column_names,
-                                          col_map_type* column_map,
-                                          device_span<uint64_t const> rec_starts,
-                                          device_span<char const> data,
-                                          rmm::cuda_stream_view stream,
-                                          rmm::device_async_resource_ref mr)
-{
-  auto const num_columns = dtypes.size();
-  auto const num_records = rec_starts.size();
-
-  // alloc output buffers.
-  std::vector<cudf::io::detail::column_buffer> out_buffers;
-  for (size_t col = 0; col < num_columns; ++col) {
-    out_buffers.emplace_back(dtypes[col], num_records, true, stream, mr);
-  }
-
-  thrust::host_vector<data_type> h_dtypes(num_columns);
-  thrust::host_vector<void*> h_data(num_columns);
-  thrust::host_vector<bitmask_type*> h_valid(num_columns);
-
-  for (size_t i = 0; i < num_columns; ++i) {
-    h_dtypes[i] = dtypes[i];
-    h_data[i]   = out_buffers[i].data();
-    h_valid[i]  = out_buffers[i].null_mask();
-  }
-
-  auto d_dtypes = cudf::detail::make_device_uvector_async<data_type>(
-    h_dtypes, stream, rmm::mr::get_current_device_resource());
-  auto d_data = cudf::detail::make_device_uvector_async<void*>(
-    h_data, stream, rmm::mr::get_current_device_resource());
-  auto d_valid = cudf::detail::make_device_uvector_async<cudf::bitmask_type*>(
-    h_valid, stream, rmm::mr::get_current_device_resource());
-  auto d_valid_counts = cudf::detail::make_zeroed_device_uvector_async<cudf::size_type>(
-    num_columns, stream, rmm::mr::get_current_device_resource());
-
-  convert_json_to_columns(
-    parse_opts, data, rec_starts, d_dtypes, column_map, d_data, d_valid, d_valid_counts, stream);
-
-  stream.synchronize();
-
-  // postprocess columns
-  auto target_chars   = std::vector<char>{'\\', '"', '\\', '\\', '\\', 't', '\\', 'r', '\\', 'b'};
-  auto target_offsets = std::vector<size_type>{0, 2, 4, 6, 8, 10};
-
-  auto repl_chars   = std::vector<char>{'"', '\\', '\t', '\r', '\b'};
-  auto repl_offsets = std::vector<size_type>{0, 1, 2, 3, 4, 5};
-
-  auto target =
-    make_strings_column(static_cast<size_type>(target_offsets.size() - 1),
-                        std::make_unique<cudf::column>(
-                          cudf::detail::make_device_uvector_async(
-                            target_offsets, stream, rmm::mr::get_current_device_resource()),
-                          rmm::device_buffer{},
-                          0),
-                        cudf::detail::make_device_uvector_async(
-                          target_chars, stream, rmm::mr::get_current_device_resource())
-                          .release(),
-                        0,
-                        {});
-  auto repl = make_strings_column(
-    static_cast<size_type>(repl_offsets.size() - 1),
-    std::make_unique<cudf::column>(cudf::detail::make_device_uvector_async(
-                                     repl_offsets, stream, rmm::mr::get_current_device_resource()),
-                                   rmm::device_buffer{},
-                                   0),
-    cudf::detail::make_device_uvector_async(
-      repl_chars, stream, rmm::mr::get_current_device_resource())
-      .release(),
-    0,
-    {});
-
-  auto const h_valid_counts = cudf::detail::make_std_vector_sync(d_valid_counts, stream);
-  std::vector<std::unique_ptr<column>> out_columns;
-  for (size_t i = 0; i < num_columns; ++i) {
-    out_buffers[i].null_count() = num_records - h_valid_counts[i];
-
-    auto out_column = make_column(out_buffers[i], nullptr, std::nullopt, stream);
-    if (out_column->type().id() == type_id::STRING) {
-      // Need to remove escape character in case of '\"' and '\\'
-      out_columns.emplace_back(cudf::strings::detail::replace(
-        out_column->view(), target->view(), repl->view(), stream, mr));
-    } else {
-      out_columns.emplace_back(std::move(out_column));
-    }
-    if (out_columns.back()->null_count() == 0) {
-      out_columns.back()->set_null_mask(rmm::device_buffer{0, stream, mr}, 0);
-    }
-  }
-
-  std::vector<column_name_info> column_infos;
-  column_infos.reserve(column_names.size());
-  std::transform(std::make_move_iterator(column_names.begin()),
-                 std::make_move_iterator(column_names.end()),
-                 std::back_inserter(column_infos),
-                 [](auto const& col_name) { return column_name_info{col_name}; });
-
-  // This is to ensure the stream-ordered make_stream_column calls above complete before
-  // the temporary std::vectors are destroyed on exit from this function.
-  stream.synchronize();
-
-  CUDF_EXPECTS(!out_columns.empty(), "No columns created from json input");
-
-  return table_with_metadata{std::make_unique<table>(std::move(out_columns)), {column_infos}};
-}
-
-/**
- * @brief Read an entire set or a subset of data from the source
- *
- * @param[in] options reader options with Number of bytes offset from the start,
- * Bytes to read; use `0` for all remaining data
- * @param[in] stream CUDA stream used for device memory operations and kernel launches.
- *
- * @return Table and its metadata
- */
-table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
-                              json_reader_options const& reader_opts,
-                              rmm::cuda_stream_view stream,
-                              rmm::device_async_resource_ref mr)
-{
-  CUDF_EXPECTS(not sources.empty(), "No sources were defined");
-  CUDF_EXPECTS(sources.size() == 1 or reader_opts.get_compression() == compression_type::NONE,
-               "Multiple compressed inputs are not supported");
-  CUDF_EXPECTS(reader_opts.is_enabled_lines(), "Only JSON Lines format is currently supported.\n");
-
-  auto parse_opts = parse_options{',', '\n', '\"', '.'};
-
-  parse_opts.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
-  parse_opts.trie_false = cudf::detail::create_serialized_trie({"false"}, stream);
-  parse_opts.trie_na    = cudf::detail::create_serialized_trie({"", "null"}, stream);
-
-  parse_opts.dayfirst = reader_opts.is_enabled_dayfirst();
-
-  auto range_offset      = reader_opts.get_byte_range_offset();
-  auto range_size        = reader_opts.get_byte_range_size();
-  auto range_size_padded = reader_opts.get_byte_range_size_with_padding();
-
-  auto const h_raw_data = ingest_raw_input(
-    sources, reader_opts.get_compression(), range_offset, range_size, range_size_padded);
-  host_span<char const> h_data{reinterpret_cast<char const*>(h_raw_data.data()), h_raw_data.size()};
-
-  CUDF_EXPECTS(not h_data.empty(), "Ingest failed: uncompressed input data has zero size.\n");
-
-  auto d_data = rmm::device_uvector<char>(0, stream);
-
-  if (should_load_whole_source(reader_opts)) {
-    d_data = cudf::detail::make_device_uvector_async(
-      h_data, stream, rmm::mr::get_current_device_resource());
-  }
-
-  auto rec_starts = find_record_starts(reader_opts, h_data, d_data, stream);
-
-  CUDF_EXPECTS(rec_starts.size() > 0, "Error enumerating records.\n");
-
-  if (not should_load_whole_source(reader_opts)) {
-    d_data = upload_data_to_device(reader_opts, h_data, rec_starts, stream);
-  }
-
-  CUDF_EXPECTS(not d_data.is_empty(), "Error uploading input data to the GPU.\n");
-
-  auto column_names_and_map =
-    get_column_names_and_map(parse_opts.view(), h_data, rec_starts, d_data, stream);
-
-  auto column_names = std::get<0>(column_names_and_map);
-  auto column_map   = std::move(std::get<1>(column_names_and_map));
-
-  CUDF_EXPECTS(not column_names.empty(), "Error determining column names.\n");
-
-  auto dtypes = get_data_types(
-    reader_opts, parse_opts.view(), column_names, column_map.get(), rec_starts, d_data, stream);
-
-  CUDF_EXPECTS(not dtypes.empty(), "Error in data type detection.\n");
-
-  return convert_data_to_table(parse_opts.view(),
-                               dtypes,
-                               std::move(column_names),
-                               column_map.get(),
-                               rec_starts,
-                               d_data,
-                               stream,
-                               mr);
-}
-
-}  // namespace cudf::io::json::detail::legacy
diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu
index ea52dce020e..df5c7bc21e1 100644
--- a/cpp/src/io/json/read_json.cu
+++ b/cpp/src/io/json/read_json.cu
@@ -15,7 +15,6 @@
  */
 
 #include "io/comp/io_uncomp.hpp"
-#include "io/json/legacy/read_json.hpp"
 #include "io/json/nested_json.hpp"
 #include "read_json.hpp"
 
@@ -267,14 +266,6 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
 {
   CUDF_FUNC_RANGE();
 
-  // TODO remove this if-statement once legacy is removed
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-  if (reader_opts.is_enabled_legacy()) {
-    return legacy::read_json(sources, reader_opts, stream, mr);
-  }
-#pragma GCC diagnostic pop
-
   if (reader_opts.get_byte_range_offset() != 0 or reader_opts.get_byte_range_size() != 0) {
     CUDF_EXPECTS(reader_opts.is_enabled_lines(),
                  "Specifying a byte range is supported only for JSON Lines");
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index db934818ae7..2b8c1b02b40 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -189,10 +189,6 @@ ConfigureTest(
   PERCENT 70
 )
 
-# ##################################################################################################
-# * hash_map tests --------------------------------------------------------------------------------
-ConfigureTest(HASH_MAP_TEST hash_map/map_test.cu)
-
 # ##################################################################################################
 # * quantiles tests -------------------------------------------------------------------------------
 ConfigureTest(
diff --git a/cpp/tests/hash_map/map_test.cu b/cpp/tests/hash_map/map_test.cu
deleted file mode 100644
index 4b10716706b..00000000000
--- a/cpp/tests/hash_map/map_test.cu
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Copyright (c) 2018-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "hash/concurrent_unordered_map.cuh"
-
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/cudf_gtest.hpp>
-#include <cudf_test/testing_main.hpp>
-
-#include <cudf/types.hpp>
-#include <cudf/utilities/default_stream.hpp>
-
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/logical.h>
-#include <thrust/pair.h>
-#include <thrust/tabulate.h>
-
-#include <cstdlib>
-#include <iostream>
-#include <limits>
-#include <random>
-#include <unordered_map>
-#include <vector>
-
-template <typename K, typename V>
-struct key_value_types {
-  using key_type   = K;
-  using value_type = V;
-  using pair_type  = thrust::pair<K, V>;
-  using map_type   = concurrent_unordered_map<key_type, value_type>;
-};
-
-template <typename T>
-struct InsertTest : public cudf::test::BaseFixture {
-  using key_type   = typename T::key_type;
-  using value_type = typename T::value_type;
-  using pair_type  = typename T::pair_type;
-  using map_type   = typename T::map_type;
-
-  InsertTest()
-  {
-    // prevent overflow of small types
-    const size_t input_size =
-      std::min(static_cast<key_type>(size), std::numeric_limits<key_type>::max());
-    pairs.resize(input_size, cudf::get_default_stream());
-    map = std::move(map_type::create(compute_hash_table_size(size), cudf::get_default_stream()));
-    cudf::get_default_stream().synchronize();
-  }
-
-  const cudf::size_type size{10000};
-  rmm::device_uvector<pair_type> pairs{static_cast<std::size_t>(size), cudf::get_default_stream()};
-  std::unique_ptr<map_type, std::function<void(map_type*)>> map;
-};
-
-using TestTypes = ::testing::Types<key_value_types<int32_t, int32_t>,
-                                   key_value_types<int64_t, int64_t>,
-                                   key_value_types<int16_t, int16_t>,
-                                   key_value_types<int32_t, float>,
-                                   key_value_types<int64_t, double>>;
-
-TYPED_TEST_SUITE(InsertTest, TestTypes);
-
-template <typename map_type, typename pair_type>
-struct insert_pair {
-  insert_pair(map_type _map) : map{_map} {}
-
-  __device__ bool operator()(pair_type const& pair)
-  {
-    auto result = map.insert(pair);
-    if (result.first == map.end()) { return false; }
-    return result.second;
-  }
-
-  map_type map;
-};
-
-template <typename map_type, typename pair_type>
-struct find_pair {
-  find_pair(map_type _map) : map{_map} {}
-
-  __device__ bool operator()(pair_type const& pair)
-  {
-    auto result = map.find(pair.first);
-    if (result == map.end()) { return false; }
-    return *result == pair;
-  }
-  map_type map;
-};
-
-template <typename pair_type,
-          typename key_type   = typename pair_type::first_type,
-          typename value_type = typename pair_type::second_type>
-struct unique_pair_generator {
-  __device__ pair_type operator()(cudf::size_type i)
-  {
-    return thrust::make_pair(key_type(i), value_type(i));
-  }
-};
-
-template <typename pair_type,
-          typename key_type   = typename pair_type::first_type,
-          typename value_type = typename pair_type::second_type>
-struct identical_pair_generator {
-  identical_pair_generator(key_type k = 42, value_type v = 42) : key{k}, value{v} {}
-  __device__ pair_type operator()(cudf::size_type i) { return thrust::make_pair(key, value); }
-  key_type key;
-  value_type value;
-};
-
-template <typename pair_type,
-          typename key_type   = typename pair_type::first_type,
-          typename value_type = typename pair_type::second_type>
-struct identical_key_generator {
-  identical_key_generator(key_type k = 42) : key{k} {}
-  __device__ pair_type operator()(cudf::size_type i)
-  {
-    return thrust::make_pair(key, value_type(i));
-  }
-  key_type key;
-};
-
-TYPED_TEST(InsertTest, UniqueKeysUniqueValues)
-{
-  using map_type  = typename TypeParam::map_type;
-  using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()),
-                   this->pairs.begin(),
-                   this->pairs.end(),
-                   unique_pair_generator<pair_type>{});
-  // All pairs should be new inserts
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                             this->pairs.begin(),
-                             this->pairs.end(),
-                             insert_pair<map_type, pair_type>{*this->map}));
-
-  // All pairs should be present in the map
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                             this->pairs.begin(),
-                             this->pairs.end(),
-                             find_pair<map_type, pair_type>{*this->map}));
-}
-
-TYPED_TEST(InsertTest, IdenticalKeysIdenticalValues)
-{
-  using map_type  = typename TypeParam::map_type;
-  using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()),
-                   this->pairs.begin(),
-                   this->pairs.end(),
-                   identical_pair_generator<pair_type>{});
-  // Insert a single pair
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                             this->pairs.begin(),
-                             this->pairs.begin() + 1,
-                             insert_pair<map_type, pair_type>{*this->map}));
-  // Identical inserts should all return false (no new insert)
-  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                              this->pairs.begin(),
-                              this->pairs.end(),
-                              insert_pair<map_type, pair_type>{*this->map}));
-
-  // All pairs should be present in the map
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                             this->pairs.begin(),
-                             this->pairs.end(),
-                             find_pair<map_type, pair_type>{*this->map}));
-}
-
-TYPED_TEST(InsertTest, IdenticalKeysUniqueValues)
-{
-  using map_type  = typename TypeParam::map_type;
-  using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()),
-                   this->pairs.begin(),
-                   this->pairs.end(),
-                   identical_key_generator<pair_type>{});
-
-  // Insert a single pair
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                             this->pairs.begin(),
-                             this->pairs.begin() + 1,
-                             insert_pair<map_type, pair_type>{*this->map}));
-
-  // Identical key inserts should all return false (no new insert)
-  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                              this->pairs.begin() + 1,
-                              this->pairs.end(),
-                              insert_pair<map_type, pair_type>{*this->map}));
-
-  // Only first pair is present in map
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                             this->pairs.begin(),
-                             this->pairs.begin() + 1,
-                             find_pair<map_type, pair_type>{*this->map}));
-
-  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
-                              this->pairs.begin() + 1,
-                              this->pairs.end(),
-                              find_pair<map_type, pair_type>{*this->map}));
-}
-
-CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index 35e6adf20e7..9d766e80094 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -264,13 +264,13 @@ struct JsonValidFixedPointReaderTest : public JsonFixedPointReaderTest<DecimalTy
 TYPED_TEST_SUITE(JsonFixedPointReaderTest, cudf::test::FixedPointTypes);
 TYPED_TEST_SUITE(JsonValidFixedPointReaderTest, cudf::test::FixedPointTypes);
 
-// Parametrize qualifying JSON tests for executing both nested reader and legacy JSON lines reader
+// Parametrize qualifying JSON tests for supported orients
 INSTANTIATE_TEST_CASE_P(JsonReaderParamTest,
                         JsonReaderParamTest,
                         ::testing::Values(json_test_t::json_record_orient,
                                           json_test_t::json_row_orient));
 
-// Parametrize qualifying JSON tests for executing both nested reader and legacy JSON lines reader
+// Parametrize qualifying JSON tests for supported orients
 INSTANTIATE_TEST_CASE_P(JsonReaderRecordTest,
                         JsonReaderRecordTest,
                         ::testing::Values(json_test_t::json_record_orient));
@@ -917,7 +917,6 @@ TEST_F(JsonReaderTest, EmptyFile)
     outfile << "";
   }
 
-  // New reader only - legacy reader is strict about having non-empty input
   cudf::io::json_reader_options in_options =
     cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true);
   auto result = cudf::io::read_json(in_options);
@@ -934,7 +933,6 @@ TEST_F(JsonReaderTest, NoDataFile)
     outfile << "{}\n";
   }
 
-  // New reader only - legacy reader is strict about having non-empty input
   cudf::io::json_reader_options in_options =
     cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true);
   cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
@@ -1303,31 +1301,6 @@ TEST_P(JsonReaderParamTest, JsonLinesMultipleFileInputsNoNL)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(1), float64_wrapper{{1.1, 2.2, 3.3, 4.4}});
 }
 
-// This can be removed once the legacy option has been removed.
-// The read_json only throws with legacy(true)
-TEST_F(JsonReaderTest, DISABLED_BadDtypeParams)
-{
-  std::string buffer = "[1,2,3,4]";
-
-  cudf::io::json_reader_options options_vec =
-    cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
-      .lines(true)
-      .dtypes({dtype<int8_t>()});
-
-  // should throw because there are four columns and only one dtype
-  EXPECT_THROW(cudf::io::read_json(options_vec), cudf::logic_error);
-
-  cudf::io::json_reader_options options_map =
-    cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
-      .lines(true)
-      .dtypes(std::map<std::string, cudf::data_type>{{"0", dtype<int8_t>()},
-                                                     {"1", dtype<int8_t>()},
-                                                     {"2", dtype<int8_t>()},
-                                                     {"wrong_name", dtype<int8_t>()}});
-  // should throw because one of the columns is not in the dtype map
-  EXPECT_THROW(cudf::io::read_json(options_map), cudf::logic_error);
-}
-
 TEST_F(JsonReaderTest, JsonBasic)
 {
   std::string const fname = temp_env->get_temp_dir() + "JsonBasic.json";
@@ -1372,12 +1345,8 @@ TEST_F(JsonReaderTest, JsonLines)
   // Read test data via nested JSON reader
   auto const table = cudf::io::read_json(json_lines_options);
 
-  // Read test data via legacy, non-nested JSON lines reader
-  auto const legacy_reader_table = cudf::io::read_json(json_lines_options);
-
-  // Verify that the data read via non-nested JSON lines reader matches the data read via nested
-  // JSON reader
-  CUDF_TEST_EXPECT_TABLES_EQUAL(legacy_reader_table.tbl->view(), table.tbl->view());
+  // TODO: Rewrite this test to check against a fixed value
+  CUDF_TEST_EXPECT_TABLES_EQUAL(table.tbl->view(), table.tbl->view());
 }
 
 TEST_F(JsonReaderTest, JsonLongString)
@@ -1548,12 +1517,8 @@ TEST_F(JsonReaderTest, LinesNoOmissions)
     // Read test data via nested JSON reader
     auto const table = cudf::io::read_json(json_lines_options);
 
-    // Read test data via legacy, non-nested JSON lines reader
-    auto const legacy_reader_table = cudf::io::read_json(json_lines_options);
-
-    // Verify that the data read via non-nested JSON lines reader matches the data read via
-    // nested JSON reader
-    CUDF_TEST_EXPECT_TABLES_EQUAL(legacy_reader_table.tbl->view(), table.tbl->view());
+    // TODO: Rewrite this test to check against a fixed value
+    CUDF_TEST_EXPECT_TABLES_EQUAL(table.tbl->view(), table.tbl->view());
   }
 }
 
@@ -2440,7 +2405,7 @@ TEST_F(JsonReaderTest, MapTypes)
 struct JsonDelimiterParamTest : public cudf::test::BaseFixture,
                                 public testing::WithParamInterface<char> {};
 
-// Parametrize qualifying JSON tests for executing both nested reader and legacy JSON lines reader
+// Parametrize qualifying JSON tests for multiple delimiters
 INSTANTIATE_TEST_SUITE_P(JsonDelimiterParamTest,
                          JsonDelimiterParamTest,
                          ::testing::Values('\n', '\b', '\v', '\f', 'h'));
diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp
index d6f800cce8b..5dc25133719 100644
--- a/cpp/tests/io/nested_json_test.cpp
+++ b/cpp/tests/io/nested_json_test.cpp
@@ -248,7 +248,7 @@ TEST_F(JsonTest, StackContextUtf8)
 struct JsonDelimiterParamTest : public cudf::test::BaseFixture,
                                 public testing::WithParamInterface<char> {};
 
-// Parametrize qualifying JSON tests for executing both nested reader and legacy JSON lines reader
+// Parametrize qualifying JSON tests for multiple delimiters
 INSTANTIATE_TEST_SUITE_P(JsonDelimiterParamTest,
                          JsonDelimiterParamTest,
                          ::testing::Values('\n', '\b', '\v', '\f', 'h'));
diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx
index 283a451dd4a..242727163ee 100644
--- a/python/cudf/cudf/_lib/json.pyx
+++ b/python/cudf/cudf/_lib/json.pyx
@@ -47,7 +47,6 @@ cpdef read_json(object filepaths_or_buffers,
                 bool lines,
                 object compression,
                 object byte_range,
-                bool legacy,
                 bool keep_quotes,
                 bool mixed_types_as_string,
                 bool prune_columns):
@@ -119,7 +118,6 @@ cpdef read_json(object filepaths_or_buffers,
         .lines(c_lines)
         .byte_range_offset(c_range_offset)
         .byte_range_size(c_range_size)
-        .legacy(legacy)
         .build()
     )
     if is_list_like_dtypes:
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd
index 7e64a4cae29..10e43467d57 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd
@@ -87,9 +87,6 @@ cdef extern from "cudf/io/json.hpp" \
         json_reader_options_builder& dayfirst(
             bool val
         ) except +
-        json_reader_options_builder& legacy(
-            bool val
-        ) except +
         json_reader_options_builder& keep_quotes(
             bool val
         ) except +
diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py
index 03d07fc3a50..7de9705e4cb 100644
--- a/python/cudf/cudf/io/json.py
+++ b/python/cudf/cudf/io/json.py
@@ -99,7 +99,6 @@ def read_json(
             lines,
             compression,
             byte_range,
-            False,
             keep_quotes,
             mixed_types_as_string,
             prune_columns,