diff --git a/cpp/benchmarks/io/fst.cu b/cpp/benchmarks/io/fst.cu
index 6b821eb5cae..c0c88517d41 100644
--- a/cpp/benchmarks/io/fst.cu
+++ b/cpp/benchmarks/io/fst.cu
@@ -67,10 +67,9 @@ auto make_test_json_data(nvbench::state& state)
 // Type used to represent the atomic symbol type used within the finite-state machine
 using SymbolT = char;
 // Type sufficiently large to index symbols within the input and output (may be unsigned)
-using SymbolOffsetT = uint32_t;
-// Helper class to set up transition table, symbol group lookup table, and translation table
-using DfaFstT = cudf::io::fst::detail::Dfa<char, NUM_SYMBOL_GROUPS, TT_NUM_STATES>;
-constexpr std::size_t single_item = 1;
+using SymbolOffsetT                       = uint32_t;
+constexpr std::size_t single_item         = 1;
+constexpr auto max_translation_table_size = TT_NUM_STATES * NUM_SYMBOL_GROUPS;
 
 }  // namespace
 
@@ -94,7 +93,11 @@ void BM_FST_JSON(nvbench::state& state)
   cudf::detail::hostdevice_vector<SymbolOffsetT> out_indexes_gpu(d_input.size(), stream_view);
 
   // Run algorithm
-  DfaFstT parser{pda_sgs, pda_state_tt, pda_out_tt, stream.value()};
+  auto parser = cudf::io::fst::detail::make_fst(
+    cudf::io::fst::detail::make_symbol_group_lut(pda_sgs),
+    cudf::io::fst::detail::make_transition_table(pda_state_tt),
+    cudf::io::fst::detail::make_translation_table<max_translation_table_size>(pda_out_tt),
+    stream);
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
@@ -129,7 +132,11 @@ void BM_FST_JSON_no_outidx(nvbench::state& state)
   cudf::detail::hostdevice_vector<SymbolOffsetT> out_indexes_gpu(d_input.size(), stream_view);
 
   // Run algorithm
-  DfaFstT parser{pda_sgs, pda_state_tt, pda_out_tt, stream.value()};
+  auto parser = cudf::io::fst::detail::make_fst(
+    cudf::io::fst::detail::make_symbol_group_lut(pda_sgs),
+    cudf::io::fst::detail::make_transition_table(pda_state_tt),
+    cudf::io::fst::detail::make_translation_table<max_translation_table_size>(pda_out_tt),
+    stream);
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
@@ -162,7 +169,11 @@ void BM_FST_JSON_no_out(nvbench::state& state)
   cudf::detail::hostdevice_vector<SymbolOffsetT> output_gpu_size(single_item, stream_view);
 
   // Run algorithm
-  DfaFstT parser{pda_sgs, pda_state_tt, pda_out_tt, stream.value()};
+  auto parser = cudf::io::fst::detail::make_fst(
+    cudf::io::fst::detail::make_symbol_group_lut(pda_sgs),
+    cudf::io::fst::detail::make_transition_table(pda_state_tt),
+    cudf::io::fst::detail::make_translation_table<max_translation_table_size>(pda_out_tt),
+    stream);
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
@@ -196,7 +207,11 @@ void BM_FST_JSON_no_str(nvbench::state& state)
   cudf::detail::hostdevice_vector<SymbolOffsetT> out_indexes_gpu(d_input.size(), stream_view);
 
   // Run algorithm
-  DfaFstT parser{pda_sgs, pda_state_tt, pda_out_tt, stream.value()};
+  auto parser = cudf::io::fst::detail::make_fst(
+    cudf::io::fst::detail::make_symbol_group_lut(pda_sgs),
+    cudf::io::fst::detail::make_transition_table(pda_state_tt),
+    cudf::io::fst::detail::make_translation_table<max_translation_table_size>(pda_out_tt),
+    stream);
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
diff --git a/cpp/include/cudf/io/detail/tokenize_json.hpp b/cpp/include/cudf/io/detail/tokenize_json.hpp
index 4914f434c98..b2ea29a85c3 100644
--- a/cpp/include/cudf/io/detail/tokenize_json.hpp
+++ b/cpp/include/cudf/io/detail/tokenize_json.hpp
@@ -110,6 +110,8 @@ enum token_t : PdaTokenT {
   ValueEnd,
   /// Beginning-of-error token (on first encounter of a parsing error)
   ErrorBegin,
+  /// Delimiting a JSON line for error recovery
+  LineEnd,
   /// Total number of tokens
   NUM_TOKENS
 };
diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp
index 670409a898a..15dc2a614ad 100644
--- a/cpp/include/cudf/io/json.hpp
+++ b/cpp/include/cudf/io/json.hpp
@@ -54,6 +54,14 @@ struct schema_element {
   std::map<std::string, schema_element> child_types;
 };
 
+/**
+ * @brief Control the error recovery behavior of the json parser
+ */
+enum class json_recovery_mode_t {
+  FAIL,              ///< Does not recover from an error when encountering an invalid format
+  RECOVER_WITH_NULL  ///< Recovers from an error, replacing invalid records with null
+};
+
 /**
  * @brief Input arguments to the `read_json` interface.
  *
@@ -105,6 +113,9 @@ class json_reader_options {
   // Whether to keep the quote characters of string values
   bool _keep_quotes = false;
 
+  // Whether to recover after an invalid JSON line
+  json_recovery_mode_t _recovery_mode = json_recovery_mode_t::FAIL;
+
   /**
    * @brief Constructor from source info.
    *
@@ -235,6 +246,13 @@ class json_reader_options {
    */
   bool is_enabled_keep_quotes() const { return _keep_quotes; }
 
+  /**
+   * @brief Queries the JSON reader's behavior on invalid JSON lines.
+   *
+   * @returns An enum that specifies the JSON reader's behavior on invalid JSON lines.
+   */
+  json_recovery_mode_t recovery_mode() const { return _recovery_mode; }
+
   /**
    * @brief Set data types for columns to be read.
    *
@@ -305,6 +323,13 @@ class json_reader_options {
    * of string values
    */
   void enable_keep_quotes(bool val) { _keep_quotes = val; }
+
+  /**
+   * @brief Specifies the JSON reader's behavior on invalid JSON lines.
+   *
+   * @param val An enum value to indicate the JSON reader's behavior on invalid JSON lines.
+   */
+  void set_recovery_mode(json_recovery_mode_t val) { _recovery_mode = val; }
 };
 
 /**
@@ -449,6 +474,18 @@ class json_reader_options_builder {
     return *this;
   }
 
+  /**
+   * @brief Specifies the JSON reader's behavior on invalid JSON lines.
+   *
+   * @param val An enum value to indicate the JSON reader's behavior on invalid JSON lines.
+   * @return this for chaining
+   */
+  json_reader_options_builder& recovery_mode(json_recovery_mode_t val)
+  {
+    options._recovery_mode = val;
+    return *this;
+  }
+
   /**
    * @brief move json_reader_options member once it's built.
    */
diff --git a/cpp/src/io/fst/agent_dfa.cuh b/cpp/src/io/fst/agent_dfa.cuh
index 0c813c7917f..52fd039c097 100644
--- a/cpp/src/io/fst/agent_dfa.cuh
+++ b/cpp/src/io/fst/agent_dfa.cuh
@@ -83,16 +83,18 @@ class DFASimulationCallbackWrapper {
     if (!write) out_count = 0;
   }
 
-  template <typename CharIndexT, typename StateIndexT, typename SymbolIndexT>
+  template <typename CharIndexT, typename StateIndexT, typename SymbolIndexT, typename SymbolT>
   __host__ __device__ __forceinline__ void ReadSymbol(CharIndexT const character_index,
                                                       StateIndexT const old_state,
                                                       StateIndexT const new_state,
-                                                      SymbolIndexT const symbol_id)
+                                                      SymbolIndexT const symbol_id,
+                                                      SymbolT const read_symbol)
   {
-    uint32_t const count = transducer_table(old_state, symbol_id);
+    uint32_t const count = transducer_table(old_state, symbol_id, read_symbol);
     if (write) {
       for (uint32_t out_char = 0; out_char < count; out_char++) {
-        out_it[out_count + out_char]     = transducer_table(old_state, symbol_id, out_char);
+        out_it[out_count + out_char] =
+          transducer_table(old_state, symbol_id, out_char, read_symbol);
         out_idx_it[out_count + out_char] = offset + character_index;
       }
     }
@@ -127,9 +129,10 @@ class StateVectorTransitionOp {
   {
   }
 
-  template <typename CharIndexT, typename SymbolIndexT>
+  template <typename CharIndexT, typename SymbolIndexT, typename SymbolT>
   __host__ __device__ __forceinline__ void ReadSymbol(CharIndexT const& character_index,
-                                                      SymbolIndexT const read_symbol_id) const
+                                                      SymbolIndexT const& read_symbol_id,
+                                                      SymbolT const& read_symbol) const
   {
     for (int32_t i = 0; i < NUM_INSTANCES; ++i) {
       state_vector[i] = transition_table(state_vector[i], read_symbol_id);
@@ -154,15 +157,16 @@ struct StateTransitionOp {
   {
   }
 
-  template <typename CharIndexT, typename SymbolIndexT>
+  template <typename CharIndexT, typename SymbolIndexT, typename SymbolT>
   __host__ __device__ __forceinline__ void ReadSymbol(CharIndexT const& character_index,
-                                                      SymbolIndexT const& read_symbol_id)
+                                                      SymbolIndexT const& read_symbol_id,
+                                                      SymbolT const& read_symbol)
   {
     // Remember what state we were in before we made the transition
     StateIndexT previous_state = state;
 
     state = transition_table(state, read_symbol_id);
-    callback_op.ReadSymbol(character_index, previous_state, state, read_symbol_id);
+    callback_op.ReadSymbol(character_index, previous_state, state, read_symbol_id, read_symbol);
   }
 };
 
@@ -230,7 +234,7 @@ struct AgentDFA {
     for (int32_t i = 0; i < NUM_SYMBOLS; ++i) {
       if (IS_FULL_BLOCK || threadIdx.x * SYMBOLS_PER_THREAD + i < max_num_chars) {
         auto matched_id = symbol_matcher(chars[i]);
-        callback_op.ReadSymbol(i, matched_id);
+        callback_op.ReadSymbol(i, matched_id, chars[i]);
       }
     }
   }
@@ -253,7 +257,8 @@ struct AgentDFA {
   //---------------------------------------------------------------------
   // LOADING FULL BLOCK OF CHARACTERS, NON-ALIASED
   //---------------------------------------------------------------------
-  __device__ __forceinline__ void LoadBlock(CharT const* d_chars,
+  template <typename CharInItT>
+  __device__ __forceinline__ void LoadBlock(CharInItT d_chars,
                                             OffsetT const block_offset,
                                             OffsetT const num_total_symbols,
                                             cub::Int2Type<true> /*IS_FULL_BLOCK*/,
@@ -261,7 +266,7 @@ struct AgentDFA {
   {
     CharT thread_chars[SYMBOLS_PER_THREAD];
 
-    CharT const* d_block_symbols = d_chars + block_offset;
+    CharInItT d_block_symbols = d_chars + block_offset;
     cub::LoadDirectStriped<BLOCK_THREADS>(threadIdx.x, d_block_symbols, thread_chars);
 
 #pragma unroll
@@ -273,7 +278,8 @@ struct AgentDFA {
   //---------------------------------------------------------------------
   // LOADING PARTIAL BLOCK OF CHARACTERS, NON-ALIASED
   //---------------------------------------------------------------------
-  __device__ __forceinline__ void LoadBlock(CharT const* d_chars,
+  template <typename CharInItT>
+  __device__ __forceinline__ void LoadBlock(CharInItT d_chars,
                                             OffsetT const block_offset,
                                             OffsetT const num_total_symbols,
                                             cub::Int2Type<false> /*IS_FULL_BLOCK*/,
@@ -286,7 +292,7 @@ struct AgentDFA {
     // Last unit to be loaded is IDIV_CEIL(#SYM, SYMBOLS_PER_UNIT)
     OffsetT num_total_chars = num_total_symbols - block_offset;
 
-    CharT const* d_block_symbols = d_chars + block_offset;
+    CharInItT d_block_symbols = d_chars + block_offset;
     cub::LoadDirectStriped<BLOCK_THREADS>(
       threadIdx.x, d_block_symbols, thread_chars, num_total_chars);
 
@@ -372,11 +378,26 @@ struct AgentDFA {
     }
   }
 
+  template <typename CharInItT>
+  __device__ __forceinline__ void LoadBlock(CharInItT d_chars,
+                                            OffsetT const block_offset,
+                                            OffsetT const num_total_symbols)
+  {
+    // Check if we are loading a full tile of data
+    if (block_offset + SYMBOLS_PER_UINT_BLOCK < num_total_symbols) {
+      LoadBlock(
+        d_chars, block_offset, num_total_symbols, cub::Int2Type<true>(), cub::Int2Type<1>());
+    } else {
+      LoadBlock(
+        d_chars, block_offset, num_total_symbols, cub::Int2Type<false>(), cub::Int2Type<1>());
+    }
+  }
+
   template <int32_t NUM_STATES, typename SymbolMatcherT, typename TransitionTableT>
   __device__ __forceinline__ void GetThreadStateTransitionVector(
     SymbolMatcherT const& symbol_matcher,
     TransitionTableT const& transition_table,
-    CharT const* d_chars,
+    SymbolItT d_chars,
     OffsetT const block_offset,
     OffsetT const num_total_symbols,
     std::array<StateIndexT, NUM_STATES>& state_vector)
@@ -416,7 +437,7 @@ struct AgentDFA {
   __device__ __forceinline__ void GetThreadStateTransitions(
     SymbolMatcherT const& symbol_matcher,
     TransitionTableT const& transition_table,
-    CharT const* d_chars,
+    SymbolItT d_chars,
     OffsetT const block_offset,
     OffsetT const num_total_symbols,
     StateIndexT& state,
diff --git a/cpp/src/io/fst/logical_stack.cuh b/cpp/src/io/fst/logical_stack.cuh
index 27ce6403ee8..a5d32cba125 100644
--- a/cpp/src/io/fst/logical_stack.cuh
+++ b/cpp/src/io/fst/logical_stack.cuh
@@ -42,9 +42,10 @@ namespace cudf::io::fst {
  * @brief Describes the kind of stack operation.
  */
 enum class stack_op_type : int8_t {
-  READ = 0,  ///< Operation reading what is currently on top of the stack
-  PUSH = 1,  ///< Operation pushing a new item on top of the stack
-  POP  = 2   ///< Operation popping the item currently on top of the stack
+  READ  = 0,  ///< Operation reading what is currently on top of the stack
+  PUSH  = 1,  ///< Operation pushing a new item on top of the stack
+  POP   = 2,  ///< Operation popping the item currently on top of the stack
+  RESET = 3   ///< Operation popping all items currently on the stack
 };
 
 namespace detail {
@@ -119,9 +120,9 @@ struct StackSymbolToStackOp {
   {
     stack_op_type stack_op = symbol_to_stack_op_type(stack_symbol);
     // PUSH => +1, POP => -1, READ => 0
-    int32_t level_delta = stack_op == stack_op_type::PUSH  ? 1
-                          : stack_op == stack_op_type::POP ? -1
-                                                           : 0;
+    int32_t level_delta = (stack_op == stack_op_type::PUSH)  ? 1
+                          : (stack_op == stack_op_type::POP) ? -1
+                                                             : 0;
     return StackOpT{static_cast<decltype(StackOpT::stack_level)>(level_delta), stack_symbol};
   }
 
@@ -133,14 +134,20 @@ struct StackSymbolToStackOp {
  * @brief Binary reduction operator to compute the absolute stack level from relative stack levels
  * (i.e., +1 for a PUSH, -1 for a POP operation).
  */
+template <typename StackSymbolToStackOpTypeT>
 struct AddStackLevelFromStackOp {
   template <typename StackLevelT, typename ValueT>
   constexpr CUDF_HOST_DEVICE StackOp<StackLevelT, ValueT> operator()(
     StackOp<StackLevelT, ValueT> const& lhs, StackOp<StackLevelT, ValueT> const& rhs) const
   {
-    StackLevelT new_level = lhs.stack_level + rhs.stack_level;
+    StackLevelT new_level = (symbol_to_stack_op_type(rhs.value) == stack_op_type::RESET)
+                              ? 0
+                              : (lhs.stack_level + rhs.stack_level);
     return StackOp<StackLevelT, ValueT>{new_level, rhs.value};
   }
+
+  /// Function object returning a stack operation type for a given stack symbol
+  StackSymbolToStackOpTypeT symbol_to_stack_op_type;
 };
 
 /**
@@ -323,13 +330,14 @@ void sparse_stack_op_to_top_of_stack(StackSymbolItT d_symbols,
 
   // Getting temporary storage requirements for the prefix sum of the stack level after each
   // operation
-  CUDF_CUDA_TRY(cub::DeviceScan::InclusiveScan(nullptr,
-                                               stack_level_scan_bytes,
-                                               stack_symbols_in,
-                                               d_kv_operations.Current(),
-                                               detail::AddStackLevelFromStackOp{},
-                                               num_symbols_in,
-                                               stream));
+  CUDF_CUDA_TRY(cub::DeviceScan::InclusiveScan(
+    nullptr,
+    stack_level_scan_bytes,
+    stack_symbols_in,
+    d_kv_operations.Current(),
+    detail::AddStackLevelFromStackOp<StackSymbolToStackOpTypeT>{symbol_to_stack_op},
+    num_symbols_in,
+    stream));
 
   // Getting temporary storage requirements for the stable radix sort (sorting by stack level of the
   // operations)
@@ -393,13 +401,14 @@ void sparse_stack_op_to_top_of_stack(StackSymbolItT d_symbols,
   d_kv_operations = cub::DoubleBuffer<StackOpT>{d_kv_ops_current.data(), d_kv_ops_alt.data()};
 
   // Compute prefix sum of the stack level after each operation
-  CUDF_CUDA_TRY(cub::DeviceScan::InclusiveScan(temp_storage.data(),
-                                               total_temp_storage_bytes,
-                                               stack_symbols_in,
-                                               d_kv_operations.Current(),
-                                               detail::AddStackLevelFromStackOp{},
-                                               num_symbols_in,
-                                               stream));
+  CUDF_CUDA_TRY(cub::DeviceScan::InclusiveScan(
+    temp_storage.data(),
+    total_temp_storage_bytes,
+    stack_symbols_in,
+    d_kv_operations.Current(),
+    detail::AddStackLevelFromStackOp<StackSymbolToStackOpTypeT>{symbol_to_stack_op},
+    num_symbols_in,
+    stream));
 
   // Stable radix sort, sorting by stack level of the operations
   d_kv_operations_unsigned = cub::DoubleBuffer<StackOpUnsignedT>{
diff --git a/cpp/src/io/fst/lookup_tables.cuh b/cpp/src/io/fst/lookup_tables.cuh
index 26f6891d963..c4176d5673f 100644
--- a/cpp/src/io/fst/lookup_tables.cuh
+++ b/cpp/src/io/fst/lookup_tables.cuh
@@ -22,20 +22,40 @@
 
 #include <cub/cub.cuh>
 
+#include <cuda/std/iterator>
+
 #include <algorithm>
 #include <cstdint>
+#include <iterator>
 #include <vector>
 
 namespace cudf::io::fst::detail {
 
+/**
+ * @brief Helper function object that delegates a lookup to a given lookup table without mapping any
+ * of the given arguments.
+ */
+struct IdentityOp {
+  template <typename LookUpTableT, typename... Args>
+  __host__ __device__ __forceinline__ auto operator()(LookUpTableT const& lookup_table,
+                                                      Args&&... args) const
+  {
+    return lookup_table.lookup(std::forward<Args>(args)...);
+  }
+};
+
 /**
  * @brief Class template that can be plugged into the finite-state machine to look up the symbol
  * group index for a given symbol. Class template does not support multi-symbol lookups (i.e., no
  * look-ahead). The class uses shared memory for the lookups.
  *
  * @tparam SymbolT The symbol type being passed in to lookup the corresponding symbol group id
+ * @tparam PreMapOpT A function object that is invoked with `(lut, symbol)` and must return the
+ * symbol group index of `symbol`.  `lut` is an instance of the lookup table and `symbol` is the
+ * symbol for which to get the symbol group index. If no particular mapping is needed, an instance
+ * of `IdentityOp` can be used.
  */
-template <typename SymbolT>
+template <typename SymbolT, typename PreMapOpT>
 class SingleSymbolSmemLUT {
  private:
   // Type used for representing a symbol group id (i.e., what we return for a given symbol)
@@ -50,32 +70,36 @@ class SingleSymbolSmemLUT {
   };
 
  public:
+  using TempStorage = cub::Uninitialized<_TempStorage>;
+
   struct KernelParameter {
+    using LookupTableT = SingleSymbolSmemLUT<SymbolT, PreMapOpT>;
+
     // sym_to_sgid[min(symbol,num_valid_entries)] -> symbol group index
-    SymbolT num_valid_entries;
+    uint32_t num_valid_entries;
 
     // sym_to_sgid[symbol] -> symbol group index
     SymbolGroupIdT sym_to_sgid[NUM_ENTRIES_PER_LUT];
-  };
 
-  using TempStorage = cub::Uninitialized<_TempStorage>;
+    // Function object that transforms a symbol to a symbol group id
+    PreMapOpT pre_map_op;
+  };
 
   /**
    * @brief Initializes the given \p sgid_init with the symbol group lookups defined by \p
    * symbol_strings.
    *
-   * @param[out] sgid_init A hostdevice_vector that will be populated
-   * @param[in] symbol_strings Array of strings, where the i-th string holds all symbols
+   * @param symbol_strings Array of strings, where the i-th string holds all symbols
    * (characters!) that correspond to the i-th symbol group index
-   * @param[in] stream The stream that shall be used to cudaMemcpyAsync the lookup table
+   * @param stream The stream that shall be used to cudaMemcpyAsync the lookup table
    * @return
    */
   template <typename SymbolGroupItT>
-  static void InitDeviceSymbolGroupIdLut(
-    cudf::detail::hostdevice_vector<KernelParameter>& sgid_init,
-    SymbolGroupItT const& symbol_strings,
-    rmm::cuda_stream_view stream)
+  static KernelParameter InitDeviceSymbolGroupIdLut(SymbolGroupItT const& symbol_strings,
+                                                    PreMapOpT pre_map_op)
   {
+    KernelParameter init_data{};
+
     // The symbol group index to be returned if none of the given symbols match
     SymbolGroupIdT no_match_id = symbol_strings.size();
 
@@ -83,9 +107,7 @@ class SingleSymbolSmemLUT {
     SymbolGroupIdT max_base_match_val = 0;
 
     // Initialize all entries: by default we return the no-match-id
-    std::fill(&sgid_init.host_ptr()->sym_to_sgid[0],
-              &sgid_init.host_ptr()->sym_to_sgid[NUM_ENTRIES_PER_LUT],
-              no_match_id);
+    std::fill(&init_data.sym_to_sgid[0], &init_data.sym_to_sgid[NUM_ENTRIES_PER_LUT], no_match_id);
 
     // Set up lookup table
     uint32_t sg_id = 0;
@@ -94,22 +116,24 @@ class SingleSymbolSmemLUT {
       // Iterate over all symbols that belong to the current symbol group
       for (auto const& sg_symbol : sg_symbols) {
         max_base_match_val = std::max(max_base_match_val, static_cast<SymbolGroupIdT>(sg_symbol));
-        sgid_init.host_ptr()->sym_to_sgid[static_cast<int32_t>(sg_symbol)] = sg_id;
+        init_data.sym_to_sgid[static_cast<int32_t>(sg_symbol)] = sg_id;
       }
       sg_id++;
     }
 
     // Initialize the out-of-bounds lookup: sym_to_sgid[max_base_match_val+1] -> no_match_id
-    sgid_init.host_ptr()->sym_to_sgid[max_base_match_val + 1] = no_match_id;
+    init_data.sym_to_sgid[max_base_match_val + 1] = no_match_id;
 
     // Alias memory / return memory requirements
-    sgid_init.host_ptr()->num_valid_entries = max_base_match_val + 1;
+    init_data.num_valid_entries = max_base_match_val + 1;
+    init_data.pre_map_op        = pre_map_op;
 
-    sgid_init.host_to_device_async(stream);
+    return init_data;
   }
 
   _TempStorage& temp_storage;
   SymbolGroupIdT num_valid_entries;
+  PreMapOpT pre_map_op;
 
   __device__ __forceinline__ _TempStorage& PrivateStorage()
   {
@@ -140,7 +164,14 @@ class SingleSymbolSmemLUT {
 #endif
   }
 
-  constexpr CUDF_HOST_DEVICE int32_t operator()(SymbolT const symbol) const
+  template <typename SymbolT_>
+  constexpr CUDF_HOST_DEVICE int32_t operator()(SymbolT_ const symbol) const
+  {
+    // Look up the symbol group for given symbol
+    return pre_map_op(*this, symbol);
+  }
+
+  constexpr CUDF_HOST_DEVICE int32_t lookup(SymbolT const symbol) const
   {
     // Look up the symbol group for given symbol
     return temp_storage
@@ -148,6 +179,95 @@ class SingleSymbolSmemLUT {
   }
 };
 
+/**
+ * @brief Creates a symbol group lookup table of type `SingleSymbolSmemLUT` that uses a two-staged
+ * lookup approach. @p pre_map_op is a function object invoked with `(lut, symbol)` that must return
+ * the symbol group id for the given `symbol`. `lut` is an instance of the lookup table
+ * and `symbol` is a symbol from the input tape. Usually, @p pre_map_op first maps a symbol from
+ * the input tape to an integral that is convertible to `symbol_t`. In a second stage, @p pre_map_op
+ * uses `lut`'s `lookup(mapped_symbol)` that maps that integral to the symbol group id.
+ *
+ * @tparam symbol_t Must be an integral type
+ * @tparam NUM_SYMBOL_GROUPS The number of symbol groups, excluding the catchall symbol group (aka
+ * "other" symbol group)
+ * @tparam pre_map_op_t A unary function object type that returns the symbol group id
+ * @param symbol_strings An array of vectors, where all the symbols in the i-th vector are mapped to
+ * the i-th symbol group
+ * @param pre_map_op A unary function object type that returns the symbol group id for a symbol
+ * @return A symbol group lookup table
+ */
+template <typename symbol_t, std::size_t NUM_SYMBOL_GROUPS, typename pre_map_op_t>
+auto make_symbol_group_lut(
+  std::array<std::vector<symbol_t>, NUM_SYMBOL_GROUPS> const& symbol_strings,
+  pre_map_op_t pre_map_op)
+{
+  using lookup_table_t = SingleSymbolSmemLUT<symbol_t, pre_map_op_t>;
+  return lookup_table_t::InitDeviceSymbolGroupIdLut(symbol_strings, pre_map_op);
+}
+
+/**
+ * @brief Creates a symbol group lookup table of type `SingleSymbolSmemLUT` that uses a two-staged
+ * lookup approach. @p pre_map_op is a function object invoked with `(lut, symbol)` that must return
+ * the symbol group id for the given `symbol`. `lut` is an instance of the lookup table
+ * and `symbol` is a symbol from the input tape. Usually, @p pre_map_op first maps a symbol from
+ * the input tape to an integral that is convertible to `symbol_t`. In a second stage, @p pre_map_op
+ * uses `lut`'s `lookup(mapped_symbol)` that maps that integral to the symbol group id.
+ *
+ * @tparam symbol_t The type returned by @p pre_map_op must be assignable to `char`
+ * @tparam NUM_SYMBOL_GROUPS The number of symbol groups, excluding the catchall symbol group (aka
+ * "other" symbol group)
+ * @tparam pre_map_op_t A unary function object type that returns the symbol group id for a symbol
+ * @param symbol_strings An array of strings, where all the characters in the i-th string are mapped
+ * to the i-th symbol group
+ * @param pre_map_op A unary function object type that returns the symbol group id for a symbol
+ * @return A symbol group lookup table
+ */
+template <std::size_t NUM_SYMBOL_GROUPS, typename pre_map_op_t>
+auto make_symbol_group_lut(std::array<std::string, NUM_SYMBOL_GROUPS> const& symbol_strings,
+                           pre_map_op_t pre_map_op)
+{
+  using symbol_t       = char;
+  using lookup_table_t = SingleSymbolSmemLUT<symbol_t, pre_map_op_t>;
+  return lookup_table_t::InitDeviceSymbolGroupIdLut(symbol_strings, pre_map_op);
+}
+
+/**
+ * @brief Creates a symbol group lookup table that maps a symbol to a symbol group id, requiring the
+ * symbol type from the input tape to be assignable to `symbol_t` and `symbol_t` to be of integral
+ * type.
+ *
+ * @tparam symbol_t The input tape's symbol type must be assignable to this type
+ * @tparam NUM_SYMBOL_GROUPS The number of symbol groups, excluding the catchall symbol group (aka
+ * "other" symbol group)
+ * @param symbol_strings An array of vectors, where all the symbols in the i-th vector are mapped to
+ * the i-th symbol group
+ * @return A symbol group lookup table
+ */
+template <typename symbol_t, std::size_t NUM_SYMBOL_GROUPS>
+auto make_symbol_group_lut(
+  std::array<std::vector<symbol_t>, NUM_SYMBOL_GROUPS> const& symbol_strings)
+{
+  return make_symbol_group_lut(symbol_strings, IdentityOp{});
+}
+
+/**
+ * @brief Creates a symbol group lookup table that maps a symbol to a symbol group id, requiring the
+ * symbol type from the input tape to be assignable to `symbol_t` and `symbol_t` to be of integral
+ * type.
+ *
+ * @tparam symbol_t The input tape's symbol type must be assignable to this type
+ * @tparam NUM_SYMBOL_GROUPS The number of symbol groups, excluding the catchall symbol group (aka
+ * "other" symbol group)
+ * @param symbol_strings An array of strings, where all the characters in the i-th string are mapped
+ * to the i-th symbol group
+ * @return A symbol group lookup table
+ */
+template <std::size_t NUM_SYMBOL_GROUPS>
+auto make_symbol_group_lut(std::array<std::string, NUM_SYMBOL_GROUPS> const& symbol_strings)
+{
+  return make_symbol_group_lut(symbol_strings, IdentityOp{});
+}
+
 /**
  * @brief Lookup table mapping (old_state, symbol_group_id) transitions to a new target state. The
  * class uses shared memory for the lookups.
@@ -166,18 +286,20 @@ class TransitionTable {
   };
 
  public:
-  using TempStorage = cub::Uninitialized<_TempStorage>;
+  static constexpr int32_t NUM_STATES = MAX_NUM_STATES;
+  using TempStorage                   = cub::Uninitialized<_TempStorage>;
 
   struct KernelParameter {
+    using LookupTableT = TransitionTable<MAX_NUM_SYMBOLS, MAX_NUM_STATES>;
+
     ItemT transitions[MAX_NUM_STATES * MAX_NUM_SYMBOLS];
   };
 
   template <typename StateIdT>
-  static void InitDeviceTransitionTable(
-    cudf::detail::hostdevice_vector<KernelParameter>& transition_table_init,
-    std::array<std::array<StateIdT, MAX_NUM_SYMBOLS>, MAX_NUM_STATES> const& translation_table,
-    rmm::cuda_stream_view stream)
+  static KernelParameter InitDeviceTransitionTable(
+    std::array<std::array<StateIdT, MAX_NUM_SYMBOLS>, MAX_NUM_STATES> const& translation_table)
   {
+    KernelParameter init_data{};
     // translation_table[state][symbol] -> new state
     for (std::size_t state = 0; state < translation_table.size(); ++state) {
       for (std::size_t symbol = 0; symbol < translation_table[state].size(); ++symbol) {
@@ -185,13 +307,12 @@ class TransitionTable {
           static_cast<int64_t>(translation_table[state][symbol]) <=
             std::numeric_limits<ItemT>::max(),
           "Target state index value exceeds value representable by the transition table's type");
-        transition_table_init.host_ptr()->transitions[symbol * MAX_NUM_STATES + state] =
+        init_data.transitions[symbol * MAX_NUM_STATES + state] =
           static_cast<ItemT>(translation_table[state][symbol]);
       }
     }
 
-    // Copy transition table to device
-    transition_table_init.host_to_device_async(stream);
+    return init_data;
   }
 
   constexpr CUDF_HOST_DEVICE TransitionTable(KernelParameter const& kernel_param,
@@ -235,24 +356,83 @@ class TransitionTable {
   }
 };
 
+/**
+ * @brief Creates a transition table of type `TransitionTable` that maps `(state_id, match_id)`
+ * pairs to the new target state for the given `(state_id, match_id)`-combination.
+ *
+ * @tparam StateIdT An integral type used to represent state indexes
+ * @tparam MAX_NUM_SYMBOLS The maximum number of symbols being output by a single state transition
+ * @tparam MAX_NUM_STATES The maximum number of states that this lookup table shall support
+ * @param transition_table The transition table
+ * @return A transition table of type `TransitionTable`
+ */
+template <typename StateIdT, std::size_t MAX_NUM_SYMBOLS, std::size_t MAX_NUM_STATES>
+auto make_transition_table(
+  std::array<std::array<StateIdT, MAX_NUM_SYMBOLS>, MAX_NUM_STATES> const& transition_table)
+{
+  using transition_table_t = TransitionTable<MAX_NUM_SYMBOLS, MAX_NUM_STATES>;
+  return transition_table_t::InitDeviceTransitionTable(transition_table);
+}
+
+/**
+ * @brief Compile-time reflection to check if `OpT` type has the `TempStorage` and
+ * `KernelParameter` type members.
+ */
+template <typename OpT, typename = void>
+struct is_complex_op : std::false_type {};
+
+template <typename OpT>
+struct is_complex_op<OpT, std::void_t<typename OpT::TempStorage, typename OpT::KernelParameter>>
+  : std::true_type {};
+
+/**
+ * @brief The device view that is passed to the finite-state transducer algorithm. Each of the
+ * lookup tables can either be a simple function object that defines the `operator()` required for
+ * respective lookup table or a complex class.
+ *
+ * @tparam SymbolGroupIdLookupT
+ * @tparam TransitionTableT
+ * @tparam TranslationTableT
+ * @tparam NUM_STATES
+ */
 template <typename SymbolGroupIdLookupT,
           typename TransitionTableT,
           typename TranslationTableT,
           int32_t NUM_STATES>
 class dfa_device_view {
  private:
-  using sgid_lut_init_t          = typename SymbolGroupIdLookupT::KernelParameter;
-  using transition_table_init_t  = typename TransitionTableT::KernelParameter;
-  using translation_table_init_t = typename TranslationTableT::KernelParameter;
+  // Complex symbol group lookup operators need to declare a `TempStorage` and `KernelParameter`
+  // type member that is passed during device-side initialization.
+  using sgid_lut_init_t = std::conditional_t<is_complex_op<SymbolGroupIdLookupT>::value,
+                                             typename SymbolGroupIdLookupT::KernelParameter,
+                                             SymbolGroupIdLookupT>;
+
+  // Complex transition table lookup operators need to declare a `TempStorage` and
+  // `KernelParameter` type member that is passed during device-side initialization.
+  using transition_table_init_t = std::conditional_t<is_complex_op<TransitionTableT>::value,
+                                                     typename TransitionTableT::KernelParameter,
+                                                     TransitionTableT>;
+
+  // Complex translation table lookup operators need to declare a `TempStorage` and
+  // `KernelParameter` type member that is passed during device-side initialization.
+  using translation_table_init_t = std::conditional_t<is_complex_op<TranslationTableT>::value,
+                                                      typename TranslationTableT::KernelParameter,
+                                                      TranslationTableT>;
 
  public:
   // The maximum number of states supported by this DFA instance
   // This is a value queried by the DFA simulation algorithm
   static constexpr int32_t MAX_NUM_STATES = NUM_STATES;
 
-  using SymbolGroupStorageT      = typename SymbolGroupIdLookupT::TempStorage;
-  using TransitionTableStorageT  = typename TransitionTableT::TempStorage;
-  using TranslationTableStorageT = typename TranslationTableT::TempStorage;
+  using SymbolGroupStorageT      = std::conditional_t<is_complex_op<SymbolGroupIdLookupT>::value,
+                                                 typename SymbolGroupIdLookupT::TempStorage,
+                                                 typename cub::NullType>;
+  using TransitionTableStorageT  = std::conditional_t<is_complex_op<TransitionTableT>::value,
+                                                     typename TransitionTableT::TempStorage,
+                                                     typename cub::NullType>;
+  using TranslationTableStorageT = std::conditional_t<is_complex_op<TranslationTableT>::value,
+                                                      typename TranslationTableT::TempStorage,
+                                                      typename cub::NullType>;
 
   __device__ auto InitSymbolGroupLUT(SymbolGroupStorageT& temp_storage)
   {
@@ -286,14 +466,16 @@ class dfa_device_view {
 
 /**
  * @brief Lookup table mapping (old_state, symbol_group_id) transitions to a sequence of symbols
- * that the finite-state transducer is supposed to output for each transition. The class uses shared
- * memory for the lookups.
+ * that the finite-state transducer is supposed to output for each transition. The class uses
+ * shared memory for the lookups.
  *
  * @tparam OutSymbolT The symbol type being output
- * @tparam OutSymbolOffsetT Type sufficiently large to index into the lookup table of output symbols
+ * @tparam OutSymbolOffsetT Type sufficiently large to index into the lookup table of output
+ * symbols
  * @tparam MAX_NUM_SYMBOLS The maximum number of symbols being output by a single state transition
  * @tparam MAX_NUM_STATES The maximum number of states that this lookup table shall support
  * @tparam MAX_TABLE_SIZE The maximum number of items in the lookup table of output symbols
+ * be used.
  */
 template <typename OutSymbolT,
           typename OutSymbolOffsetT,
@@ -311,6 +493,12 @@ class TransducerLookupTable {
   using TempStorage = cub::Uninitialized<_TempStorage>;
 
   struct KernelParameter {
+    using LookupTableT = TransducerLookupTable<OutSymbolT,
+                                               OutSymbolOffsetT,
+                                               MAX_NUM_SYMBOLS,
+                                               MAX_NUM_STATES,
+                                               MAX_TABLE_SIZE>;
+
     OutSymbolOffsetT d_out_offsets[MAX_NUM_STATES * MAX_NUM_SYMBOLS + 1];
     OutSymbolT d_out_symbols[MAX_TABLE_SIZE];
   };
@@ -321,12 +509,11 @@ class TransducerLookupTable {
    * @note Synchronizes the thread block, if called from device, and, hence, requires all threads
    * of the thread block to call the constructor
    */
-  static void InitDeviceTranslationTable(
-    cudf::detail::hostdevice_vector<KernelParameter>& translation_table_init,
+  static KernelParameter InitDeviceTranslationTable(
     std::array<std::array<std::vector<OutSymbolT>, MAX_NUM_SYMBOLS>, MAX_NUM_STATES> const&
-      translation_table,
-    rmm::cuda_stream_view stream)
+      translation_table)
   {
+    KernelParameter init_data;
     std::vector<OutSymbolT> out_symbols;
     out_symbols.reserve(MAX_TABLE_SIZE);
     std::vector<OutSymbolOffsetT> out_symbol_offsets;
@@ -357,15 +544,11 @@ class TransducerLookupTable {
     CUDF_EXPECTS(out_symbols.size() <= MAX_TABLE_SIZE, "Unsupported translation table");
 
     // Prepare host-side data to be copied and passed to the device
-    std::copy(std::cbegin(out_symbol_offsets),
-              std::cend(out_symbol_offsets),
-              translation_table_init.host_ptr()->d_out_offsets);
-    std::copy(std::cbegin(out_symbols),
-              std::cend(out_symbols),
-              translation_table_init.host_ptr()->d_out_symbols);
-
-    // Copy data to device
-    translation_table_init.host_to_device_async(stream);
+    std::copy(
+      std::cbegin(out_symbol_offsets), std::cend(out_symbol_offsets), init_data.d_out_offsets);
+    std::copy(std::cbegin(out_symbols), std::cend(out_symbols), init_data.d_out_symbols);
+
+    return init_data;
   }
 
  private:
@@ -408,24 +591,130 @@ class TransducerLookupTable {
 #endif
   }
 
-  template <typename StateIndexT, typename SymbolIndexT, typename RelativeOffsetT>
-  constexpr CUDF_HOST_DEVICE OutSymbolT operator()(StateIndexT const state_id,
-                                                   SymbolIndexT const match_id,
-                                                   RelativeOffsetT const relative_offset) const
+  template <typename StateIndexT, typename SymbolIndexT, typename RelativeOffsetT, typename SymbolT>
+  constexpr CUDF_HOST_DEVICE auto operator()(StateIndexT const state_id,
+                                             SymbolIndexT const match_id,
+                                             RelativeOffsetT const relative_offset,
+                                             SymbolT const /*read_symbol*/) const
   {
     auto offset = temp_storage.out_offset[state_id * MAX_NUM_SYMBOLS + match_id] + relative_offset;
     return temp_storage.out_symbols[offset];
   }
 
-  template <typename StateIndexT, typename SymbolIndexT>
+  template <typename StateIndexT, typename SymbolIndexT, typename SymbolT>
   constexpr CUDF_HOST_DEVICE OutSymbolOffsetT operator()(StateIndexT const state_id,
-                                                         SymbolIndexT const match_id) const
+                                                         SymbolIndexT const match_id,
+                                                         SymbolT const /*read_symbol*/) const
   {
     return temp_storage.out_offset[state_id * MAX_NUM_SYMBOLS + match_id + 1] -
            temp_storage.out_offset[state_id * MAX_NUM_SYMBOLS + match_id];
   }
 };
 
+/**
+ * @brief Creates a translation table that maps (old_state, symbol_group_id) transitions to a
+ * sequence of symbols that the finite-state transducer is supposed to output for each transition.
+ *
+ * @tparam MAX_TABLE_SIZE The maximum number of items in the lookup table of output symbols
+ * be used
+ * @tparam OutSymbolT The symbol type being output
+ * @tparam MAX_NUM_SYMBOLS The maximum number of symbols being output by a single state transition
+ * @tparam MAX_NUM_STATES The maximum number of states that this lookup table shall support
+ * @param translation_table The translation table
+ * @return A translation table of type `TransducerLookupTable`.
+ */
+template <std::size_t MAX_TABLE_SIZE,
+          typename OutSymbolT,
+          std::size_t MAX_NUM_SYMBOLS,
+          std::size_t MAX_NUM_STATES>
+auto make_translation_table(std::array<std::array<std::vector<OutSymbolT>, MAX_NUM_SYMBOLS>,
+                                       MAX_NUM_STATES> const& translation_table)
+{
+  using OutSymbolOffsetT    = int32_t;
+  using translation_table_t = TransducerLookupTable<OutSymbolT,
+                                                    OutSymbolOffsetT,
+                                                    MAX_NUM_SYMBOLS,
+                                                    MAX_NUM_STATES,
+                                                    MAX_TABLE_SIZE>;
+  return translation_table_t::InitDeviceTranslationTable(translation_table);
+}
+
+template <typename TranslationOpT>
+class TranslationOp {
+ private:
+  struct _TempStorage {};
+
+ public:
+  using TempStorage = cub::Uninitialized<_TempStorage>;
+
+  struct KernelParameter {
+    using LookupTableT = TranslationOp<TranslationOpT>;
+    TranslationOpT translation_op;
+  };
+
+  /**
+   * @brief Initializes the lookup table, primarily to be invoked from within device code but also
+   * provides host-side implementation for verification.
+   * @note Synchronizes the thread block, if called from device, and, hence, requires all threads
+   * of the thread block to call the constructor
+   */
+  static KernelParameter InitDeviceTranslationTable(TranslationOpT translation_op)
+  {
+    return KernelParameter{translation_op};
+  }
+
+ private:
+  _TempStorage& temp_storage;
+  TranslationOpT translation_op;
+
+  __device__ __forceinline__ _TempStorage& PrivateStorage()
+  {
+    __shared__ _TempStorage private_storage;
+    return private_storage;
+  }
+
+ public:
+  CUDF_HOST_DEVICE TranslationOp(KernelParameter const& kernel_param, TempStorage& temp_storage)
+    : temp_storage(temp_storage.Alias()), translation_op(kernel_param.translation_op)
+  {
+  }
+
+  template <typename StateIndexT, typename SymbolIndexT, typename RelativeOffsetT, typename SymbolT>
+  constexpr CUDF_HOST_DEVICE auto operator()(StateIndexT const state_id,
+                                             SymbolIndexT const match_id,
+                                             RelativeOffsetT const relative_offset,
+                                             SymbolT const read_symbol) const
+  {
+    return translation_op(*this, state_id, match_id, relative_offset, read_symbol);
+  }
+
+  template <typename StateIndexT, typename SymbolIndexT, typename SymbolT>
+  constexpr CUDF_HOST_DEVICE auto operator()(StateIndexT const state_id,
+                                             SymbolIndexT const match_id,
+                                             SymbolT const read_symbol) const
+  {
+    return translation_op(*this, state_id, match_id, read_symbol);
+  }
+};
+
+/**
+ * @brief Creates a simple translation table that uses a simple function object to retrieve the
+ *
+ * @tparam FunctorT A function object type that must implement two signatures: (1) with `(state_id,
+ * match_id, read_symbol)` and (2) with `(state_id, match_id, relative_offset, read_symbol)`
+ * @param map_op A function object that must implement two signatures: (1) with `(state_id,
+ * match_id, read_symbol)` and (2) with `(state_id, match_id, relative_offset, read_symbol)`.
+ * Invocations of the first signature, (1), must return the number of symbols that are emitted for
+ * the given transition. The second signature, (2), must return the i-th symbol to be emitted for
+ * that transition, where `i` corresponds to `relative_offse`
+ * @return A translation table of type `TranslationO`
+ */
+template <typename FunctorT>
+auto make_translation_functor(FunctorT map_op)
+{
+  return TranslationOp<FunctorT>::InitDeviceTranslationTable(map_op);
+}
+
 /**
  * @brief Helper class to facilitate the specification and instantiation of a DFA (i.e., the
  * transition table and its number of states, the mapping of symbols to symbol groups, and the
@@ -437,70 +726,32 @@ class TransducerLookupTable {
  * @tparam NUM_STATES The number of states defined by the DFA (the other dimension of the
  * transition table)
  */
-template <typename OutSymbolT, int32_t NUM_SYMBOLS, int32_t NUM_STATES>
+template <typename SymbolGroupIdInitT,
+          typename TransitionTableInitT,
+          typename TranslationTableInitT>
 class Dfa {
- public:
-  // The maximum number of states supported by this DFA instance
-  // This is a value queried by the DFA simulation algorithm
-  static constexpr int32_t MAX_NUM_STATES = NUM_STATES;
-
- private:
-  // Symbol-group id lookup table
-  using SymbolGroupIdLookupT = detail::SingleSymbolSmemLUT<char>;
-  using SymbolGroupIdInitT   = typename SymbolGroupIdLookupT::KernelParameter;
-
-  // Transition table
-  using TransitionTableT     = detail::TransitionTable<NUM_SYMBOLS, NUM_STATES>;
-  using TransitionTableInitT = typename TransitionTableT::KernelParameter;
-
-  // Translation lookup table
-  using OutSymbolOffsetT      = uint32_t;
-  using TranslationTableT     = detail::TransducerLookupTable<OutSymbolT,
-                                                          OutSymbolOffsetT,
-                                                          NUM_SYMBOLS,
-                                                          NUM_STATES,
-                                                          NUM_SYMBOLS * NUM_STATES>;
-  using TranslationTableInitT = typename TranslationTableT::KernelParameter;
+  static constexpr int32_t single_item = 1;
 
+ public:
   auto get_device_view()
   {
-    return dfa_device_view<SymbolGroupIdLookupT, TransitionTableT, TranslationTableT, NUM_STATES>{
-      sgid_init.d_begin(), transition_table_init.d_begin(), translation_table_init.d_begin()};
+    return dfa_device_view<typename SymbolGroupIdInitT::LookupTableT,
+                           typename TransitionTableInitT::LookupTableT,
+                           typename TranslationTableInitT::LookupTableT,
+                           TransitionTableInitT::LookupTableT::NUM_STATES>{
+      &init_data.d_begin()->sgid_lut_init,
+      &init_data.d_begin()->transition_table_init,
+      &init_data.d_begin()->translation_table_init};
   }
 
- public:
-  /**
-   * @brief Constructs a new DFA.
-   *
-   * @param symbol_vec Sequence container of symbol groups. Each symbol group is a sequence
-   * container to symbols within that group. The index of the symbol group containing a symbol being
-   * read will be used as symbol_gid of the transition and translation tables.
-   * @param tt_vec The transition table
-   * @param out_tt_vec The translation table
-   * @param stream The stream to which memory operations and kernels are getting dispatched to
-   */
-  template <typename StateIdT, typename SymbolGroupIdItT>
-  Dfa(SymbolGroupIdItT const& symbol_vec,
-      std::array<std::array<StateIdT, NUM_SYMBOLS>, NUM_STATES> const& tt_vec,
-      std::array<std::array<std::vector<OutSymbolT>, NUM_SYMBOLS>, NUM_STATES> const& out_tt_vec,
-      cudaStream_t stream)
+  Dfa(SymbolGroupIdInitT const& sgid_lut_init,
+      TransitionTableInitT const& transition_table_init,
+      TranslationTableInitT const& translation_table_init,
+      rmm::cuda_stream_view stream)
+    : init_data{single_item, stream}
   {
-    constexpr std::size_t single_item = 1;
-
-    sgid_init = cudf::detail::hostdevice_vector<SymbolGroupIdInitT>{single_item, stream};
-    transition_table_init =
-      cudf::detail::hostdevice_vector<TransitionTableInitT>{single_item, stream};
-    translation_table_init =
-      cudf::detail::hostdevice_vector<TranslationTableInitT>{single_item, stream};
-
-    // Initialize symbol group id lookup table
-    SymbolGroupIdLookupT::InitDeviceSymbolGroupIdLut(sgid_init, symbol_vec, stream);
-
-    // Initialize state transition table
-    TransitionTableT::InitDeviceTransitionTable(transition_table_init, tt_vec, stream);
-
-    // Initialize finite-state transducer lookup table
-    TranslationTableT::InitDeviceTranslationTable(translation_table_init, out_tt_vec, stream);
+    *init_data.host_ptr() = {sgid_lut_init, transition_table_init, translation_table_init};
+    init_data.host_to_device_async(stream);
   }
 
   /**
@@ -513,8 +764,8 @@ class Dfa {
    * indexes are written.
    * @tparam TransducedCountOutItT A single-item output iterator type to which the total number of
    * output symbols is written
-   * @tparam OffsetT A type large enough to index into either of both: (a) the input symbols and (b)
-   * the output symbols
+   * @tparam OffsetT A type large enough to index into either of both: (a) the input symbols and
+   * (b) the output symbols
    * @param d_chars Pointer to the input string of symbols
    * @param num_chars The total number of input symbols to process
    * @param d_out_it Random-access output iterator to which the transduced output is
@@ -527,12 +778,12 @@ class Dfa {
    * "end-state" of the previous invocation of the algorithm.
    * @param stream CUDA stream to launch kernels within. Default is the null-stream.
    */
-  template <typename SymbolT,
+  template <typename SymbolItT,
             typename TransducedOutItT,
             typename TransducedIndexOutItT,
             typename TransducedCountOutItT,
             typename OffsetT>
-  void Transduce(SymbolT const* d_chars,
+  void Transduce(SymbolItT d_chars_it,
                  OffsetT num_chars,
                  TransducedOutItT d_out_it,
                  TransducedIndexOutItT d_out_idx_it,
@@ -545,7 +796,7 @@ class Dfa {
     DeviceTransduce(nullptr,
                     temp_storage_bytes,
                     this->get_device_view(),
-                    d_chars,
+                    d_chars_it,
                     num_chars,
                     d_out_it,
                     d_out_idx_it,
@@ -560,7 +811,7 @@ class Dfa {
     DeviceTransduce(temp_storage.data(),
                     temp_storage_bytes,
                     this->get_device_view(),
-                    d_chars,
+                    d_chars_it,
                     num_chars,
                     d_out_it,
                     d_out_idx_it,
@@ -570,9 +821,36 @@ class Dfa {
   }
 
  private:
-  cudf::detail::hostdevice_vector<SymbolGroupIdInitT> sgid_init{};
-  cudf::detail::hostdevice_vector<TransitionTableInitT> transition_table_init{};
-  cudf::detail::hostdevice_vector<TranslationTableInitT> translation_table_init{};
+  struct host_device_data {
+    SymbolGroupIdInitT sgid_lut_init;
+    TransitionTableInitT transition_table_init;
+    TranslationTableInitT translation_table_init;
+  };
+  cudf::detail::hostdevice_vector<host_device_data> init_data{};
 };
 
+/**
+ * @brief Creates a determninistic finite automaton (DFA) as specified by the triple of (symbol
+ * group, transition, translation)-lookup tables to be used with the finite-state transducer
+ * algorithm.
+ *
+ * @param sgid_lut_init Object used to initialize the symbol group lookup table
+ * @param transition_table_init Object used to initialize the transition table
+ * @param translation_table_init Object used to initialize the translation table
+ * @param stream The stream used to allocate and initialize device-side memory that is used to
+ * initialize the lookup tables
+ * @return A DFA of type `Dfa`.
+ */
+template <typename SymbolGroupIdInitT,
+          typename TransitionTableInitT,
+          typename TranslationTableInitT>
+auto make_fst(SymbolGroupIdInitT const& sgid_lut_init,
+              TransitionTableInitT const& transition_table_init,
+              TranslationTableInitT const& translation_table_init,
+              rmm::cuda_stream_view stream)
+{
+  return Dfa<SymbolGroupIdInitT, TransitionTableInitT, TranslationTableInitT>(
+    sgid_lut_init, transition_table_init, translation_table_init, stream);
+}
+
 }  // namespace cudf::io::fst::detail
diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp
index 1c7d5b11032..3bbfc4b5f83 100644
--- a/cpp/src/io/json/nested_json.hpp
+++ b/cpp/src/io/json/nested_json.hpp
@@ -44,6 +44,21 @@ struct tree_meta_t {
  */
 enum class json_col_t : char { ListColumn, StructColumn, StringColumn, Unknown };
 
+/**
+ * @brief Enum class to specify whether we just push onto and pop from the stack or whether we also
+ * reset to an empty stack on a newline character.
+ */
+enum class stack_behavior_t : char {
+  /// Opening brackets and braces, [, {, push onto the stack, closing brackets and braces, ], }, pop
+  /// from the stack
+  PushPopWithoutReset,
+
+  /// Opening brackets and braces, [, {, push onto the stack, closing brackets and braces, ], }, pop
+  /// from the stack. Newline characters are considered delimiters and therefore reset to an empty
+  /// stack.
+  ResetOnDelimiter
+};
+
 // Default name for a list's child column
 constexpr auto list_child_name{"element"};
 
@@ -175,12 +190,28 @@ namespace detail {
  * character of \p d_json_in, where a '{' represents that the corresponding input character is
  * within the context of a struct, a '[' represents that it is within the context of an array, and a
  * '_' symbol that it is at the root of the JSON.
+ * @param[in] stack_behavior Specifies the stack's behavior
  * @param[in] stream The cuda stream to dispatch GPU kernels to
  */
 void get_stack_context(device_span<SymbolT const> json_in,
                        SymbolT* d_top_of_stack,
+                       stack_behavior_t stack_behavior,
                        rmm::cuda_stream_view stream);
 
+/**
+ * @brief Post-processes a token stream that may contain tokens from invalid lines. Expects that the
+ * token stream begins with a LineEnd token.
+ *
+ * @param tokens The tokens to be post-processed
+ * @param token_indices The tokens' corresponding indices that are post-processed
+ * @param stream The cuda stream to dispatch GPU kernels to
+ * @return Returns the post-processed token stream
+ */
+std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> process_token_stream(
+  device_span<PdaTokenT const> tokens,
+  device_span<SymbolOffsetT const> token_indices,
+  rmm::cuda_stream_view stream);
+
 /**
  * @brief Parses the given JSON string and generates a tree representation of the given input.
  *
diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 26dffd3328a..3b6c2b18250 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -27,6 +27,7 @@
 #include <cudf/detail/utilities/visitor_overload.hpp>
 #include <cudf/detail/valid_if.cuh>
 #include <cudf/io/detail/data_casting.cuh>
+#include <cudf/io/detail/tokenize_json.hpp>
 #include <cudf/io/json.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/types.hpp>
@@ -39,8 +40,11 @@
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/device_vector.h>
+#include <thrust/iterator/discard_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
 #include <thrust/transform.h>
+#include <thrust/tuple.h>
 
 #include <limits>
 #include <stack>
@@ -88,6 +92,115 @@ void check_input_size(std::size_t input_size)
 
 namespace cudf::io::json {
 
+// FST to prune tokens of invalid lines for recovering JSON lines format
+namespace token_filter {
+
+// Type used to represent the target state in the transition table
+using StateT = char;
+
+// Type used to represent a symbol group id
+using SymbolGroupT = uint8_t;
+
+/**
+ * @brief Definition of the DFA's states
+ */
+enum class dfa_states : StateT { VALID, INVALID, NUM_STATES };
+
+// Aliases for readability of the transition table
+constexpr auto TT_INV = dfa_states::INVALID;
+constexpr auto TT_VLD = dfa_states::VALID;
+
+/**
+ * @brief Definition of the symbol groups
+ */
+enum class dfa_symbol_group_id : SymbolGroupT {
+  ERROR,             ///< Error token symbol group
+  DELIMITER,         ///< Record / line delimiter symbol group
+  OTHER_SYMBOLS,     ///< Symbol group that implicitly matches all other tokens
+  NUM_SYMBOL_GROUPS  ///< Total number of symbol groups
+};
+
+constexpr auto TT_NUM_STATES     = static_cast<StateT>(dfa_states::NUM_STATES);
+constexpr auto NUM_SYMBOL_GROUPS = static_cast<uint32_t>(dfa_symbol_group_id::NUM_SYMBOL_GROUPS);
+
+// Lookup table to map an input symbol (i.e., a token) to a symbol group
+std::array<std::vector<PdaTokenT>, NUM_SYMBOL_GROUPS - 1> const symbol_groups{{
+  {static_cast<PdaTokenT>(token_t::ErrorBegin)},  // Symbols mapping to ERROR
+  {static_cast<PdaTokenT>(token_t::LineEnd)}      // Symbols mapping to DELIMITER
+}};
+
+/**
+ * @brief Function object to map (token,token_index) tuples to a symbol group.
+ */
+struct UnwrapTokenFromSymbolOp {
+  template <typename SymbolGroupLookupTableT>
+  CUDF_HOST_DEVICE SymbolGroupT operator()(SymbolGroupLookupTableT const& sgid_lut,
+                                           thrust::tuple<PdaTokenT, SymbolOffsetT> symbol) const
+  {
+    PdaTokenT const token_type = thrust::get<0>(symbol);
+    return sgid_lut.lookup(token_type);
+  }
+};
+
+/**
+ * @brief Translation function object that discards line delimiter tokens and tokens belonging to
+ * invalid lines.
+ */
+struct TransduceToken {
+  template <typename TransducerTableT, typename RelativeOffsetT, typename SymbolT>
+  constexpr CUDF_HOST_DEVICE SymbolT operator()(TransducerTableT const&,
+                                                StateT const state_id,
+                                                SymbolGroupT const match_id,
+                                                RelativeOffsetT const relative_offset,
+                                                SymbolT const read_symbol) const
+  {
+    const bool is_end_of_invalid_line =
+      (state_id == static_cast<StateT>(TT_INV) &&
+       match_id == static_cast<SymbolGroupT>(dfa_symbol_group_id::DELIMITER));
+
+    if (is_end_of_invalid_line) {
+      return relative_offset == 0 ? SymbolT{token_t::StructEnd, 0}
+                                  : SymbolT{token_t::StructBegin, 0};
+    } else {
+      return read_symbol;
+    }
+  }
+
+  template <typename TransducerTableT, typename SymbolT>
+  constexpr CUDF_HOST_DEVICE int32_t operator()(TransducerTableT const&,
+                                                StateT const state_id,
+                                                SymbolGroupT const match_id,
+                                                SymbolT const read_symbol) const
+  {
+    // Number of tokens emitted on invalid lines
+    constexpr int32_t num_inv_tokens = 2;
+
+    const bool is_delimiter = match_id == static_cast<SymbolGroupT>(dfa_symbol_group_id::DELIMITER);
+
+    // If state is either invalid or we're entering an invalid state, we discard tokens
+    const bool is_part_of_invalid_line =
+      (match_id != static_cast<SymbolGroupT>(dfa_symbol_group_id::ERROR) &&
+       state_id == static_cast<StateT>(TT_VLD));
+
+    // Indicates whether we transition from an invalid line to a potentially valid line
+    const bool is_end_of_invalid_line = (state_id == static_cast<StateT>(TT_INV) && is_delimiter);
+
+    int32_t const emit_count =
+      is_end_of_invalid_line ? num_inv_tokens : (is_part_of_invalid_line && !is_delimiter ? 1 : 0);
+    return emit_count;
+  }
+};
+
+// Transition table
+std::array<std::array<dfa_states, NUM_SYMBOL_GROUPS>, TT_NUM_STATES> const transition_table{
+  {/* IN_STATE      ERROR   DELIM   OTHER */
+   /* VALID    */ {{TT_INV, TT_VLD, TT_VLD}},
+   /* INVALID  */ {{TT_INV, TT_VLD, TT_INV}}}};
+
+// The DFA's starting state
+constexpr auto start_state = static_cast<StateT>(TT_VLD);
+}  // namespace token_filter
+
 // JSON to stack operator DFA (Deterministic Finite Automata)
 namespace to_stack_op {
 
@@ -129,6 +242,7 @@ enum class dfa_symbol_group_id : uint8_t {
   CLOSING_BRACKET,   ///< Closing bracket SG: ]
   QUOTE_CHAR,        ///< Quote character SG: "
   ESCAPE_CHAR,       ///< Escape character SG: '\'
+  NEWLINE_CHAR,      ///< Newline character SG: '\n'
   OTHER_SYMBOLS,     ///< SG implicitly matching all other characters
   NUM_SYMBOL_GROUPS  ///< Total number of symbol groups
 };
@@ -138,21 +252,29 @@ constexpr auto NUM_SYMBOL_GROUPS = static_cast<uint32_t>(dfa_symbol_group_id::NU
 
 // The i-th string representing all the characters of a symbol group
 std::array<std::string, NUM_SYMBOL_GROUPS - 1> const symbol_groups{
-  {{"{"}, {"["}, {"}"}, {"]"}, {"\""}, {"\\"}}};
+  {{"{"}, {"["}, {"}"}, {"]"}, {"\""}, {"\\"}, {"\n"}}};
 
 // Transition table
 std::array<std::array<dfa_states, NUM_SYMBOL_GROUPS>, TT_NUM_STATES> const transition_table{
-  {/* IN_STATE          {       [       }       ]       "       \    OTHER */
-   /* TT_OOS    */ {{TT_OOS, TT_OOS, TT_OOS, TT_OOS, TT_STR, TT_OOS, TT_OOS}},
-   /* TT_STR    */ {{TT_STR, TT_STR, TT_STR, TT_STR, TT_OOS, TT_ESC, TT_STR}},
-   /* TT_ESC    */ {{TT_STR, TT_STR, TT_STR, TT_STR, TT_STR, TT_STR, TT_STR}}}};
+  {/* IN_STATE          {       [       }       ]       "       \      \n    OTHER */
+   /* TT_OOS    */ {{TT_OOS, TT_OOS, TT_OOS, TT_OOS, TT_STR, TT_OOS, TT_OOS, TT_OOS}},
+   /* TT_STR    */ {{TT_STR, TT_STR, TT_STR, TT_STR, TT_OOS, TT_ESC, TT_STR, TT_STR}},
+   /* TT_ESC    */ {{TT_STR, TT_STR, TT_STR, TT_STR, TT_STR, TT_STR, TT_STR, TT_STR}}}};
 
 // Translation table (i.e., for each transition, what are the symbols that we output)
 std::array<std::array<std::vector<char>, NUM_SYMBOL_GROUPS>, TT_NUM_STATES> const translation_table{
-  {/* IN_STATE         {      [      }      ]      "      \    OTHER */
-   /* TT_OOS    */ {{{'{'}, {'['}, {'}'}, {']'}, {}, {}, {}}},
-   /* TT_STR    */ {{{}, {}, {}, {}, {}, {}, {}}},
-   /* TT_ESC    */ {{{}, {}, {}, {}, {}, {}, {}}}}};
+  {/* IN_STATE         {      [      }      ]      "      \     \n    OTHER */
+   /* TT_OOS    */ {{{'{'}, {'['}, {'}'}, {']'}, {}, {}, {}, {}}},
+   /* TT_STR    */ {{{}, {}, {}, {}, {}, {}, {}, {}}},
+   /* TT_ESC    */ {{{}, {}, {}, {}, {}, {}, {}, {}}}}};
+
+// Translation table
+std::array<std::array<std::vector<char>, NUM_SYMBOL_GROUPS>, TT_NUM_STATES> const
+  resetting_translation_table{
+    {/* IN_STATE         {      [      }      ]      "      \     \n    OTHER */
+     /* TT_OOS    */ {{{'{'}, {'['}, {'}'}, {']'}, {}, {}, {'\n'}, {}}},
+     /* TT_STR    */ {{{}, {}, {}, {}, {}, {}, {}, {}}},
+     /* TT_ESC    */ {{{}, {}, {}, {}, {}, {}, {}, {}}}}};
 
 // The DFA's starting state
 constexpr auto start_state = static_cast<StateT>(TT_OOS);
@@ -409,6 +531,27 @@ enum class pda_state_t : StateT {
   PD_NUM_STATES
 };
 
+enum class json_format_cfg_t {
+  // Format describing regular JSON
+  JSON,
+
+  // Format describing permissive newline-delimited JSON
+  // I.e., newline characters are only treteated as delimiters at the root stack level
+  // E.g., this is treated as a single record:
+  // {"a":
+  //  123}
+  JSON_LINES,
+
+  // Format describing strict newline-delimited JSON
+  // I.e., All newlines are delimiting a record, independent of the context they appear in
+  JSON_LINES_STRICT,
+
+  // Transition table for parsing newline-delimited JSON that recovers from invalid JSON lines
+  // This format also follows `JSON_LINES_STRICT` behaviour
+  JSON_LINES_RECOVER
+
+};
+
 // Aliases for readability of the transition table
 constexpr auto PD_BOV = pda_state_t::PD_BOV;
 constexpr auto PD_BOA = pda_state_t::PD_BOA;
@@ -430,68 +573,133 @@ constexpr auto start_state = static_cast<StateT>(pda_state_t::PD_BOV);
 /**
  * @brief Getting the transition table
  */
-auto get_transition_table(bool newline_delimited_json)
+auto get_transition_table(json_format_cfg_t format)
 {
   static_assert(static_cast<PdaStackSymbolGroupIdT>(stack_symbol_group_id::STACK_ROOT) == 0);
   static_assert(static_cast<PdaStackSymbolGroupIdT>(stack_symbol_group_id::STACK_LIST) == 1);
   static_assert(static_cast<PdaStackSymbolGroupIdT>(stack_symbol_group_id::STACK_STRUCT) == 2);
 
-  // In case of newline-delimited JSON, multiple newlines are ignored, similar to whitespace.
-  // Thas is, empty lines are ignored
-  auto const PD_ANL = newline_delimited_json ? PD_BOV : PD_PVL;
   std::array<std::array<pda_state_t, NUM_PDA_SGIDS>, PD_NUM_STATES> pda_tt;
-  //  {       [       }       ]       "       \       ,       :     space   newline other
-  pda_tt[static_cast<StateT>(pda_state_t::PD_BOV)] = {
-    PD_BOA, PD_BOA, PD_ERR, PD_ERR, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_BOV, PD_LON,
-    PD_BOA, PD_BOA, PD_ERR, PD_PVL, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_BOV, PD_LON,
-    PD_BOA, PD_BOA, PD_ERR, PD_ERR, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_BOV, PD_LON};
-  pda_tt[static_cast<StateT>(pda_state_t::PD_BOA)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_BOA, PD_BOA, PD_ERR, PD_PVL, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOA, PD_BOA, PD_LON,
-    PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_FLN, PD_ERR, PD_ERR, PD_ERR, PD_BOA, PD_BOA, PD_ERR};
-  pda_tt[static_cast<StateT>(pda_state_t::PD_LON)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_PVL, PD_LON,
-    PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_PVL, PD_PVL, PD_LON,
-    PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_ERR, PD_PVL, PD_PVL, PD_LON};
-  pda_tt[static_cast<StateT>(pda_state_t::PD_STR)] = {
-    PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
-    PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
-    PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR};
-  pda_tt[static_cast<StateT>(pda_state_t::PD_SCE)] = {
-    PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
-    PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
-    PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR};
-  pda_tt[static_cast<StateT>(pda_state_t::PD_PVL)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ANL, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_PVL, PD_PVL, PD_ERR,
-    PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_ERR, PD_PVL, PD_PVL, PD_ERR};
-  pda_tt[static_cast<StateT>(pda_state_t::PD_BFN)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_FLN, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_BFN, PD_ERR};
-  pda_tt[static_cast<StateT>(pda_state_t::PD_FLN)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_PFN, PD_FNE, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN};
-  pda_tt[static_cast<StateT>(pda_state_t::PD_FNE)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN};
-  pda_tt[static_cast<StateT>(pda_state_t::PD_PFN)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_PFN, PD_PFN, PD_ERR};
-  pda_tt[static_cast<StateT>(pda_state_t::PD_ERR)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR};
+
+  if (format == json_format_cfg_t::JSON || format == json_format_cfg_t::JSON_LINES) {
+    // In case of newline-delimited JSON, multiple newlines are ignored, similar to whitespace.
+    // Thas is, empty lines are ignored
+    // PD_ANL describes the target state after a new line on an empty stack (JSON root level)
+    auto const PD_ANL = (format == json_format_cfg_t::JSON) ? PD_PVL : PD_BOV;
+
+    // First row:  empty stack         ("root" level of the JSON)
+    // Second row: '[' on top of stack (we're parsing a list value)
+    // Third row:  '{' on top of stack (we're parsing a struct value)
+    //  {       [       }       ]       "       \       ,       :     space   newline other
+    pda_tt[static_cast<StateT>(pda_state_t::PD_BOV)] = {
+      PD_BOA, PD_BOA, PD_ERR, PD_ERR, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_BOV, PD_LON,
+      PD_BOA, PD_BOA, PD_ERR, PD_PVL, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_BOV, PD_LON,
+      PD_BOA, PD_BOA, PD_ERR, PD_ERR, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_BOV, PD_LON};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_BOA)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+      PD_BOA, PD_BOA, PD_ERR, PD_PVL, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOA, PD_BOA, PD_LON,
+      PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_FLN, PD_ERR, PD_ERR, PD_ERR, PD_BOA, PD_BOA, PD_ERR};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_LON)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_PVL, PD_LON,
+      PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_PVL, PD_PVL, PD_LON,
+      PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_ERR, PD_PVL, PD_PVL, PD_LON};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_STR)] = {
+      PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
+      PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
+      PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_SCE)] = {
+      PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
+      PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
+      PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_PVL)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ANL, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_PVL, PD_PVL, PD_ERR,
+      PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_ERR, PD_PVL, PD_PVL, PD_ERR};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_BFN)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+      PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_FLN, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_BFN, PD_ERR};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_FLN)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+      PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_PFN, PD_FNE, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_FNE)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+      PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_PFN)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_PFN, PD_PFN, PD_ERR};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_ERR)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR};
+  }
+  // Transition table for strict JSON lines (including recovery)
+  // Newlines are treated as record delimiters
+  else {
+    // In case of newline-delimited JSON, multiple newlines are ignored, similar to whitespace.
+    // Thas is, empty lines are ignored
+    // PD_ANL describes the target state after a new line after encountering error state
+    auto const PD_ANL = (format == json_format_cfg_t::JSON_LINES_RECOVER) ? PD_BOV : PD_ERR;
+
+    // First row:  empty stack         ("root" level of the JSON)
+    // Second row: '[' on top of stack (we're parsing a list value)
+    // Third row:  '{' on top of stack (we're parsing a struct value)
+    //  {       [       }       ]       "       \       ,       :     space   newline other
+    pda_tt[static_cast<StateT>(pda_state_t::PD_BOV)] = {
+      PD_BOA, PD_BOA, PD_ERR, PD_ERR, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_BOV, PD_LON,
+      PD_BOA, PD_BOA, PD_ERR, PD_PVL, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_BOV, PD_LON,
+      PD_BOA, PD_BOA, PD_ERR, PD_ERR, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_BOV, PD_LON};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_BOA)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_ERR,
+      PD_BOA, PD_BOA, PD_ERR, PD_PVL, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOA, PD_BOV, PD_LON,
+      PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_FLN, PD_ERR, PD_ERR, PD_ERR, PD_BOA, PD_BOV, PD_ERR};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_LON)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_BOV, PD_LON,
+      PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_PVL, PD_BOV, PD_LON,
+      PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_ERR, PD_PVL, PD_BOV, PD_LON};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_STR)] = {
+      PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
+      PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
+      PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_SCE)] = {
+      PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
+      PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
+      PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_PVL)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_BOV, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_PVL, PD_BOV, PD_ERR,
+      PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_ERR, PD_PVL, PD_BOV, PD_ERR};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_BFN)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_ERR,
+      PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_FLN, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_BOV, PD_ERR};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_FLN)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_ERR,
+      PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_PFN, PD_FNE, PD_FLN, PD_FLN, PD_FLN, PD_BOV, PD_FLN};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_FNE)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+      PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_PFN)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_PFN, PD_BOV, PD_ERR};
+    pda_tt[static_cast<StateT>(pda_state_t::PD_ERR)] = {
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ANL, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ANL, PD_ERR,
+      PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ANL, PD_ERR};
+  }
   return pda_tt;
 }
 
 /**
  * @brief Getting the translation table
  */
-auto get_translation_table()
+auto get_translation_table(bool include_line_delimiter)
 {
   constexpr auto StructBegin       = token_t::StructBegin;
   constexpr auto StructEnd         = token_t::StructEnd;
@@ -507,6 +715,15 @@ auto get_translation_table()
   constexpr auto ValueEnd          = token_t::ValueEnd;
   constexpr auto ErrorBegin        = token_t::ErrorBegin;
 
+  /**
+   * @brief Appends token_t::LineEnd token to the given token sequence, if and only if
+   * `include_line_delimiter` is true.
+   */
+  auto nl_tokens = [include_line_delimiter](std::vector<char> tokens) {
+    if (include_line_delimiter) { tokens.push_back(token_t::LineEnd); }
+    return tokens;
+  };
+
   std::array<std::array<std::vector<char>, NUM_PDA_SGIDS>, PD_NUM_STATES> pda_tlt;
   pda_tlt[static_cast<StateT>(pda_state_t::PD_BOV)] = {{                /*ROOT*/
                                                         {StructBegin},  // OPENING_BRACE
@@ -518,7 +735,7 @@ auto get_translation_table()
                                                         {ErrorBegin},   // COMMA
                                                         {ErrorBegin},   // COLON
                                                         {},             // WHITE_SPACE
-                                                        {},             // LINE_BREAK
+                                                        nl_tokens({}),  // LINE_BREAK
                                                         {ValueBegin},   // OTHER
                                                         /*LIST*/
                                                         {StructBegin},  // OPENING_BRACE
@@ -530,7 +747,7 @@ auto get_translation_table()
                                                         {ErrorBegin},   // COMMA
                                                         {ErrorBegin},   // COLON
                                                         {},             // WHITE_SPACE
-                                                        {},             // LINE_BREAK
+                                                        nl_tokens({}),  // LINE_BREAK
                                                         {ValueBegin},   // OTHER
                                                         /*STRUCT*/
                                                         {StructBegin},   // OPENING_BRACE
@@ -542,7 +759,7 @@ auto get_translation_table()
                                                         {ErrorBegin},    // COMMA
                                                         {ErrorBegin},    // COLON
                                                         {},              // WHITE_SPACE
-                                                        {},              // LINE_BREAK
+                                                        nl_tokens({}),   // LINE_BREAK
                                                         {ValueBegin}}};  // OTHER
   pda_tlt[static_cast<StateT>(pda_state_t::PD_BOA)] = {
     {                                                                    /*ROOT*/
@@ -555,7 +772,7 @@ auto get_translation_table()
      {ErrorBegin},                                                       // COMMA
      {ErrorBegin},                                                       // COLON
      {ErrorBegin},                                                       // WHITE_SPACE
-     {ErrorBegin},                                                       // LINE_BREAK
+     nl_tokens({ErrorBegin}),                                            // LINE_BREAK
      {ErrorBegin},                                                       // OTHER
      /*LIST*/
      {StructBegin},  // OPENING_BRACE
@@ -567,7 +784,7 @@ auto get_translation_table()
      {ErrorBegin},   // COMMA
      {ErrorBegin},   // COLON
      {},             // WHITE_SPACE
-     {},             // LINE_BREAK
+     nl_tokens({}),  // LINE_BREAK
      {ValueBegin},   // OTHER
      /*STRUCT*/
      {ErrorBegin},                         // OPENING_BRACE
@@ -579,7 +796,7 @@ auto get_translation_table()
      {ErrorBegin},                         // COMMA
      {ErrorBegin},                         // COLON
      {},                                   // WHITE_SPACE
-     {},                                   // LINE_BREAK
+     nl_tokens({}),                        // LINE_BREAK
      {ErrorBegin}}};                       // OTHER
   pda_tlt[static_cast<StateT>(pda_state_t::PD_LON)] = {
     {                                      /*ROOT*/
@@ -592,132 +809,132 @@ auto get_translation_table()
      {ErrorBegin},                         // COMMA
      {ErrorBegin},                         // COLON
      {ValueEnd},                           // WHITE_SPACE
-     {ValueEnd},                           // LINE_BREAK
+     nl_tokens({ValueEnd}),                // LINE_BREAK
      {},                                   // OTHER
      /*LIST*/
-     {ErrorBegin},         // OPENING_BRACE
-     {ErrorBegin},         // OPENING_BRACKET
-     {ErrorBegin},         // CLOSING_BRACE
-     {ValueEnd, ListEnd},  // CLOSING_BRACKET
-     {ErrorBegin},         // QUOTE
-     {ErrorBegin},         // ESCAPE
-     {ValueEnd},           // COMMA
-     {ErrorBegin},         // COLON
-     {ValueEnd},           // WHITE_SPACE
-     {ValueEnd},           // LINE_BREAK
-     {},                   // OTHER
+     {ErrorBegin},           // OPENING_BRACE
+     {ErrorBegin},           // OPENING_BRACKET
+     {ErrorBegin},           // CLOSING_BRACE
+     {ValueEnd, ListEnd},    // CLOSING_BRACKET
+     {ErrorBegin},           // QUOTE
+     {ErrorBegin},           // ESCAPE
+     {ValueEnd},             // COMMA
+     {ErrorBegin},           // COLON
+     {ValueEnd},             // WHITE_SPACE
+     nl_tokens({ValueEnd}),  // LINE_BREAK
+     {},                     // OTHER
      /*STRUCT*/
-     {ErrorBegin},                                                    // OPENING_BRACE
-     {ErrorBegin},                                                    // OPENING_BRACKET
-     {ValueEnd, StructMemberEnd, StructEnd},                          // CLOSING_BRACE
-     {ErrorBegin},                                                    // CLOSING_BRACKET
-     {ErrorBegin},                                                    // QUOTE
-     {ErrorBegin},                                                    // ESCAPE
-     {ValueEnd, StructMemberEnd},                                     // COMMA
-     {ErrorBegin},                                                    // COLON
-     {ValueEnd},                                                      // WHITE_SPACE
-     {ValueEnd},                                                      // LINE_BREAK
-     {}}};                                                            // OTHER
-
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_STR)] = {{              /*ROOT*/
-                                                        {},           // OPENING_BRACE
-                                                        {},           // OPENING_BRACKET
-                                                        {},           // CLOSING_BRACE
-                                                        {},           // CLOSING_BRACKET
-                                                        {StringEnd},  // QUOTE
-                                                        {},           // ESCAPE
-                                                        {},           // COMMA
-                                                        {},           // COLON
-                                                        {},           // WHITE_SPACE
-                                                        {},           // LINE_BREAK
-                                                        {},           // OTHER
+     {ErrorBegin},                                                      // OPENING_BRACE
+     {ErrorBegin},                                                      // OPENING_BRACKET
+     {ValueEnd, StructMemberEnd, StructEnd},                            // CLOSING_BRACE
+     {ErrorBegin},                                                      // CLOSING_BRACKET
+     {ErrorBegin},                                                      // QUOTE
+     {ErrorBegin},                                                      // ESCAPE
+     {ValueEnd, StructMemberEnd},                                       // COMMA
+     {ErrorBegin},                                                      // COLON
+     {ValueEnd},                                                        // WHITE_SPACE
+     nl_tokens({ValueEnd}),                                             // LINE_BREAK
+     {}}};                                                              // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_STR)] = {{                /*ROOT*/
+                                                        {},             // OPENING_BRACE
+                                                        {},             // OPENING_BRACKET
+                                                        {},             // CLOSING_BRACE
+                                                        {},             // CLOSING_BRACKET
+                                                        {StringEnd},    // QUOTE
+                                                        {},             // ESCAPE
+                                                        {},             // COMMA
+                                                        {},             // COLON
+                                                        {},             // WHITE_SPACE
+                                                        nl_tokens({}),  // LINE_BREAK
+                                                        {},             // OTHER
                                                         /*LIST*/
-                                                        {},           // OPENING_BRACE
-                                                        {},           // OPENING_BRACKET
-                                                        {},           // CLOSING_BRACE
-                                                        {},           // CLOSING_BRACKET
-                                                        {StringEnd},  // QUOTE
-                                                        {},           // ESCAPE
-                                                        {},           // COMMA
-                                                        {},           // COLON
-                                                        {},           // WHITE_SPACE
-                                                        {},           // LINE_BREAK
-                                                        {},           // OTHER
+                                                        {},             // OPENING_BRACE
+                                                        {},             // OPENING_BRACKET
+                                                        {},             // CLOSING_BRACE
+                                                        {},             // CLOSING_BRACKET
+                                                        {StringEnd},    // QUOTE
+                                                        {},             // ESCAPE
+                                                        {},             // COMMA
+                                                        {},             // COLON
+                                                        {},             // WHITE_SPACE
+                                                        nl_tokens({}),  // LINE_BREAK
+                                                        {},             // OTHER
                                                         /*STRUCT*/
-                                                        {},           // OPENING_BRACE
-                                                        {},           // OPENING_BRACKET
-                                                        {},           // CLOSING_BRACE
-                                                        {},           // CLOSING_BRACKET
-                                                        {StringEnd},  // QUOTE
-                                                        {},           // ESCAPE
-                                                        {},           // COMMA
-                                                        {},           // COLON
-                                                        {},           // WHITE_SPACE
-                                                        {},           // LINE_BREAK
-                                                        {}}};         // OTHER
-
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_SCE)] = {{              /*ROOT*/
-                                                        {},           // OPENING_BRACE
-                                                        {},           // OPENING_BRACKET
-                                                        {},           // CLOSING_BRACE
-                                                        {},           // CLOSING_BRACKET
-                                                        {},           // QUOTE
-                                                        {},           // ESCAPE
-                                                        {},           // COMMA
-                                                        {},           // COLON
-                                                        {},           // WHITE_SPACE
-                                                        {},           // LINE_BREAK
-                                                        {},           // OTHER
+                                                        {},             // OPENING_BRACE
+                                                        {},             // OPENING_BRACKET
+                                                        {},             // CLOSING_BRACE
+                                                        {},             // CLOSING_BRACKET
+                                                        {StringEnd},    // QUOTE
+                                                        {},             // ESCAPE
+                                                        {},             // COMMA
+                                                        {},             // COLON
+                                                        {},             // WHITE_SPACE
+                                                        nl_tokens({}),  // LINE_BREAK
+                                                        {}}};           // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_SCE)] = {{                /*ROOT*/
+                                                        {},             // OPENING_BRACE
+                                                        {},             // OPENING_BRACKET
+                                                        {},             // CLOSING_BRACE
+                                                        {},             // CLOSING_BRACKET
+                                                        {},             // QUOTE
+                                                        {},             // ESCAPE
+                                                        {},             // COMMA
+                                                        {},             // COLON
+                                                        {},             // WHITE_SPACE
+                                                        nl_tokens({}),  // LINE_BREAK
+                                                        {},             // OTHER
                                                         /*LIST*/
-                                                        {},  // OPENING_BRACE
-                                                        {},  // OPENING_BRACKET
-                                                        {},  // CLOSING_BRACE
-                                                        {},  // CLOSING_BRACKET
-                                                        {},  // QUOTE
-                                                        {},  // ESCAPE
-                                                        {},  // COMMA
-                                                        {},  // COLON
-                                                        {},  // WHITE_SPACE
-                                                        {},  // LINE_BREAK
-                                                        {},  // OTHER
+                                                        {},             // OPENING_BRACE
+                                                        {},             // OPENING_BRACKET
+                                                        {},             // CLOSING_BRACE
+                                                        {},             // CLOSING_BRACKET
+                                                        {},             // QUOTE
+                                                        {},             // ESCAPE
+                                                        {},             // COMMA
+                                                        {},             // COLON
+                                                        {},             // WHITE_SPACE
+                                                        nl_tokens({}),  // LINE_BREAK
+                                                        {},             // OTHER
                                                         /*STRUCT*/
-                                                        {},    // OPENING_BRACE
-                                                        {},    // OPENING_BRACKET
-                                                        {},    // CLOSING_BRACE
-                                                        {},    // CLOSING_BRACKET
-                                                        {},    // QUOTE
-                                                        {},    // ESCAPE
-                                                        {},    // COMMA
-                                                        {},    // COLON
-                                                        {},    // WHITE_SPACE
-                                                        {},    // LINE_BREAK
-                                                        {}}};  // OTHER
+                                                        {},             // OPENING_BRACE
+                                                        {},             // OPENING_BRACKET
+                                                        {},             // CLOSING_BRACE
+                                                        {},             // CLOSING_BRACKET
+                                                        {},             // QUOTE
+                                                        {},             // ESCAPE
+                                                        {},             // COMMA
+                                                        {},             // COLON
+                                                        {},             // WHITE_SPACE
+                                                        nl_tokens({}),  // LINE_BREAK
+                                                        {}}};           // OTHER
 
   pda_tlt[static_cast<StateT>(pda_state_t::PD_PVL)] = {
-    {               /*ROOT*/
-     {ErrorBegin},  // OPENING_BRACE
-     {ErrorBegin},  // OPENING_BRACKET
-     {ErrorBegin},  // CLOSING_BRACE
-     {ErrorBegin},  // CLOSING_BRACKET
-     {ErrorBegin},  // QUOTE
-     {ErrorBegin},  // ESCAPE
-     {ErrorBegin},  // COMMA
-     {ErrorBegin},  // COLON
-     {},            // WHITE_SPACE
-     {},            // LINE_BREAK
-     {ErrorBegin},  // OTHER
+    {                /*ROOT*/
+     {ErrorBegin},   // OPENING_BRACE
+     {ErrorBegin},   // OPENING_BRACKET
+     {ErrorBegin},   // CLOSING_BRACE
+     {ErrorBegin},   // CLOSING_BRACKET
+     {ErrorBegin},   // QUOTE
+     {ErrorBegin},   // ESCAPE
+     {ErrorBegin},   // COMMA
+     {ErrorBegin},   // COLON
+     {},             // WHITE_SPACE
+     nl_tokens({}),  // LINE_BREAK
+     {ErrorBegin},   // OTHER
      /*LIST*/
-     {ErrorBegin},  // OPENING_BRACE
-     {ErrorBegin},  // OPENING_BRACKET
-     {ErrorBegin},  // CLOSING_BRACE
-     {ListEnd},     // CLOSING_BRACKET
-     {ErrorBegin},  // QUOTE
-     {ErrorBegin},  // ESCAPE
-     {},            // COMMA
-     {ErrorBegin},  // COLON
-     {},            // WHITE_SPACE
-     {},            // LINE_BREAK
-     {ErrorBegin},  // OTHER
+     {ErrorBegin},   // OPENING_BRACE
+     {ErrorBegin},   // OPENING_BRACKET
+     {ErrorBegin},   // CLOSING_BRACE
+     {ListEnd},      // CLOSING_BRACKET
+     {ErrorBegin},   // QUOTE
+     {ErrorBegin},   // ESCAPE
+     {},             // COMMA
+     {ErrorBegin},   // COLON
+     {},             // WHITE_SPACE
+     nl_tokens({}),  // LINE_BREAK
+     {ErrorBegin},   // OTHER
      /*STRUCT*/
      {ErrorBegin},                  // OPENING_BRACE
      {ErrorBegin},                  // OPENING_BRACKET
@@ -728,145 +945,145 @@ auto get_translation_table()
      {StructMemberEnd},             // COMMA
      {ErrorBegin},                  // COLON
      {},                            // WHITE_SPACE
-     {},                            // LINE_BREAK
+     nl_tokens({}),                 // LINE_BREAK
      {ErrorBegin}}};                // OTHER
 
   pda_tlt[static_cast<StateT>(pda_state_t::PD_BFN)] = {
-    {               /*ROOT*/
-     {ErrorBegin},  // OPENING_BRACE
-     {ErrorBegin},  // OPENING_BRACKET
-     {ErrorBegin},  // CLOSING_BRACE
-     {ErrorBegin},  // CLOSING_BRACKET
-     {ErrorBegin},  // QUOTE
-     {ErrorBegin},  // ESCAPE
-     {ErrorBegin},  // COMMA
-     {ErrorBegin},  // COLON
-     {ErrorBegin},  // WHITE_SPACE
-     {ErrorBegin},  // LINE_BREAK
-     {ErrorBegin},  // OTHER
+    {                          /*ROOT*/
+     {ErrorBegin},             // OPENING_BRACE
+     {ErrorBegin},             // OPENING_BRACKET
+     {ErrorBegin},             // CLOSING_BRACE
+     {ErrorBegin},             // CLOSING_BRACKET
+     {ErrorBegin},             // QUOTE
+     {ErrorBegin},             // ESCAPE
+     {ErrorBegin},             // COMMA
+     {ErrorBegin},             // COLON
+     {ErrorBegin},             // WHITE_SPACE
+     nl_tokens({ErrorBegin}),  // LINE_BREAK
+     {ErrorBegin},             // OTHER
      /*LIST*/
-     {ErrorBegin},  // OPENING_BRACE
-     {ErrorBegin},  // OPENING_BRACKET
-     {ErrorBegin},  // CLOSING_BRACE
-     {ErrorBegin},  // CLOSING_BRACKET
-     {ErrorBegin},  // QUOTE
-     {ErrorBegin},  // ESCAPE
-     {ErrorBegin},  // COMMA
-     {ErrorBegin},  // COLON
-     {ErrorBegin},  // WHITE_SPACE
-     {ErrorBegin},  // LINE_BREAK
-     {ErrorBegin},  // OTHER
+     {ErrorBegin},             // OPENING_BRACE
+     {ErrorBegin},             // OPENING_BRACKET
+     {ErrorBegin},             // CLOSING_BRACE
+     {ErrorBegin},             // CLOSING_BRACKET
+     {ErrorBegin},             // QUOTE
+     {ErrorBegin},             // ESCAPE
+     {ErrorBegin},             // COMMA
+     {ErrorBegin},             // COLON
+     {ErrorBegin},             // WHITE_SPACE
+     nl_tokens({ErrorBegin}),  // LINE_BREAK
+     {ErrorBegin},             // OTHER
      /*STRUCT*/
-     {ErrorBegin},                                                     // OPENING_BRACE
-     {ErrorBegin},                                                     // OPENING_BRACKET
-     {StructEnd},                                                      // CLOSING_BRACE
-     {ErrorBegin},                                                     // CLOSING_BRACKET
-     {StructMemberBegin, FieldNameBegin},                              // QUOTE
-     {ErrorBegin},                                                     // ESCAPE
-     {ErrorBegin},                                                     // COMMA
-     {ErrorBegin},                                                     // COLON
-     {},                                                               // WHITE_SPACE
-     {},                                                               // LINE_BREAK
-     {ErrorBegin}}};                                                   // OTHER
-
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_FLN)] = {{               /*ROOT*/
-                                                        {ErrorBegin},  // OPENING_BRACE
-                                                        {ErrorBegin},  // OPENING_BRACKET
-                                                        {ErrorBegin},  // CLOSING_BRACE
-                                                        {ErrorBegin},  // CLOSING_BRACKET
-                                                        {ErrorBegin},  // QUOTE
-                                                        {ErrorBegin},  // ESCAPE
-                                                        {ErrorBegin},  // COMMA
-                                                        {ErrorBegin},  // COLON
-                                                        {ErrorBegin},  // WHITE_SPACE
-                                                        {ErrorBegin},  // LINE_BREAK
-                                                        {ErrorBegin},  // OTHER
+     {ErrorBegin},                                                                // OPENING_BRACE
+     {ErrorBegin},                                                                // OPENING_BRACKET
+     {StructEnd},                                                                 // CLOSING_BRACE
+     {ErrorBegin},                                                                // CLOSING_BRACKET
+     {StructMemberBegin, FieldNameBegin},                                         // QUOTE
+     {ErrorBegin},                                                                // ESCAPE
+     {ErrorBegin},                                                                // COMMA
+     {ErrorBegin},                                                                // COLON
+     {},                                                                          // WHITE_SPACE
+     nl_tokens({}),                                                               // LINE_BREAK
+     {ErrorBegin}}};                                                              // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_FLN)] = {{                          /*ROOT*/
+                                                        {ErrorBegin},             // OPENING_BRACE
+                                                        {ErrorBegin},             // OPENING_BRACKET
+                                                        {ErrorBegin},             // CLOSING_BRACE
+                                                        {ErrorBegin},             // CLOSING_BRACKET
+                                                        {ErrorBegin},             // QUOTE
+                                                        {ErrorBegin},             // ESCAPE
+                                                        {ErrorBegin},             // COMMA
+                                                        {ErrorBegin},             // COLON
+                                                        {ErrorBegin},             // WHITE_SPACE
+                                                        nl_tokens({ErrorBegin}),  // LINE_BREAK
+                                                        {ErrorBegin},             // OTHER
                                                         /*LIST*/
-                                                        {ErrorBegin},  // OPENING_BRACE
-                                                        {ErrorBegin},  // OPENING_BRACKET
-                                                        {ErrorBegin},  // CLOSING_BRACE
-                                                        {ErrorBegin},  // CLOSING_BRACKET
-                                                        {ErrorBegin},  // QUOTE
-                                                        {ErrorBegin},  // ESCAPE
-                                                        {ErrorBegin},  // COMMA
-                                                        {ErrorBegin},  // COLON
-                                                        {ErrorBegin},  // WHITE_SPACE
-                                                        {ErrorBegin},  // LINE_BREAK
-                                                        {ErrorBegin},  // OTHER
+                                                        {ErrorBegin},             // OPENING_BRACE
+                                                        {ErrorBegin},             // OPENING_BRACKET
+                                                        {ErrorBegin},             // CLOSING_BRACE
+                                                        {ErrorBegin},             // CLOSING_BRACKET
+                                                        {ErrorBegin},             // QUOTE
+                                                        {ErrorBegin},             // ESCAPE
+                                                        {ErrorBegin},             // COMMA
+                                                        {ErrorBegin},             // COLON
+                                                        {ErrorBegin},             // WHITE_SPACE
+                                                        nl_tokens({ErrorBegin}),  // LINE_BREAK
+                                                        {ErrorBegin},             // OTHER
                                                         /*STRUCT*/
-                                                        {},              // OPENING_BRACE
-                                                        {},              // OPENING_BRACKET
-                                                        {},              // CLOSING_BRACE
-                                                        {},              // CLOSING_BRACKET
-                                                        {FieldNameEnd},  // QUOTE
-                                                        {},              // ESCAPE
-                                                        {},              // COMMA
-                                                        {},              // COLON
-                                                        {},              // WHITE_SPACE
-                                                        {},              // LINE_BREAK
-                                                        {}}};            // OTHER
-
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_FNE)] = {{                 /*ROOT*/
-                                                        {ErrorBegin},    // OPENING_BRACE
-                                                        {ErrorBegin},    // OPENING_BRACKET
-                                                        {ErrorBegin},    // CLOSING_BRACE
-                                                        {ErrorBegin},    // CLOSING_BRACKET
-                                                        {ErrorBegin},    // QUOTE
-                                                        {ErrorBegin},    // ESCAPE
-                                                        {ErrorBegin},    // COMMA
-                                                        {ErrorBegin},    // COLON
-                                                        {ErrorBegin},    // WHITE_SPACE
-                                                        {ErrorBegin},    // LINE_BREAK
-                                                        {ErrorBegin},    // OTHER
+                                                        {},                       // OPENING_BRACE
+                                                        {},                       // OPENING_BRACKET
+                                                        {},                       // CLOSING_BRACE
+                                                        {},                       // CLOSING_BRACKET
+                                                        {FieldNameEnd},           // QUOTE
+                                                        {},                       // ESCAPE
+                                                        {},                       // COMMA
+                                                        {},                       // COLON
+                                                        {},                       // WHITE_SPACE
+                                                        nl_tokens({}),            // LINE_BREAK
+                                                        {}}};                     // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_FNE)] = {{                          /*ROOT*/
+                                                        {ErrorBegin},             // OPENING_BRACE
+                                                        {ErrorBegin},             // OPENING_BRACKET
+                                                        {ErrorBegin},             // CLOSING_BRACE
+                                                        {ErrorBegin},             // CLOSING_BRACKET
+                                                        {ErrorBegin},             // QUOTE
+                                                        {ErrorBegin},             // ESCAPE
+                                                        {ErrorBegin},             // COMMA
+                                                        {ErrorBegin},             // COLON
+                                                        {ErrorBegin},             // WHITE_SPACE
+                                                        nl_tokens({ErrorBegin}),  // LINE_BREAK
+                                                        {ErrorBegin},             // OTHER
                                                         /*LIST*/
-                                                        {ErrorBegin},  // OPENING_BRACE
-                                                        {ErrorBegin},  // OPENING_BRACKET
-                                                        {ErrorBegin},  // CLOSING_BRACE
-                                                        {ErrorBegin},  // CLOSING_BRACKET
-                                                        {ErrorBegin},  // QUOTE
-                                                        {ErrorBegin},  // ESCAPE
-                                                        {ErrorBegin},  // COMMA
-                                                        {ErrorBegin},  // COLON
-                                                        {ErrorBegin},  // WHITE_SPACE
-                                                        {ErrorBegin},  // LINE_BREAK
-                                                        {ErrorBegin},  // OTHER
+                                                        {ErrorBegin},             // OPENING_BRACE
+                                                        {ErrorBegin},             // OPENING_BRACKET
+                                                        {ErrorBegin},             // CLOSING_BRACE
+                                                        {ErrorBegin},             // CLOSING_BRACKET
+                                                        {ErrorBegin},             // QUOTE
+                                                        {ErrorBegin},             // ESCAPE
+                                                        {ErrorBegin},             // COMMA
+                                                        {ErrorBegin},             // COLON
+                                                        {ErrorBegin},             // WHITE_SPACE
+                                                        nl_tokens({ErrorBegin}),  // LINE_BREAK
+                                                        {ErrorBegin},             // OTHER
                                                         /*STRUCT*/
-                                                        {},            // OPENING_BRACE
-                                                        {},            // OPENING_BRACKET
-                                                        {},            // CLOSING_BRACE
-                                                        {},            // CLOSING_BRACKET
-                                                        {},            // QUOTE
-                                                        {},            // ESCAPE
-                                                        {},            // COMMA
-                                                        {},            // COLON
-                                                        {},            // WHITE_SPACE
-                                                        {},            // LINE_BREAK
-                                                        {}}};          // OTHER
-
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_PFN)] = {{               /*ROOT*/
-                                                        {ErrorBegin},  // OPENING_BRACE
-                                                        {ErrorBegin},  // OPENING_BRACKET
-                                                        {ErrorBegin},  // CLOSING_BRACE
-                                                        {ErrorBegin},  // CLOSING_BRACKET
-                                                        {ErrorBegin},  // QUOTE
-                                                        {ErrorBegin},  // ESCAPE
-                                                        {ErrorBegin},  // COMMA
-                                                        {ErrorBegin},  // COLON
-                                                        {ErrorBegin},  // WHITE_SPACE
-                                                        {ErrorBegin},  // LINE_BREAK
-                                                        {ErrorBegin},  // OTHER
+                                                        {},                       // OPENING_BRACE
+                                                        {},                       // OPENING_BRACKET
+                                                        {},                       // CLOSING_BRACE
+                                                        {},                       // CLOSING_BRACKET
+                                                        {},                       // QUOTE
+                                                        {},                       // ESCAPE
+                                                        {},                       // COMMA
+                                                        {},                       // COLON
+                                                        {},                       // WHITE_SPACE
+                                                        nl_tokens({}),            // LINE_BREAK
+                                                        {}}};                     // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_PFN)] = {{                          /*ROOT*/
+                                                        {ErrorBegin},             // OPENING_BRACE
+                                                        {ErrorBegin},             // OPENING_BRACKET
+                                                        {ErrorBegin},             // CLOSING_BRACE
+                                                        {ErrorBegin},             // CLOSING_BRACKET
+                                                        {ErrorBegin},             // QUOTE
+                                                        {ErrorBegin},             // ESCAPE
+                                                        {ErrorBegin},             // COMMA
+                                                        {ErrorBegin},             // COLON
+                                                        {ErrorBegin},             // WHITE_SPACE
+                                                        nl_tokens({ErrorBegin}),  // LINE_BREAK
+                                                        {ErrorBegin},             // OTHER
                                                         /*LIST*/
-                                                        {ErrorBegin},  // OPENING_BRACE
-                                                        {ErrorBegin},  // OPENING_BRACKET
-                                                        {ErrorBegin},  // CLOSING_BRACE
-                                                        {ErrorBegin},  // CLOSING_BRACKET
-                                                        {ErrorBegin},  // QUOTE
-                                                        {ErrorBegin},  // ESCAPE
-                                                        {ErrorBegin},  // COMMA
-                                                        {ErrorBegin},  // COLON
-                                                        {ErrorBegin},  // WHITE_SPACE
-                                                        {ErrorBegin},  // LINE_BREAK
-                                                        {ErrorBegin},  // OTHER
+                                                        {ErrorBegin},             // OPENING_BRACE
+                                                        {ErrorBegin},             // OPENING_BRACKET
+                                                        {ErrorBegin},             // CLOSING_BRACE
+                                                        {ErrorBegin},             // CLOSING_BRACKET
+                                                        {ErrorBegin},             // QUOTE
+                                                        {ErrorBegin},             // ESCAPE
+                                                        {ErrorBegin},             // COMMA
+                                                        {ErrorBegin},             // COLON
+                                                        {ErrorBegin},             // WHITE_SPACE
+                                                        nl_tokens({ErrorBegin}),  // LINE_BREAK
+                                                        {ErrorBegin},             // OTHER
                                                         /*STRUCT*/
                                                         {ErrorBegin},    // OPENING_BRACE
                                                         {ErrorBegin},    // OPENING_BRACKET
@@ -877,7 +1094,7 @@ auto get_translation_table()
                                                         {ErrorBegin},    // COMMA
                                                         {},              // COLON
                                                         {},              // WHITE_SPACE
-                                                        {},              // LINE_BREAK
+                                                        nl_tokens({}),   // LINE_BREAK
                                                         {ErrorBegin}}};  // OTHER
 
   pda_tlt[static_cast<StateT>(pda_state_t::PD_ERR)] = {{                 /*ROOT*/
@@ -890,32 +1107,32 @@ auto get_translation_table()
                                                         {},              // COMMA
                                                         {},              // COLON
                                                         {},              // WHITE_SPACE
-                                                        {},              // LINE_BREAK
+                                                        nl_tokens({}),   // LINE_BREAK
                                                         {},              // OTHER
                                                         /*LIST*/
-                                                        {},  // OPENING_BRACE
-                                                        {},  // OPENING_BRACKET
-                                                        {},  // CLOSING_BRACE
-                                                        {},  // CLOSING_BRACKET
-                                                        {},  // QUOTE
-                                                        {},  // ESCAPE
-                                                        {},  // COMMA
-                                                        {},  // COLON
-                                                        {},  // WHITE_SPACE
-                                                        {},  // LINE_BREAK
-                                                        {},  // OTHER
+                                                        {},             // OPENING_BRACE
+                                                        {},             // OPENING_BRACKET
+                                                        {},             // CLOSING_BRACE
+                                                        {},             // CLOSING_BRACKET
+                                                        {},             // QUOTE
+                                                        {},             // ESCAPE
+                                                        {},             // COMMA
+                                                        {},             // COLON
+                                                        {},             // WHITE_SPACE
+                                                        nl_tokens({}),  // LINE_BREAK
+                                                        {},             // OTHER
                                                         /*STRUCT*/
-                                                        {},    // OPENING_BRACE
-                                                        {},    // OPENING_BRACKET
-                                                        {},    // CLOSING_BRACE
-                                                        {},    // CLOSING_BRACKET
-                                                        {},    // QUOTE
-                                                        {},    // ESCAPE
-                                                        {},    // COMMA
-                                                        {},    // COLON
-                                                        {},    // WHITE_SPACE
-                                                        {},    // LINE_BREAK
-                                                        {}}};  // OTHER
+                                                        {},             // OPENING_BRACE
+                                                        {},             // OPENING_BRACKET
+                                                        {},             // CLOSING_BRACE
+                                                        {},             // CLOSING_BRACKET
+                                                        {},             // QUOTE
+                                                        {},             // ESCAPE
+                                                        {},             // COMMA
+                                                        {},             // COLON
+                                                        {},             // WHITE_SPACE
+                                                        nl_tokens({}),  // LINE_BREAK
+                                                        {}}};           // OTHER
   return pda_tlt;
 }
 
@@ -929,9 +1146,32 @@ struct JSONToStackOp {
   template <typename StackSymbolT>
   constexpr CUDF_HOST_DEVICE fst::stack_op_type operator()(StackSymbolT const& stack_symbol) const
   {
-    return (stack_symbol == '{' || stack_symbol == '[')   ? fst::stack_op_type::PUSH
-           : (stack_symbol == '}' || stack_symbol == ']') ? fst::stack_op_type::POP
-                                                          : fst::stack_op_type::READ;
+    switch (stack_symbol) {
+      case '{':
+      case '[': return fst::stack_op_type::PUSH;
+      case '}':
+      case ']': return fst::stack_op_type::POP;
+      default: return fst::stack_op_type::READ;
+    }
+  }
+};
+
+/**
+ * @brief Function object used to filter for brackets and braces that represent push and pop
+ * operations
+ */
+struct JSONWithRecoveryToStackOp {
+  template <typename StackSymbolT>
+  constexpr CUDF_HOST_DEVICE fst::stack_op_type operator()(StackSymbolT const& stack_symbol) const
+  {
+    switch (stack_symbol) {
+      case '{':
+      case '[': return fst::stack_op_type::PUSH;
+      case '}':
+      case ']': return fst::stack_op_type::POP;
+      case '\n': return fst::stack_op_type::RESET;
+      default: return fst::stack_op_type::READ;
+    }
   }
 };
 
@@ -1030,6 +1270,7 @@ namespace detail {
 
 void get_stack_context(device_span<SymbolT const> json_in,
                        SymbolT* d_top_of_stack,
+                       stack_behavior_t stack_behavior,
                        rmm::cuda_stream_view stream)
 {
   check_input_size(json_in.size());
@@ -1052,15 +1293,19 @@ void get_stack_context(device_span<SymbolT const> json_in,
   rmm::device_uvector<SymbolOffsetT> stack_op_indices{json_in.size(), stream};
 
   // Prepare finite-state transducer that only selects '{', '}', '[', ']' outside of quotes
-  using ToStackOpFstT =
-    cudf::io::fst::detail::Dfa<StackSymbolT,
-                               static_cast<int32_t>(
-                                 to_stack_op::dfa_symbol_group_id::NUM_SYMBOL_GROUPS),
-                               static_cast<int32_t>(to_stack_op::dfa_states::TT_NUM_STATES)>;
-  ToStackOpFstT json_to_stack_ops_fst{to_stack_op::symbol_groups,
-                                      to_stack_op::transition_table,
-                                      to_stack_op::translation_table,
-                                      stream};
+  constexpr auto max_translation_table_size =
+    to_stack_op::NUM_SYMBOL_GROUPS * to_stack_op::TT_NUM_STATES;
+
+  // Translation table specialized on the choice of whether to reset on newlines outside of strings
+  const auto translation_table = (stack_behavior == stack_behavior_t::ResetOnDelimiter)
+                                   ? to_stack_op::resetting_translation_table
+                                   : to_stack_op::translation_table;
+
+  auto json_to_stack_ops_fst = fst::detail::make_fst(
+    fst::detail::make_symbol_group_lut(to_stack_op::symbol_groups),
+    fst::detail::make_transition_table(to_stack_op::transition_table),
+    fst::detail::make_translation_table<max_translation_table_size>(translation_table),
+    stream);
 
   // "Search" for relevant occurrence of brackets and braces that indicate the beginning/end
   // of structs/lists
@@ -1075,16 +1320,80 @@ void get_stack_context(device_span<SymbolT const> json_in,
   // Copy back to actual number of stack operations
   auto const num_stack_ops = d_num_stack_ops.value(stream);
 
-  // stack operations with indices are converted to top of the stack for each character in the input
-  fst::sparse_stack_op_to_top_of_stack<StackLevelT>(
-    stack_ops.data(),
-    device_span<SymbolOffsetT>{stack_op_indices.data(), num_stack_ops},
-    JSONToStackOp{},
-    d_top_of_stack,
-    root_symbol,
-    read_symbol,
-    json_in.size(),
+  // Stack operations with indices are converted to top of the stack for each character in the input
+  if (stack_behavior == stack_behavior_t::ResetOnDelimiter) {
+    fst::sparse_stack_op_to_top_of_stack<StackLevelT>(
+      stack_ops.data(),
+      device_span<SymbolOffsetT>{stack_op_indices.data(), num_stack_ops},
+      JSONWithRecoveryToStackOp{},
+      d_top_of_stack,
+      root_symbol,
+      read_symbol,
+      json_in.size(),
+      stream);
+  } else {
+    fst::sparse_stack_op_to_top_of_stack<StackLevelT>(
+      stack_ops.data(),
+      device_span<SymbolOffsetT>{stack_op_indices.data(), num_stack_ops},
+      JSONToStackOp{},
+      d_top_of_stack,
+      root_symbol,
+      read_symbol,
+      json_in.size(),
+      stream);
+  }
+}
+
+std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> process_token_stream(
+  device_span<PdaTokenT const> tokens,
+  device_span<SymbolOffsetT const> token_indices,
+  rmm::cuda_stream_view stream)
+{
+  // Instantiate FST for post-processing the token stream to remove all tokens that belong to an
+  // invalid JSON line
+  token_filter::UnwrapTokenFromSymbolOp sgid_op{};
+  auto filter_fst =
+    fst::detail::make_fst(fst::detail::make_symbol_group_lut(token_filter::symbol_groups, sgid_op),
+                          fst::detail::make_transition_table(token_filter::transition_table),
+                          fst::detail::make_translation_functor(token_filter::TransduceToken{}),
+                          stream);
+
+  auto const mr = rmm::mr::get_current_device_resource();
+  rmm::device_scalar<SymbolOffsetT> d_num_selected_tokens(stream, mr);
+  rmm::device_uvector<PdaTokenT> filtered_tokens_out{tokens.size(), stream, mr};
+  rmm::device_uvector<SymbolOffsetT> filtered_token_indices_out{tokens.size(), stream, mr};
+
+  // The FST is run on the reverse token stream, discarding all tokens between ErrorBegin and the
+  // next LineEnd (LineEnd, inv_token_0, inv_token_1, ..., inv_token_n, ErrorBegin, LineEnd, ...),
+  // emitting a [StructBegin, StructEnd] pair on the end of such an invalid line. In that example,
+  // inv_token_i for i in [0, n] together with the ErrorBegin are removed and replaced with
+  // StructBegin, StructEnd. Also, all LineEnd are removed as well, as these are not relevant after
+  // this stage anymore
+  filter_fst.Transduce(
+    thrust::make_reverse_iterator(thrust::make_zip_iterator(tokens.data(), token_indices.data()) +
+                                  tokens.size()),
+    static_cast<SymbolOffsetT>(tokens.size()),
+    thrust::make_reverse_iterator(
+      thrust::make_zip_iterator(filtered_tokens_out.data(), filtered_token_indices_out.data()) +
+      tokens.size()),
+    thrust::make_discard_iterator(),
+    d_num_selected_tokens.data(),
+    token_filter::start_state,
     stream);
+
+  auto const num_total_tokens = d_num_selected_tokens.value(stream);
+  rmm::device_uvector<PdaTokenT> tokens_out{num_total_tokens, stream, mr};
+  rmm::device_uvector<SymbolOffsetT> token_indices_out{num_total_tokens, stream, mr};
+  thrust::copy(rmm::exec_policy(stream),
+               filtered_tokens_out.end() - num_total_tokens,
+               filtered_tokens_out.end(),
+               tokens_out.data());
+  thrust::copy(rmm::exec_policy(stream),
+               filtered_token_indices_out.end() - num_total_tokens,
+               filtered_token_indices_out.end(),
+               token_indices_out.data());
+
+  return std::make_pair(std::move(tokens_out), std::move(token_indices_out));
 }
 
 std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> get_token_stream(
@@ -1100,13 +1409,25 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
 
   auto const new_line_delimited_json = options.is_enabled_lines();
 
+  // (!new_line_delimited_json)                         => JSON
+  // (new_line_delimited_json and recover_from_error)   => JSON_LINES_RECOVER
+  // (new_line_delimited_json and !recover_from_error)  => JSON_LINES
+  auto format = new_line_delimited_json
+                  ? (options.recovery_mode() == json_recovery_mode_t::RECOVER_WITH_NULL
+                       ? tokenizer_pda::json_format_cfg_t::JSON_LINES_RECOVER
+                       : tokenizer_pda::json_format_cfg_t::JSON_LINES)
+                  : tokenizer_pda::json_format_cfg_t::JSON;
+
   // Prepare for PDA transducer pass, merging input symbols with stack symbols
-  rmm::device_uvector<PdaSymbolGroupIdT> pda_sgids = [json_in, stream]() {
+  auto const recover_from_error = (format == tokenizer_pda::json_format_cfg_t::JSON_LINES_RECOVER);
+  rmm::device_uvector<PdaSymbolGroupIdT> pda_sgids = [json_in, stream, recover_from_error]() {
     // Memory holding the top-of-stack stack context for the input
     rmm::device_uvector<StackSymbolT> stack_op_indices{json_in.size(), stream};
 
     // Identify what is the stack context for each input character (JSON-root, struct, or list)
-    get_stack_context(json_in, stack_op_indices.data(), stream);
+    auto const stack_behavior = recover_from_error ? stack_behavior_t::ResetOnDelimiter
+                                                   : stack_behavior_t::PushPopWithoutReset;
+    get_stack_context(json_in, stack_op_indices.data(), stack_behavior, stream);
 
     rmm::device_uvector<PdaSymbolGroupIdT> pda_sgids{json_in.size(), stream};
     auto zip_in = thrust::make_zip_iterator(json_in.data(), stack_op_indices.data());
@@ -1118,22 +1439,21 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
     return pda_sgids;
   }();
 
-  // PDA transducer alias
-  using ToTokenStreamFstT =
-    cudf::io::fst::detail::Dfa<StackSymbolT,
-                               tokenizer_pda::NUM_PDA_SGIDS,
-                               static_cast<tokenizer_pda::StateT>(
-                                 tokenizer_pda::pda_state_t::PD_NUM_STATES)>;
-
   // Instantiating PDA transducer
-  std::vector<std::vector<char>> pda_sgid_identity{tokenizer_pda::NUM_PDA_SGIDS};
+  std::array<std::vector<char>, tokenizer_pda::NUM_PDA_SGIDS> pda_sgid_identity{};
   std::generate(std::begin(pda_sgid_identity),
                 std::end(pda_sgid_identity),
                 [i = char{0}]() mutable { return std::vector<char>{i++}; });
-  ToTokenStreamFstT json_to_tokens_fst{pda_sgid_identity,
-                                       tokenizer_pda::get_transition_table(new_line_delimited_json),
-                                       tokenizer_pda::get_translation_table(),
-                                       stream};
+
+  constexpr auto max_translation_table_size =
+    tokenizer_pda::NUM_PDA_SGIDS *
+    static_cast<tokenizer_pda::StateT>(tokenizer_pda::pda_state_t::PD_NUM_STATES);
+  auto json_to_tokens_fst = fst::detail::make_fst(
+    fst::detail::make_symbol_group_lut(pda_sgid_identity),
+    fst::detail::make_transition_table(tokenizer_pda::get_transition_table(format)),
+    fst::detail::make_translation_table<max_translation_table_size>(
+      tokenizer_pda::get_translation_table(recover_from_error)),
+    stream);
 
   // Perform a PDA-transducer pass
   // Compute the maximum amount of tokens that can possibly be emitted for a given input size
@@ -1145,21 +1465,34 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
   auto const max_token_out_count =
     cudf::util::div_rounding_up_safe(json_in.size(), min_chars_per_struct) * max_tokens_per_struct;
   rmm::device_scalar<std::size_t> num_written_tokens{stream};
-  rmm::device_uvector<PdaTokenT> tokens{max_token_out_count, stream, mr};
-  rmm::device_uvector<SymbolOffsetT> tokens_indices{max_token_out_count, stream, mr};
+  // In case we're recovering on invalid JSON lines, post-processing the token stream requires to
+  // see a JSON-line delimiter as the very first item
+  SymbolOffsetT const delimiter_offset =
+    (format == tokenizer_pda::json_format_cfg_t::JSON_LINES_RECOVER ? 1 : 0);
+  rmm::device_uvector<PdaTokenT> tokens{max_token_out_count + delimiter_offset, stream, mr};
+  rmm::device_uvector<SymbolOffsetT> tokens_indices{
+    max_token_out_count + delimiter_offset, stream, mr};
 
   json_to_tokens_fst.Transduce(pda_sgids.begin(),
                                static_cast<SymbolOffsetT>(json_in.size()),
-                               tokens.data(),
-                               tokens_indices.data(),
+                               tokens.data() + delimiter_offset,
+                               tokens_indices.data() + delimiter_offset,
                                num_written_tokens.data(),
                                tokenizer_pda::start_state,
                                stream);
 
-  auto const num_total_tokens = num_written_tokens.value(stream);
+  auto const num_total_tokens = num_written_tokens.value(stream) + delimiter_offset;
   tokens.resize(num_total_tokens, stream);
   tokens_indices.resize(num_total_tokens, stream);
 
+  if (delimiter_offset == 1) {
+    tokens.set_element(0, token_t::LineEnd, stream);
+    auto [filtered_tokens, filtered_tokens_indices] =
+      process_token_stream(tokens, tokens_indices, stream);
+    tokens         = std::move(filtered_tokens);
+    tokens_indices = std::move(filtered_tokens_indices);
+  }
+
   CUDF_EXPECTS(num_total_tokens <= max_token_out_count,
                "Generated token count exceeds the expected token count");
 
@@ -1281,6 +1614,7 @@ void make_json_column(json_column& root_column,
       case token_t::ValueBegin: return "ValueBegin";
       case token_t::ValueEnd: return "ValueEnd";
       case token_t::ErrorBegin: return "ErrorBegin";
+      case token_t::LineEnd: return "LineEnd";
       default: return "Unknown";
     }
   };
diff --git a/cpp/tests/io/fst/fst_test.cu b/cpp/tests/io/fst/fst_test.cu
index 1970b29fee9..fd69251e4f5 100644
--- a/cpp/tests/io/fst/fst_test.cu
+++ b/cpp/tests/io/fst/fst_test.cu
@@ -129,9 +129,6 @@ TEST_F(FstTest, GroundTruth)
   // Type sufficiently large to index symbols within the input and output (may be unsigned)
   using SymbolOffsetT = uint32_t;
 
-  // Helper class to set up transition table, symbol group lookup table, and translation table
-  using DfaFstT = cudf::io::fst::detail::Dfa<char, NUM_SYMBOL_GROUPS, TT_NUM_STATES>;
-
   // Prepare cuda stream for data transfers & kernels
   rmm::cuda_stream stream{};
   rmm::cuda_stream_view stream_view(stream);
@@ -167,7 +164,11 @@ TEST_F(FstTest, GroundTruth)
   cudf::detail::hostdevice_vector<SymbolOffsetT> out_indexes_gpu(input.size(), stream_view);
 
   // Run algorithm
-  DfaFstT parser{pda_sgs, pda_state_tt, pda_out_tt, stream.value()};
+  auto parser = cudf::io::fst::detail::make_fst(
+    cudf::io::fst::detail::make_symbol_group_lut(pda_sgs),
+    cudf::io::fst::detail::make_transition_table(pda_state_tt),
+    cudf::io::fst::detail::make_translation_table<TT_NUM_STATES * NUM_SYMBOL_GROUPS>(pda_out_tt),
+    stream);
 
   // Allocate device-side temporary storage & run algorithm
   parser.Transduce(d_input.data(),
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index d0c16078329..e4d52a2953e 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -1769,4 +1769,50 @@ TEST_F(JsonReaderTest, TrailingCommas)
   }
 }
 
+TEST_F(JsonReaderTest, JSONLinesRecovering)
+{
+  std::string data =
+    // 0 -> a: -2 (valid)
+    R"({"a":-2})"
+    "\n"
+    // 1 -> (invalid)
+    R"({"a":])"
+    "\n"
+    // 2 -> (invalid)
+    R"({"b":{"a":[321})"
+    "\n"
+    // 3 -> c: [1] (valid)
+    R"({"c":1.2})"
+    "\n"
+    "\n"
+    // 4 -> a: 123 (valid)
+    R"({"a":123})";
+
+  auto filepath = temp_env->get_temp_dir() + "RecoveringLines.json";
+  {
+    std::ofstream outfile(filepath, std::ofstream::out);
+    outfile << data;
+  }
+
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath})
+      .lines(true)
+      .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL);
+
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
+
+  EXPECT_EQ(result.tbl->num_columns(), 2);
+  EXPECT_EQ(result.tbl->num_rows(), 5);
+  EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT64);
+  EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::FLOAT64);
+
+  std::vector<bool> a_validity{true, false, false, false, true};
+  std::vector<bool> c_validity{false, false, false, true, false};
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0),
+                                 int64_wrapper{{-2, 0, 0, 0, 123}, a_validity.cbegin()});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(1),
+                                 float64_wrapper{{0.0, 0.0, 0.0, 1.2, 0.0}, c_validity.cbegin()});
+}
+
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/io/json_tree.cpp b/cpp/tests/io/json_tree.cpp
index a81348872cf..ad6678dbe5b 100644
--- a/cpp/tests/io/json_tree.cpp
+++ b/cpp/tests/io/json_tree.cpp
@@ -258,6 +258,7 @@ tree_meta_t2 get_tree_representation_cpu(
       case cuio_json::token_t::ValueEnd: return "VE";
       case cuio_json::token_t::StructMemberBegin: return " <";
       case cuio_json::token_t::StructMemberEnd: return " >";
+      case cuio_json::token_t::LineEnd: return ";";
       default: return ".";
     }
   };
diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp
index d82abdd1287..00d657108b8 100644
--- a/cpp/tests/io/nested_json_test.cpp
+++ b/cpp/tests/io/nested_json_test.cpp
@@ -32,6 +32,11 @@
 #include <cudf_test/io_metadata_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
 
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/copy.h>
+#include <thrust/iterator/zip_iterator.h>
+
 #include <string>
 
 namespace cuio_json = cudf::io::json;
@@ -163,7 +168,8 @@ TEST_F(JsonTest, StackContext)
   cudf::detail::hostdevice_vector<StackSymbolT> stack_context(input.size(), stream);
 
   // Run algorithm
-  cuio_json::detail::get_stack_context(d_input, stack_context.device_ptr(), stream);
+  constexpr auto stack_behavior = cuio_json::stack_behavior_t::PushPopWithoutReset;
+  cuio_json::detail::get_stack_context(d_input, stack_context.device_ptr(), stack_behavior, stream);
 
   // Copy back the results
   stack_context.device_to_host_async(stream);
@@ -211,7 +217,8 @@ TEST_F(JsonTest, StackContextUtf8)
   cudf::detail::hostdevice_vector<StackSymbolT> stack_context(input.size(), stream);
 
   // Run algorithm
-  cuio_json::detail::get_stack_context(d_input, stack_context.device_ptr(), stream);
+  constexpr auto stack_behavior = cuio_json::stack_behavior_t::PushPopWithoutReset;
+  cuio_json::detail::get_stack_context(d_input, stack_context.device_ptr(), stack_behavior, stream);
 
   // Copy back the results
   stack_context.device_to_host_async(stream);
@@ -229,6 +236,55 @@ TEST_F(JsonTest, StackContextUtf8)
   CUDF_TEST_EXPECT_VECTOR_EQUAL(golden_stack_context, stack_context, stack_context.size());
 }
 
+TEST_F(JsonTest, StackContextRecovering)
+{
+  // Type used to represent the atomic symbol type used within the finite-state machine
+  using SymbolT      = char;
+  using StackSymbolT = char;
+
+  // Prepare cuda stream for data transfers & kernels
+  auto const stream = cudf::get_default_stream();
+
+  // JSON lines input that recovers on invalid lines
+  std::string const input = R"({"a":-2},
+  {"a":
+  {"a":{"a":[321
+  {"a":[1]}
+
+  {"b":123}
+  )";
+
+  // Expected stack context (including stack context of the newline characters)
+  std::string const golden_stack_context =
+    "_{{{{{{{__"
+    "___{{{{{"
+    "___{{{{{{{{{{[[[["
+    "___{{{{{[[{_"
+    "_"
+    "___{{{{{{{{_"
+    "__";
+
+  // Prepare input & output buffers
+  cudf::string_scalar const d_scalar(input, true, stream);
+  auto const d_input =
+    cudf::device_span<SymbolT const>{d_scalar.data(), static_cast<size_t>(d_scalar.size())};
+  cudf::detail::hostdevice_vector<StackSymbolT> stack_context(input.size(), stream);
+
+  // Run algorithm
+  constexpr auto stack_behavior = cuio_json::stack_behavior_t::ResetOnDelimiter;
+  cuio_json::detail::get_stack_context(d_input, stack_context.device_ptr(), stack_behavior, stream);
+
+  // Copy back the results
+  stack_context.device_to_host_async(stream);
+
+  // Make sure we copied back the stack context
+  stream.synchronize();
+
+  // Verify results
+  ASSERT_EQ(golden_stack_context.size(), stack_context.size());
+  CUDF_TEST_EXPECT_VECTOR_EQUAL(golden_stack_context, stack_context, stack_context.size());
+}
+
 TEST_F(JsonTest, TokenStream)
 {
   using cuio_json::PdaTokenT;
@@ -264,10 +320,8 @@ TEST_F(JsonTest, TokenStream)
   auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream(
     d_input, default_options, stream, rmm::mr::get_current_device_resource());
   // Copy back the number of tokens that were written
-  thrust::host_vector<PdaTokenT> const tokens_gpu =
-    cudf::detail::make_host_vector_async(d_tokens_gpu, stream);
-  thrust::host_vector<SymbolOffsetT> const token_indices_gpu =
-    cudf::detail::make_host_vector_async(d_token_indices_gpu, stream);
+  auto const tokens_gpu        = cudf::detail::make_std_vector_async(d_tokens_gpu, stream);
+  auto const token_indices_gpu = cudf::detail::make_std_vector_async(d_token_indices_gpu, stream);
 
   // Golden token stream sample
   using token_t = cuio_json::token_t;
@@ -400,10 +454,8 @@ TEST_F(JsonTest, TokenStream2)
   auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream(
     d_input, default_options, stream, rmm::mr::get_current_device_resource());
   // Copy back the number of tokens that were written
-  thrust::host_vector<PdaTokenT> const tokens_gpu =
-    cudf::detail::make_host_vector_async(d_tokens_gpu, stream);
-  thrust::host_vector<SymbolOffsetT> const token_indices_gpu =
-    cudf::detail::make_host_vector_async(d_token_indices_gpu, stream);
+  auto const tokens_gpu        = cudf::detail::make_std_vector_async(d_tokens_gpu, stream);
+  auto const token_indices_gpu = cudf::detail::make_std_vector_async(d_token_indices_gpu, stream);
 
   // Golden token stream sample
   using token_t = cuio_json::token_t;
@@ -487,6 +539,228 @@ TEST_P(JsonParserTest, ExtractColumn)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col2, parsed_col2);
 }
 
+TEST_F(JsonTest, RecoveringTokenStream)
+{
+  // Test input. Inline comments used to indicate character indexes
+  //                           012345678 <= line 0
+  std::string const input = R"({"a":-2},)"
+                            // 9
+                            "\n"
+                            // 01234 <= line 1
+                            R"({"a":)"
+                            // 5
+                            "\n"
+                            // 67890123456789 <= line 2
+                            R"({"a":{"a":[321)"
+                            // 0
+                            "\n"
+                            // 123456789 <= line 3
+                            R"({"a":[1]})"
+                            // 0
+                            "\n"
+                            // 1  <= line 4
+                            "\n"
+                            // 23456789 <= line 5
+                            R"({"b":123})";
+
+  // Golden token stream sample
+  using token_t = cuio_json::token_t;
+  std::vector<std::pair<std::size_t, cuio_json::PdaTokenT>> const golden_token_stream = {
+    // Line 0 (invalid)
+    {0, token_t::StructBegin},
+    {0, token_t::StructEnd},
+    // Line 1 (valid)
+    {10, token_t::StructBegin},
+    {11, token_t::StructMemberBegin},
+    {11, token_t::FieldNameBegin},
+    {13, token_t::FieldNameEnd},
+    // Line 2 (valid)
+    {16, token_t::StructBegin},
+    {17, token_t::StructMemberBegin},
+    {17, token_t::FieldNameBegin},
+    {19, token_t::FieldNameEnd},
+    {21, token_t::StructBegin},
+    {22, token_t::StructMemberBegin},
+    {22, token_t::FieldNameBegin},
+    {24, token_t::FieldNameEnd},
+    {26, token_t::ListBegin},
+    {27, token_t::ValueBegin},
+    {30, token_t::ValueEnd},
+    // Line 3 (valid)
+    {31, token_t::StructBegin},
+    {32, token_t::StructMemberBegin},
+    {32, token_t::FieldNameBegin},
+    {34, token_t::FieldNameEnd},
+    {36, token_t::ListBegin},
+    {37, token_t::ValueBegin},
+    {38, token_t::ValueEnd},
+    {38, token_t::ListEnd},
+    {39, token_t::StructMemberEnd},
+    {39, token_t::StructEnd},
+    // Line 4 (empty)
+    // Line 5 (valid)
+    {42, token_t::StructBegin},
+    {43, token_t::StructMemberBegin},
+    {43, token_t::FieldNameBegin},
+    {45, token_t::FieldNameEnd},
+    {47, token_t::ValueBegin},
+    {50, token_t::ValueEnd},
+    {50, token_t::StructMemberEnd},
+    {50, token_t::StructEnd}};
+
+  auto const stream = cudf::get_default_stream();
+
+  // Default parsing options
+  cudf::io::json_reader_options default_options{};
+  default_options.set_recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL);
+  default_options.enable_lines(true);
+
+  // Prepare input & output buffers
+  cudf::string_scalar const d_scalar(input, true, stream);
+  auto const d_input = cudf::device_span<cuio_json::SymbolT const>{
+    d_scalar.data(), static_cast<size_t>(d_scalar.size())};
+
+  // Parse the JSON and get the token stream
+  auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream(
+    d_input, default_options, stream, rmm::mr::get_current_device_resource());
+  // Copy back the number of tokens that were written
+  auto const tokens_gpu        = cudf::detail::make_std_vector_async(d_tokens_gpu, stream);
+  auto const token_indices_gpu = cudf::detail::make_std_vector_async(d_token_indices_gpu, stream);
+
+  // Verify the number of tokens matches
+  ASSERT_EQ(golden_token_stream.size(), tokens_gpu.size());
+  ASSERT_EQ(golden_token_stream.size(), token_indices_gpu.size());
+
+  for (std::size_t i = 0; i < tokens_gpu.size(); i++) {
+    // Ensure the index the tokens are pointing to do match
+    EXPECT_EQ(golden_token_stream[i].first, token_indices_gpu[i]) << "Mismatch at #" << i;
+    // Ensure the token category is correct
+    EXPECT_EQ(golden_token_stream[i].second, tokens_gpu[i]) << "Mismatch at #" << i;
+  }
+}
+
+TEST_F(JsonTest, PostProcessTokenStream)
+{
+  // Golden token stream sample
+  using token_t       = cuio_json::token_t;
+  using token_index_t = cuio_json::SymbolOffsetT;
+  using tuple_t       = thrust::tuple<token_index_t, cuio_json::PdaTokenT>;
+
+  std::vector<tuple_t> const input = {// Line 0 (invalid)
+                                      {0, token_t::LineEnd},
+                                      {0, token_t::StructBegin},
+                                      {1, token_t::StructMemberBegin},
+                                      {1, token_t::FieldNameBegin},
+                                      {3, token_t::FieldNameEnd},
+                                      {5, token_t::ValueBegin},
+                                      {7, token_t::ValueEnd},
+                                      {7, token_t::StructMemberEnd},
+                                      {7, token_t::StructEnd},
+                                      {8, token_t::ErrorBegin},
+                                      {9, token_t::LineEnd},
+                                      // Line 1
+                                      {10, token_t::StructBegin},
+                                      {11, token_t::StructMemberBegin},
+                                      {11, token_t::FieldNameBegin},
+                                      {13, token_t::FieldNameEnd},
+                                      {15, token_t::LineEnd},
+                                      // Line 2 (invalid)
+                                      {16, token_t::StructBegin},
+                                      {17, token_t::StructMemberBegin},
+                                      {17, token_t::FieldNameBegin},
+                                      {19, token_t::FieldNameEnd},
+                                      {21, token_t::StructBegin},
+                                      {22, token_t::StructMemberBegin},
+                                      {22, token_t::FieldNameBegin},
+                                      {24, token_t::FieldNameEnd},
+                                      {26, token_t::ListBegin},
+                                      {27, token_t::ValueBegin},
+                                      {29, token_t::ErrorBegin},
+                                      {30, token_t::LineEnd},
+                                      // Line 3 (invalid)
+                                      {31, token_t::StructBegin},
+                                      {32, token_t::StructMemberBegin},
+                                      {32, token_t::FieldNameBegin},
+                                      {34, token_t::FieldNameEnd},
+                                      {36, token_t::ListBegin},
+                                      {37, token_t::ValueBegin},
+                                      {38, token_t::ValueEnd},
+                                      {38, token_t::ListEnd},
+                                      {39, token_t::StructMemberEnd},
+                                      {39, token_t::StructEnd},
+                                      {40, token_t::ErrorBegin},
+                                      {40, token_t::LineEnd},
+                                      // Line 4
+                                      {41, token_t::LineEnd},
+                                      // Line 5
+                                      {42, token_t::StructBegin},
+                                      {43, token_t::StructMemberBegin},
+                                      {43, token_t::FieldNameBegin},
+                                      {45, token_t::FieldNameEnd},
+                                      {47, token_t::ValueBegin},
+                                      {50, token_t::ValueEnd},
+                                      {50, token_t::StructMemberEnd},
+                                      {50, token_t::StructEnd}};
+
+  std::vector<tuple_t> const expected_output = {// Line 0 (invalid)
+                                                {0, token_t::StructBegin},
+                                                {0, token_t::StructEnd},
+                                                // Line 1
+                                                {10, token_t::StructBegin},
+                                                {11, token_t::StructMemberBegin},
+                                                {11, token_t::FieldNameBegin},
+                                                {13, token_t::FieldNameEnd},
+                                                // Line 2 (invalid)
+                                                {0, token_t::StructBegin},
+                                                {0, token_t::StructEnd},
+                                                // Line 3 (invalid)
+                                                {0, token_t::StructBegin},
+                                                {0, token_t::StructEnd},
+                                                // Line 4 (empty)
+                                                // Line 5
+                                                {42, token_t::StructBegin},
+                                                {43, token_t::StructMemberBegin},
+                                                {43, token_t::FieldNameBegin},
+                                                {45, token_t::FieldNameEnd},
+                                                {47, token_t::ValueBegin},
+                                                {50, token_t::ValueEnd},
+                                                {50, token_t::StructMemberEnd},
+                                                {50, token_t::StructEnd}};
+
+  // Decompose tuples
+  auto const stream = cudf::get_default_stream();
+  std::vector<token_index_t> offsets(input.size());
+  std::vector<cuio_json::PdaTokenT> tokens(input.size());
+  auto token_tuples = thrust::make_zip_iterator(offsets.begin(), tokens.begin());
+  thrust::copy(input.cbegin(), input.cend(), token_tuples);
+
+  // Initialize device-side test data
+  auto const d_offsets = cudf::detail::make_device_uvector_async(
+    cudf::host_span<token_index_t const>{offsets.data(), offsets.size()},
+    stream,
+    rmm::mr::get_current_device_resource());
+  auto const d_tokens =
+    cudf::detail::make_device_uvector_async(tokens, stream, rmm::mr::get_current_device_resource());
+
+  // Run system-under-test
+  auto [d_filtered_tokens, d_filtered_indices] =
+    cuio_json::detail::process_token_stream(d_tokens, d_offsets, stream);
+
+  auto const filtered_tokens  = cudf::detail::make_std_vector_async(d_filtered_tokens, stream);
+  auto const filtered_indices = cudf::detail::make_std_vector_async(d_filtered_indices, stream);
+
+  // Verify the number of tokens matches
+  ASSERT_EQ(filtered_tokens.size(), expected_output.size());
+  ASSERT_EQ(filtered_indices.size(), expected_output.size());
+
+  for (std::size_t i = 0; i < filtered_tokens.size(); i++) {
+    // Ensure the index the tokens are pointing to do match
+    EXPECT_EQ(thrust::get<0>(expected_output[i]), filtered_indices[i]) << "Mismatch at #" << i;
+    // Ensure the token category is correct
+    EXPECT_EQ(thrust::get<1>(expected_output[i]), filtered_tokens[i]) << "Mismatch at #" << i;
+  }
+}
+
 TEST_P(JsonParserTest, UTF_JSON)
 {
   // Prepare cuda stream for data transfers & kernels