From cac92f8ba75269e888e58a82bc4ac5f8b29e73d7 Mon Sep 17 00:00:00 2001
From: Julio Perez <37191411+jperez999@users.noreply.github.com>
Date: Sun, 20 Mar 2022 16:29:48 -0400
Subject: [PATCH] Remove Systems library from nvtabular (#1456)

---
 merlin/systems/dag/__init__.py               |   21 -
 merlin/systems/dag/ensemble.py               |  125 -
 merlin/systems/dag/node.py                   |   62 -
 merlin/systems/dag/op_runner.py              |   43 -
 merlin/systems/dag/ops/__init__.py           |   15 -
 merlin/systems/dag/ops/faiss.py              |  108 -
 merlin/systems/dag/ops/feast.py              |  243 -
 merlin/systems/dag/ops/operator.py           |  138 -
 merlin/systems/dag/ops/session_filter.py     |   97 -
 merlin/systems/dag/ops/softmax_sampling.py   |  108 -
 merlin/systems/dag/ops/tensorflow.py         |  163 -
 merlin/systems/dag/ops/unroll_features.py    |   85 -
 merlin/systems/dag/ops/workflow.py           |   72 -
 merlin/systems/triton/__init__.py            |   82 -
 merlin/systems/triton/conversions.py         |  150 -
 merlin/systems/triton/export.py              |  795 ---
 merlin/systems/triton/model_config.proto     | 1660 -------
 merlin/systems/triton/model_config_pb2.py    | 4564 ------------------
 merlin/systems/triton/oprunner_model.py      |   88 -
 merlin/systems/triton/workflow_model.py      |  121 -
 merlin/systems/workflow/__init__.py          |   65 -
 merlin/systems/workflow/base.py              |  209 -
 merlin/systems/workflow/hugectr.py           |   87 -
 merlin/systems/workflow/pytorch.py           |   46 -
 merlin/systems/workflow/tensorflow.py        |   68 -
 tests/unit/systems/__init__.py               |    0
 tests/unit/systems/inf_test_ops.py           |   24 -
 tests/unit/systems/inference_utils.py        |   81 -
 tests/unit/systems/test_ensemble.py          |  236 -
 tests/unit/systems/test_ensemble_ops.py      |   88 -
 tests/unit/systems/test_export.py            |   63 -
 tests/unit/systems/test_graph.py             |   27 -
 tests/unit/systems/test_inference_ops.py     |   82 -
 tests/unit/systems/test_op_runner.py         |  163 -
 tests/unit/systems/test_tensorflow_inf_op.py |  113 -
 35 files changed, 10092 deletions(-)
 delete mode 100644 merlin/systems/dag/__init__.py
 delete mode 100644 merlin/systems/dag/ensemble.py
 delete mode 100644 merlin/systems/dag/node.py
 delete mode 100644 merlin/systems/dag/op_runner.py
 delete mode 100644 merlin/systems/dag/ops/__init__.py
 delete mode 100644 merlin/systems/dag/ops/faiss.py
 delete mode 100644 merlin/systems/dag/ops/feast.py
 delete mode 100644 merlin/systems/dag/ops/operator.py
 delete mode 100644 merlin/systems/dag/ops/session_filter.py
 delete mode 100644 merlin/systems/dag/ops/softmax_sampling.py
 delete mode 100644 merlin/systems/dag/ops/tensorflow.py
 delete mode 100644 merlin/systems/dag/ops/unroll_features.py
 delete mode 100644 merlin/systems/dag/ops/workflow.py
 delete mode 100644 merlin/systems/triton/__init__.py
 delete mode 100644 merlin/systems/triton/conversions.py
 delete mode 100644 merlin/systems/triton/export.py
 delete mode 100644 merlin/systems/triton/model_config.proto
 delete mode 100644 merlin/systems/triton/model_config_pb2.py
 delete mode 100644 merlin/systems/triton/oprunner_model.py
 delete mode 100644 merlin/systems/triton/workflow_model.py
 delete mode 100644 merlin/systems/workflow/__init__.py
 delete mode 100644 merlin/systems/workflow/base.py
 delete mode 100644 merlin/systems/workflow/hugectr.py
 delete mode 100644 merlin/systems/workflow/pytorch.py
 delete mode 100644 merlin/systems/workflow/tensorflow.py
 delete mode 100644 tests/unit/systems/__init__.py
 delete mode 100644 tests/unit/systems/inf_test_ops.py
 delete mode 100644 tests/unit/systems/inference_utils.py
 delete mode 100644 tests/unit/systems/test_ensemble.py
 delete mode 100644 tests/unit/systems/test_ensemble_ops.py
 delete mode 100644 tests/unit/systems/test_export.py
 delete mode 100644 tests/unit/systems/test_graph.py
 delete mode 100644 tests/unit/systems/test_inference_ops.py
 delete mode 100644 tests/unit/systems/test_op_runner.py
 delete mode 100644 tests/unit/systems/test_tensorflow_inf_op.py

diff --git a/merlin/systems/dag/__init__.py b/merlin/systems/dag/__init__.py
deleted file mode 100644
index 80bef9b2086..00000000000
--- a/merlin/systems/dag/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#
-# Copyright (c) 2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# alias submodules here to avoid breaking everything with moving to submodules
-# flake8: noqa
-from .ensemble import Ensemble
-from .node import Node
-from .op_runner import OperatorRunner
diff --git a/merlin/systems/dag/ensemble.py b/merlin/systems/dag/ensemble.py
deleted file mode 100644
index d0bf5db9bdf..00000000000
--- a/merlin/systems/dag/ensemble.py
+++ /dev/null
@@ -1,125 +0,0 @@
-#
-# Copyright (c) 2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import os
-
-from merlin.dag import postorder_iter_nodes
-
-# this needs to be before any modules that import protobuf
-os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
-
-from google.protobuf import text_format  # noqa
-
-import merlin.systems.triton.model_config_pb2 as model_config  # noqa
-from merlin.dag import Graph  # noqa
-from merlin.systems.triton.export import _convert_dtype  # noqa
-
-
-class Ensemble:
-    def __init__(self, ops, schema, name="ensemble_model", label_columns=None):
-        self.graph = Graph(ops)
-        self.graph.construct_schema(schema)
-        self.name = name
-        self.label_columns = label_columns or []
-
-    def export(self, export_path, version=1):
-        # Create ensemble config
-        ensemble_config = model_config.ModelConfig(
-            name=self.name,
-            platform="ensemble",
-            # max_batch_size=configs[0].max_batch_size
-        )
-
-        for col_name, col_schema in self.graph.input_schema.column_schemas.items():
-            ensemble_config.input.append(
-                model_config.ModelInput(
-                    name=col_name, data_type=_convert_dtype(col_schema.dtype), dims=[-1, -1]
-                )
-            )
-
-        for col_name, col_schema in self.graph.output_schema.column_schemas.items():
-            ensemble_config.output.append(
-                model_config.ModelOutput(
-                    name=col_name, data_type=_convert_dtype(col_schema.dtype), dims=[-1, -1]
-                )
-            )
-
-        # Build node id lookup table
-        postorder_nodes = list(postorder_iter_nodes(self.graph.output_node))
-
-        node_idx = 0
-        node_id_lookup = {}
-        for node in postorder_nodes:
-            if node.exportable:
-                node_id_lookup[node] = node_idx
-                node_idx += 1
-
-        node_configs = []
-        # Export node configs and add ensemble steps
-        for node in postorder_nodes:
-            if node.exportable:
-                node_id = node_id_lookup.get(node, None)
-                node_name = f"{node_id}_{node.export_name}"
-
-                found = False
-                for step in ensemble_config.ensemble_scheduling.step:
-                    if step.model_name == node_name:
-                        found = True
-                if found:
-                    continue
-
-                node_config = node.export(export_path, node_id=node_id, version=version)
-
-                config_step = model_config.ModelEnsembling.Step(
-                    model_name=node_name, model_version=-1
-                )
-
-                for input_col_name in node.input_schema.column_names:
-                    source = _find_column_source(node.parents_with_dependencies, input_col_name)
-                    source_id = node_id_lookup.get(source, None)
-                    in_suffix = f"_{source_id}" if source_id is not None else ""
-                    config_step.input_map[input_col_name] = input_col_name + in_suffix
-
-                for output_col_name in node.output_schema.column_names:
-                    out_suffix = (
-                        f"_{node_id}" if node_id is not None and node_id < node_idx - 1 else ""
-                    )
-                    config_step.output_map[output_col_name] = output_col_name + out_suffix
-
-                ensemble_config.ensemble_scheduling.step.append(config_step)
-                node_configs.append(node_config)
-
-        # Write the ensemble config file
-        ensemble_path = os.path.join(export_path, self.name)
-        os.makedirs(ensemble_path, exist_ok=True)
-        os.makedirs(os.path.join(ensemble_path, str(version)), exist_ok=True)
-
-        with open(os.path.join(ensemble_path, "config.pbtxt"), "w") as o:
-            text_format.PrintMessage(ensemble_config, o)
-
-        return (ensemble_config, node_configs)
-
-
-def _find_column_source(upstream_nodes, column_name):
-    source_node = None
-    for upstream_node in upstream_nodes:
-        if column_name in upstream_node.output_columns.names:
-            source_node = upstream_node
-            break
-
-    if source_node and not source_node.exportable:
-        return _find_column_source(source_node.parents_with_dependencies, column_name)
-    else:
-        return source_node
diff --git a/merlin/systems/dag/node.py b/merlin/systems/dag/node.py
deleted file mode 100644
index b4825205286..00000000000
--- a/merlin/systems/dag/node.py
+++ /dev/null
@@ -1,62 +0,0 @@
-#
-# Copyright (c) 2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-from merlin.dag import Node
-from merlin.schema import Schema
-
-
-class InferenceNode(Node):
-    def export(self, output_path, node_id=None, version=1):
-        return self.op.export(
-            output_path, self.input_schema, self.output_schema, node_id=node_id, version=version
-        )
-
-    @property
-    def export_name(self):
-        return self.op.export_name
-
-    def match_descendant_dtypes(self, source_node):
-        self.output_schema = _match_dtypes(source_node.input_schema, self.output_schema)
-        return self
-
-    def match_ancestor_dtypes(self, source_node):
-        self.input_schema = _match_dtypes(source_node.output_schema, self.input_schema)
-        return self
-
-    def validate_schemas(self, root_schema, strict_dtypes=False):
-        super().validate_schemas(root_schema, strict_dtypes)
-
-        if self.children:
-            childrens_schema = Schema()
-            for elem in self.children:
-                childrens_schema += elem.input_schema
-
-            for col_name, col_schema in self.output_schema.column_schemas.items():
-                sink_col_schema = childrens_schema.get(col_name)
-
-                if not sink_col_schema:
-                    raise ValueError(
-                        f"Output column '{col_name}' not detected in any "
-                        f"child inputs for '{self.op.__class__.__name__}'."
-                    )
-
-
-def _match_dtypes(source_schema, dest_schema):
-    matched = Schema()
-    for col_name, col_schema in dest_schema.column_schemas.items():
-        source_dtype = source_schema.get(col_name, col_schema).dtype
-        matched[col_name] = col_schema.with_dtype(source_dtype)
-
-    return matched
diff --git a/merlin/systems/dag/op_runner.py b/merlin/systems/dag/op_runner.py
deleted file mode 100644
index 2db6247fb43..00000000000
--- a/merlin/systems/dag/op_runner.py
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-# Copyright (c) 2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import importlib
-import json
-
-
-class OperatorRunner:
-    def __init__(self, config, repository="./", version=1, kind=""):
-        operator_names = self.fetch_json_param(config, "operator_names")
-        op_configs = [self.fetch_json_param(config, op_name) for op_name in operator_names]
-
-        self.operators = []
-        for op_config in op_configs:
-            module_name = op_config["module_name"]
-            class_name = op_config["class_name"]
-
-            op_module = importlib.import_module(module_name)
-            op_class = getattr(op_module, class_name)
-
-            operator = op_class.from_config(op_config)
-            self.operators.append(operator)
-
-    def execute(self, tensors):
-        for operator in self.operators:
-            tensors = operator.transform(tensors)
-        return tensors
-
-    def fetch_json_param(self, model_config, param_name):
-        string_value = model_config["parameters"][param_name]["string_value"]
-        return json.loads(string_value)
diff --git a/merlin/systems/dag/ops/__init__.py b/merlin/systems/dag/ops/__init__.py
deleted file mode 100644
index 5d9909dec4e..00000000000
--- a/merlin/systems/dag/ops/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#
-# Copyright (c) 2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
diff --git a/merlin/systems/dag/ops/faiss.py b/merlin/systems/dag/ops/faiss.py
deleted file mode 100644
index 70dd019054a..00000000000
--- a/merlin/systems/dag/ops/faiss.py
+++ /dev/null
@@ -1,108 +0,0 @@
-#
-# Copyright (c) 2022, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import json
-import os
-from shutil import copy2
-
-import faiss
-import numpy as np
-
-from merlin.dag import ColumnSelector
-from merlin.schema import ColumnSchema, Schema
-from merlin.systems.dag.ops.operator import InferenceDataFrame, PipelineableInferenceOperator
-
-
-class QueryFaiss(PipelineableInferenceOperator):
-    def __init__(self, index_path, topk=10):
-        self.index_path = str(index_path)
-        self.topk = topk
-        self._index = None
-        super().__init__()
-
-    @classmethod
-    def from_config(cls, config):
-        parameters = json.loads(config.get("params", ""))
-        index_path = parameters["index_path"]
-        topk = parameters["topk"]
-
-        operator = QueryFaiss(index_path, topk=topk)
-        operator._index = faiss.read_index(str(index_path))
-
-        return operator
-
-    def export(self, path, input_schema, output_schema, params=None, node_id=None, version=1):
-        params = params or {}
-
-        # TODO: Copy the index into the export directory
-
-        self_params = {
-            # TODO: Write the (relative) path from inside the export directory
-            "index_path": self.index_path,
-            "topk": self.topk,
-        }
-        self_params.update(params)
-        index_filename = os.path.basename(os.path.realpath(self.index_path))
-
-        # set index path to new path after export
-        new_index_path = os.path.join(
-            path, f"{node_id}_{QueryFaiss.__name__.lower()}", str(version), index_filename
-        )
-        copy2(self.index_path, new_index_path)
-        self.index_path = new_index_path
-        return super().export(path, input_schema, output_schema, self_params, node_id, version)
-
-    def transform(self, df: InferenceDataFrame):
-        user_vector = list(df.tensors.values())[0]
-
-        _, indices = self._index.search(user_vector, self.topk)
-        # distances, indices = self.index.search(user_vector, self.topk)
-
-        candidate_ids = np.array(indices).T.astype(np.int32)
-
-        return InferenceDataFrame({"candidate_ids": candidate_ids})
-
-    def compute_input_schema(
-        self,
-        root_schema: Schema,
-        parents_schema: Schema,
-        deps_schema: Schema,
-        selector: ColumnSelector,
-    ) -> Schema:
-        input_schema = super().compute_input_schema(
-            root_schema, parents_schema, deps_schema, selector
-        )
-        if len(input_schema.column_schemas) > 1:
-            raise ValueError(
-                "More than one input has been detected for this node,"
-                / f"inputs received: {input_schema.column_names}"
-            )
-        return input_schema
-
-    def compute_output_schema(
-        self, input_schema: Schema, col_selector: ColumnSelector, prev_output_schema: Schema = None
-    ) -> Schema:
-        return Schema(
-            [
-                ColumnSchema("candidate_ids", dtype=np.int32),
-            ]
-        )
-
-
-def setup_faiss(item_vector, output_path):
-    index = faiss.IndexFlatL2(item_vector[0].shape[0])
-    index.add(item_vector)
-    faiss.write_index(index, str(output_path))
diff --git a/merlin/systems/dag/ops/feast.py b/merlin/systems/dag/ops/feast.py
deleted file mode 100644
index e9d4770bc4a..00000000000
--- a/merlin/systems/dag/ops/feast.py
+++ /dev/null
@@ -1,243 +0,0 @@
-import json
-
-import numpy as np
-from feast import FeatureStore, ValueType
-
-from merlin.dag import ColumnSelector
-from merlin.schema import ColumnSchema, Schema
-from merlin.systems.dag.ops.operator import InferenceDataFrame, PipelineableInferenceOperator
-
-# Feast_key: (numpy dtype, is_list, is_ragged)
-feast_2_numpy = {
-    ValueType.INT64: (np.int64, False, False),
-    ValueType.INT32: (np.int32, False, False),
-    ValueType.FLOAT: (np.float, False, False),
-    ValueType.INT64_LIST: (np.int64, True, True),
-    ValueType.INT32_LIST: (np.int32, True, True),
-    ValueType.FLOAT_LIST: (np.float, True, True),
-}
-
-
-class QueryFeast(PipelineableInferenceOperator):
-    @classmethod
-    def from_feature_view(cls, store, path, view, column, output_prefix=None, include_id=False):
-        feature_view = store.get_feature_view(view)
-        entity_id = feature_view.entities[0]
-
-        features = []
-        mh_features = []
-
-        input_schema = Schema([ColumnSchema(column, dtype=np.int32)])
-
-        output_schema = Schema([])
-        for feature in feature_view.features:
-            feature_dtype, is_list, is_ragged = feast_2_numpy[feature.dtype]
-
-            if is_list:
-                mh_features.append(feature.name)
-
-                values_name = cls._prefixed_name(output_prefix, f"{feature.name}_1")
-                nnzs_name = cls._prefixed_name(output_prefix, f"{feature.name}_2")
-                output_schema[values_name] = ColumnSchema(
-                    values_name, dtype=feature_dtype, is_list=is_list, is_ragged=is_ragged
-                )
-                output_schema[nnzs_name] = ColumnSchema(
-                    nnzs_name, dtype=np.int64, is_list=True, is_ragged=False
-                )
-            else:
-                features.append(feature.name)
-
-                name = cls._prefixed_name(output_prefix, feature.name)
-                output_schema[name] = ColumnSchema(
-                    name, dtype=feature_dtype, is_list=is_list, is_ragged=is_ragged
-                )
-
-        if include_id:
-            output_schema[entity_id] = ColumnSchema(entity_id, dtype=np.int32)
-
-        return QueryFeast(
-            path,
-            entity_id,
-            view,
-            column,
-            features,
-            mh_features,
-            input_schema,
-            output_schema,
-            include_id=include_id,
-            output_prefix=output_prefix or "",
-            suffix_int=1,
-        )
-
-    def __init__(
-        self,
-        repo_path,
-        entity_id,
-        entity_view,
-        entity_column,
-        features,
-        mh_features,
-        input_schema,
-        output_schema,
-        include_id=False,
-        output_prefix="",
-        suffix_int=1,
-    ):
-        self.repo_path = repo_path
-        self.entity_id = entity_id
-        self.entity_view = entity_view
-        self.entity_column = entity_column
-
-        self.features = features
-        self.mh_features = mh_features
-        self.input_schema = input_schema
-        self.output_schema = output_schema
-        self.include_id = include_id
-        self.output_prefix = output_prefix
-        self.suffix_int = suffix_int
-
-        self.store = FeatureStore(repo_path=repo_path)
-        super().__init__()
-
-    def compute_output_schema(
-        self, input_schema: Schema, col_selector: ColumnSelector, prev_output_schema: Schema = None
-    ) -> Schema:
-        return self.output_schema
-
-    def compute_input_schema(
-        self,
-        root_schema: Schema,
-        parents_schema: Schema,
-        deps_schema: Schema,
-        selector: ColumnSelector,
-    ) -> Schema:
-        return self.input_schema
-
-    @classmethod
-    def from_config(cls, config):
-        parameters = json.loads(config.get("params", ""))
-        entity_id = parameters["entity_id"]
-        entity_view = parameters["entity_view"]
-        entity_column = parameters["entity_column"]
-        repo_path = parameters["feast_repo_path"]
-        features = parameters["features"]
-        mh_features = parameters["mh_features"]
-        in_dict = json.loads(config.get("input_dict", "{}"))
-        out_dict = json.loads(config.get("output_dict", "{}"))
-        include_id = parameters["include_id"]
-        output_prefix = parameters["output_prefix"]
-        suffix_int = parameters["suffix_int"]
-
-        in_schema = Schema([])
-        for col_name, col_rep in in_dict.items():
-            in_schema[col_name] = ColumnSchema(
-                col_name,
-                dtype=col_rep["dtype"],
-                is_list=col_rep["is_list"],
-                is_ragged=col_rep["is_ragged"],
-            )
-        out_schema = Schema([])
-        for col_name, col_rep in out_dict.items():
-            out_schema[col_name] = ColumnSchema(
-                col_name,
-                dtype=col_rep["dtype"],
-                is_list=col_rep["is_list"],
-                is_ragged=col_rep["is_ragged"],
-            )
-
-        return QueryFeast(
-            repo_path,
-            entity_id,
-            entity_view,
-            entity_column,
-            features,
-            mh_features,
-            in_schema,
-            out_schema,
-            include_id,
-            output_prefix,
-            suffix_int,
-        )
-
-    def export(self, path, input_schema, output_schema, params=None, node_id=None, version=1):
-        params = params or {}
-        self_params = {
-            "entity_id": self.entity_id,
-            "entity_view": self.entity_view,
-            "entity_column": self.entity_column,
-            "features": self.features,
-            "mh_features": self.mh_features,
-            "feast_repo_path": self.repo_path,
-            "include_id": self.include_id,
-            "output_prefix": self.output_prefix,
-            "suffix_int": self.suffix_int,
-        }
-        self_params.update(params)
-        return super().export(path, input_schema, output_schema, self_params, node_id, version)
-
-    def transform(self, df: InferenceDataFrame) -> InferenceDataFrame:
-        entity_ids = df[self.entity_column]
-        entity_rows = [{self.entity_id: int(entity_id)} for entity_id in entity_ids]
-
-        feature_names = self.features + self.mh_features
-        feature_refs = [
-            ":".join([self.entity_view, feature_name]) for feature_name in feature_names
-        ]
-
-        feast_response = self.store.get_online_features(
-            features=feature_refs,
-            entity_rows=entity_rows,
-        ).to_dict()
-
-        output_tensors = {}
-        if self.include_id:
-            output_tensors[self.entity_id] = entity_ids
-
-        # Numerical and single-hot categorical
-        for feature_name in self.features:
-            prefixed_name = self.__class__._prefixed_name(self.output_prefix, feature_name)
-
-            feature_value = feast_response[feature_name]
-            feature_array = np.array([feature_value]).T.astype(
-                self.output_schema[prefixed_name].dtype
-            )
-            output_tensors[prefixed_name] = feature_array
-
-        # Multi-hot categorical
-        for feature_name in self.mh_features:
-            feature_value = feast_response[feature_name]
-
-            prefixed_name = self.__class__._prefixed_name(self.output_prefix, feature_name)
-            feature_out_name = f"{prefixed_name}_{self.suffix_int}"
-
-            nnzs = None
-            if (
-                isinstance(feature_value[0], list)
-                and self.output_schema[feature_out_name].is_ragged
-            ):
-                flattened_value = []
-                for val in feature_value:
-                    flattened_value.extend(val)
-
-                nnzs = [len(vals) for vals in feature_value]
-                feature_value = [flattened_value]
-
-            feature_array = np.array(feature_value).T.astype(
-                self.output_schema[feature_out_name].dtype
-            )
-            if not nnzs:
-                nnzs = [len(feature_array)]
-            feature_out_nnz = f"{prefixed_name}_{self.suffix_int+1}"
-            feature_nnzs = np.array([nnzs], dtype=self.output_schema[feature_out_nnz].dtype).T
-
-            output_tensors[feature_out_name] = feature_array
-            output_tensors[feature_out_nnz] = feature_nnzs
-
-        return InferenceDataFrame(output_tensors)
-
-    @classmethod
-    def _prefixed_name(cls, output_prefix, col_name):
-        if output_prefix and col_name and not col_name.startswith(output_prefix):
-            return f"{output_prefix}_{col_name}"
-        else:
-            return col_name
diff --git a/merlin/systems/dag/ops/operator.py b/merlin/systems/dag/ops/operator.py
deleted file mode 100644
index c81ddee4837..00000000000
--- a/merlin/systems/dag/ops/operator.py
+++ /dev/null
@@ -1,138 +0,0 @@
-import json
-import os
-import pathlib
-from abc import abstractclassmethod, abstractmethod
-from shutil import copyfile
-
-# this needs to be before any modules that import protobuf
-os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
-
-from google.protobuf import text_format  # noqa
-
-import merlin.systems.triton.model_config_pb2 as model_config  # noqa
-from merlin.dag import BaseOperator  # noqa
-from merlin.systems.dag.node import InferenceNode  # noqa
-from merlin.systems.triton.export import _convert_dtype  # noqa
-
-
-class InferenceDataFrame:
-    def __init__(self, tensors=None):
-        self.tensors = tensors or {}
-
-    def __getitem__(self, col_items):
-        if isinstance(col_items, list):
-            results = {name: self.tensors[name] for name in col_items}
-            return InferenceDataFrame(results)
-        else:
-            return self.tensors[col_items]
-
-    def __len__(self):
-        return len(self.tensors)
-
-    def __iter__(self):
-        for name, tensor in self.tensors.items():
-            yield name, tensor
-
-    def __repr__(self):
-        dict_rep = {}
-        for k, v in self.tensors.items():
-            dict_rep[k] = v
-        return str(dict_rep)
-
-
-class InferenceOperator(BaseOperator):
-    @property
-    def export_name(self):
-        return self.__class__.__name__.lower()
-
-    @abstractmethod
-    def export(self, export_path, input_schema, output_schema, node_id=None, version=1):
-        pass
-
-    def create_node(self, selector):
-        return InferenceNode(selector)
-
-
-class PipelineableInferenceOperator(InferenceOperator):
-    @abstractclassmethod
-    def from_config(cls, config):
-        pass
-
-    @abstractmethod
-    def transform(self, df: InferenceDataFrame) -> InferenceDataFrame:
-        """Transform the dataframe by applying this operator to the set of input columns
-
-        Parameters
-        -----------
-        df: Dataframe
-            A pandas or cudf dataframe that this operator will work on
-
-        Returns
-        -------
-        DataFrame
-            Returns a transformed dataframe for this operator
-        """
-
-    def export(self, path, input_schema, output_schema, params=None, node_id=None, version=1):
-        params = params or {}
-
-        node_name = f"{node_id}_{self.export_name}" if node_id is not None else self.export_name
-
-        node_export_path = pathlib.Path(path) / node_name
-        node_export_path.mkdir(exist_ok=True)
-
-        config = model_config.ModelConfig(name=node_name, backend="nvtabular", platform="op_runner")
-
-        config.parameters["operator_names"].string_value = json.dumps([node_name])
-
-        config.parameters[node_name].string_value = json.dumps(
-            {
-                "module_name": self.__class__.__module__,
-                "class_name": self.__class__.__name__,
-                "input_dict": json.dumps(_schema_to_dict(input_schema)),
-                "output_dict": json.dumps(_schema_to_dict(output_schema)),
-                "params": json.dumps(params),
-            }
-        )
-
-        for col_name, col_dict in _schema_to_dict(input_schema).items():
-            config.input.append(
-                model_config.ModelInput(
-                    name=col_name, data_type=_convert_dtype(col_dict["dtype"]), dims=[-1, -1]
-                )
-            )
-
-        for col_name, col_dict in _schema_to_dict(output_schema).items():
-            # this assumes the list columns are 1D tensors both for cats and conts
-            config.output.append(
-                model_config.ModelOutput(
-                    name=col_name.split("/")[0],
-                    data_type=_convert_dtype(col_dict["dtype"]),
-                    dims=[-1, -1],
-                )
-            )
-
-        with open(os.path.join(node_export_path, "config.pbtxt"), "w") as o:
-            text_format.PrintMessage(config, o)
-
-        os.makedirs(node_export_path, exist_ok=True)
-        os.makedirs(os.path.join(node_export_path, str(version)), exist_ok=True)
-        copyfile(
-            os.path.join(os.path.dirname(__file__), "..", "..", "triton", "oprunner_model.py"),
-            os.path.join(node_export_path, str(version), "model.py"),
-        )
-
-        return config
-
-
-def _schema_to_dict(schema):
-    # TODO: Write the conversion
-    schema_dict = {}
-    for col_name, col_schema in schema.column_schemas.items():
-        schema_dict[col_name] = {
-            "dtype": col_schema.dtype.name,
-            "is_list": col_schema.is_list,
-            "is_ragged": col_schema.is_ragged,
-        }
-
-    return schema_dict
diff --git a/merlin/systems/dag/ops/session_filter.py b/merlin/systems/dag/ops/session_filter.py
deleted file mode 100644
index ead50284321..00000000000
--- a/merlin/systems/dag/ops/session_filter.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#
-# Copyright (c) 2022, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import json
-
-import numpy as np
-
-from merlin.dag import ColumnSelector, Node
-from merlin.schema import ColumnSchema, Schema
-from merlin.systems.dag.ops.operator import InferenceDataFrame, PipelineableInferenceOperator
-
-
-class FilterCandidates(PipelineableInferenceOperator):
-    def __init__(self, filter_out, input_col=None):
-        self.filter_out = Node.construct_from(filter_out)
-        self._input_col = input_col
-        self._filter_out_col = filter_out
-        super().__init__()
-
-    @classmethod
-    def from_config(cls, config):
-        parameters = json.loads(config.get("params", ""))
-        filter_out_col = parameters["filter_out_col"]
-        input_col = parameters["input_col"]
-        return FilterCandidates(filter_out_col, input_col)
-
-    @property
-    def dependencies(self):
-        return self.filter_out
-
-    def compute_input_schema(
-        self,
-        root_schema: Schema,
-        parents_schema: Schema,
-        deps_schema: Schema,
-        selector: ColumnSelector,
-    ) -> Schema:
-        input_schema = super().compute_input_schema(
-            root_schema, parents_schema, deps_schema, selector
-        )
-
-        if len(parents_schema.column_schemas) > 1:
-            raise ValueError(
-                "More than one input has been detected for this node,"
-                / f"inputs received: {input_schema.column_names}"
-            )
-        if len(deps_schema.column_schemas) > 1:
-            raise ValueError(
-                "More than one dependency input has been detected"
-                / f"for this node, inputs received: {input_schema.column_names}"
-            )
-
-        # 1 for deps and 1 for parents
-        if len(input_schema.column_schemas) > 2:
-            raise ValueError(
-                "More than one input has been detected for this node,"
-                / f"inputs received: {input_schema.column_names}"
-            )
-
-        self._input_col = parents_schema.column_names[0]
-        self._filter_out_col = deps_schema.column_names[0]
-
-        return input_schema
-
-    def compute_output_schema(
-        self, input_schema: Schema, col_selector: ColumnSelector, prev_output_schema: Schema = None
-    ) -> Schema:
-        return Schema([ColumnSchema("filtered_ids", dtype=np.int32, is_list=False)])
-
-    def transform(self, df: InferenceDataFrame):
-        candidate_ids = df[self._input_col]
-        filter_ids = df[self._filter_out_col]
-
-        filtered_results = np.array([candidate_ids[~np.isin(candidate_ids, filter_ids)]]).T
-        return InferenceDataFrame({"filtered_ids": filtered_results})
-
-    def export(self, path, input_schema, output_schema, params=None, node_id=None, version=1):
-        params = params or {}
-        self_params = {
-            "input_col": self._input_col,
-            "filter_out_col": self._filter_out_col,
-        }
-        self_params.update(params)
-        return super().export(path, input_schema, output_schema, self_params, node_id, version)
diff --git a/merlin/systems/dag/ops/softmax_sampling.py b/merlin/systems/dag/ops/softmax_sampling.py
deleted file mode 100644
index 04063584a37..00000000000
--- a/merlin/systems/dag/ops/softmax_sampling.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import json
-
-import numpy as np
-
-from merlin.dag.node import Node
-from merlin.dag.selector import ColumnSelector
-from merlin.schema import ColumnSchema, Schema
-from merlin.systems.dag.ops.operator import InferenceDataFrame, PipelineableInferenceOperator
-
-
-class SoftmaxSampling(PipelineableInferenceOperator):
-    def __init__(self, relevance_col, temperature=20.0, topk=10, _input_col=None):
-        self.relevance_col = Node.construct_from(relevance_col)
-        self.temperature = temperature
-        self.topk = topk
-        self._input_col_name = _input_col
-        self._relevance_col_name = relevance_col
-        super().__init__()
-
-    @classmethod
-    def from_config(cls, config):
-        """Load operator and properties from Triton config"""
-        parameters = json.loads(config.get("params", ""))
-        relevance_col = parameters["relevance_col"]
-        input_col = parameters["input_col"]
-        temperature = parameters["temperature"]
-        topk = parameters["topk"]
-
-        return SoftmaxSampling(
-            relevance_col, temperature=temperature, topk=topk, _input_col=input_col
-        )
-
-    @property
-    def dependencies(self):
-        return self.relevance_col
-
-    def export(self, path, input_schema, output_schema, params=None, node_id=None, version=1):
-        """Write out a Triton model config directory"""
-        params = params or {}
-        self_params = {
-            "input_col": self._input_col_name,
-            "relevance_col": self._relevance_col_name,
-            "temperature": self.temperature,
-            "topk": self.topk,
-        }
-        self_params.update(params)
-        return super().export(path, input_schema, output_schema, self_params, node_id, version)
-
-    def compute_input_schema(
-        self,
-        root_schema: Schema,
-        parents_schema: Schema,
-        deps_schema: Schema,
-        selector: ColumnSelector,
-    ) -> Schema:
-        input_schema = super().compute_input_schema(
-            root_schema, parents_schema, deps_schema, selector
-        )
-        if len(parents_schema.column_schemas) > 1:
-            raise ValueError(
-                "More than one input has been detected for this node,"
-                f" inputs received: {input_schema.column_names}"
-            )
-
-        self._input_col_name = parents_schema.column_names[0]
-        self._relevance_col_name = deps_schema.column_names[0]
-        return input_schema
-
-    def compute_output_schema(
-        self, input_schema: Schema, col_selector: ColumnSelector, prev_output_schema: Schema = None
-    ) -> Schema:
-        """Describe the operator's outputs"""
-        return Schema([ColumnSchema("ordered_ids", dtype=np.int32, is_list=True, is_ragged=True)])
-
-    def transform(self, df: InferenceDataFrame) -> InferenceDataFrame:
-        """Transform the dataframe by applying this operator to the set of input columns"""
-        # Extract parameters from the request
-        candidate_ids = df[self._input_col_name].reshape(-1)
-
-        predicted_scores = df[self._relevance_col_name].reshape(-1)
-
-        # Exponential sort trick for sampling from a distribution without replacement from:
-
-        # Pavlos S. Efraimidis, Paul G. Spirakis, Weighted random sampling with a reservoir,
-        # Information Processing Letters, Volume 97, Issue 5, 2006, Pages 181-185, ISSN 0020-0190,
-        # https://doi.org/10.1016/j.ipl.2005.11.003.
-
-        # As implemented by Tim Vieira in "Algorithms for sampling without replacement"
-        # https://timvieira.github.io/blog/post/2019/09/16/algorithms-for-sampling-without-replacement/
-
-        # The weights for the sampling distribution are the softmax of the scores
-        weights = np.exp(self.temperature * predicted_scores) / np.sum(predicted_scores)
-
-        # This is the core of the exponential sampling trick, which creates a
-        # set of values that depend on both the predicted scores and random
-        # variables, resulting in a set of values that will sort into an order
-        # that reflects sampling without replacement according to the weight
-        # distribution
-        num_items = candidate_ids.shape[0]
-        exponentials = -np.log(np.random.uniform(0, 1, size=(num_items,)))
-        exponentials /= weights
-
-        # This is just bookkeeping to produce the final ordered list of recs
-        sorted_indices = np.argsort(exponentials)
-        topk_movie_ids = candidate_ids[sorted_indices][: self.topk]
-        ordered_movie_ids = topk_movie_ids.reshape(1, -1).T
-
-        return InferenceDataFrame({"ordered_ids": ordered_movie_ids})
diff --git a/merlin/systems/dag/ops/tensorflow.py b/merlin/systems/dag/ops/tensorflow.py
deleted file mode 100644
index 68c5dc1d058..00000000000
--- a/merlin/systems/dag/ops/tensorflow.py
+++ /dev/null
@@ -1,163 +0,0 @@
-#
-# Copyright (c) 2022, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import os
-import pathlib
-import tempfile
-from shutil import copytree
-
-# this needs to be before any modules that import protobuf
-os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
-
-import tensorflow as tf  # noqa
-from google.protobuf import text_format  # noqa
-
-import merlin.systems.triton.model_config_pb2 as model_config  # noqa
-from merlin.dag import ColumnSelector  # noqa
-from merlin.schema import ColumnSchema, Schema  # noqa
-from merlin.systems.dag.ops.operator import InferenceOperator  # noqa
-from merlin.systems.triton.export import _convert_dtype  # noqa
-
-
-class PredictTensorflow(InferenceOperator):
-    def __init__(self, model_or_path, custom_objects=None):
-        custom_objects = custom_objects or {}
-
-        if isinstance(model_or_path, (str, os.PathLike)):
-            self.path = model_or_path
-            self.model = tf.keras.models.load_model(self.path, custom_objects=custom_objects)
-        else:
-            self.path = None
-            self.model = model_or_path
-
-        signatures = getattr(self.model, "signatures", {}) or {}
-        default_signature = signatures.get("serving_default")
-
-        if not default_signature:
-            # roundtrip saved self.model to disk to generate signature if it doesn't exist
-
-            with tempfile.TemporaryDirectory() as tmp_dir:
-                tf_model_path = pathlib.Path(tmp_dir) / "model.savedmodel"
-                self.model.save(tf_model_path, include_optimizer=False)
-                reloaded = tf.keras.models.load_model(tf_model_path)
-                default_signature = reloaded.signatures["serving_default"]
-
-        inputs = list(default_signature.structured_input_signature[1].values())
-        outputs = list(default_signature.structured_outputs.values())
-
-        input_col_names = [col.name.split("/")[0] for col in inputs]
-        output_col_names = [col.name.split("/")[0] for col in outputs]
-
-        self.input_schema = Schema()
-        for col, input_col in zip(input_col_names, inputs):
-            self.input_schema.column_schemas[col] = ColumnSchema(
-                col, dtype=input_col.dtype.as_numpy_dtype
-            )
-
-        self.output_schema = Schema()
-        for col, output_col in zip(output_col_names, outputs):
-            self.output_schema.column_schemas[col] = ColumnSchema(
-                col, dtype=output_col.dtype.as_numpy_dtype
-            )
-        super().__init__()
-
-    def export(self, path, input_schema, output_schema, node_id=None, version=1):
-        """Create a directory inside supplied path based on our export name"""
-        node_name = f"{node_id}_{self.export_name}" if node_id is not None else self.export_name
-
-        node_export_path = pathlib.Path(path) / node_name
-        node_export_path.mkdir(exist_ok=True)
-
-        tf_model_path = pathlib.Path(node_export_path) / str(version) / "model.savedmodel"
-
-        if self.path:
-            copytree(
-                str(self.path),
-                tf_model_path,
-                dirs_exist_ok=True,
-            )
-        else:
-            self.model.save(tf_model_path, include_optimizer=False)
-
-        return self._export_model(self.model, node_name, node_export_path, version=version)
-
-    def compute_input_schema(
-        self,
-        root_schema: Schema,
-        parents_schema: Schema,
-        deps_schema: Schema,
-        selector: ColumnSelector,
-    ) -> Schema:
-        return self.input_schema
-
-    def compute_output_schema(
-        self, input_schema: Schema, col_selector: ColumnSelector, prev_output_schema: Schema = None
-    ) -> Schema:
-        return self.output_schema
-
-    def _export_model(self, model, name, output_path, version=1):
-        """Exports a TensorFlow model for serving with Triton
-
-        Parameters
-        ----------
-        model:
-            The tensorflow model that should be served
-        name:
-            The name of the triton model to export
-        output_path:
-            The path to write the exported model to
-        """
-        tf_model_path = os.path.join(output_path, str(version), "model.savedmodel")
-        config = model_config.ModelConfig(
-            name=name, backend="tensorflow", platform="tensorflow_savedmodel"
-        )
-
-        inputs, outputs = model.inputs, model.outputs
-
-        if not inputs or not outputs:
-            signatures = getattr(model, "signatures", {}) or {}
-            default_signature = signatures.get("serving_default")
-            if not default_signature:
-                # roundtrip saved model to disk to generate signature if it doesn't exist
-
-                reloaded = tf.keras.models.load_model(tf_model_path)
-                default_signature = reloaded.signatures["serving_default"]
-
-            inputs = list(default_signature.structured_input_signature[1].values())
-            outputs = list(default_signature.structured_outputs.values())
-
-        config.parameters["TF_GRAPH_TAG"].string_value = "serve"
-        config.parameters["TF_SIGNATURE_DEF"].string_value = "serving_default"
-
-        for col in inputs:
-            config.input.append(
-                model_config.ModelInput(
-                    name=f"{col.name}", data_type=_convert_dtype(col.dtype), dims=[-1, col.shape[1]]
-                )
-            )
-
-        for col in outputs:
-            # this assumes the list columns are 1D tensors both for cats and conts
-            config.output.append(
-                model_config.ModelOutput(
-                    name=col.name.split("/")[0],
-                    data_type=_convert_dtype(col.dtype),
-                    dims=[-1, col.shape[1]],
-                )
-            )
-
-        with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
-            text_format.PrintMessage(config, o)
-        return config
diff --git a/merlin/systems/dag/ops/unroll_features.py b/merlin/systems/dag/ops/unroll_features.py
deleted file mode 100644
index 7d908069c37..00000000000
--- a/merlin/systems/dag/ops/unroll_features.py
+++ /dev/null
@@ -1,85 +0,0 @@
-#
-# Copyright (c) 2022, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import json
-
-import numpy as np
-
-from merlin.dag import Node
-from merlin.dag.selector import ColumnSelector
-from merlin.schema import Schema
-from merlin.systems.dag.ops.operator import InferenceDataFrame, PipelineableInferenceOperator
-
-
-class UnrollFeatures(PipelineableInferenceOperator):
-    def __init__(self, item_id_col, unroll_cols, unrolled_prefix=""):
-        self.item_id_col = item_id_col
-        self.unroll_cols = Node.construct_from(unroll_cols)
-        self.unrolled_prefix = unrolled_prefix
-        super().__init__()
-
-    @classmethod
-    def from_config(cls, config):
-        parameters = json.loads(config.get("params", ""))
-        candidate_col = parameters["item_id_col"]
-        unroll_cols = parameters["unroll_cols"]
-        unrolled_prefix = parameters["unrolled_prefix"]
-        return UnrollFeatures(candidate_col, unroll_cols, unrolled_prefix)
-
-    def export(self, path, input_schema, output_schema, params=None, node_id=None, version=1):
-        params = params or {}
-        self_params = {
-            "item_id_col": self.item_id_col,
-            "unroll_cols": self._unroll_col_names,
-            "unrolled_prefix": self.unrolled_prefix,
-        }
-        self_params.update(params)
-        return super().export(path, input_schema, output_schema, self_params, node_id, version)
-
-    @property
-    def dependencies(self):
-        return self.unroll_cols
-
-    def compute_output_schema(
-        self, input_schema: Schema, col_selector: ColumnSelector, prev_output_schema: Schema = None
-    ) -> Schema:
-        schema = super().compute_output_schema(input_schema, col_selector, prev_output_schema)
-
-        for col_name, col_schema in self.unroll_cols.output_schema.column_schemas.items():
-            schema.column_schemas.pop(col_name, None)
-            col_name = f"{self.unrolled_prefix}_{col_name}" if self.unrolled_prefix else col_name
-            schema[col_name] = col_schema.with_name(col_name)
-
-        return schema
-
-    def transform(self, df: InferenceDataFrame):
-        num_items = df[self.item_id_col].shape[0]
-        outputs = {}
-        for col_name, col_value in df.tensors.items():
-            outputs[col_name] = col_value
-
-        for col in self._unroll_col_names:
-            target = outputs.pop(col)
-            col_name = f"{self.unrolled_prefix}_{col}" if self.unrolled_prefix else col
-            outputs[col_name] = np.repeat(target, num_items, axis=0)
-
-        return InferenceDataFrame(outputs)
-
-    @property
-    def _unroll_col_names(self):
-        if self.unroll_cols.selector:
-            return self.unroll_cols.selector.names
-        else:
-            return self.unroll_cols.output_columns.names
diff --git a/merlin/systems/dag/ops/workflow.py b/merlin/systems/dag/ops/workflow.py
deleted file mode 100644
index 3c839f08357..00000000000
--- a/merlin/systems/dag/ops/workflow.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#
-# Copyright (c) 2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import pathlib
-
-from merlin.dag import ColumnSelector
-from merlin.schema import Schema
-from merlin.systems.dag.ops.operator import InferenceOperator
-from merlin.systems.triton.export import _generate_nvtabular_config
-
-
-class TransformWorkflow(InferenceOperator):
-    def __init__(
-        self,
-        workflow,
-        sparse_max=None,
-        max_batch_size=None,
-        label_columns=None,
-        model_framework=None,
-        cats=None,
-        conts=None,
-    ):
-        super().__init__()
-
-        self.workflow = workflow
-        self.sparse_max = sparse_max or {}
-        self.max_batch_size = max_batch_size
-        self.label_columns = label_columns or []
-        self.model_framework = model_framework or ""
-        self.cats = cats or []
-        self.conts = conts or []
-        super().__init__()
-
-    def compute_output_schema(
-        self, input_schema: Schema, col_selector: ColumnSelector, prev_output_schema: Schema = None
-    ) -> Schema:
-        return self.workflow.output_schema
-
-    def export(self, path, input_schema, output_schema, node_id=None, version=1):
-        """Create a directory inside supplied path based on our export name"""
-        modified_workflow = self.workflow.remove_inputs(self.label_columns)
-
-        node_name = f"{node_id}_{self.export_name}" if node_id is not None else self.export_name
-
-        node_export_path = pathlib.Path(path) / node_name
-        node_export_path.mkdir(exist_ok=True)
-
-        workflow_export_path = node_export_path / str(version) / "workflow"
-        modified_workflow.save(str(workflow_export_path))
-
-        return _generate_nvtabular_config(
-            modified_workflow,
-            node_name,
-            node_export_path,
-            backend="nvtabular",
-            sparse_max=self.sparse_max,
-            max_batch_size=self.max_batch_size,
-            cats=self.cats,
-            conts=self.conts,
-        )
diff --git a/merlin/systems/triton/__init__.py b/merlin/systems/triton/__init__.py
deleted file mode 100644
index 8d11484134f..00000000000
--- a/merlin/systems/triton/__init__.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import json
-import os
-
-import pandas as pd
-
-# this needs to be before any modules that import protobuf
-os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
-
-import tritonclient.grpc as grpcclient  # noqa
-from tritonclient.utils import np_to_triton_dtype  # noqa
-
-from merlin.core.dispatch import is_list_dtype, is_string_dtype, make_df  # noqa
-from merlin.systems.triton.export import (  # noqa
-    _convert_string2pytorch_dtype,
-    export_hugectr_ensemble,
-    export_pytorch_ensemble,
-    export_tensorflow_ensemble,
-    generate_hugectr_model,
-    generate_nvtabular_model,
-)
-
-
-def convert_df_to_triton_input(column_names, batch, input_class=grpcclient.InferInput):
-    columns = [(col, batch[col]) for col in column_names]
-    inputs = []
-    for i, (name, col) in enumerate(columns):
-        if is_list_dtype(col):
-            if isinstance(col, pd.Series):
-                raise ValueError("this function doesn't support CPU list values yet")
-            inputs.append(
-                _convert_column_to_triton_input(
-                    col._column.offsets.values_host.astype("int64"), name + "__nnzs", input_class
-                )
-            )
-            inputs.append(
-                _convert_column_to_triton_input(
-                    col.list.leaves.values_host.astype("int64"), name + "__values", input_class
-                )
-            )
-        else:
-            values = col.values if isinstance(col, pd.Series) else col.values_host
-            inputs.append(_convert_column_to_triton_input(values, name, input_class))
-    return inputs
-
-
-def _convert_column_to_triton_input(col, name, input_class=grpcclient.InferInput):
-    col = col.reshape(len(col), 1)
-    input_tensor = input_class(name, col.shape, np_to_triton_dtype(col.dtype))
-    input_tensor.set_data_from_numpy(col)
-    return input_tensor
-
-
-def convert_triton_output_to_df(columns, response):
-    return make_df({col: response.as_numpy(col) for col in columns})
-
-
-def get_column_types(path):
-    return json.load(open(os.path.join(path, "column_types.json")))
-
-
-def _convert_tensor(t):
-    out = t.as_numpy()
-    if len(out.shape) == 2:
-        out = out[:, 0]
-    # cudf doesn't seem to handle dtypes like |S15 or object that well
-    if is_string_dtype(out.dtype):
-        out = out.astype("str")
-    return out
diff --git a/merlin/systems/triton/conversions.py b/merlin/systems/triton/conversions.py
deleted file mode 100644
index 123f467be1e..00000000000
--- a/merlin/systems/triton/conversions.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import itertools
-
-try:
-    import cudf
-    import cupy as cp
-except ImportError:
-    cudf = cp = None
-
-import numpy as np
-import pandas as pd
-
-from merlin.core.dispatch import build_cudf_list_column, is_list_dtype
-from merlin.dag import Supports
-
-
-def convert_format(tensors, kind, target_kind):
-    """Converts data from format 'kind' to one of the formats specified in 'target_kind'
-    This allows us to convert data to/from dataframe representations for operators that
-    only support certain reprentations
-    """
-
-    # this is all much more difficult because of multihot columns, which don't have
-    # great representations in dicts of cpu/gpu arrays. we're representing multihots
-    # as tuples of (values, offsets) tensors in this case - but have to do work at
-    # each step in terms of converting.
-    if kind & target_kind:
-        return tensors, kind
-
-    elif target_kind & Supports.GPU_DICT_ARRAY:
-        if kind == Supports.CPU_DICT_ARRAY:
-            return _convert_array(tensors, cp.array), Supports.GPU_DICT_ARRAY
-        elif kind == Supports.CPU_DATAFRAME:
-            return _pandas_to_array(tensors, False), Supports.GPU_DICT_ARRAY
-        elif kind == Supports.GPU_DATAFRAME:
-            return _cudf_to_array(tensors, False), Supports.GPU_DICT_ARRAY
-
-    elif target_kind & Supports.CPU_DICT_ARRAY:
-        if kind == Supports.GPU_DICT_ARRAY:
-            return _convert_array(tensors, cp.asnumpy), Supports.CPU_DICT_ARRAY
-        elif kind == Supports.CPU_DATAFRAME:
-            return _pandas_to_array(tensors, True), Supports.CPU_DICT_ARRAY
-        elif kind == Supports.GPU_DATAFRAME:
-            return _cudf_to_array(tensors, True), Supports.CPU_DICT_ARRAY
-
-    elif target_kind & Supports.GPU_DATAFRAME:
-        if kind == Supports.CPU_DATAFRAME:
-            return cudf.DataFrame(tensors), Supports.GPU_DATAFRAME
-        return _array_to_cudf(tensors), Supports.GPU_DATAFRAME
-
-    elif target_kind & Supports.CPU_DATAFRAME:
-        if kind == Supports.GPU_DATAFRAME:
-            return tensors.to_pandas(), Supports.CPU_DATAFRAME
-        elif kind == Supports.CPU_DICT_ARRAY:
-            return _array_to_pandas(tensors), Supports.CPU_DATAFRAME
-        elif kind == Supports.GPU_DICT_ARRAY:
-            return _array_to_pandas(_convert_array(tensors, cp.asnumpy)), Supports.CPU_DATAFRAME
-
-    raise ValueError("unsupported target for converting tensors", target_kind)
-
-
-def _convert_array(tensors, converter):
-    output = {}
-    for name, tensor in tensors.items():
-        if isinstance(tensor, tuple):
-            output[name] = tuple(converter(t) for t in tensor)
-        else:
-            output[name] = converter(tensor)
-    return output
-
-
-def _array_to_pandas(tensors):
-    output = pd.DataFrame()
-    for name, tensor in tensors.items():
-        if isinstance(tensor, tuple):
-            values, offsets = tensor
-            output[name] = [values[offsets[i] : offsets[i + 1]] for i in range(len(offsets) - 1)]
-        else:
-            output[name] = tensor
-    return output
-
-
-def _array_to_cudf(tensors):
-    output = cudf.DataFrame()
-    for name, tensor in tensors.items():
-        if isinstance(tensor, tuple):
-            output[name] = build_cudf_list_column(tensor[0], tensor[1].astype("int32"))
-        else:
-            output[name] = tensor
-    return output
-
-
-def _pandas_to_array(df, cpu=True):
-    array_type = np.array if cpu else cp.array
-
-    output = {}
-    for name in df.columns:
-        col = df[name]
-        if pd.api.types.is_list_like(col.values[0]):
-            offsets = pd.Series([0]).append(col.map(len).cumsum()).values
-            if not cpu:
-                offsets = cp.array(offsets)
-            values = array_type(list(itertools.chain(*col)))
-            output[name] = (values, offsets)
-        else:
-            values = col.values
-            if not cpu:
-                values = cp.array(values)
-            output[name] = values
-
-    return output
-
-
-def _cudf_to_array(df, cpu=True):
-    output = {}
-    for name in df.columns:
-        col = df[name]
-        if is_list_dtype(col.dtype):
-            offsets = col._column.offsets.values_host if cpu else col._column.offsets.values
-            values = col.list.leaves.values_host if cpu else col.list.leaves.values
-            output[name] = (values, offsets)
-        else:
-            output[name] = col.values_host if cpu else col.values
-
-    return output
diff --git a/merlin/systems/triton/export.py b/merlin/systems/triton/export.py
deleted file mode 100644
index bced3607c3c..00000000000
--- a/merlin/systems/triton/export.py
+++ /dev/null
@@ -1,795 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import json
-import os
-import warnings
-from shutil import copyfile, copytree
-
-import numpy as np
-
-# this needs to be before any modules that import protobuf
-os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
-
-from google.protobuf import text_format  # noqa
-
-import merlin.systems.triton.model_config_pb2 as model_config  # noqa
-from merlin.core.dispatch import is_string_dtype  # noqa
-from merlin.dag import ColumnSelector  # noqa
-from merlin.schema import Tags  # noqa
-
-
-def export_tensorflow_ensemble(
-    model,
-    workflow,
-    name,
-    model_path,
-    label_columns=None,
-    sparse_max=None,
-    version=1,
-    nvtabular_backend="nvtabular",
-    cats=None,
-    conts=None,
-):
-    """Creates an ensemble triton server model, with the first model being a nvtabular
-    preprocessing, and the second by a tensorflow savedmodel
-
-    Parameters
-    ----------
-    model:
-        The tensorflow model that should be served
-    workflow:
-        The nvtabular workflow used in preprocessing
-    name:
-        The base name of the various triton models
-    model_path:
-        The root path to write out files to
-    cats:
-        Names of the categorical columns
-    conts:
-        Names of the continuous columns
-    label_columns:
-        Labels in the dataset (will be removed from the dataset)
-    sparse_max:
-        Max length of the each row when the sparse data is converted to dense
-    version:
-        Version of the model
-    nvtabular_backend: "python" or "nvtabular"
-        The backend that will be used for inference in Triton.
-    """
-    labels = (
-        label_columns
-        or workflow.output_schema.apply(ColumnSelector(tags=[Tags.TARGET])).column_names
-    )
-    workflow = workflow.remove_inputs(labels)
-
-    # generate the TF saved model
-    tf_path = os.path.join(model_path, name + "_tf")
-    tf_config = export_tensorflow_model(model, name + "_tf", tf_path, version=version)
-
-    # override the output dtype of the nvtabular model if necessary (fixes mismatches
-    # in dtypes between tf inputs and nvt outputs)
-    for column in tf_config.input:
-        tf_dtype = _triton_datatype_to_dtype(column.data_type)
-        nvt_col_name = column.name.replace("__values", "").replace("__nnzs", "")
-        col_schema = workflow.output_schema[nvt_col_name]
-        if col_schema.dtype and col_schema.dtype != tf_dtype:
-            warnings.warn(
-                f"TF model expects {tf_dtype} for column {col_schema.name}, but workflow "
-                f" is producing type {col_schema.dtype}. Overriding dtype in NVTabular workflow."
-            )
-            workflow.output_schema.column_schemas[col_schema.name] = col_schema.with_dtype(tf_dtype)
-
-    # generate the nvtabular triton model
-    preprocessing_path = os.path.join(model_path, name + "_nvt")
-    nvt_config = generate_nvtabular_model(
-        workflow,
-        name + "_nvt",
-        preprocessing_path,
-        sparse_max=sparse_max,
-        backend=nvtabular_backend,
-        cats=cats,
-        conts=conts,
-    )
-
-    # generate the triton ensemble
-    ensemble_path = os.path.join(model_path, name)
-    os.makedirs(ensemble_path, exist_ok=True)
-    os.makedirs(os.path.join(ensemble_path, str(version)), exist_ok=True)
-    _generate_ensemble_config(name, ensemble_path, nvt_config, tf_config)
-
-
-def export_pytorch_ensemble(
-    model,
-    workflow,
-    sparse_max,
-    name,
-    model_path,
-    label_columns=None,
-    use_fix_dtypes=True,
-    version=1,
-    nvtabular_backend="python",
-    cats=None,
-    conts=None,
-):
-    """Creates an ensemble triton server model, with the first model being a nvtabular
-    preprocessing, and the second by a pytorch savedmodel
-
-    Parameters
-    ----------
-    model:
-        The pytorch model that should be served
-    workflow:
-        The nvtabular workflow used in preprocessing
-    sparse_max:
-        Max length of the each row when the sparse data is converted to dense
-    name:
-        The base name of the various triton models
-    model_path:
-        The root path to write out files to
-    cats:
-        Names of the categorical columns
-    conts:
-        Names of the continuous columns
-    label_columns:
-        Labels in the dataset (will be removed from the dataset)
-    use_fix_dtypes:
-        Transformers4Rec is using fixed dtypes and this option is
-        whether to use fixed dtypes in inference or not
-    version:
-        Version of the model
-    nvtabular_backend: "python" or "nvtabular"
-        The backend that will be used for inference in Triton.
-    """
-    labels = (
-        label_columns
-        or workflow.output_schema.apply(ColumnSelector(tags=[Tags.TARGET])).column_names
-    )
-    workflow = workflow.remove_inputs(labels)
-
-    # generate the TF saved model
-    pt_path = os.path.join(model_path, name + "_pt")
-    pt_config = export_pytorch_model(
-        model, workflow, sparse_max, name + "_pt", pt_path, use_fix_dtypes, version=version
-    )
-
-    # override the output dtype of the nvtabular model if necessary (fixes mismatches
-    # in dtypes between tf inputs and nvt outputs)
-    for column in pt_config.input:
-        pt_dtype = _triton_datatype_to_dtype(column.data_type)
-        nvt_dtype = workflow.output_dtypes.get(column.name)
-        if nvt_dtype and nvt_dtype != pt_dtype:
-            warnings.warn(
-                f"PyTorch model expects {pt_dtype} for column {column.name}, but workflow "
-                f" is producing type {nvt_dtype}. Overriding dtype in NVTabular workflow."
-            )
-            workflow.output_dtypes[column.name] = pt_dtype
-
-    # generate the nvtabular triton model
-    preprocessing_path = os.path.join(model_path, name + "_nvt")
-    nvt_config = generate_nvtabular_model(
-        workflow,
-        name + "_nvt",
-        preprocessing_path,
-        backend=nvtabular_backend,
-        cats=cats,
-        conts=conts,
-    )
-
-    # generate the triton ensemble
-    ensemble_path = os.path.join(model_path, name)
-    os.makedirs(ensemble_path, exist_ok=True)
-    os.makedirs(os.path.join(ensemble_path, str(version)), exist_ok=True)
-    _generate_ensemble_config(name, ensemble_path, nvt_config, pt_config)
-
-
-def export_hugectr_ensemble(
-    workflow,
-    hugectr_model_path,
-    hugectr_params,
-    name,
-    output_path,
-    version=1,
-    max_batch_size=None,
-    nvtabular_backend="python",
-    cats=None,
-    conts=None,
-    label_columns=None,
-):
-    """Creates an ensemble hugectr server model, with the first model being a nvtabular
-    preprocessing, and the second by a hugectr savedmodel
-
-    Parameters
-    ----------
-    workflow:
-        The nvtabular workflow used in preprocessing
-    hugectr_model_path:
-        The path of the trained model files
-    hugectr_params:
-        HugeCTR specific parameters
-    name:
-        The base name of the various triton models
-    output_path:
-        The path where the models will be served
-    version:
-        The version of the model
-    max_batch_size:
-        Max batch size that Triton can receive
-    nvtabular_backend: "python" or "nvtabular"
-        The backend that will be used for inference in Triton.
-    cats:
-        Names of the categorical columns
-    conts:
-        Names of the continuous columns
-    label_columns:
-        Labels in the dataset (will be removed from the dataset)
-    """
-    cats = cats or workflow.output_schema.apply(ColumnSelector(tags=[Tags.CATEGORICAL]))
-    conts = conts or workflow.output_schema.apply(ColumnSelector(tags=[Tags.CONTINUOUS]))
-    labels = label_columns or workflow.output_schema.apply(ColumnSelector(tags=[Tags.TARGET]))
-
-    if not cats and not conts:
-        raise ValueError("Either cats or conts has to have a value.")
-
-    workflow = workflow.remove_inputs(labels)
-
-    # generate the nvtabular triton model
-    preprocessing_path = os.path.join(output_path, name + "_nvt")
-    nvt_config = generate_nvtabular_model(
-        workflow=workflow,
-        name=name + "_nvt",
-        output_path=preprocessing_path,
-        version=version,
-        output_model="hugectr",
-        max_batch_size=max_batch_size,
-        backend=nvtabular_backend,
-        cats=cats,
-        conts=conts,
-    )
-
-    hugectr_params["label_dim"] = len(labels)
-    if conts is None:
-        hugectr_params["des_feature_num"] = 0
-    else:
-        hugectr_params["des_feature_num"] = len(conts)
-
-    if cats is None:
-        hugectr_params["cat_feature_num"] = 0
-    else:
-        hugectr_params["cat_feature_num"] = len(cats)
-
-    # generate the HugeCTR saved model
-    hugectr_config = generate_hugectr_model(
-        trained_model_path=hugectr_model_path,
-        hugectr_params=hugectr_params,
-        name=name,
-        output_path=output_path,
-        version=version,
-        max_batch_size=max_batch_size,
-    )
-
-    # generate the triton ensemble
-    ensemble_path = os.path.join(output_path, name + "_ens")
-    os.makedirs(ensemble_path, exist_ok=True)
-    os.makedirs(os.path.join(ensemble_path, str(version)), exist_ok=True)
-    _generate_ensemble_config(name, ensemble_path, nvt_config, hugectr_config, "_ens")
-
-
-def _generate_ensemble_config(name, output_path, nvt_config, nn_config, name_ext=""):
-    config = model_config.ModelConfig(
-        name=name + name_ext, platform="ensemble", max_batch_size=nvt_config.max_batch_size
-    )
-    config.input.extend(nvt_config.input)
-    config.output.extend(nn_config.output)
-
-    nn_input_cols = set(col.name for col in nn_config.input)
-
-    nvt_step = model_config.ModelEnsembling.Step(model_name=nvt_config.name, model_version=-1)
-    for input_col in nvt_config.input:
-        nvt_step.input_map[input_col.name] = input_col.name
-    for output_col in nvt_config.output:
-        if output_col.name not in nn_input_cols:
-            warnings.warn(
-                f"Column {output_col.name} is being generated by NVTabular workflow "
-                f" but is unused in {nn_config.name} model"
-            )
-            continue
-        nvt_step.output_map[output_col.name] = output_col.name + "_nvt"
-
-    tf_step = model_config.ModelEnsembling.Step(model_name=nn_config.name, model_version=-1)
-    for input_col in nn_config.input:
-        tf_step.input_map[input_col.name] = input_col.name + "_nvt"
-    for output_col in nn_config.output:
-        tf_step.output_map[output_col.name] = output_col.name
-
-    config.ensemble_scheduling.step.append(nvt_step)
-    config.ensemble_scheduling.step.append(tf_step)
-
-    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
-        text_format.PrintMessage(config, o)
-    return config
-
-
-def generate_nvtabular_model(
-    workflow,
-    name,
-    output_path,
-    version=1,
-    output_model=None,
-    max_batch_size=None,
-    sparse_max=None,
-    backend="python",
-    cats=None,
-    conts=None,
-):
-    """converts a workflow to a triton mode
-    Parameters
-    ----------
-    sparse_max:
-        Max length of the each row when the sparse data is converted to dense
-    cats:
-        Names of the categorical columns
-    conts:
-        Names of the continuous columns
-    """
-    workflow.save(os.path.join(output_path, str(version), "workflow"))
-    config = _generate_nvtabular_config(
-        workflow,
-        name,
-        output_path,
-        output_model,
-        max_batch_size,
-        sparse_max=sparse_max,
-        backend=backend,
-        cats=cats,
-        conts=conts,
-    )
-
-    # copy the model file over. note that this isn't necessary with the c++ backend, but
-    # does provide us to use the python backend with just changing the 'backend' parameter
-    copyfile(
-        os.path.join(os.path.dirname(__file__), "workflow_model.py"),
-        os.path.join(output_path, str(version), "model.py"),
-    )
-
-    return config
-
-
-def generate_hugectr_model(
-    trained_model_path,
-    hugectr_params,
-    name,
-    output_path,
-    version=1,
-    max_batch_size=None,
-):
-    """converts a trained HugeCTR model to a triton mode"""
-
-    out_path = os.path.join(output_path, name)
-    os.makedirs(os.path.join(output_path, name), exist_ok=True)
-    out_path_version = os.path.join(out_path, str(version))
-    os.makedirs(out_path_version, exist_ok=True)
-
-    config = _generate_hugectr_config(name, out_path, hugectr_params, max_batch_size=max_batch_size)
-    copytree(trained_model_path, out_path_version, dirs_exist_ok=True)
-
-    return config
-
-
-def _generate_nvtabular_config(
-    workflow,
-    name,
-    output_path,
-    output_model=None,
-    max_batch_size=None,
-    sparse_max=None,
-    backend="python",
-    cats=None,
-    conts=None,
-):
-    """given a workflow generates the trton modelconfig proto object describing the inputs
-    and outputs to that workflow"""
-    config = model_config.ModelConfig(name=name, backend=backend, max_batch_size=max_batch_size)
-
-    config.parameters["python_module"].string_value = "merlin.systems.triton.workflow_model"
-    config.parameters["output_model"].string_value = output_model if output_model else ""
-
-    config.parameters["cats"].string_value = json.dumps(cats) if cats else ""
-    config.parameters["conts"].string_value = json.dumps(conts) if conts else ""
-
-    if sparse_max:
-        # this assumes seq_length is same for each list column
-        config.parameters["sparse_max"].string_value = json.dumps(sparse_max)
-
-    if output_model == "hugectr":
-        config.instance_group.append(model_config.ModelInstanceGroup(kind=2))
-
-        for column in workflow.output_node.input_columns.names:
-            dtype = workflow.input_dtypes[column]
-            config.input.append(
-                model_config.ModelInput(name=column, data_type=_convert_dtype(dtype), dims=[-1])
-            )
-
-        config.output.append(
-            model_config.ModelOutput(name="DES", data_type=model_config.TYPE_FP32, dims=[-1])
-        )
-
-        config.output.append(
-            model_config.ModelOutput(name="CATCOLUMN", data_type=model_config.TYPE_INT64, dims=[-1])
-        )
-
-        config.output.append(
-            model_config.ModelOutput(name="ROWINDEX", data_type=model_config.TYPE_INT32, dims=[-1])
-        )
-    elif output_model == "pytorch":
-        for col_name, col_schema in workflow.input_schema.column_schemas.items():
-            _add_model_param(col_schema, model_config.ModelInput, config.input)
-
-        for col_name, col_schema in workflow.output_schema.column_schemas.items():
-            _add_model_param(
-                col_schema,
-                model_config.ModelOutput,
-                config.output,
-                [-1, 1],
-            )
-    else:
-        for col_name, col_schema in workflow.input_schema.column_schemas.items():
-            _add_model_param(col_schema, model_config.ModelInput, config.input)
-
-        for col_name, col_schema in workflow.output_schema.column_schemas.items():
-            if sparse_max and col_name in sparse_max.keys():
-                # this assumes max_sequence_length is equal for all output columns
-                dim = sparse_max[col_name]
-                _add_model_param(col_schema, model_config.ModelOutput, config.output, [-1, dim])
-            else:
-                _add_model_param(col_schema, model_config.ModelOutput, config.output)
-
-    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
-        text_format.PrintMessage(config, o)
-    return config
-
-
-def export_tensorflow_model(model, name, output_path, version=1):
-    """Exports a TensorFlow model for serving with Triton
-
-    Parameters
-    ----------
-    model:
-        The tensorflow model that should be served
-    name:
-        The name of the triton model to export
-    output_path:
-        The path to write the exported model to
-    """
-    tf_model_path = os.path.join(output_path, str(version), "model.savedmodel")
-    model.save(tf_model_path, include_optimizer=False)
-    config = model_config.ModelConfig(
-        name=name, backend="tensorflow", platform="tensorflow_savedmodel"
-    )
-
-    inputs, outputs = model.inputs, model.outputs
-
-    if not inputs or not outputs:
-        signatures = getattr(model, "signatures", {}) or {}
-        default_signature = signatures.get("serving_default")
-        if not default_signature:
-            # roundtrip saved model to disk to generate signature if it doesn't exist
-            import tensorflow as tf
-
-            reloaded = tf.keras.models.load_model(tf_model_path)
-            default_signature = reloaded.signatures["serving_default"]
-
-        inputs = list(default_signature.structured_input_signature[1].values())
-        outputs = list(default_signature.structured_outputs.values())
-
-    config.parameters["TF_GRAPH_TAG"].string_value = "serve"
-    config.parameters["TF_SIGNATURE_DEF"].string_value = "serving_default"
-
-    for col in inputs:
-        config.input.append(
-            model_config.ModelInput(
-                name=col.name, data_type=_convert_dtype(col.dtype), dims=[-1, col.shape[1]]
-            )
-        )
-
-    for col in outputs:
-        # this assumes the list columns are 1D tensors both for cats and conts
-        config.output.append(
-            model_config.ModelOutput(
-                name=col.name.split("/")[0],
-                data_type=_convert_dtype(col.dtype),
-                dims=[-1, col.shape[1]],
-            )
-        )
-
-    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
-        text_format.PrintMessage(config, o)
-    return config
-
-
-def export_pytorch_model(
-    model, workflow, sparse_max, name, output_path, use_fix_dtypes=True, version=1, backend="python"
-):
-    """Exports a PyTorch model for serving with Triton
-
-    Parameters
-    ----------
-    model:
-        The PyTorch model that should be served
-    workflow:
-        The nvtabular workflow used in preprocessing
-    sparse_max:
-        Max length of the each row when the sparse data is converted to dense
-    name:
-        The name of the triton model to export
-    output_path:
-        The path to write the exported model to
-    use_fix_dtypes:
-        Transformers4Rec is using fixed dtypes and this option is
-        whether to use fixed dtypes in inference or not
-    version:
-        Version of the model
-    backend: "python" or "nvtabular"
-        The backend that will be used for inference in Triton.
-    """
-    import cloudpickle
-    import torch
-
-    os.makedirs(os.path.join(output_path, str(version)), exist_ok=True)
-
-    pt_model_path = os.path.join(output_path, str(version), "model.pth")
-    torch.save(model.state_dict(), pt_model_path)
-
-    pt_model_path = os.path.join(output_path, str(version), "model.pkl")
-    with open(pt_model_path, "wb") as o:
-        cloudpickle.dump(model, o)
-
-    copyfile(
-        os.path.join(os.path.dirname(__file__), "model", "model_pt.py"),
-        os.path.join(output_path, str(version), "model.py"),
-    )
-
-    config = model_config.ModelConfig(name=name, backend=backend)
-
-    for col_name, col_schema in workflow.output_schema.column_schemas.items():
-        _add_model_param(col_schema, model_config.ModelInput, config.input)
-
-    *_, last_layer = model.parameters()
-    dims = last_layer.shape[0]
-    dtype = last_layer.dtype
-    config.output.append(
-        model_config.ModelOutput(
-            name="output", data_type=_convert_pytorch_dtype(dtype), dims=[-1, dims]
-        )
-    )
-
-    if sparse_max:
-        with open(os.path.join(output_path, str(version), "model_info.json"), "w") as o:
-            model_info = dict()
-            model_info["sparse_max"] = sparse_max
-            model_info["use_fix_dtypes"] = use_fix_dtypes
-            json.dump(model_info, o)
-
-    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
-        text_format.PrintMessage(config, o)
-    return config
-
-
-def _generate_pytorch_config(model, name, output_path, max_batch_size=None):
-    """given a workflow generates the trton modelconfig proto object describing the inputs
-    and outputs to that workflow"""
-    config = model_config.ModelConfig(name=name, backend="python", max_batch_size=max_batch_size)
-
-    for col in model.inputs:
-        config.input.append(
-            model_config.ModelInput(name=col.name, data_type=_convert_dtype(col.dtype), dims=[-1])
-        )
-
-    for col in model.outputs:
-        config.output.append(
-            model_config.ModelOutput(
-                name=col.name.split("/")[0], data_type=_convert_dtype(col.dtype), dims=[-1]
-            )
-        )
-
-    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
-        text_format.PrintMessage(config, o)
-    return config
-
-
-def _generate_hugectr_config(name, output_path, hugectr_params, max_batch_size=None):
-    config = model_config.ModelConfig(name=name, backend="hugectr", max_batch_size=max_batch_size)
-
-    config.input.append(
-        model_config.ModelInput(name="DES", data_type=model_config.TYPE_FP32, dims=[-1])
-    )
-
-    config.input.append(
-        model_config.ModelInput(name="CATCOLUMN", data_type=model_config.TYPE_INT64, dims=[-1])
-    )
-
-    config.input.append(
-        model_config.ModelInput(name="ROWINDEX", data_type=model_config.TYPE_INT32, dims=[-1])
-    )
-
-    for i in range(hugectr_params["n_outputs"]):
-        config.output.append(
-            model_config.ModelOutput(
-                name="OUTPUT" + str(i), data_type=model_config.TYPE_FP32, dims=[-1]
-            )
-        )
-
-    config.instance_group.append(model_config.ModelInstanceGroup(gpus=[0], count=1, kind=1))
-
-    config_hugectr = model_config.ModelParameter(string_value=hugectr_params["config"])
-    config.parameters["config"].CopyFrom(config_hugectr)
-
-    gpucache_val = hugectr_params.get("gpucache", "true")
-
-    gpucache = model_config.ModelParameter(string_value=gpucache_val)
-    config.parameters["gpucache"].CopyFrom(gpucache)
-
-    gpucacheper_val = str(hugectr_params.get("gpucacheper_val", "0.5"))
-
-    gpucacheper = model_config.ModelParameter(string_value=gpucacheper_val)
-    config.parameters["gpucacheper"].CopyFrom(gpucacheper)
-
-    label_dim = model_config.ModelParameter(string_value=str(hugectr_params["label_dim"]))
-    config.parameters["label_dim"].CopyFrom(label_dim)
-
-    slots = model_config.ModelParameter(string_value=str(hugectr_params["slots"]))
-    config.parameters["slots"].CopyFrom(slots)
-
-    des_feature_num = model_config.ModelParameter(
-        string_value=str(hugectr_params["des_feature_num"])
-    )
-    config.parameters["des_feature_num"].CopyFrom(des_feature_num)
-
-    cat_feature_num = model_config.ModelParameter(
-        string_value=str(hugectr_params["cat_feature_num"])
-    )
-    config.parameters["cat_feature_num"].CopyFrom(cat_feature_num)
-
-    max_nnz = model_config.ModelParameter(string_value=str(hugectr_params["max_nnz"]))
-    config.parameters["max_nnz"].CopyFrom(max_nnz)
-
-    embedding_vector_size = model_config.ModelParameter(
-        string_value=str(hugectr_params["embedding_vector_size"])
-    )
-    config.parameters["embedding_vector_size"].CopyFrom(embedding_vector_size)
-
-    embeddingkey_long_type_val = hugectr_params.get("embeddingkey_long_type", "true")
-
-    embeddingkey_long_type = model_config.ModelParameter(string_value=embeddingkey_long_type_val)
-    config.parameters["embeddingkey_long_type"].CopyFrom(embeddingkey_long_type)
-
-    with open(os.path.join(output_path, "config.pbtxt"), "w") as o:
-        text_format.PrintMessage(config, o)
-    return config
-
-
-def _add_model_param(col_schema, paramclass, params, dims=None):
-    dims = dims if dims is not None else [-1, 1]
-    if col_schema.is_list and col_schema.is_ragged:
-        params.append(
-            paramclass(
-                name=col_schema.name + "__values",
-                data_type=_convert_dtype(col_schema.dtype),
-                dims=dims,
-            )
-        )
-        params.append(
-            paramclass(
-                name=col_schema.name + "__nnzs", data_type=model_config.TYPE_INT64, dims=dims
-            )
-        )
-    else:
-        params.append(
-            paramclass(name=col_schema.name, data_type=_convert_dtype(col_schema.dtype), dims=dims)
-        )
-
-
-def _convert_dtype(dtype):
-    """converts a dtype to the appropriate triton proto type"""
-
-    if dtype and not isinstance(dtype, str):
-        dtype_name = dtype.name if hasattr(dtype, "name") else dtype.__name__
-    else:
-        dtype_name = dtype
-
-    dtypes = {
-        "float64": model_config.TYPE_FP64,
-        "float32": model_config.TYPE_FP32,
-        "float16": model_config.TYPE_FP16,
-        "int64": model_config.TYPE_INT64,
-        "int32": model_config.TYPE_INT32,
-        "int16": model_config.TYPE_INT16,
-        "int8": model_config.TYPE_INT8,
-        "uint64": model_config.TYPE_UINT64,
-        "uint32": model_config.TYPE_UINT32,
-        "uint16": model_config.TYPE_UINT16,
-        "uint8": model_config.TYPE_UINT8,
-        "bool": model_config.TYPE_BOOL,
-    }
-
-    if is_string_dtype(dtype):
-        return model_config.TYPE_STRING
-    elif dtype_name in dtypes:
-        return dtypes[dtype_name]
-    else:
-        raise ValueError(f"Can't convert {dtype} to a Triton dtype")
-
-
-def _convert_pytorch_dtype(dtype):
-    """converts a dtype to the appropriate triton proto type"""
-
-    import torch
-
-    dtypes = {
-        torch.float64: model_config.TYPE_FP64,
-        torch.float32: model_config.TYPE_FP32,
-        torch.float16: model_config.TYPE_FP16,
-        torch.int64: model_config.TYPE_INT64,
-        torch.int32: model_config.TYPE_INT32,
-        torch.int16: model_config.TYPE_INT16,
-        torch.int8: model_config.TYPE_INT8,
-        torch.uint8: model_config.TYPE_UINT8,
-        torch.bool: model_config.TYPE_BOOL,
-    }
-
-    if is_string_dtype(dtype):
-        return model_config.TYPE_STRING
-    elif dtype in dtypes:
-        return dtypes[dtype]
-    else:
-        raise ValueError(f"Can't convert dtype {dtype})")
-
-
-def _convert_string2pytorch_dtype(dtype):
-    """converts a dtype to the appropriate torch type"""
-
-    import torch
-
-    if not isinstance(dtype, str):
-        dtype_name = dtype.name
-    else:
-        dtype_name = dtype
-
-    dtypes = {
-        "TYPE_FP64": torch.float64,
-        "TYPE_FP32": torch.float32,
-        "TYPE_FP16": torch.float16,
-        "TYPE_INT64": torch.int64,
-        "TYPE_INT32": torch.int32,
-        "TYPE_INT16": torch.int16,
-        "TYPE_INT8": torch.int8,
-        "TYPE_UINT8": torch.uint8,
-        "TYPE_BOOL": torch.bool,
-    }
-
-    if is_string_dtype(dtype):
-        return model_config.TYPE_STRING
-    elif dtype_name in dtypes:
-        return dtypes[dtype_name]
-    else:
-        raise ValueError(f"Can't convert dtype {dtype})")
-
-
-def _triton_datatype_to_dtype(data_type):
-    """the reverse of _convert_dtype: converts a triton proto data_type to a numpy dtype"""
-    name = model_config._DATATYPE.values[data_type].name[5:].lower()
-    if name == "string":
-        return np.dtype("str")
-    return np.dtype(name.replace("fp", "float"))
diff --git a/merlin/systems/triton/model_config.proto b/merlin/systems/triton/model_config.proto
deleted file mode 100644
index 61e00cd8268..00000000000
--- a/merlin/systems/triton/model_config.proto
+++ /dev/null
@@ -1,1660 +0,0 @@
-// Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Copyright (c) 2018, TensorFlow Authors. All rights reserved.
-
-syntax = "proto3";
-
-package inference;
-
-//@@.. cpp:namespace:: inference
-
-//@@
-//@@.. cpp:enum:: DataType
-//@@
-//@@   Data types supported for input and output tensors.
-//@@
-enum DataType {
-  //@@  .. cpp:enumerator:: DataType::INVALID = 0
-  TYPE_INVALID = 0;
-
-  //@@  .. cpp:enumerator:: DataType::BOOL = 1
-  TYPE_BOOL = 1;
-
-  //@@  .. cpp:enumerator:: DataType::UINT8 = 2
-  TYPE_UINT8 = 2;
-  //@@  .. cpp:enumerator:: DataType::UINT16 = 3
-  TYPE_UINT16 = 3;
-  //@@  .. cpp:enumerator:: DataType::UINT32 = 4
-  TYPE_UINT32 = 4;
-  //@@  .. cpp:enumerator:: DataType::UINT64 = 5
-  TYPE_UINT64 = 5;
-
-  //@@  .. cpp:enumerator:: DataType::INT8 = 6
-  TYPE_INT8 = 6;
-  //@@  .. cpp:enumerator:: DataType::INT16 = 7
-  TYPE_INT16 = 7;
-  //@@  .. cpp:enumerator:: DataType::INT32 = 8
-  TYPE_INT32 = 8;
-  //@@  .. cpp:enumerator:: DataType::INT64 = 9
-  TYPE_INT64 = 9;
-
-  //@@  .. cpp:enumerator:: DataType::FP16 = 10
-  TYPE_FP16 = 10;
-  //@@  .. cpp:enumerator:: DataType::FP32 = 11
-  TYPE_FP32 = 11;
-  //@@  .. cpp:enumerator:: DataType::FP64 = 12
-  TYPE_FP64 = 12;
-
-  //@@  .. cpp:enumerator:: DataType::STRING = 13
-  TYPE_STRING = 13;
-}
-
-//@@
-//@@  .. cpp:var:: message ModelRateLimiter
-//@@
-//@@     The specifications required by the rate limiter to properly
-//@@     schedule the inference requests across the different models
-//@@     and their instances.
-//@@
-message ModelRateLimiter
-{
-  //@@  .. cpp:var:: message Resource
-  //@@
-  //@@     The resource property.
-  //@@
-  message Resource
-  {
-    //@@  .. cpp:var:: string name
-    //@@
-    //@@     The name associated with the resource.
-    //@@
-    string name = 1;
-
-    //@@  .. cpp:var:: bool global
-    //@@
-    //@@     Whether or not the resource is global. If true then the resource
-    //@@     is assumed to be shared among the devices otherwise specified
-    //@@     count of the resource is assumed for each device associated
-    //@@     with the instance.
-    //@@
-    bool global = 2;
-
-    //@@  .. cpp:var:: uint32 count
-    //@@
-    //@@     The number of resources required for the execution of the model
-    //@@     instance.
-    //@@
-    uint32 count = 3;
-  }
-
-  //@@  .. cpp:var:: Resource resources (repeated)
-  //@@
-  //@@     The resources required to execute the request on a model instance.
-  //@@     Resources are just names with a corresponding count. The execution
-  //@@     of the instance will be blocked until the specified resources are
-  //@@     available. By default an instance uses no rate-limiter resources.
-  //@@
-  repeated Resource resources = 1;
-
-  //@@  .. cpp:var:: uint32 priority
-  //@@
-  //@@     The weighting value to be used for prioritizing across instances.
-  //@@     An instance with priority 2 will be given 1/2 the number of
-  //@@     scheduling chances as an instance_group with priority 1. The
-  //@@     default priority is 1.
-  //@@
-  uint32 priority = 2;
-}
-
-//@@
-//@@.. cpp:var:: message ModelInstanceGroup
-//@@
-//@@   A group of one or more instances of a model and resources made
-//@@   available for those instances.
-//@@
-message ModelInstanceGroup
-{
-  //@@
-  //@@  .. cpp:enum:: Kind
-  //@@
-  //@@     Kind of this instance group.
-  //@@
-  enum Kind {
-    //@@    .. cpp:enumerator:: Kind::KIND_AUTO = 0
-    //@@
-    //@@       This instance group represents instances that can run on either
-    //@@       CPU or GPU. If all GPUs listed in 'gpus' are available then
-    //@@       instances will be created on GPU(s), otherwise instances will
-    //@@       be created on CPU.
-    //@@
-    KIND_AUTO = 0;
-
-    //@@    .. cpp:enumerator:: Kind::KIND_GPU = 1
-    //@@
-    //@@       This instance group represents instances that must run on the
-    //@@       GPU.
-    //@@
-    KIND_GPU = 1;
-
-    //@@    .. cpp:enumerator:: Kind::KIND_CPU = 2
-    //@@
-    //@@       This instance group represents instances that must run on the
-    //@@       CPU.
-    //@@
-    KIND_CPU = 2;
-
-    //@@    .. cpp:enumerator:: Kind::KIND_MODEL = 3
-    //@@
-    //@@       This instance group represents instances that should run on the
-    //@@       CPU and/or GPU(s) as specified by the model or backend itself.
-    //@@       The inference server will not override the model/backend
-    //@@       settings.
-    //@@       Currently, this option is supported only for Tensorflow models.
-    //@@
-    KIND_MODEL = 3;
-  }
-
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     Optional name of this group of instances. If not specified the
-  //@@     name will be formed as <model name>_<group number>. The name of
-  //@@     individual instances will be further formed by a unique instance
-  //@@     number and GPU index:
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: Kind kind
-  //@@
-  //@@     The kind of this instance group. Default is KIND_AUTO. If
-  //@@     KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and
-  //@@     may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid
-  //@@     and 'gpu' cannot be specified.
-  //@@
-  Kind kind = 4;
-
-  //@@  .. cpp:var:: int32 count
-  //@@
-  //@@     For a group assigned to GPU, the number of instances created for
-  //@@     each GPU listed in 'gpus'. For a group assigned to CPU the number
-  //@@     of instances created. Default is 1.
-  int32 count = 2;
-
-  //@@  .. cpp:var:: ModelRateLimiter rate_limiter
-  //@@
-  //@@     The rate limiter specific settings to be associated with this
-  //@@     instance group. Optional, if not specified no rate limiting
-  //@@     will be applied to this instance group.
-  //@@
-  ModelRateLimiter rate_limiter = 6;
-
-  //@@  .. cpp:var:: int32 gpus (repeated)
-  //@@
-  //@@     GPU(s) where instances should be available. For each GPU listed,
-  //@@     'count' instances of the model will be available. Setting 'gpus'
-  //@@     to empty (or not specifying at all) is equivalent to listing all
-  //@@     available GPUs.
-  //@@
-  repeated int32 gpus = 3;
-
-  //@@  .. cpp:var:: string profile (repeated)
-  //@@
-  //@@     For TensorRT models containing multiple optimization profile, this
-  //@@     parameter specifies a set of optimization profiles available to this
-  //@@     instance group. The inference server will choose the optimal profile
-  //@@     based on the shapes of the input tensors. This field should lie
-  //@@     between 0 and <TotalNumberOfOptimizationProfilesInPlanModel> - 1
-  //@@     and be specified only for TensorRT backend, otherwise an error will
-  //@@     be generated. If not specified, the server will select the first
-  //@@     optimization profile by default.
-  //@@
-  repeated string profile = 5;
-}
-
-//@@
-//@@.. cpp:var:: message ModelTensorReshape
-//@@
-//@@   Reshape specification for input and output tensors.
-//@@
-message ModelTensorReshape
-{
-  //@@  .. cpp:var:: int64 shape (repeated)
-  //@@
-  //@@     The shape to use for reshaping.
-  //@@
-  repeated int64 shape = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ModelInput
-//@@
-//@@   An input required by the model.
-//@@
-message ModelInput
-{
-  //@@
-  //@@  .. cpp:enum:: Format
-  //@@
-  //@@     The format for the input.
-  //@@
-  enum Format {
-    //@@    .. cpp:enumerator:: Format::FORMAT_NONE = 0
-    //@@
-    //@@       The input has no specific format. This is the default.
-    //@@
-    FORMAT_NONE = 0;
-
-    //@@    .. cpp:enumerator:: Format::FORMAT_NHWC = 1
-    //@@
-    //@@       HWC image format. Tensors with this format require 3 dimensions
-    //@@       if the model does not support batching (max_batch_size = 0) or 4
-    //@@       dimensions if the model does support batching (max_batch_size
-    //@@       >= 1). In either case the 'dims' below should only specify the
-    //@@       3 non-batch dimensions (i.e. HWC or CHW).
-    //@@
-    FORMAT_NHWC = 1;
-
-    //@@    .. cpp:enumerator:: Format::FORMAT_NCHW = 2
-    //@@
-    //@@       CHW image format. Tensors with this format require 3 dimensions
-    //@@       if the model does not support batching (max_batch_size = 0) or 4
-    //@@       dimensions if the model does support batching (max_batch_size
-    //@@       >= 1). In either case the 'dims' below should only specify the
-    //@@       3 non-batch dimensions (i.e. HWC or CHW).
-    //@@
-    FORMAT_NCHW = 2;
-  }
-
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the input.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: DataType data_type
-  //@@
-  //@@     The data-type of the input.
-  //@@
-  DataType data_type = 2;
-
-  //@@  .. cpp:var:: Format format
-  //@@
-  //@@     The format of the input. Optional.
-  //@@
-  Format format = 3;
-
-  //@@  .. cpp:var:: int64 dims (repeated)
-  //@@
-  //@@     The dimensions/shape of the input tensor that must be provided
-  //@@     when invoking the inference API for this model.
-  //@@
-  repeated int64 dims = 4;
-
-  //@@  .. cpp:var:: ModelTensorReshape reshape
-  //@@
-  //@@     The shape expected for this input by the backend. The input will
-  //@@     be reshaped to this before being presented to the backend. The
-  //@@     reshape must have the same number of elements as the input shape
-  //@@     specified by 'dims'. Optional.
-  //@@
-  ModelTensorReshape reshape = 5;
-
-  //@@  .. cpp:var:: bool is_shape_tensor
-  //@@
-  //@@     Whether or not the input is a shape tensor to the model. This field
-  //@@     is currently supported only for the TensorRT model. An error will be
-  //@@     generated if this specification does not comply with underlying
-  //@@     model.
-  //@@
-  bool is_shape_tensor = 6;
-
-  //@@  .. cpp:var:: bool allow_ragged_batch
-  //@@
-  //@@     Whether or not the input is allowed to be "ragged" in a dynamically
-  //@@     created batch. Default is false indicating that two requests will
-  //@@     only be batched if this tensor has the same shape in both requests.
-  //@@     True indicates that two requests can be batched even if this tensor
-  //@@     has a different shape in each request. A true value is currently
-  //@@     supported only for custom models.
-  //@@
-  bool allow_ragged_batch = 7;
-}
-
-//@@
-//@@.. cpp:var:: message ModelOutput
-//@@
-//@@   An output produced by the model.
-//@@
-message ModelOutput
-{
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the output.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: DataType data_type
-  //@@
-  //@@     The data-type of the output.
-  //@@
-  DataType data_type = 2;
-
-  //@@  .. cpp:var:: int64 dims (repeated)
-  //@@
-  //@@     The dimensions/shape of the output tensor.
-  //@@
-  repeated int64 dims = 3;
-
-  //@@  .. cpp:var:: ModelTensorReshape reshape
-  //@@
-  //@@     The shape produced for this output by the backend. The output will
-  //@@     be reshaped from this to the shape specified in 'dims' before being
-  //@@     returned in the inference response. The reshape must have the same
-  //@@     number of elements as the output shape specified by 'dims'. Optional.
-  //@@
-  ModelTensorReshape reshape = 5;
-
-  //@@  .. cpp:var:: string label_filename
-  //@@
-  //@@     The label file associated with this output. Should be specified only
-  //@@     for outputs that represent classifications. Optional.
-  //@@
-  string label_filename = 4;
-
-
-  //@@  .. cpp:var:: bool is_shape_tensor
-  //@@
-  //@@     Whether or not the output is a shape tensor to the model. This field
-  //@@     is currently supported only for the TensorRT model. An error will be
-  //@@     generated if this specification does not comply with underlying
-  //@@     model.
-  //@@
-  bool is_shape_tensor = 6;
-}
-
-//@@  .. cpp:var:: message BatchInput
-//@@
-//@@     A batch input is an additional input that must be added by
-//@@     the backend based on all the requests in a batch.
-//@@
-message BatchInput
-{
-  //@@
-  //@@    .. cpp:enum:: Kind
-  //@@
-  //@@       The kind of the batch input.
-  //@@
-  enum Kind {
-    //@@      .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0
-    //@@
-    //@@         The element count of the 'source_input' will be added as
-    //@@         input with shape [1].
-    //@@
-    BATCH_ELEMENT_COUNT = 0;
-
-    //@@      .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1
-    //@@
-    //@@         The accumulated element count of the 'source_input' will be
-    //@@         added as input with shape [1]. For example, if there is a
-    //@@         batch of two request, each with 2 elements, an input of value
-    //@@         2 will be added to the first request, and an input of value
-    //@@         4 will be added to the second request.
-    //@@
-    BATCH_ACCUMULATED_ELEMENT_COUNT = 1;
-
-    //@@      .. cpp:enumerator::
-    //@@         Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2
-    //@@
-    //@@         The accumulated element count of the 'source_input' will be
-    //@@         added as input with shape [1], except for the first request
-    //@@         in the batch. For the first request in the batch, the input
-    //@@         will have shape [2] where the first element is value 0.
-    //@@
-    BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2;
-
-    //@@      .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3
-    //@@
-    //@@         Among the requests in the batch, the max element count of the
-    //@@         'source_input' will be added as input with shape
-    //@@         [max_element_count] for the first request in the batch.
-    //@@         For other requests, such input will be with shape [0].
-    //@@         The data of the tensor will be uninitialized.
-    //@@
-    BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3;
-  }
-
-  //@@    .. cpp:var:: Kind kind
-  //@@
-  //@@       The kind of this batch input.
-  //@@
-  Kind kind = 1;
-
-  //@@    .. cpp:var:: string target_name (repeated)
-  //@@
-  //@@       The name of the model inputs that the backend will create
-  //@@       for this batch input.
-  //@@
-  repeated string target_name = 2;
-
-  //@@    .. cpp:var:: DataType data_type
-  //@@
-  //@@       The input's datatype. The data type can be TYPE_INT32 or
-  //@@       TYPE_FP32.
-  //@@
-  DataType data_type = 3;
-
-  //@@    .. cpp:var:: string source_input (repeated)
-  //@@
-  //@@       The backend derives the value for each batch input from one or
-  //@@       more other inputs. 'source_input' gives the names of those
-  //@@       inputs.
-  //@@
-  repeated string source_input = 4;
-}
-
-//@@.. cpp:var:: message BatchOutput
-//@@
-//@@   A batch output is an output produced by the model that must be handled
-//@@   differently by the backend based on all the requests in a batch.
-//@@
-message BatchOutput
-{
-  //@@
-  //@@  .. cpp:enum:: Kind
-  //@@
-  //@@     The kind of the batch output.
-  //@@
-  enum Kind {
-    //@@    .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0
-    //@@
-    //@@       The output should be scattered according to the shape of
-    //@@       'source_input'. The dynamic dimension of the output will
-    //@@       be set to the value of the same dimension in the input.
-    //@@
-    BATCH_SCATTER_WITH_INPUT_SHAPE = 0;
-  }
-
-  //@@  .. cpp:var:: string target_name (repeated)
-  //@@
-  //@@     The name of the outputs to be produced by this batch output
-  //@@     specification.
-  //@@
-  repeated string target_name = 1;
-
-  //@@  .. cpp:var:: Kind kind
-  //@@
-  //@@     The kind of this batch output.
-  //@@
-  Kind kind = 2;
-
-  //@@  .. cpp:var:: string source_input (repeated)
-  //@@
-  //@@     The backend derives each batch output from one or more inputs.
-  //@@     'source_input' gives the names of those inputs.
-  //@@
-  repeated string source_input = 3;
-}
-
-//@@
-//@@.. cpp:var:: message ModelVersionPolicy
-//@@
-//@@   Policy indicating which versions of a model should be made
-//@@   available by the inference server.
-//@@
-message ModelVersionPolicy
-{
-  //@@  .. cpp:var:: message Latest
-  //@@
-  //@@     Serve only the latest version(s) of a model. This is
-  //@@     the default policy.
-  //@@
-  message Latest
-  {
-    //@@    .. cpp:var:: uint32 num_versions
-    //@@
-    //@@       Serve only the 'num_versions' highest-numbered versions. T
-    //@@       The default value of 'num_versions' is 1, indicating that by
-    //@@       default only the single highest-number version of a
-    //@@       model will be served.
-    //@@
-    uint32 num_versions = 1;
-  }
-
-  //@@  .. cpp:var:: message All
-  //@@
-  //@@     Serve all versions of the model.
-  //@@
-  message All {}
-
-  //@@  .. cpp:var:: message Specific
-  //@@
-  //@@     Serve only specific versions of the model.
-  //@@
-  message Specific
-  {
-    //@@    .. cpp:var:: int64 versions (repeated)
-    //@@
-    //@@       The specific versions of the model that will be served.
-    //@@
-    repeated int64 versions = 1;
-  }
-
-  //@@  .. cpp:var:: oneof policy_choice
-  //@@
-  //@@     Each model must implement only a single version policy. The
-  //@@     default policy is 'Latest'.
-  //@@
-  oneof policy_choice
-  {
-    //@@    .. cpp:var:: Latest latest
-    //@@
-    //@@       Serve only latest version(s) of the model.
-    //@@
-    Latest latest = 1;
-
-    //@@    .. cpp:var:: All all
-    //@@
-    //@@       Serve all versions of the model.
-    //@@
-    All all = 2;
-
-    //@@    .. cpp:var:: Specific specific
-    //@@
-    //@@       Serve only specific version(s) of the model.
-    //@@
-    Specific specific = 3;
-  }
-}
-
-//@@
-//@@.. cpp:var:: message ModelOptimizationPolicy
-//@@
-//@@   Optimization settings for a model. These settings control if/how a
-//@@   model is optimized and prioritized by the backend framework when
-//@@   it is loaded.
-//@@
-message ModelOptimizationPolicy
-{
-  //@@
-  //@@  .. cpp:var:: message Graph
-  //@@
-  //@@     Enable generic graph optimization of the model. If not specified
-  //@@     the framework's default level of optimization is used. Supports
-  //@@     TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow
-  //@@     causes XLA to be enabled/disabled for the model. For Onnx defaults
-  //@@     to enabling all optimizations, -1 enables only basic optimizations,
-  //@@     +1 enables only basic and extended optimizations.
-  //@@
-  message Graph
-  {
-    //@@    .. cpp:var:: int32 level
-    //@@
-    //@@       The optimization level. Defaults to 0 (zero) if not specified.
-    //@@
-    //@@         - -1: Disabled
-    //@@         -  0: Framework default
-    //@@         -  1+: Enable optimization level (greater values indicate
-    //@@            higher optimization levels)
-    //@@
-    int32 level = 1;
-  }
-
-  //@@
-  //@@  .. cpp:enum:: ModelPriority
-  //@@
-  //@@     Model priorities. A model will be given scheduling and execution
-  //@@     preference over models at lower priorities. Current model
-  //@@     priorities only work for TensorRT models.
-  //@@
-  enum ModelPriority {
-    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0
-    //@@
-    //@@       The default model priority.
-    //@@
-    PRIORITY_DEFAULT = 0;
-
-    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1
-    //@@
-    //@@       The maximum model priority.
-    //@@
-    PRIORITY_MAX = 1;
-
-    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2
-    //@@
-    //@@       The minimum model priority.
-    //@@
-    PRIORITY_MIN = 2;
-  }
-
-  //@@
-  //@@  .. cpp:var:: message Cuda
-  //@@
-  //@@     CUDA-specific optimization settings.
-  //@@
-  message Cuda
-  {
-    //@@    .. cpp:var:: message GraphSpec
-    //@@
-    //@@       Specification of the CUDA graph to be captured.
-    //@@
-    message GraphSpec
-    {
-      //@@      .. cpp:var:: message Dims
-      //@@
-      //@@         Specification of tensor dimension.
-      //@@
-      message Shape
-      {
-        //@@        .. cpp:var:: int64 dim (repeated)
-        //@@
-        //@@           The dimension.
-        //@@
-        repeated int64 dim = 1;
-      }
-
-      message LowerBound
-      {
-        //@@      .. cpp:var:: int32 batch_size
-        //@@
-        //@@         The batch size of the CUDA graph. If 'max_batch_size' is 0,
-        //@@         'batch_size' must be set to 0. Otherwise, 'batch_size' must
-        //@@         be set to value between 1 and 'max_batch_size'.
-        //@@
-        int32 batch_size = 1;
-
-        //@@      .. cpp:var:: map<string, Shape> input
-        //@@
-        //@@         The specification of the inputs. 'Shape' is the shape of
-        //@@         the input without batching dimension.
-        //@@
-        map<string, Shape> input = 2;
-      }
-
-      //@@      .. cpp:var:: int32 batch_size
-      //@@
-      //@@         The batch size of the CUDA graph. If 'max_batch_size' is 0,
-      //@@         'batch_size' must be set to 0. Otherwise, 'batch_size' must
-      //@@         be set to value between 1 and 'max_batch_size'.
-      //@@
-      int32 batch_size = 1;
-
-      //@@      .. cpp:var:: map<string, Shape> input
-      //@@
-      //@@         The specification of the inputs. 'Shape' is the shape of the
-      //@@         input without batching dimension.
-      //@@
-      map<string, Shape> input = 2;
-
-      //@@      .. cpp:var:: LowerBound graph_lower_bound
-      //@@
-      //@@         Specify the lower bound of the CUDA graph. Optional.
-      //@@         If specified, the graph can be used for input shapes and
-      //@@         batch sizes that are in closed interval between the lower
-      //@@         bound specification and graph specification. For dynamic
-      //@@         shape model, this allows CUDA graphs to be launched
-      //@@         frequently without capturing all possible shape combinations.
-      //@@         However, using graph for shape combinations different from
-      //@@         the one used for capturing introduces uninitialized data for
-      //@@         execution and it may distort the inference result if
-      //@@         the model is sensitive to uninitialized data.
-      //@@
-      LowerBound graph_lower_bound = 3;
-    }
-
-    //@@    .. cpp:var:: bool graphs
-    //@@
-    //@@       Use CUDA graphs API to capture model operations and execute
-    //@@       them more efficiently. Default value is false.
-    //@@       Currently only recognized by TensorRT backend.
-    //@@
-    bool graphs = 1;
-
-    //@@    .. cpp:var:: bool busy_wait_events
-    //@@
-    //@@       Use busy-waiting to synchronize CUDA events to achieve minimum
-    //@@       latency from event complete to host thread to be notified, with
-    //@@       the cost of high CPU load. Default value is false.
-    //@@       Currently only recognized by TensorRT backend.
-    //@@
-    bool busy_wait_events = 2;
-
-    //@@    .. cpp:var:: GraphSpec graph_spec (repeated)
-    //@@
-    //@@       Specification of the CUDA graph to be captured. If not specified
-    //@@       and 'graphs' is true, the default CUDA graphs will be captured
-    //@@       based on model settings.
-    //@@       Currently only recognized by TensorRT backend.
-    //@@
-    repeated GraphSpec graph_spec = 3;
-  }
-
-  //@@
-  //@@  .. cpp:var:: message ExecutionAccelerators
-  //@@
-  //@@     Specify the preferred execution accelerators to be used to execute
-  //@@     the model. Currently only recognized by ONNX Runtime backend and
-  //@@     TensorFlow backend.
-  //@@
-  //@@     For ONNX Runtime backend, it will deploy the model with the execution
-  //@@     accelerators by priority, the priority is determined based on the
-  //@@     order that they are set, i.e. the provider at the front has highest
-  //@@     priority. Overall, the priority will be in the following order:
-  //@@         <gpu_execution_accelerator> (if instance is on GPU)
-  //@@         CUDA Execution Provider     (if instance is on GPU)
-  //@@         <cpu_execution_accelerator>
-  //@@         Default CPU Execution Provider
-  //@@
-  message ExecutionAccelerators
-  {
-    //@@
-    //@@  .. cpp:var:: message Accelerator
-    //@@
-    //@@     Specify the accelerator to be used to execute the model.
-    //@@     Accelerator with the same name may accept different parameters
-    //@@     depending on the backends.
-    //@@
-    message Accelerator
-    {
-      //@@    .. cpp:var:: string name
-      //@@
-      //@@       The name of the execution accelerator.
-      //@@
-      string name = 1;
-
-      //@@    .. cpp:var:: map<string, string> parameters
-      //@@
-      //@@       Additional parameters used to configure the accelerator.
-      //@@
-      map<string, string> parameters = 2;
-    }
-
-    //@@    .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
-    //@@
-    //@@       The preferred execution provider to be used if the model instance
-    //@@       is deployed on GPU.
-    //@@
-    //@@       For ONNX Runtime backend, possible value is "tensorrt" as name,
-    //@@       and no parameters are required.
-    //@@
-    //@@       For TensorFlow backend, possible values are "tensorrt",
-    //@@       "auto_mixed_precision", "gpu_io".
-    //@@
-    //@@       For "tensorrt", the following parameters can be specified:
-    //@@         "precision_mode": The precision used for optimization.
-    //@@         Allowed values are "FP32" and "FP16". Default value is "FP32".
-    //@@
-    //@@         "max_cached_engines": The maximum number of cached TensorRT
-    //@@         engines in dynamic TensorRT ops. Default value is 100.
-    //@@
-    //@@         "minimum_segment_size": The smallest model subgraph that will
-    //@@         be considered for optimization by TensorRT. Default value is 3.
-    //@@
-    //@@         "max_workspace_size_bytes": The maximum GPU memory the model
-    //@@         can use temporarily during execution. Default value is 1GB.
-    //@@
-    //@@       For "auto_mixed_precision", no parameters are required. If set,
-    //@@       the model will try to use FP16 for better performance.
-    //@@       This optimization can not be set with "tensorrt".
-    //@@
-    //@@       For "gpu_io", no parameters are required. If set, the model will
-    //@@       be executed using TensorFlow Callable API to set input and output
-    //@@       tensors in GPU memory if possible, which can reduce data transfer
-    //@@       overhead if the model is used in ensemble. However, the Callable
-    //@@       object will be created on model creation and it will request all
-    //@@       outputs for every model execution, which may impact the
-    //@@       performance if a request does not require all outputs. This
-    //@@       optimization will only take affect if the model instance is
-    //@@       created with KIND_GPU.
-    //@@
-    repeated Accelerator gpu_execution_accelerator = 1;
-
-    //@@    .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
-    //@@
-    //@@       The preferred execution provider to be used if the model instance
-    //@@       is deployed on CPU.
-    //@@
-    //@@       For ONNX Runtime backend, possible value is "openvino" as name,
-    //@@       and no parameters are required.
-    //@@
-    repeated Accelerator cpu_execution_accelerator = 2;
-  }
-
-  //@@
-  //@@  .. cpp:var:: message PinnedMemoryBuffer
-  //@@
-  //@@     Specify whether to use a pinned memory buffer when transferring data
-  //@@     between non-pinned system memory and GPU memory. Using a pinned
-  //@@     memory buffer for system from/to GPU transfers will typically provide
-  //@@     increased performance. For example, in the common use case where the
-  //@@     request provides inputs and delivers outputs via non-pinned system
-  //@@     memory, if the model instance accepts GPU IOs, the inputs will be
-  //@@     processed by two copies: from non-pinned system memory to pinned
-  //@@     memory, and from pinned memory to GPU memory. Similarly, pinned
-  //@@     memory will be used for delivering the outputs.
-  //@@
-  message PinnedMemoryBuffer
-  {
-    //@@    .. cpp:var:: bool enable
-    //@@
-    //@@       Use pinned memory buffer. Default is true.
-    //@@
-    bool enable = 1;
-  }
-
-  //@@  .. cpp:var:: Graph graph
-  //@@
-  //@@     The graph optimization setting for the model. Optional.
-  //@@
-  Graph graph = 1;
-
-  //@@  .. cpp:var:: ModelPriority priority
-  //@@
-  //@@     The priority setting for the model. Optional.
-  //@@
-  ModelPriority priority = 2;
-
-  //@@  .. cpp:var:: Cuda cuda
-  //@@
-  //@@     CUDA-specific optimization settings. Optional.
-  //@@
-  Cuda cuda = 3;
-
-  //@@  .. cpp:var:: ExecutionAccelerators execution_accelerators
-  //@@
-  //@@     The accelerators used for the model. Optional.
-  //@@
-  ExecutionAccelerators execution_accelerators = 4;
-
-  //@@  .. cpp:var:: PinnedMemoryBuffer input_pinned_memory
-  //@@
-  //@@     Use pinned memory buffer when the data transfer for inputs
-  //@@     is between GPU memory and non-pinned system memory.
-  //@@     Default is true.
-  //@@
-  PinnedMemoryBuffer input_pinned_memory = 5;
-
-  //@@  .. cpp:var:: PinnedMemoryBuffer output_pinned_memory
-  //@@
-  //@@     Use pinned memory buffer when the data transfer for outputs
-  //@@     is between GPU memory and non-pinned system memory.
-  //@@     Default is true.
-  //@@
-  PinnedMemoryBuffer output_pinned_memory = 6;
-}
-
-//@@
-//@@.. cpp:var:: message ModelQueuePolicy
-//@@
-//@@   Queue policy for inference requests.
-//@@
-message ModelQueuePolicy
-{
-  //@@
-  //@@  .. cpp:enum:: TimeoutAction
-  //@@
-  //@@     The action applied to timed-out requests.
-  //@@
-  enum TimeoutAction {
-    //@@    .. cpp:enumerator:: Action::REJECT = 0
-    //@@
-    //@@       Reject the request and return error message accordingly.
-    //@@
-    REJECT = 0;
-
-    //@@    .. cpp:enumerator:: Action::DELAY = 1
-    //@@
-    //@@       Delay the request until all other requests at the same
-    //@@       (or higher) priority levels that have not reached their timeouts
-    //@@       are processed. A delayed request will eventually be processed,
-    //@@       but may be delayed indefinitely due to newly arriving requests.
-    //@@
-    DELAY = 1;
-  }
-
-  //@@
-  //@@  .. cpp:var:: TimeoutAction timeout_action
-  //@@
-  //@@     The action applied to timed-out request.
-  //@@     The default action is REJECT.
-  //@@
-  TimeoutAction timeout_action = 1;
-
-  //@@
-  //@@  .. cpp:var:: uint64 default_timeout_microseconds
-  //@@
-  //@@     The default timeout for every request, in microseconds.
-  //@@     The default value is 0 which indicates that no timeout is set.
-  //@@
-  uint64 default_timeout_microseconds = 2;
-
-  //@@
-  //@@  .. cpp:var:: bool allow_timeout_override
-  //@@
-  //@@     Whether individual request can override the default timeout value.
-  //@@     When true, individual requests can set a timeout that is less than
-  //@@     the default timeout value but may not increase the timeout.
-  //@@     The default value is false.
-  //@@
-  bool allow_timeout_override = 3;
-
-  //@@
-  //@@  .. cpp:var:: uint32 max_queue_size
-  //@@
-  //@@     The maximum queue size for holding requests. A request will be
-  //@@     rejected immediately if it can't be enqueued because the queue is
-  //@@     full. The default value is 0 which indicates that no maximum
-  //@@     queue size is enforced.
-  //@@
-  uint32 max_queue_size = 4;
-}
-
-//@@
-//@@.. cpp:var:: message ModelDynamicBatching
-//@@
-//@@   Dynamic batching configuration. These settings control how dynamic
-//@@   batching operates for the model.
-//@@
-message ModelDynamicBatching
-{
-  //@@  .. cpp:var:: int32 preferred_batch_size (repeated)
-  //@@
-  //@@     Preferred batch sizes for dynamic batching. If a batch of one of
-  //@@     these sizes can be formed it will be executed immediately.  If
-  //@@     not specified a preferred batch size will be chosen automatically
-  //@@     based on model and GPU characteristics.
-  //@@
-  repeated int32 preferred_batch_size = 1;
-
-  //@@  .. cpp:var:: uint64 max_queue_delay_microseconds
-  //@@
-  //@@     The maximum time, in microseconds, a request will be delayed in
-  //@@     the scheduling queue to wait for additional requests for
-  //@@     batching. Default is 0.
-  //@@
-  uint64 max_queue_delay_microseconds = 2;
-
-  //@@  .. cpp:var:: bool preserve_ordering
-  //@@
-  //@@     Should the dynamic batcher preserve the ordering of responses to
-  //@@     match the order of requests received by the scheduler. Default is
-  //@@     false. If true, the responses will be returned in the same order as
-  //@@     the order of requests sent to the scheduler. If false, the responses
-  //@@     may be returned in arbitrary order. This option is specifically
-  //@@     needed when a sequence of related inference requests (i.e. inference
-  //@@     requests with the same correlation ID) are sent to the dynamic
-  //@@     batcher to ensure that the sequence responses are in the correct
-  //@@     order.
-  //@@
-  bool preserve_ordering = 3;
-
-  //@@  .. cpp:var:: uint32 priority_levels
-  //@@
-  //@@     The number of priority levels to be enabled for the model,
-  //@@     the priority level starts from 1 and 1 is the highest priority.
-  //@@     Requests are handled in priority order with all priority 1 requests
-  //@@     processed before priority 2, all priority 2 requests processed before
-  //@@     priority 3, etc. Requests with the same priority level will be
-  //@@     handled in the order that they are received.
-  //@@
-  uint32 priority_levels = 4;
-
-  //@@  .. cpp:var:: uint32 default_priority_level
-  //@@
-  //@@     The priority level used for requests that don't specify their
-  //@@     priority. The value must be in the range [ 1, 'priority_levels' ].
-  //@@
-  uint32 default_priority_level = 5;
-
-  //@@  .. cpp:var:: ModelQueuePolicy default_queue_policy
-  //@@
-  //@@     The default queue policy used for requests that don't require
-  //@@     priority handling and requests that specify priority levels where
-  //@@     there is no specific policy given. If not specified, a policy with
-  //@@     default field values will be used.
-  //@@
-  ModelQueuePolicy default_queue_policy = 6;
-
-  //@@  .. cpp:var:: map<uint32, ModelQueuePolicy> priority_queue_policy
-  //@@
-  //@@     Specify the queue policy for the priority level. The default queue
-  //@@     policy will be used if a priority level doesn't specify a queue
-  //@@     policy.
-  //@@
-  map<uint32, ModelQueuePolicy> priority_queue_policy = 7;
-}
-
-//@@
-//@@.. cpp:var:: message ModelSequenceBatching
-//@@
-//@@   Sequence batching configuration. These settings control how sequence
-//@@   batching operates for the model.
-//@@
-message ModelSequenceBatching
-{
-  //@@  .. cpp:var:: message Control
-  //@@
-  //@@     A control is a signal that the sequence batcher uses to
-  //@@     communicate with a backend.
-  //@@
-  message Control
-  {
-    //@@
-    //@@    .. cpp:enum:: Kind
-    //@@
-    //@@       The kind of the control.
-    //@@
-    enum Kind {
-      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0
-      //@@
-      //@@         A new sequence is/is-not starting. If true a sequence is
-      //@@         starting, if false a sequence is continuing. Must
-      //@@         specify either int32_false_true or fp32_false_true for
-      //@@         this control. This control is optional.
-      //@@
-      CONTROL_SEQUENCE_START = 0;
-
-      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1
-      //@@
-      //@@         A sequence is/is-not ready for inference. If true the
-      //@@         input tensor data is valid and should be used. If false
-      //@@         the input tensor data is invalid and inferencing should
-      //@@         be "skipped".  Must specify either int32_false_true or
-      //@@         fp32_false_true for this control. This control is optional.
-      //@@
-      CONTROL_SEQUENCE_READY = 1;
-
-      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2
-      //@@
-      //@@         A sequence is/is-not ending. If true a sequence is
-      //@@         ending, if false a sequence is continuing. Must
-      //@@         specify either int32_false_true or fp32_false_true for
-      //@@         this control. This control is optional.
-      //@@
-      CONTROL_SEQUENCE_END = 2;
-
-      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3
-      //@@
-      //@@         The correlation ID of the sequence. The correlation ID
-      //@@         is an uint64_t value that is communicated in whole or
-      //@@         in part by the tensor. The tensor's datatype must be
-      //@@         specified by data_type and must be TYPE_UINT64, TYPE_INT64,
-      //@@         TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified
-      //@@         the correlation ID will be truncated to the low-order 32
-      //@@         bits. This control is optional.
-      //@@
-      CONTROL_SEQUENCE_CORRID = 3;
-    }
-
-    //@@    .. cpp:var:: Kind kind
-    //@@
-    //@@       The kind of this control.
-    //@@
-    Kind kind = 1;
-
-    //@@    .. cpp:var:: int32 int32_false_true (repeated)
-    //@@
-    //@@       The control's true and false setting is indicated by setting
-    //@@       a value in an int32 tensor. The tensor must be a
-    //@@       1-dimensional tensor with size equal to the batch size of
-    //@@       the request. 'int32_false_true' must have two entries: the
-    //@@       first the false value and the second the true value.
-    //@@
-    repeated int32 int32_false_true = 2;
-
-    //@@    .. cpp:var:: float fp32_false_true (repeated)
-    //@@
-    //@@       The control's true and false setting is indicated by setting
-    //@@       a value in a fp32 tensor. The tensor must be a
-    //@@       1-dimensional tensor with size equal to the batch size of
-    //@@       the request. 'fp32_false_true' must have two entries: the
-    //@@       first the false value and the second the true value.
-    //@@
-    repeated float fp32_false_true = 3;
-
-    //@@    .. cpp:var:: DataType data_type
-    //@@
-    //@@       The control's datatype.
-    //@@
-    DataType data_type = 4;
-  }
-
-  //@@  .. cpp:var:: message ControlInput
-  //@@
-  //@@     The sequence control values to communicate by a model input.
-  //@@
-  message ControlInput
-  {
-    //@@    .. cpp:var:: string name
-    //@@
-    //@@       The name of the model input.
-    //@@
-    string name = 1;
-
-    //@@    .. cpp:var:: Control control (repeated)
-    //@@
-    //@@       The control value(s) that should be communicated to the
-    //@@       model using this model input.
-    //@@
-    repeated Control control = 2;
-  }
-
-  //@@  .. cpp:var:: message StrategyDirect
-  //@@
-  //@@     The sequence batcher uses a specific, unique batch
-  //@@     slot for each sequence. All inference requests in a
-  //@@     sequence are directed to the same batch slot in the same
-  //@@     model instance over the lifetime of the sequence. This
-  //@@     is the default strategy.
-  //@@
-  message StrategyDirect
-  {
-    //@@    .. cpp:var:: uint64 max_queue_delay_microseconds
-    //@@
-    //@@       The maximum time, in microseconds, a candidate request
-    //@@       will be delayed in the sequence batch scheduling queue to
-    //@@       wait for additional requests for batching. Default is 0.
-    //@@
-    uint64 max_queue_delay_microseconds = 1;
-
-    //@@    .. cpp:var:: float minimum_slot_utilization
-    //@@
-    //@@       The minimum slot utilization that must be satisfied to
-    //@@       execute the batch before 'max_queue_delay_microseconds' expires.
-    //@@       For example, a value of 0.5 indicates that the batch should be
-    //@@       executed as soon as 50% or more of the slots are ready even if
-    //@@       the 'max_queue_delay_microseconds' timeout has not expired.
-    //@@       The default is 0.0, indicating that a batch will be executed
-    //@@       before 'max_queue_delay_microseconds' timeout expires if at least
-    //@@       one batch slot is ready. 'max_queue_delay_microseconds' will be
-    //@@       ignored unless minimum_slot_utilization is set to a non-zero
-    //@@       value.
-    //@@
-    float minimum_slot_utilization = 2;
-  }
-
-  //@@  .. cpp:var:: message StrategyOldest
-  //@@
-  //@@     The sequence batcher maintains up to 'max_candidate_sequences'
-  //@@     candidate sequences. 'max_candidate_sequences' can be greater
-  //@@     than the model's 'max_batch_size'. For inferencing the batcher
-  //@@     chooses from the candidate sequences up to 'max_batch_size'
-  //@@     inference requests. Requests are chosen in an oldest-first
-  //@@     manner across all candidate sequences. A given sequence is
-  //@@     not guaranteed to be assigned to the same batch slot for
-  //@@     all inference requests of that sequence.
-  //@@
-  message StrategyOldest
-  {
-    //@@    .. cpp:var:: int32 max_candidate_sequences
-    //@@
-    //@@       Maximum number of candidate sequences that the batcher
-    //@@       maintains. Excess sequences are kept in an ordered backlog
-    //@@       and become candidates when existing candidate sequences
-    //@@       complete.
-    //@@
-    int32 max_candidate_sequences = 1;
-
-    //@@    .. cpp:var:: int32 preferred_batch_size (repeated)
-    //@@
-    //@@       Preferred batch sizes for dynamic batching of candidate
-    //@@       sequences. If a batch of one of these sizes can be formed
-    //@@       it will be executed immediately. If not specified a
-    //@@       preferred batch size will be chosen automatically
-    //@@       based on model and GPU characteristics.
-    //@@
-    repeated int32 preferred_batch_size = 2;
-
-    //@@    .. cpp:var:: uint64 max_queue_delay_microseconds
-    //@@
-    //@@       The maximum time, in microseconds, a candidate request
-    //@@       will be delayed in the dynamic batch scheduling queue to
-    //@@       wait for additional requests for batching. Default is 0.
-    //@@
-    uint64 max_queue_delay_microseconds = 3;
-  }
-
-  //@@  .. cpp:var:: oneof strategy_choice
-  //@@
-  //@@     The strategy used by the sequence batcher. Default strategy
-  //@@     is 'direct'.
-  //@@
-  oneof strategy_choice
-  {
-    //@@    .. cpp:var:: StrategyDirect direct
-    //@@
-    //@@       StrategyDirect scheduling strategy.
-    //@@
-    StrategyDirect direct = 3;
-
-    //@@    .. cpp:var:: StrategyOldest oldest
-    //@@
-    //@@       StrategyOldest scheduling strategy.
-    //@@
-    StrategyOldest oldest = 4;
-  }
-
-  //@@  .. cpp:var:: uint64 max_sequence_idle_microseconds
-  //@@
-  //@@     The maximum time, in microseconds, that a sequence is allowed to
-  //@@     be idle before it is aborted. The inference server considers a
-  //@@     sequence idle when it does not have any inference request queued
-  //@@     for the sequence. If this limit is exceeded, the inference server
-  //@@     will free the sequence slot allocated by the sequence and make it
-  //@@     available for another sequence. If not specified (or specified as
-  //@@     zero) a default value of 1000000 (1 second) is used.
-  //@@
-  uint64 max_sequence_idle_microseconds = 1;
-
-  //@@  .. cpp:var:: ControlInput control_input (repeated)
-  //@@
-  //@@     The model input(s) that the server should use to communicate
-  //@@     sequence start, stop, ready and similar control values to the
-  //@@     model.
-  //@@
-  repeated ControlInput control_input = 2;
-}
-
-//@@
-//@@.. cpp:var:: message ModelEnsembling
-//@@
-//@@   Model ensembling configuration. These settings specify the models that
-//@@   compose the ensemble and how data flows between the models.
-//@@
-message ModelEnsembling
-{
-  //@@  .. cpp:var:: message Step
-  //@@
-  //@@     Each step specifies a model included in the ensemble,
-  //@@     maps ensemble tensor names to the model input tensors,
-  //@@     and maps model output tensors to ensemble tensor names
-  //@@
-  message Step
-  {
-    //@@  .. cpp:var:: string model_name
-    //@@
-    //@@     The name of the model to execute for this step of the ensemble.
-    //@@
-    string model_name = 1;
-
-    //@@  .. cpp:var:: int64 model_version
-    //@@
-    //@@     The version of the model to use for inference. If -1
-    //@@     the latest/most-recent version of the model is used.
-    //@@
-    int64 model_version = 2;
-
-    //@@  .. cpp:var:: map<string,string> input_map
-    //@@
-    //@@     Map from name of an input tensor on this step's model to ensemble
-    //@@     tensor name. The ensemble tensor must have the same data type and
-    //@@     shape as the model input. Each model input must be assigned to
-    //@@     one ensemble tensor, but the same ensemble tensor can be assigned
-    //@@     to multiple model inputs.
-    //@@
-    map<string, string> input_map = 3;
-
-    //@@  .. cpp:var:: map<string,string> output_map
-    //@@
-    //@@     Map from name of an output tensor on this step's model to ensemble
-    //@@     tensor name. The data type and shape of the ensemble tensor will
-    //@@     be inferred from the model output. It is optional to assign all
-    //@@     model outputs to ensemble tensors. One ensemble tensor name
-    //@@     can appear in an output map only once.
-    //@@
-    map<string, string> output_map = 4;
-  }
-
-  //@@  .. cpp:var:: Step step (repeated)
-  //@@
-  //@@     The models and the input / output mappings used within the ensemble.
-  //@@
-  repeated Step step = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ModelParameter
-//@@
-//@@   A model parameter.
-//@@
-message ModelParameter
-{
-  //@@  .. cpp:var:: string string_value
-  //@@
-  //@@     The string value of the parameter.
-  //@@
-  string string_value = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ModelWarmup
-//@@
-//@@   Settings used to construct the request sample for model warmup.
-//@@
-message ModelWarmup
-{
-  //@@
-  //@@  .. cpp:var:: message Input
-  //@@
-  //@@     Meta data associated with an input.
-  //@@
-  message Input
-  {
-    //@@    .. cpp:var:: DataType data_type
-    //@@
-    //@@       The data-type of the input.
-    //@@
-    DataType data_type = 1;
-
-    //@@    .. cpp:var:: int64 dims (repeated)
-    //@@
-    //@@       The shape of the input tensor, not including the batch dimension.
-    //@@
-    repeated int64 dims = 2;
-
-    //@@    .. cpp:var:: oneof input_data_type
-    //@@
-    //@@       Specify how the input data is generated. If the input has STRING
-    //@@       data type and 'random_data' is set, the data generation will fall
-    //@@       back to 'zero_data'.
-    //@@
-    oneof input_data_type
-    {
-      //@@
-      //@@    .. cpp:var:: bool zero_data
-      //@@
-      //@@       The identifier for using zeros as input data. Note that the
-      //@@       value of 'zero_data' will not be checked, instead, zero data
-      //@@       will be used as long as the field is set.
-      //@@
-      bool zero_data = 3;
-
-      //@@
-      //@@    .. cpp:var:: bool random_data
-      //@@
-      //@@       The identifier for using random data as input data. Note that
-      //@@       the value of 'random_data' will not be checked, instead,
-      //@@       random data will be used as long as the field is set.
-      //@@
-      bool random_data = 4;
-
-      //@@    .. cpp:var:: string input_data_file
-      //@@
-      //@@       The file whose content will be used as raw input data in
-      //@@       row-major order. The file must be provided in a sub-directory
-      //@@       'warmup' under the model directory.
-      //@@
-      string input_data_file = 5;
-    }
-  }
-
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the request sample.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: uint32 batch_size
-  //@@
-  //@@     The batch size of the inference request. This must be >= 1. For
-  //@@     models that don't support batching, batch_size must be 1. If
-  //@@     batch_size > 1, the 'inputs' specified below will be duplicated to
-  //@@     match the batch size requested.
-  //@@
-  uint32 batch_size = 2;
-
-  //@@  .. cpp:var:: map<string, Input> inputs
-  //@@
-  //@@     The warmup meta data associated with every model input, including
-  //@@     control tensors.
-  //@@
-  map<string, Input> inputs = 3;
-}
-
-//@@
-//@@ .. cpp:var:: message ModelOperations
-//@@
-//@@    The metadata of libraries providing custom operations for this model.
-//@@
-message ModelOperations
-{
-  //@@  .. cpp:var:: string op_library_filename
-  //@@
-  //@@     Optional paths of the libraries providing custom operations for
-  //@@     this model. Valid only for ONNX models.
-  //@@
-  repeated string op_library_filename = 1;
-}
-
-//@@
-//@@ .. cpp:var:: message ModelTransactionPolicy
-//@@
-//@@    The specification that describes the nature of transactions
-//@@    to be expected from the model.
-//@@
-message ModelTransactionPolicy
-{
-  //@@  .. cpp:var:: bool decoupled
-  //@@
-  //@@     Indicates whether responses generated by the model are decoupled with
-  //@@     the requests issued to it, which means the number of responses
-  //@@     generated by model may differ from number of requests issued, and
-  //@@     that the responses may be out of order relative to the order of
-  //@@     requests. The default is false, which means the model will generate
-  //@@     exactly one response for each request.
-  //@@
-  bool decoupled = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ModelConfig
-//@@
-//@@   A model configuration.
-//@@
-message ModelConfig
-{
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the model.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: string platform
-  //@@
-  //@@     The framework for the model. Possible values are
-  //@@     "tensorrt_plan", "tensorflow_graphdef",
-  //@@     "tensorflow_savedmodel", "onnxruntime_onnx",
-  //@@     "pytorch_libtorch" and "custom".
-  //@@
-  string platform = 2;
-
-  //@@  .. cpp:var:: string backend
-  //@@
-  //@@     The backend used by the model.
-  //@@
-  string backend = 17;
-
-  //@@  .. cpp:var:: ModelVersionPolicy version_policy
-  //@@
-  //@@     Policy indicating which version(s) of the model will be served.
-  //@@
-  ModelVersionPolicy version_policy = 3;
-
-  //@@  .. cpp:var:: int32 max_batch_size
-  //@@
-  //@@     Maximum batch size allowed for inference. This can only decrease
-  //@@     what is allowed by the model itself. A max_batch_size value of 0
-  //@@     indicates that batching is not allowed for the model and the
-  //@@     dimension/shape of the input and output tensors must exactly
-  //@@     match what is specified in the input and output configuration. A
-  //@@     max_batch_size value > 0 indicates that batching is allowed and
-  //@@     so the model expects the input tensors to have an additional
-  //@@     initial dimension for the batching that is not specified in the
-  //@@     input (for example, if the model supports batched inputs of
-  //@@     2-dimensional tensors then the model configuration will specify
-  //@@     the input shape as [ X, Y ] but the model will expect the actual
-  //@@     input tensors to have shape [ N, X, Y ]). For max_batch_size > 0
-  //@@     returned outputs will also have an additional initial dimension
-  //@@     for the batch.
-  //@@
-  int32 max_batch_size = 4;
-
-  //@@  .. cpp:var:: ModelInput input (repeated)
-  //@@
-  //@@     The inputs request by the model.
-  //@@
-  repeated ModelInput input = 5;
-
-  //@@  .. cpp:var:: ModelOutput output (repeated)
-  //@@
-  //@@     The outputs produced by the model.
-  //@@
-  repeated ModelOutput output = 6;
-
-  //@@  .. cpp:var:: BatchInput batch_input (repeated)
-  //@@
-  //@@     The model input(s) that the server should use to communicate
-  //@@     batch related values to the model.
-  //@@
-  repeated BatchInput batch_input = 20;
-
-  //@@  .. cpp:var:: BatchOutput batch_output (repeated)
-  //@@
-  //@@     The outputs produced by the model that requires special handling
-  //@@     by the model backend.
-  //@@
-  repeated BatchOutput batch_output = 21;
-
-  //@@  .. cpp:var:: ModelOptimizationPolicy optimization
-  //@@
-  //@@     Optimization configuration for the model. If not specified
-  //@@     then default optimization policy is used.
-  //@@
-  ModelOptimizationPolicy optimization = 12;
-
-  //@@  .. cpp:var:: oneof scheduling_choice
-  //@@
-  //@@     The scheduling policy for the model. If not specified the
-  //@@     default scheduling policy is used for the model. The default
-  //@@     policy is to execute each inference request independently.
-  //@@
-  oneof scheduling_choice
-  {
-    //@@    .. cpp:var:: ModelDynamicBatching dynamic_batching
-    //@@
-    //@@       If specified, enables the dynamic-batching scheduling
-    //@@       policy. With dynamic-batching the scheduler may group
-    //@@       together independent requests into a single batch to
-    //@@       improve inference throughput.
-    //@@
-    ModelDynamicBatching dynamic_batching = 11;
-
-    //@@    .. cpp:var:: ModelSequenceBatching sequence_batching
-    //@@
-    //@@       If specified, enables the sequence-batching scheduling
-    //@@       policy. With sequence-batching, inference requests
-    //@@       with the same correlation ID are routed to the same
-    //@@       model instance. Multiple sequences of inference requests
-    //@@       may be batched together into a single batch to
-    //@@       improve inference throughput.
-    //@@
-    ModelSequenceBatching sequence_batching = 13;
-
-    //@@    .. cpp:var:: ModelEnsembling ensemble_scheduling
-    //@@
-    //@@       If specified, enables the model-ensembling scheduling
-    //@@       policy. With model-ensembling, inference requests
-    //@@       will be processed according to the specification, such as an
-    //@@       execution sequence of models. The input specified in this model
-    //@@       config will be the input for the ensemble, and the output
-    //@@       specified will be the output of the ensemble.
-    //@@
-    ModelEnsembling ensemble_scheduling = 15;
-  }
-
-  //@@  .. cpp:var:: ModelInstanceGroup instance_group (repeated)
-  //@@
-  //@@     Instances of this model. If not specified, one instance
-  //@@     of the model will be instantiated on each available GPU.
-  //@@
-  repeated ModelInstanceGroup instance_group = 7;
-
-  //@@  .. cpp:var:: string default_model_filename
-  //@@
-  //@@     Optional filename of the model file to use if a
-  //@@     compute-capability specific model is not specified in
-  //@@     :cpp:var:`cc_model_filenames`. If not specified the default name
-  //@@     is 'model.graphdef', 'model.savedmodel', 'model.plan' or
-  //@@     'model.pt' depending on the model type.
-  //@@
-  string default_model_filename = 8;
-
-  //@@  .. cpp:var:: map<string,string> cc_model_filenames
-  //@@
-  //@@     Optional map from CUDA compute capability to the filename of
-  //@@     the model that supports that compute capability. The filename
-  //@@     refers to a file within the model version directory.
-  //@@
-  map<string, string> cc_model_filenames = 9;
-
-  //@@  .. cpp:var:: map<string,string> metric_tags
-  //@@
-  //@@     Optional metric tags. User-specific key-value pairs for metrics
-  //@@     reported for this model. These tags are applied to the metrics
-  //@@     reported on the HTTP metrics port.
-  //@@
-  map<string, string> metric_tags = 10;
-
-  //@@  .. cpp:var:: map<string,ModelParameter> parameters
-  //@@
-  //@@     Optional model parameters. User-specified parameter values that
-  //@@     are made available to custom backends.
-  //@@
-  map<string, ModelParameter> parameters = 14;
-
-  //@@  .. cpp:var:: ModelWarmup model_warmup (repeated)
-  //@@
-  //@@     Warmup setting of this model. If specified, all instances
-  //@@     will be run with the request samples in sequence before
-  //@@     serving the model.
-  //@@     This field can only be specified if the model is not an ensemble
-  //@@     model.
-  //@@
-  repeated ModelWarmup model_warmup = 16;
-
-  //@@  .. cpp:var:: ModelOperations model_operations
-  //@@
-  //@@     Optional metadata of the libraries providing custom operations for
-  //@@     this model.
-  //@@
-  ModelOperations model_operations = 18;
-
-  //@@  .. cpp:var:: ModelTransactionPolicy model_transaction_policy
-  //@@
-  //@@     Optional specification that describes the nature of transactions
-  //@@     to be expected from the model.
-  //@@
-  ModelTransactionPolicy model_transaction_policy = 19;
-}
diff --git a/merlin/systems/triton/model_config_pb2.py b/merlin/systems/triton/model_config_pb2.py
deleted file mode 100644
index 362d46a6a61..00000000000
--- a/merlin/systems/triton/model_config_pb2.py
+++ /dev/null
@@ -1,4564 +0,0 @@
-# Generated by the protocol buffer compiler.  DO NOT EDIT!
-# source: model_config.proto
-
-import sys
-
-_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1"))
-from google.protobuf.internal import enum_type_wrapper
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
-    name="model_config.proto",
-    package="inference",
-    syntax="proto3",
-    serialized_options=None,
-    serialized_pb=_b(
-        '\n\x12model_config.proto\x12\tinference"\x96\x01\n\x10ModelRateLimiter\x12\x37\n\tresources\x18\x01 \x03(\x0b\x32$.inference.ModelRateLimiter.Resource\x12\x10\n\x08priority\x18\x02 \x01(\r\x1a\x37\n\x08Resource\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06global\x18\x02 \x01(\x08\x12\r\n\x05\x63ount\x18\x03 \x01(\r"\xf8\x01\n\x12ModelInstanceGroup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x30\n\x04kind\x18\x04 \x01(\x0e\x32".inference.ModelInstanceGroup.Kind\x12\r\n\x05\x63ount\x18\x02 \x01(\x05\x12\x31\n\x0crate_limiter\x18\x06 \x01(\x0b\x32\x1b.inference.ModelRateLimiter\x12\x0c\n\x04gpus\x18\x03 \x03(\x05\x12\x0f\n\x07profile\x18\x05 \x03(\t"A\n\x04Kind\x12\r\n\tKIND_AUTO\x10\x00\x12\x0c\n\x08KIND_GPU\x10\x01\x12\x0c\n\x08KIND_CPU\x10\x02\x12\x0e\n\nKIND_MODEL\x10\x03"#\n\x12ModelTensorReshape\x12\r\n\x05shape\x18\x01 \x03(\x03"\xa0\x02\n\nModelInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12,\n\x06\x66ormat\x18\x03 \x01(\x0e\x32\x1c.inference.ModelInput.Format\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\x12\x1a\n\x12\x61llow_ragged_batch\x18\x07 \x01(\x08";\n\x06\x46ormat\x12\x0f\n\x0b\x46ORMAT_NONE\x10\x00\x12\x0f\n\x0b\x46ORMAT_NHWC\x10\x01\x12\x0f\n\x0b\x46ORMAT_NCHW\x10\x02"\xb2\x01\n\x0bModelOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x03 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x16\n\x0elabel_filename\x18\x04 \x01(\t\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08"\xa5\x02\n\nBatchInput\x12(\n\x04kind\x18\x01 \x01(\x0e\x32\x1a.inference.BatchInput.Kind\x12\x13\n\x0btarget_name\x18\x02 \x03(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x14\n\x0csource_input\x18\x04 \x03(\t"\x99\x01\n\x04Kind\x12\x17\n\x13\x42\x41TCH_ELEMENT_COUNT\x10\x00\x12#\n\x1f\x42\x41TCH_ACCUMULATED_ELEMENT_COUNT\x10\x01\x12-\n)BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO\x10\x02\x12$\n BATCH_MAX_ELEMENT_COUNT_AS_SHAPE\x10\x03"\x8f\x01\n\x0b\x42\x61tchOutput\x12\x13\n\x0btarget_name\x18\x01 \x03(\t\x12)\n\x04kind\x18\x02 \x01(\x0e\x32\x1b.inference.BatchOutput.Kind\x12\x14\n\x0csource_input\x18\x03 \x03(\t"*\n\x04Kind\x12"\n\x1e\x42\x41TCH_SCATTER_WITH_INPUT_SHAPE\x10\x00"\x90\x02\n\x12ModelVersionPolicy\x12\x36\n\x06latest\x18\x01 \x01(\x0b\x32$.inference.ModelVersionPolicy.LatestH\x00\x12\x30\n\x03\x61ll\x18\x02 \x01(\x0b\x32!.inference.ModelVersionPolicy.AllH\x00\x12:\n\x08specific\x18\x03 \x01(\x0b\x32&.inference.ModelVersionPolicy.SpecificH\x00\x1a\x1e\n\x06Latest\x12\x14\n\x0cnum_versions\x18\x01 \x01(\r\x1a\x05\n\x03\x41ll\x1a\x1c\n\x08Specific\x12\x10\n\x08versions\x18\x01 \x03(\x03\x42\x0f\n\rpolicy_choice"\xa1\r\n\x17ModelOptimizationPolicy\x12\x37\n\x05graph\x18\x01 \x01(\x0b\x32(.inference.ModelOptimizationPolicy.Graph\x12\x42\n\x08priority\x18\x02 \x01(\x0e\x32\x30.inference.ModelOptimizationPolicy.ModelPriority\x12\x35\n\x04\x63uda\x18\x03 \x01(\x0b\x32\'.inference.ModelOptimizationPolicy.Cuda\x12X\n\x16\x65xecution_accelerators\x18\x04 \x01(\x0b\x32\x38.inference.ModelOptimizationPolicy.ExecutionAccelerators\x12R\n\x13input_pinned_memory\x18\x05 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12S\n\x14output_pinned_memory\x18\x06 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x1a\x16\n\x05Graph\x12\r\n\x05level\x18\x01 \x01(\x05\x1a\x9e\x05\n\x04\x43uda\x12\x0e\n\x06graphs\x18\x01 \x01(\x08\x12\x18\n\x10\x62usy_wait_events\x18\x02 \x01(\x08\x12\x45\n\ngraph_spec\x18\x03 \x03(\x0b\x32\x31.inference.ModelOptimizationPolicy.Cuda.GraphSpec\x1a\xa4\x04\n\tGraphSpec\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12K\n\x05input\x18\x02 \x03(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry\x12W\n\x11graph_lower_bound\x18\x03 \x01(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound\x1a\x14\n\x05Shape\x12\x0b\n\x03\x64im\x18\x01 \x03(\x03\x1a\xdf\x01\n\nLowerBound\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12V\n\x05input\x18\x02 \x03(\x0b\x32G.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\xa4\x03\n\x15\x45xecutionAccelerators\x12g\n\x19gpu_execution_accelerator\x18\x01 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x12g\n\x19\x63pu_execution_accelerator\x18\x02 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x1a\xb8\x01\n\x0b\x41\x63\x63\x65lerator\x12\x0c\n\x04name\x18\x01 \x01(\t\x12h\n\nparameters\x18\x02 \x03(\x0b\x32T.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a$\n\x12PinnedMemoryBuffer\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08"I\n\rModelPriority\x12\x14\n\x10PRIORITY_DEFAULT\x10\x00\x12\x10\n\x0cPRIORITY_MAX\x10\x01\x12\x10\n\x0cPRIORITY_MIN\x10\x02"\xdb\x01\n\x10ModelQueuePolicy\x12\x41\n\x0etimeout_action\x18\x01 \x01(\x0e\x32).inference.ModelQueuePolicy.TimeoutAction\x12$\n\x1c\x64\x65\x66\x61ult_timeout_microseconds\x18\x02 \x01(\x04\x12\x1e\n\x16\x61llow_timeout_override\x18\x03 \x01(\x08\x12\x16\n\x0emax_queue_size\x18\x04 \x01(\r"&\n\rTimeoutAction\x12\n\n\x06REJECT\x10\x00\x12\t\n\x05\x44\x45LAY\x10\x01"\x9b\x03\n\x14ModelDynamicBatching\x12\x1c\n\x14preferred_batch_size\x18\x01 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x02 \x01(\x04\x12\x19\n\x11preserve_ordering\x18\x03 \x01(\x08\x12\x17\n\x0fpriority_levels\x18\x04 \x01(\r\x12\x1e\n\x16\x64\x65\x66\x61ult_priority_level\x18\x05 \x01(\r\x12\x39\n\x14\x64\x65\x66\x61ult_queue_policy\x18\x06 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy\x12W\n\x15priority_queue_policy\x18\x07 \x03(\x0b\x32\x38.inference.ModelDynamicBatching.PriorityQueuePolicyEntry\x1aW\n\x18PriorityQueuePolicyEntry\x12\x0b\n\x03key\x18\x01 \x01(\r\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy:\x02\x38\x01"\xe3\x06\n\x15ModelSequenceBatching\x12\x41\n\x06\x64irect\x18\x03 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyDirectH\x00\x12\x41\n\x06oldest\x18\x04 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyOldestH\x00\x12&\n\x1emax_sequence_idle_microseconds\x18\x01 \x01(\x04\x12\x44\n\rcontrol_input\x18\x02 \x03(\x0b\x32-.inference.ModelSequenceBatching.ControlInput\x1a\x98\x02\n\x07\x43ontrol\x12;\n\x04kind\x18\x01 \x01(\x0e\x32-.inference.ModelSequenceBatching.Control.Kind\x12\x18\n\x10int32_false_true\x18\x02 \x03(\x05\x12\x17\n\x0f\x66p32_false_true\x18\x03 \x03(\x02\x12&\n\tdata_type\x18\x04 \x01(\x0e\x32\x13.inference.DataType"u\n\x04Kind\x12\x1a\n\x16\x43ONTROL_SEQUENCE_START\x10\x00\x12\x1a\n\x16\x43ONTROL_SEQUENCE_READY\x10\x01\x12\x18\n\x14\x43ONTROL_SEQUENCE_END\x10\x02\x12\x1b\n\x17\x43ONTROL_SEQUENCE_CORRID\x10\x03\x1aW\n\x0c\x43ontrolInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x39\n\x07\x63ontrol\x18\x02 \x03(\x0b\x32(.inference.ModelSequenceBatching.Control\x1aX\n\x0eStrategyDirect\x12$\n\x1cmax_queue_delay_microseconds\x18\x01 \x01(\x04\x12 \n\x18minimum_slot_utilization\x18\x02 \x01(\x02\x1au\n\x0eStrategyOldest\x12\x1f\n\x17max_candidate_sequences\x18\x01 \x01(\x05\x12\x1c\n\x14preferred_batch_size\x18\x02 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x03 \x01(\x04\x42\x11\n\x0fstrategy_choice"\xdd\x02\n\x0fModelEnsembling\x12-\n\x04step\x18\x01 \x03(\x0b\x32\x1f.inference.ModelEnsembling.Step\x1a\x9a\x02\n\x04Step\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\x03\x12@\n\tinput_map\x18\x03 \x03(\x0b\x32-.inference.ModelEnsembling.Step.InputMapEntry\x12\x42\n\noutput_map\x18\x04 \x03(\x0b\x32..inference.ModelEnsembling.Step.OutputMapEntry\x1a/\n\rInputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x30\n\x0eOutputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"&\n\x0eModelParameter\x12\x14\n\x0cstring_value\x18\x01 \x01(\t"\xca\x02\n\x0bModelWarmup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x32\n\x06inputs\x18\x03 \x03(\x0b\x32".inference.ModelWarmup.InputsEntry\x1a\x97\x01\n\x05Input\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x15\n\x0brandom_data\x18\x04 \x01(\x08H\x00\x12\x19\n\x0finput_data_file\x18\x05 \x01(\tH\x00\x42\x11\n\x0finput_data_type\x1aK\n\x0bInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12+\n\x05value\x18\x02 \x01(\x0b\x32\x1c.inference.ModelWarmup.Input:\x02\x38\x01".\n\x0fModelOperations\x12\x1b\n\x13op_library_filename\x18\x01 \x03(\t"+\n\x16ModelTransactionPolicy\x12\x11\n\tdecoupled\x18\x01 \x01(\x08"\xb8\t\n\x0bModelConfig\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08platform\x18\x02 \x01(\t\x12\x0f\n\x07\x62\x61\x63kend\x18\x11 \x01(\t\x12\x35\n\x0eversion_policy\x18\x03 \x01(\x0b\x32\x1d.inference.ModelVersionPolicy\x12\x16\n\x0emax_batch_size\x18\x04 \x01(\x05\x12$\n\x05input\x18\x05 \x03(\x0b\x32\x15.inference.ModelInput\x12&\n\x06output\x18\x06 \x03(\x0b\x32\x16.inference.ModelOutput\x12*\n\x0b\x62\x61tch_input\x18\x14 \x03(\x0b\x32\x15.inference.BatchInput\x12,\n\x0c\x62\x61tch_output\x18\x15 \x03(\x0b\x32\x16.inference.BatchOutput\x12\x38\n\x0coptimization\x18\x0c \x01(\x0b\x32".inference.ModelOptimizationPolicy\x12;\n\x10\x64ynamic_batching\x18\x0b \x01(\x0b\x32\x1f.inference.ModelDynamicBatchingH\x00\x12=\n\x11sequence_batching\x18\r \x01(\x0b\x32 .inference.ModelSequenceBatchingH\x00\x12\x39\n\x13\x65nsemble_scheduling\x18\x0f \x01(\x0b\x32\x1a.inference.ModelEnsemblingH\x00\x12\x35\n\x0einstance_group\x18\x07 \x03(\x0b\x32\x1d.inference.ModelInstanceGroup\x12\x1e\n\x16\x64\x65\x66\x61ult_model_filename\x18\x08 \x01(\t\x12H\n\x12\x63\x63_model_filenames\x18\t \x03(\x0b\x32,.inference.ModelConfig.CcModelFilenamesEntry\x12;\n\x0bmetric_tags\x18\n \x03(\x0b\x32&.inference.ModelConfig.MetricTagsEntry\x12:\n\nparameters\x18\x0e \x03(\x0b\x32&.inference.ModelConfig.ParametersEntry\x12,\n\x0cmodel_warmup\x18\x10 \x03(\x0b\x32\x16.inference.ModelWarmup\x12\x34\n\x10model_operations\x18\x12 \x01(\x0b\x32\x1a.inference.ModelOperations\x12\x43\n\x18model_transaction_policy\x18\x13 \x01(\x0b\x32!.inference.ModelTransactionPolicy\x1a\x37\n\x15\x43\x63ModelFilenamesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x31\n\x0fMetricTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.ModelParameter:\x02\x38\x01\x42\x13\n\x11scheduling_choice*\xeb\x01\n\x08\x44\x61taType\x12\x10\n\x0cTYPE_INVALID\x10\x00\x12\r\n\tTYPE_BOOL\x10\x01\x12\x0e\n\nTYPE_UINT8\x10\x02\x12\x0f\n\x0bTYPE_UINT16\x10\x03\x12\x0f\n\x0bTYPE_UINT32\x10\x04\x12\x0f\n\x0bTYPE_UINT64\x10\x05\x12\r\n\tTYPE_INT8\x10\x06\x12\x0e\n\nTYPE_INT16\x10\x07\x12\x0e\n\nTYPE_INT32\x10\x08\x12\x0e\n\nTYPE_INT64\x10\t\x12\r\n\tTYPE_FP16\x10\n\x12\r\n\tTYPE_FP32\x10\x0b\x12\r\n\tTYPE_FP64\x10\x0c\x12\x0f\n\x0bTYPE_STRING\x10\rb\x06proto3'
-    ),
-)
-
-_DATATYPE = _descriptor.EnumDescriptor(
-    name="DataType",
-    full_name="inference.DataType",
-    filename=None,
-    file=DESCRIPTOR,
-    values=[
-        _descriptor.EnumValueDescriptor(
-            name="TYPE_INVALID", index=0, number=0, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="TYPE_BOOL", index=1, number=1, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="TYPE_UINT8", index=2, number=2, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="TYPE_UINT16", index=3, number=3, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="TYPE_UINT32", index=4, number=4, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="TYPE_UINT64", index=5, number=5, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="TYPE_INT8", index=6, number=6, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="TYPE_INT16", index=7, number=7, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="TYPE_INT32", index=8, number=8, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="TYPE_INT64", index=9, number=9, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="TYPE_FP16", index=10, number=10, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="TYPE_FP32", index=11, number=11, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="TYPE_FP64", index=12, number=12, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="TYPE_STRING", index=13, number=13, serialized_options=None, type=None
-        ),
-    ],
-    containing_type=None,
-    serialized_options=None,
-    serialized_start=6899,
-    serialized_end=7134,
-)
-_sym_db.RegisterEnumDescriptor(_DATATYPE)
-
-DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE)
-TYPE_INVALID = 0
-TYPE_BOOL = 1
-TYPE_UINT8 = 2
-TYPE_UINT16 = 3
-TYPE_UINT32 = 4
-TYPE_UINT64 = 5
-TYPE_INT8 = 6
-TYPE_INT16 = 7
-TYPE_INT32 = 8
-TYPE_INT64 = 9
-TYPE_FP16 = 10
-TYPE_FP32 = 11
-TYPE_FP64 = 12
-TYPE_STRING = 13
-
-
-_MODELINSTANCEGROUP_KIND = _descriptor.EnumDescriptor(
-    name="Kind",
-    full_name="inference.ModelInstanceGroup.Kind",
-    filename=None,
-    file=DESCRIPTOR,
-    values=[
-        _descriptor.EnumValueDescriptor(
-            name="KIND_AUTO", index=0, number=0, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="KIND_GPU", index=1, number=1, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="KIND_CPU", index=2, number=2, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="KIND_MODEL", index=3, number=3, serialized_options=None, type=None
-        ),
-    ],
-    containing_type=None,
-    serialized_options=None,
-    serialized_start=370,
-    serialized_end=435,
-)
-_sym_db.RegisterEnumDescriptor(_MODELINSTANCEGROUP_KIND)
-
-_MODELINPUT_FORMAT = _descriptor.EnumDescriptor(
-    name="Format",
-    full_name="inference.ModelInput.Format",
-    filename=None,
-    file=DESCRIPTOR,
-    values=[
-        _descriptor.EnumValueDescriptor(
-            name="FORMAT_NONE", index=0, number=0, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="FORMAT_NHWC", index=1, number=1, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="FORMAT_NCHW", index=2, number=2, serialized_options=None, type=None
-        ),
-    ],
-    containing_type=None,
-    serialized_options=None,
-    serialized_start=704,
-    serialized_end=763,
-)
-_sym_db.RegisterEnumDescriptor(_MODELINPUT_FORMAT)
-
-_BATCHINPUT_KIND = _descriptor.EnumDescriptor(
-    name="Kind",
-    full_name="inference.BatchInput.Kind",
-    filename=None,
-    file=DESCRIPTOR,
-    values=[
-        _descriptor.EnumValueDescriptor(
-            name="BATCH_ELEMENT_COUNT", index=0, number=0, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="BATCH_ACCUMULATED_ELEMENT_COUNT",
-            index=1,
-            number=1,
-            serialized_options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO",
-            index=2,
-            number=2,
-            serialized_options=None,
-            type=None,
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="BATCH_MAX_ELEMENT_COUNT_AS_SHAPE",
-            index=3,
-            number=3,
-            serialized_options=None,
-            type=None,
-        ),
-    ],
-    containing_type=None,
-    serialized_options=None,
-    serialized_start=1087,
-    serialized_end=1240,
-)
-_sym_db.RegisterEnumDescriptor(_BATCHINPUT_KIND)
-
-_BATCHOUTPUT_KIND = _descriptor.EnumDescriptor(
-    name="Kind",
-    full_name="inference.BatchOutput.Kind",
-    filename=None,
-    file=DESCRIPTOR,
-    values=[
-        _descriptor.EnumValueDescriptor(
-            name="BATCH_SCATTER_WITH_INPUT_SHAPE",
-            index=0,
-            number=0,
-            serialized_options=None,
-            type=None,
-        ),
-    ],
-    containing_type=None,
-    serialized_options=None,
-    serialized_start=1344,
-    serialized_end=1386,
-)
-_sym_db.RegisterEnumDescriptor(_BATCHOUTPUT_KIND)
-
-_MODELOPTIMIZATIONPOLICY_MODELPRIORITY = _descriptor.EnumDescriptor(
-    name="ModelPriority",
-    full_name="inference.ModelOptimizationPolicy.ModelPriority",
-    filename=None,
-    file=DESCRIPTOR,
-    values=[
-        _descriptor.EnumValueDescriptor(
-            name="PRIORITY_DEFAULT", index=0, number=0, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="PRIORITY_MAX", index=1, number=1, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="PRIORITY_MIN", index=2, number=2, serialized_options=None, type=None
-        ),
-    ],
-    containing_type=None,
-    serialized_options=None,
-    serialized_start=3288,
-    serialized_end=3361,
-)
-_sym_db.RegisterEnumDescriptor(_MODELOPTIMIZATIONPOLICY_MODELPRIORITY)
-
-_MODELQUEUEPOLICY_TIMEOUTACTION = _descriptor.EnumDescriptor(
-    name="TimeoutAction",
-    full_name="inference.ModelQueuePolicy.TimeoutAction",
-    filename=None,
-    file=DESCRIPTOR,
-    values=[
-        _descriptor.EnumValueDescriptor(
-            name="REJECT", index=0, number=0, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="DELAY", index=1, number=1, serialized_options=None, type=None
-        ),
-    ],
-    containing_type=None,
-    serialized_options=None,
-    serialized_start=3545,
-    serialized_end=3583,
-)
-_sym_db.RegisterEnumDescriptor(_MODELQUEUEPOLICY_TIMEOUTACTION)
-
-_MODELSEQUENCEBATCHING_CONTROL_KIND = _descriptor.EnumDescriptor(
-    name="Kind",
-    full_name="inference.ModelSequenceBatching.Control.Kind",
-    filename=None,
-    file=DESCRIPTOR,
-    values=[
-        _descriptor.EnumValueDescriptor(
-            name="CONTROL_SEQUENCE_START", index=0, number=0, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="CONTROL_SEQUENCE_READY", index=1, number=1, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="CONTROL_SEQUENCE_END", index=2, number=2, serialized_options=None, type=None
-        ),
-        _descriptor.EnumValueDescriptor(
-            name="CONTROL_SEQUENCE_CORRID", index=3, number=3, serialized_options=None, type=None
-        ),
-    ],
-    containing_type=None,
-    serialized_options=None,
-    serialized_start=4433,
-    serialized_end=4550,
-)
-_sym_db.RegisterEnumDescriptor(_MODELSEQUENCEBATCHING_CONTROL_KIND)
-
-
-_MODELRATELIMITER_RESOURCE = _descriptor.Descriptor(
-    name="Resource",
-    full_name="inference.ModelRateLimiter.Resource",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="name",
-            full_name="inference.ModelRateLimiter.Resource.name",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="global",
-            full_name="inference.ModelRateLimiter.Resource.global",
-            index=1,
-            number=2,
-            type=8,
-            cpp_type=7,
-            label=1,
-            has_default_value=False,
-            default_value=False,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="count",
-            full_name="inference.ModelRateLimiter.Resource.count",
-            index=2,
-            number=3,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=129,
-    serialized_end=184,
-)
-
-_MODELRATELIMITER = _descriptor.Descriptor(
-    name="ModelRateLimiter",
-    full_name="inference.ModelRateLimiter",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="resources",
-            full_name="inference.ModelRateLimiter.resources",
-            index=0,
-            number=1,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="priority",
-            full_name="inference.ModelRateLimiter.priority",
-            index=1,
-            number=2,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[
-        _MODELRATELIMITER_RESOURCE,
-    ],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=34,
-    serialized_end=184,
-)
-
-
-_MODELINSTANCEGROUP = _descriptor.Descriptor(
-    name="ModelInstanceGroup",
-    full_name="inference.ModelInstanceGroup",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="name",
-            full_name="inference.ModelInstanceGroup.name",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="kind",
-            full_name="inference.ModelInstanceGroup.kind",
-            index=1,
-            number=4,
-            type=14,
-            cpp_type=8,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="count",
-            full_name="inference.ModelInstanceGroup.count",
-            index=2,
-            number=2,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="rate_limiter",
-            full_name="inference.ModelInstanceGroup.rate_limiter",
-            index=3,
-            number=6,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="gpus",
-            full_name="inference.ModelInstanceGroup.gpus",
-            index=4,
-            number=3,
-            type=5,
-            cpp_type=1,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="profile",
-            full_name="inference.ModelInstanceGroup.profile",
-            index=5,
-            number=5,
-            type=9,
-            cpp_type=9,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[
-        _MODELINSTANCEGROUP_KIND,
-    ],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=187,
-    serialized_end=435,
-)
-
-
-_MODELTENSORRESHAPE = _descriptor.Descriptor(
-    name="ModelTensorReshape",
-    full_name="inference.ModelTensorReshape",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="shape",
-            full_name="inference.ModelTensorReshape.shape",
-            index=0,
-            number=1,
-            type=3,
-            cpp_type=2,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=437,
-    serialized_end=472,
-)
-
-
-_MODELINPUT = _descriptor.Descriptor(
-    name="ModelInput",
-    full_name="inference.ModelInput",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="name",
-            full_name="inference.ModelInput.name",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="data_type",
-            full_name="inference.ModelInput.data_type",
-            index=1,
-            number=2,
-            type=14,
-            cpp_type=8,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="format",
-            full_name="inference.ModelInput.format",
-            index=2,
-            number=3,
-            type=14,
-            cpp_type=8,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="dims",
-            full_name="inference.ModelInput.dims",
-            index=3,
-            number=4,
-            type=3,
-            cpp_type=2,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="reshape",
-            full_name="inference.ModelInput.reshape",
-            index=4,
-            number=5,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="is_shape_tensor",
-            full_name="inference.ModelInput.is_shape_tensor",
-            index=5,
-            number=6,
-            type=8,
-            cpp_type=7,
-            label=1,
-            has_default_value=False,
-            default_value=False,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="allow_ragged_batch",
-            full_name="inference.ModelInput.allow_ragged_batch",
-            index=6,
-            number=7,
-            type=8,
-            cpp_type=7,
-            label=1,
-            has_default_value=False,
-            default_value=False,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[
-        _MODELINPUT_FORMAT,
-    ],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=475,
-    serialized_end=763,
-)
-
-
-_MODELOUTPUT = _descriptor.Descriptor(
-    name="ModelOutput",
-    full_name="inference.ModelOutput",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="name",
-            full_name="inference.ModelOutput.name",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="data_type",
-            full_name="inference.ModelOutput.data_type",
-            index=1,
-            number=2,
-            type=14,
-            cpp_type=8,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="dims",
-            full_name="inference.ModelOutput.dims",
-            index=2,
-            number=3,
-            type=3,
-            cpp_type=2,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="reshape",
-            full_name="inference.ModelOutput.reshape",
-            index=3,
-            number=5,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="label_filename",
-            full_name="inference.ModelOutput.label_filename",
-            index=4,
-            number=4,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="is_shape_tensor",
-            full_name="inference.ModelOutput.is_shape_tensor",
-            index=5,
-            number=6,
-            type=8,
-            cpp_type=7,
-            label=1,
-            has_default_value=False,
-            default_value=False,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=766,
-    serialized_end=944,
-)
-
-
-_BATCHINPUT = _descriptor.Descriptor(
-    name="BatchInput",
-    full_name="inference.BatchInput",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="kind",
-            full_name="inference.BatchInput.kind",
-            index=0,
-            number=1,
-            type=14,
-            cpp_type=8,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="target_name",
-            full_name="inference.BatchInput.target_name",
-            index=1,
-            number=2,
-            type=9,
-            cpp_type=9,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="data_type",
-            full_name="inference.BatchInput.data_type",
-            index=2,
-            number=3,
-            type=14,
-            cpp_type=8,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="source_input",
-            full_name="inference.BatchInput.source_input",
-            index=3,
-            number=4,
-            type=9,
-            cpp_type=9,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[
-        _BATCHINPUT_KIND,
-    ],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=947,
-    serialized_end=1240,
-)
-
-
-_BATCHOUTPUT = _descriptor.Descriptor(
-    name="BatchOutput",
-    full_name="inference.BatchOutput",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="target_name",
-            full_name="inference.BatchOutput.target_name",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="kind",
-            full_name="inference.BatchOutput.kind",
-            index=1,
-            number=2,
-            type=14,
-            cpp_type=8,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="source_input",
-            full_name="inference.BatchOutput.source_input",
-            index=2,
-            number=3,
-            type=9,
-            cpp_type=9,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[
-        _BATCHOUTPUT_KIND,
-    ],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=1243,
-    serialized_end=1386,
-)
-
-
-_MODELVERSIONPOLICY_LATEST = _descriptor.Descriptor(
-    name="Latest",
-    full_name="inference.ModelVersionPolicy.Latest",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="num_versions",
-            full_name="inference.ModelVersionPolicy.Latest.num_versions",
-            index=0,
-            number=1,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=1577,
-    serialized_end=1607,
-)
-
-_MODELVERSIONPOLICY_ALL = _descriptor.Descriptor(
-    name="All",
-    full_name="inference.ModelVersionPolicy.All",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=1609,
-    serialized_end=1614,
-)
-
-_MODELVERSIONPOLICY_SPECIFIC = _descriptor.Descriptor(
-    name="Specific",
-    full_name="inference.ModelVersionPolicy.Specific",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="versions",
-            full_name="inference.ModelVersionPolicy.Specific.versions",
-            index=0,
-            number=1,
-            type=3,
-            cpp_type=2,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=1616,
-    serialized_end=1644,
-)
-
-_MODELVERSIONPOLICY = _descriptor.Descriptor(
-    name="ModelVersionPolicy",
-    full_name="inference.ModelVersionPolicy",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="latest",
-            full_name="inference.ModelVersionPolicy.latest",
-            index=0,
-            number=1,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="all",
-            full_name="inference.ModelVersionPolicy.all",
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="specific",
-            full_name="inference.ModelVersionPolicy.specific",
-            index=2,
-            number=3,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[
-        _MODELVERSIONPOLICY_LATEST,
-        _MODELVERSIONPOLICY_ALL,
-        _MODELVERSIONPOLICY_SPECIFIC,
-    ],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[
-        _descriptor.OneofDescriptor(
-            name="policy_choice",
-            full_name="inference.ModelVersionPolicy.policy_choice",
-            index=0,
-            containing_type=None,
-            fields=[],
-        ),
-    ],
-    serialized_start=1389,
-    serialized_end=1661,
-)
-
-
-_MODELOPTIMIZATIONPOLICY_GRAPH = _descriptor.Descriptor(
-    name="Graph",
-    full_name="inference.ModelOptimizationPolicy.Graph",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="level",
-            full_name="inference.ModelOptimizationPolicy.Graph.level",
-            index=0,
-            number=1,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=2130,
-    serialized_end=2152,
-)
-
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE = _descriptor.Descriptor(
-    name="Shape",
-    full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="dim",
-            full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape.dim",
-            index=0,
-            number=1,
-            type=3,
-            cpp_type=2,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=2476,
-    serialized_end=2496,
-)
-
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY = _descriptor.Descriptor(
-    name="InputEntry",
-    full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="key",
-            full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry.key",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="value",
-            full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry.value",
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=_b("8\001"),
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=2621,
-    serialized_end=2722,
-)
-
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND = _descriptor.Descriptor(
-    name="LowerBound",
-    full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="batch_size",
-            full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.batch_size",
-            index=0,
-            number=1,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="input",
-            full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.input",
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[
-        _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY,
-    ],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=2499,
-    serialized_end=2722,
-)
-
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY = _descriptor.Descriptor(
-    name="InputEntry",
-    full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="key",
-            full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry.key",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="value",
-            full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry.value",
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=_b("8\001"),
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=2621,
-    serialized_end=2722,
-)
-
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC = _descriptor.Descriptor(
-    name="GraphSpec",
-    full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="batch_size",
-            full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.batch_size",
-            index=0,
-            number=1,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="input",
-            full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.input",
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="graph_lower_bound",
-            full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.graph_lower_bound",
-            index=2,
-            number=3,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[
-        _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE,
-        _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND,
-        _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY,
-    ],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=2277,
-    serialized_end=2825,
-)
-
-_MODELOPTIMIZATIONPOLICY_CUDA = _descriptor.Descriptor(
-    name="Cuda",
-    full_name="inference.ModelOptimizationPolicy.Cuda",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="graphs",
-            full_name="inference.ModelOptimizationPolicy.Cuda.graphs",
-            index=0,
-            number=1,
-            type=8,
-            cpp_type=7,
-            label=1,
-            has_default_value=False,
-            default_value=False,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="busy_wait_events",
-            full_name="inference.ModelOptimizationPolicy.Cuda.busy_wait_events",
-            index=1,
-            number=2,
-            type=8,
-            cpp_type=7,
-            label=1,
-            has_default_value=False,
-            default_value=False,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="graph_spec",
-            full_name="inference.ModelOptimizationPolicy.Cuda.graph_spec",
-            index=2,
-            number=3,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[
-        _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC,
-    ],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=2155,
-    serialized_end=2825,
-)
-
-_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY = _descriptor.Descriptor(
-    name="ParametersEntry",
-    full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="key",
-            full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry.key",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="value",
-            full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry.value",
-            index=1,
-            number=2,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=_b("8\001"),
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=3199,
-    serialized_end=3248,
-)
-
-_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR = _descriptor.Descriptor(
-    name="Accelerator",
-    full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="name",
-            full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.name",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="parameters",
-            full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.parameters",
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[
-        _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY,
-    ],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=3064,
-    serialized_end=3248,
-)
-
-_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS = _descriptor.Descriptor(
-    name="ExecutionAccelerators",
-    full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="gpu_execution_accelerator",
-            full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.gpu_execution_accelerator",
-            index=0,
-            number=1,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="cpu_execution_accelerator",
-            full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.cpu_execution_accelerator",
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[
-        _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR,
-    ],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=2828,
-    serialized_end=3248,
-)
-
-_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER = _descriptor.Descriptor(
-    name="PinnedMemoryBuffer",
-    full_name="inference.ModelOptimizationPolicy.PinnedMemoryBuffer",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="enable",
-            full_name="inference.ModelOptimizationPolicy.PinnedMemoryBuffer.enable",
-            index=0,
-            number=1,
-            type=8,
-            cpp_type=7,
-            label=1,
-            has_default_value=False,
-            default_value=False,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=3250,
-    serialized_end=3286,
-)
-
-_MODELOPTIMIZATIONPOLICY = _descriptor.Descriptor(
-    name="ModelOptimizationPolicy",
-    full_name="inference.ModelOptimizationPolicy",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="graph",
-            full_name="inference.ModelOptimizationPolicy.graph",
-            index=0,
-            number=1,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="priority",
-            full_name="inference.ModelOptimizationPolicy.priority",
-            index=1,
-            number=2,
-            type=14,
-            cpp_type=8,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="cuda",
-            full_name="inference.ModelOptimizationPolicy.cuda",
-            index=2,
-            number=3,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="execution_accelerators",
-            full_name="inference.ModelOptimizationPolicy.execution_accelerators",
-            index=3,
-            number=4,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="input_pinned_memory",
-            full_name="inference.ModelOptimizationPolicy.input_pinned_memory",
-            index=4,
-            number=5,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="output_pinned_memory",
-            full_name="inference.ModelOptimizationPolicy.output_pinned_memory",
-            index=5,
-            number=6,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[
-        _MODELOPTIMIZATIONPOLICY_GRAPH,
-        _MODELOPTIMIZATIONPOLICY_CUDA,
-        _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS,
-        _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER,
-    ],
-    enum_types=[
-        _MODELOPTIMIZATIONPOLICY_MODELPRIORITY,
-    ],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=1664,
-    serialized_end=3361,
-)
-
-
-_MODELQUEUEPOLICY = _descriptor.Descriptor(
-    name="ModelQueuePolicy",
-    full_name="inference.ModelQueuePolicy",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="timeout_action",
-            full_name="inference.ModelQueuePolicy.timeout_action",
-            index=0,
-            number=1,
-            type=14,
-            cpp_type=8,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="default_timeout_microseconds",
-            full_name="inference.ModelQueuePolicy.default_timeout_microseconds",
-            index=1,
-            number=2,
-            type=4,
-            cpp_type=4,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="allow_timeout_override",
-            full_name="inference.ModelQueuePolicy.allow_timeout_override",
-            index=2,
-            number=3,
-            type=8,
-            cpp_type=7,
-            label=1,
-            has_default_value=False,
-            default_value=False,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="max_queue_size",
-            full_name="inference.ModelQueuePolicy.max_queue_size",
-            index=3,
-            number=4,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[
-        _MODELQUEUEPOLICY_TIMEOUTACTION,
-    ],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=3364,
-    serialized_end=3583,
-)
-
-
-_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY = _descriptor.Descriptor(
-    name="PriorityQueuePolicyEntry",
-    full_name="inference.ModelDynamicBatching.PriorityQueuePolicyEntry",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="key",
-            full_name="inference.ModelDynamicBatching.PriorityQueuePolicyEntry.key",
-            index=0,
-            number=1,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="value",
-            full_name="inference.ModelDynamicBatching.PriorityQueuePolicyEntry.value",
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=_b("8\001"),
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=3910,
-    serialized_end=3997,
-)
-
-_MODELDYNAMICBATCHING = _descriptor.Descriptor(
-    name="ModelDynamicBatching",
-    full_name="inference.ModelDynamicBatching",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="preferred_batch_size",
-            full_name="inference.ModelDynamicBatching.preferred_batch_size",
-            index=0,
-            number=1,
-            type=5,
-            cpp_type=1,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="max_queue_delay_microseconds",
-            full_name="inference.ModelDynamicBatching.max_queue_delay_microseconds",
-            index=1,
-            number=2,
-            type=4,
-            cpp_type=4,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="preserve_ordering",
-            full_name="inference.ModelDynamicBatching.preserve_ordering",
-            index=2,
-            number=3,
-            type=8,
-            cpp_type=7,
-            label=1,
-            has_default_value=False,
-            default_value=False,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="priority_levels",
-            full_name="inference.ModelDynamicBatching.priority_levels",
-            index=3,
-            number=4,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="default_priority_level",
-            full_name="inference.ModelDynamicBatching.default_priority_level",
-            index=4,
-            number=5,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="default_queue_policy",
-            full_name="inference.ModelDynamicBatching.default_queue_policy",
-            index=5,
-            number=6,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="priority_queue_policy",
-            full_name="inference.ModelDynamicBatching.priority_queue_policy",
-            index=6,
-            number=7,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[
-        _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY,
-    ],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=3586,
-    serialized_end=3997,
-)
-
-
-_MODELSEQUENCEBATCHING_CONTROL = _descriptor.Descriptor(
-    name="Control",
-    full_name="inference.ModelSequenceBatching.Control",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="kind",
-            full_name="inference.ModelSequenceBatching.Control.kind",
-            index=0,
-            number=1,
-            type=14,
-            cpp_type=8,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="int32_false_true",
-            full_name="inference.ModelSequenceBatching.Control.int32_false_true",
-            index=1,
-            number=2,
-            type=5,
-            cpp_type=1,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="fp32_false_true",
-            full_name="inference.ModelSequenceBatching.Control.fp32_false_true",
-            index=2,
-            number=3,
-            type=2,
-            cpp_type=6,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="data_type",
-            full_name="inference.ModelSequenceBatching.Control.data_type",
-            index=3,
-            number=4,
-            type=14,
-            cpp_type=8,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[
-        _MODELSEQUENCEBATCHING_CONTROL_KIND,
-    ],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=4270,
-    serialized_end=4550,
-)
-
-_MODELSEQUENCEBATCHING_CONTROLINPUT = _descriptor.Descriptor(
-    name="ControlInput",
-    full_name="inference.ModelSequenceBatching.ControlInput",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="name",
-            full_name="inference.ModelSequenceBatching.ControlInput.name",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="control",
-            full_name="inference.ModelSequenceBatching.ControlInput.control",
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=4552,
-    serialized_end=4639,
-)
-
-_MODELSEQUENCEBATCHING_STRATEGYDIRECT = _descriptor.Descriptor(
-    name="StrategyDirect",
-    full_name="inference.ModelSequenceBatching.StrategyDirect",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="max_queue_delay_microseconds",
-            full_name="inference.ModelSequenceBatching.StrategyDirect.max_queue_delay_microseconds",
-            index=0,
-            number=1,
-            type=4,
-            cpp_type=4,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="minimum_slot_utilization",
-            full_name="inference.ModelSequenceBatching.StrategyDirect.minimum_slot_utilization",
-            index=1,
-            number=2,
-            type=2,
-            cpp_type=6,
-            label=1,
-            has_default_value=False,
-            default_value=float(0),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=4641,
-    serialized_end=4729,
-)
-
-_MODELSEQUENCEBATCHING_STRATEGYOLDEST = _descriptor.Descriptor(
-    name="StrategyOldest",
-    full_name="inference.ModelSequenceBatching.StrategyOldest",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="max_candidate_sequences",
-            full_name="inference.ModelSequenceBatching.StrategyOldest.max_candidate_sequences",
-            index=0,
-            number=1,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="preferred_batch_size",
-            full_name="inference.ModelSequenceBatching.StrategyOldest.preferred_batch_size",
-            index=1,
-            number=2,
-            type=5,
-            cpp_type=1,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="max_queue_delay_microseconds",
-            full_name="inference.ModelSequenceBatching.StrategyOldest.max_queue_delay_microseconds",
-            index=2,
-            number=3,
-            type=4,
-            cpp_type=4,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=4731,
-    serialized_end=4848,
-)
-
-_MODELSEQUENCEBATCHING = _descriptor.Descriptor(
-    name="ModelSequenceBatching",
-    full_name="inference.ModelSequenceBatching",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="direct",
-            full_name="inference.ModelSequenceBatching.direct",
-            index=0,
-            number=3,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="oldest",
-            full_name="inference.ModelSequenceBatching.oldest",
-            index=1,
-            number=4,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="max_sequence_idle_microseconds",
-            full_name="inference.ModelSequenceBatching.max_sequence_idle_microseconds",
-            index=2,
-            number=1,
-            type=4,
-            cpp_type=4,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="control_input",
-            full_name="inference.ModelSequenceBatching.control_input",
-            index=3,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[
-        _MODELSEQUENCEBATCHING_CONTROL,
-        _MODELSEQUENCEBATCHING_CONTROLINPUT,
-        _MODELSEQUENCEBATCHING_STRATEGYDIRECT,
-        _MODELSEQUENCEBATCHING_STRATEGYOLDEST,
-    ],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[
-        _descriptor.OneofDescriptor(
-            name="strategy_choice",
-            full_name="inference.ModelSequenceBatching.strategy_choice",
-            index=0,
-            containing_type=None,
-            fields=[],
-        ),
-    ],
-    serialized_start=4000,
-    serialized_end=4867,
-)
-
-
-_MODELENSEMBLING_STEP_INPUTMAPENTRY = _descriptor.Descriptor(
-    name="InputMapEntry",
-    full_name="inference.ModelEnsembling.Step.InputMapEntry",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="key",
-            full_name="inference.ModelEnsembling.Step.InputMapEntry.key",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="value",
-            full_name="inference.ModelEnsembling.Step.InputMapEntry.value",
-            index=1,
-            number=2,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=_b("8\001"),
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=5122,
-    serialized_end=5169,
-)
-
-_MODELENSEMBLING_STEP_OUTPUTMAPENTRY = _descriptor.Descriptor(
-    name="OutputMapEntry",
-    full_name="inference.ModelEnsembling.Step.OutputMapEntry",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="key",
-            full_name="inference.ModelEnsembling.Step.OutputMapEntry.key",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="value",
-            full_name="inference.ModelEnsembling.Step.OutputMapEntry.value",
-            index=1,
-            number=2,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=_b("8\001"),
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=5171,
-    serialized_end=5219,
-)
-
-_MODELENSEMBLING_STEP = _descriptor.Descriptor(
-    name="Step",
-    full_name="inference.ModelEnsembling.Step",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="model_name",
-            full_name="inference.ModelEnsembling.Step.model_name",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="model_version",
-            full_name="inference.ModelEnsembling.Step.model_version",
-            index=1,
-            number=2,
-            type=3,
-            cpp_type=2,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="input_map",
-            full_name="inference.ModelEnsembling.Step.input_map",
-            index=2,
-            number=3,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="output_map",
-            full_name="inference.ModelEnsembling.Step.output_map",
-            index=3,
-            number=4,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[
-        _MODELENSEMBLING_STEP_INPUTMAPENTRY,
-        _MODELENSEMBLING_STEP_OUTPUTMAPENTRY,
-    ],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=4937,
-    serialized_end=5219,
-)
-
-_MODELENSEMBLING = _descriptor.Descriptor(
-    name="ModelEnsembling",
-    full_name="inference.ModelEnsembling",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="step",
-            full_name="inference.ModelEnsembling.step",
-            index=0,
-            number=1,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[
-        _MODELENSEMBLING_STEP,
-    ],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=4870,
-    serialized_end=5219,
-)
-
-
-_MODELPARAMETER = _descriptor.Descriptor(
-    name="ModelParameter",
-    full_name="inference.ModelParameter",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="string_value",
-            full_name="inference.ModelParameter.string_value",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=5221,
-    serialized_end=5259,
-)
-
-
-_MODELWARMUP_INPUT = _descriptor.Descriptor(
-    name="Input",
-    full_name="inference.ModelWarmup.Input",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="data_type",
-            full_name="inference.ModelWarmup.Input.data_type",
-            index=0,
-            number=1,
-            type=14,
-            cpp_type=8,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="dims",
-            full_name="inference.ModelWarmup.Input.dims",
-            index=1,
-            number=2,
-            type=3,
-            cpp_type=2,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="zero_data",
-            full_name="inference.ModelWarmup.Input.zero_data",
-            index=2,
-            number=3,
-            type=8,
-            cpp_type=7,
-            label=1,
-            has_default_value=False,
-            default_value=False,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="random_data",
-            full_name="inference.ModelWarmup.Input.random_data",
-            index=3,
-            number=4,
-            type=8,
-            cpp_type=7,
-            label=1,
-            has_default_value=False,
-            default_value=False,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="input_data_file",
-            full_name="inference.ModelWarmup.Input.input_data_file",
-            index=4,
-            number=5,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[
-        _descriptor.OneofDescriptor(
-            name="input_data_type",
-            full_name="inference.ModelWarmup.Input.input_data_type",
-            index=0,
-            containing_type=None,
-            fields=[],
-        ),
-    ],
-    serialized_start=5364,
-    serialized_end=5515,
-)
-
-_MODELWARMUP_INPUTSENTRY = _descriptor.Descriptor(
-    name="InputsEntry",
-    full_name="inference.ModelWarmup.InputsEntry",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="key",
-            full_name="inference.ModelWarmup.InputsEntry.key",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="value",
-            full_name="inference.ModelWarmup.InputsEntry.value",
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=_b("8\001"),
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=5517,
-    serialized_end=5592,
-)
-
-_MODELWARMUP = _descriptor.Descriptor(
-    name="ModelWarmup",
-    full_name="inference.ModelWarmup",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="name",
-            full_name="inference.ModelWarmup.name",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="batch_size",
-            full_name="inference.ModelWarmup.batch_size",
-            index=1,
-            number=2,
-            type=13,
-            cpp_type=3,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="inputs",
-            full_name="inference.ModelWarmup.inputs",
-            index=2,
-            number=3,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[
-        _MODELWARMUP_INPUT,
-        _MODELWARMUP_INPUTSENTRY,
-    ],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=5262,
-    serialized_end=5592,
-)
-
-
-_MODELOPERATIONS = _descriptor.Descriptor(
-    name="ModelOperations",
-    full_name="inference.ModelOperations",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="op_library_filename",
-            full_name="inference.ModelOperations.op_library_filename",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=5594,
-    serialized_end=5640,
-)
-
-
-_MODELTRANSACTIONPOLICY = _descriptor.Descriptor(
-    name="ModelTransactionPolicy",
-    full_name="inference.ModelTransactionPolicy",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="decoupled",
-            full_name="inference.ModelTransactionPolicy.decoupled",
-            index=0,
-            number=1,
-            type=8,
-            cpp_type=7,
-            label=1,
-            has_default_value=False,
-            default_value=False,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=5642,
-    serialized_end=5685,
-)
-
-
-_MODELCONFIG_CCMODELFILENAMESENTRY = _descriptor.Descriptor(
-    name="CcModelFilenamesEntry",
-    full_name="inference.ModelConfig.CcModelFilenamesEntry",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="key",
-            full_name="inference.ModelConfig.CcModelFilenamesEntry.key",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="value",
-            full_name="inference.ModelConfig.CcModelFilenamesEntry.value",
-            index=1,
-            number=2,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=_b("8\001"),
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=6691,
-    serialized_end=6746,
-)
-
-_MODELCONFIG_METRICTAGSENTRY = _descriptor.Descriptor(
-    name="MetricTagsEntry",
-    full_name="inference.ModelConfig.MetricTagsEntry",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="key",
-            full_name="inference.ModelConfig.MetricTagsEntry.key",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="value",
-            full_name="inference.ModelConfig.MetricTagsEntry.value",
-            index=1,
-            number=2,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=_b("8\001"),
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=6748,
-    serialized_end=6797,
-)
-
-_MODELCONFIG_PARAMETERSENTRY = _descriptor.Descriptor(
-    name="ParametersEntry",
-    full_name="inference.ModelConfig.ParametersEntry",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="key",
-            full_name="inference.ModelConfig.ParametersEntry.key",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="value",
-            full_name="inference.ModelConfig.ParametersEntry.value",
-            index=1,
-            number=2,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[],
-    enum_types=[],
-    serialized_options=_b("8\001"),
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[],
-    serialized_start=6799,
-    serialized_end=6875,
-)
-
-_MODELCONFIG = _descriptor.Descriptor(
-    name="ModelConfig",
-    full_name="inference.ModelConfig",
-    filename=None,
-    file=DESCRIPTOR,
-    containing_type=None,
-    fields=[
-        _descriptor.FieldDescriptor(
-            name="name",
-            full_name="inference.ModelConfig.name",
-            index=0,
-            number=1,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="platform",
-            full_name="inference.ModelConfig.platform",
-            index=1,
-            number=2,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="backend",
-            full_name="inference.ModelConfig.backend",
-            index=2,
-            number=17,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="version_policy",
-            full_name="inference.ModelConfig.version_policy",
-            index=3,
-            number=3,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="max_batch_size",
-            full_name="inference.ModelConfig.max_batch_size",
-            index=4,
-            number=4,
-            type=5,
-            cpp_type=1,
-            label=1,
-            has_default_value=False,
-            default_value=0,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="input",
-            full_name="inference.ModelConfig.input",
-            index=5,
-            number=5,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="output",
-            full_name="inference.ModelConfig.output",
-            index=6,
-            number=6,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="batch_input",
-            full_name="inference.ModelConfig.batch_input",
-            index=7,
-            number=20,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="batch_output",
-            full_name="inference.ModelConfig.batch_output",
-            index=8,
-            number=21,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="optimization",
-            full_name="inference.ModelConfig.optimization",
-            index=9,
-            number=12,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="dynamic_batching",
-            full_name="inference.ModelConfig.dynamic_batching",
-            index=10,
-            number=11,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="sequence_batching",
-            full_name="inference.ModelConfig.sequence_batching",
-            index=11,
-            number=13,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="ensemble_scheduling",
-            full_name="inference.ModelConfig.ensemble_scheduling",
-            index=12,
-            number=15,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="instance_group",
-            full_name="inference.ModelConfig.instance_group",
-            index=13,
-            number=7,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="default_model_filename",
-            full_name="inference.ModelConfig.default_model_filename",
-            index=14,
-            number=8,
-            type=9,
-            cpp_type=9,
-            label=1,
-            has_default_value=False,
-            default_value=_b("").decode("utf-8"),
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="cc_model_filenames",
-            full_name="inference.ModelConfig.cc_model_filenames",
-            index=15,
-            number=9,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="metric_tags",
-            full_name="inference.ModelConfig.metric_tags",
-            index=16,
-            number=10,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="parameters",
-            full_name="inference.ModelConfig.parameters",
-            index=17,
-            number=14,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="model_warmup",
-            full_name="inference.ModelConfig.model_warmup",
-            index=18,
-            number=16,
-            type=11,
-            cpp_type=10,
-            label=3,
-            has_default_value=False,
-            default_value=[],
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="model_operations",
-            full_name="inference.ModelConfig.model_operations",
-            index=19,
-            number=18,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-        _descriptor.FieldDescriptor(
-            name="model_transaction_policy",
-            full_name="inference.ModelConfig.model_transaction_policy",
-            index=20,
-            number=19,
-            type=11,
-            cpp_type=10,
-            label=1,
-            has_default_value=False,
-            default_value=None,
-            message_type=None,
-            enum_type=None,
-            containing_type=None,
-            is_extension=False,
-            extension_scope=None,
-            serialized_options=None,
-            file=DESCRIPTOR,
-        ),
-    ],
-    extensions=[],
-    nested_types=[
-        _MODELCONFIG_CCMODELFILENAMESENTRY,
-        _MODELCONFIG_METRICTAGSENTRY,
-        _MODELCONFIG_PARAMETERSENTRY,
-    ],
-    enum_types=[],
-    serialized_options=None,
-    is_extendable=False,
-    syntax="proto3",
-    extension_ranges=[],
-    oneofs=[
-        _descriptor.OneofDescriptor(
-            name="scheduling_choice",
-            full_name="inference.ModelConfig.scheduling_choice",
-            index=0,
-            containing_type=None,
-            fields=[],
-        ),
-    ],
-    serialized_start=5688,
-    serialized_end=6896,
-)
-
-_MODELRATELIMITER_RESOURCE.containing_type = _MODELRATELIMITER
-_MODELRATELIMITER.fields_by_name["resources"].message_type = _MODELRATELIMITER_RESOURCE
-_MODELINSTANCEGROUP.fields_by_name["kind"].enum_type = _MODELINSTANCEGROUP_KIND
-_MODELINSTANCEGROUP.fields_by_name["rate_limiter"].message_type = _MODELRATELIMITER
-_MODELINSTANCEGROUP_KIND.containing_type = _MODELINSTANCEGROUP
-_MODELINPUT.fields_by_name["data_type"].enum_type = _DATATYPE
-_MODELINPUT.fields_by_name["format"].enum_type = _MODELINPUT_FORMAT
-_MODELINPUT.fields_by_name["reshape"].message_type = _MODELTENSORRESHAPE
-_MODELINPUT_FORMAT.containing_type = _MODELINPUT
-_MODELOUTPUT.fields_by_name["data_type"].enum_type = _DATATYPE
-_MODELOUTPUT.fields_by_name["reshape"].message_type = _MODELTENSORRESHAPE
-_BATCHINPUT.fields_by_name["kind"].enum_type = _BATCHINPUT_KIND
-_BATCHINPUT.fields_by_name["data_type"].enum_type = _DATATYPE
-_BATCHINPUT_KIND.containing_type = _BATCHINPUT
-_BATCHOUTPUT.fields_by_name["kind"].enum_type = _BATCHOUTPUT_KIND
-_BATCHOUTPUT_KIND.containing_type = _BATCHOUTPUT
-_MODELVERSIONPOLICY_LATEST.containing_type = _MODELVERSIONPOLICY
-_MODELVERSIONPOLICY_ALL.containing_type = _MODELVERSIONPOLICY
-_MODELVERSIONPOLICY_SPECIFIC.containing_type = _MODELVERSIONPOLICY
-_MODELVERSIONPOLICY.fields_by_name["latest"].message_type = _MODELVERSIONPOLICY_LATEST
-_MODELVERSIONPOLICY.fields_by_name["all"].message_type = _MODELVERSIONPOLICY_ALL
-_MODELVERSIONPOLICY.fields_by_name["specific"].message_type = _MODELVERSIONPOLICY_SPECIFIC
-_MODELVERSIONPOLICY.oneofs_by_name["policy_choice"].fields.append(
-    _MODELVERSIONPOLICY.fields_by_name["latest"]
-)
-_MODELVERSIONPOLICY.fields_by_name["latest"].containing_oneof = _MODELVERSIONPOLICY.oneofs_by_name[
-    "policy_choice"
-]
-_MODELVERSIONPOLICY.oneofs_by_name["policy_choice"].fields.append(
-    _MODELVERSIONPOLICY.fields_by_name["all"]
-)
-_MODELVERSIONPOLICY.fields_by_name["all"].containing_oneof = _MODELVERSIONPOLICY.oneofs_by_name[
-    "policy_choice"
-]
-_MODELVERSIONPOLICY.oneofs_by_name["policy_choice"].fields.append(
-    _MODELVERSIONPOLICY.fields_by_name["specific"]
-)
-_MODELVERSIONPOLICY.fields_by_name[
-    "specific"
-].containing_oneof = _MODELVERSIONPOLICY.oneofs_by_name["policy_choice"]
-_MODELOPTIMIZATIONPOLICY_GRAPH.containing_type = _MODELOPTIMIZATIONPOLICY
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE.containing_type = (
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC
-)
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY.fields_by_name[
-    "value"
-].message_type = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY.containing_type = (
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND
-)
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND.fields_by_name[
-    "input"
-].message_type = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND.containing_type = (
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC
-)
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY.fields_by_name[
-    "value"
-].message_type = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY.containing_type = (
-    _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC
-)
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.fields_by_name[
-    "input"
-].message_type = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.fields_by_name[
-    "graph_lower_bound"
-].message_type = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.containing_type = _MODELOPTIMIZATIONPOLICY_CUDA
-_MODELOPTIMIZATIONPOLICY_CUDA.fields_by_name[
-    "graph_spec"
-].message_type = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC
-_MODELOPTIMIZATIONPOLICY_CUDA.containing_type = _MODELOPTIMIZATIONPOLICY
-_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY.containing_type = (
-    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR
-)
-_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR.fields_by_name[
-    "parameters"
-].message_type = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY
-_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR.containing_type = (
-    _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS
-)
-_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.fields_by_name[
-    "gpu_execution_accelerator"
-].message_type = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR
-_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.fields_by_name[
-    "cpu_execution_accelerator"
-].message_type = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR
-_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.containing_type = _MODELOPTIMIZATIONPOLICY
-_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER.containing_type = _MODELOPTIMIZATIONPOLICY
-_MODELOPTIMIZATIONPOLICY.fields_by_name["graph"].message_type = _MODELOPTIMIZATIONPOLICY_GRAPH
-_MODELOPTIMIZATIONPOLICY.fields_by_name[
-    "priority"
-].enum_type = _MODELOPTIMIZATIONPOLICY_MODELPRIORITY
-_MODELOPTIMIZATIONPOLICY.fields_by_name["cuda"].message_type = _MODELOPTIMIZATIONPOLICY_CUDA
-_MODELOPTIMIZATIONPOLICY.fields_by_name[
-    "execution_accelerators"
-].message_type = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS
-_MODELOPTIMIZATIONPOLICY.fields_by_name[
-    "input_pinned_memory"
-].message_type = _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER
-_MODELOPTIMIZATIONPOLICY.fields_by_name[
-    "output_pinned_memory"
-].message_type = _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER
-_MODELOPTIMIZATIONPOLICY_MODELPRIORITY.containing_type = _MODELOPTIMIZATIONPOLICY
-_MODELQUEUEPOLICY.fields_by_name["timeout_action"].enum_type = _MODELQUEUEPOLICY_TIMEOUTACTION
-_MODELQUEUEPOLICY_TIMEOUTACTION.containing_type = _MODELQUEUEPOLICY
-_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY.fields_by_name[
-    "value"
-].message_type = _MODELQUEUEPOLICY
-_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY.containing_type = _MODELDYNAMICBATCHING
-_MODELDYNAMICBATCHING.fields_by_name["default_queue_policy"].message_type = _MODELQUEUEPOLICY
-_MODELDYNAMICBATCHING.fields_by_name[
-    "priority_queue_policy"
-].message_type = _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY
-_MODELSEQUENCEBATCHING_CONTROL.fields_by_name[
-    "kind"
-].enum_type = _MODELSEQUENCEBATCHING_CONTROL_KIND
-_MODELSEQUENCEBATCHING_CONTROL.fields_by_name["data_type"].enum_type = _DATATYPE
-_MODELSEQUENCEBATCHING_CONTROL.containing_type = _MODELSEQUENCEBATCHING
-_MODELSEQUENCEBATCHING_CONTROL_KIND.containing_type = _MODELSEQUENCEBATCHING_CONTROL
-_MODELSEQUENCEBATCHING_CONTROLINPUT.fields_by_name[
-    "control"
-].message_type = _MODELSEQUENCEBATCHING_CONTROL
-_MODELSEQUENCEBATCHING_CONTROLINPUT.containing_type = _MODELSEQUENCEBATCHING
-_MODELSEQUENCEBATCHING_STRATEGYDIRECT.containing_type = _MODELSEQUENCEBATCHING
-_MODELSEQUENCEBATCHING_STRATEGYOLDEST.containing_type = _MODELSEQUENCEBATCHING
-_MODELSEQUENCEBATCHING.fields_by_name["direct"].message_type = _MODELSEQUENCEBATCHING_STRATEGYDIRECT
-_MODELSEQUENCEBATCHING.fields_by_name["oldest"].message_type = _MODELSEQUENCEBATCHING_STRATEGYOLDEST
-_MODELSEQUENCEBATCHING.fields_by_name[
-    "control_input"
-].message_type = _MODELSEQUENCEBATCHING_CONTROLINPUT
-_MODELSEQUENCEBATCHING.oneofs_by_name["strategy_choice"].fields.append(
-    _MODELSEQUENCEBATCHING.fields_by_name["direct"]
-)
-_MODELSEQUENCEBATCHING.fields_by_name[
-    "direct"
-].containing_oneof = _MODELSEQUENCEBATCHING.oneofs_by_name["strategy_choice"]
-_MODELSEQUENCEBATCHING.oneofs_by_name["strategy_choice"].fields.append(
-    _MODELSEQUENCEBATCHING.fields_by_name["oldest"]
-)
-_MODELSEQUENCEBATCHING.fields_by_name[
-    "oldest"
-].containing_oneof = _MODELSEQUENCEBATCHING.oneofs_by_name["strategy_choice"]
-_MODELENSEMBLING_STEP_INPUTMAPENTRY.containing_type = _MODELENSEMBLING_STEP
-_MODELENSEMBLING_STEP_OUTPUTMAPENTRY.containing_type = _MODELENSEMBLING_STEP
-_MODELENSEMBLING_STEP.fields_by_name["input_map"].message_type = _MODELENSEMBLING_STEP_INPUTMAPENTRY
-_MODELENSEMBLING_STEP.fields_by_name[
-    "output_map"
-].message_type = _MODELENSEMBLING_STEP_OUTPUTMAPENTRY
-_MODELENSEMBLING_STEP.containing_type = _MODELENSEMBLING
-_MODELENSEMBLING.fields_by_name["step"].message_type = _MODELENSEMBLING_STEP
-_MODELWARMUP_INPUT.fields_by_name["data_type"].enum_type = _DATATYPE
-_MODELWARMUP_INPUT.containing_type = _MODELWARMUP
-_MODELWARMUP_INPUT.oneofs_by_name["input_data_type"].fields.append(
-    _MODELWARMUP_INPUT.fields_by_name["zero_data"]
-)
-_MODELWARMUP_INPUT.fields_by_name["zero_data"].containing_oneof = _MODELWARMUP_INPUT.oneofs_by_name[
-    "input_data_type"
-]
-_MODELWARMUP_INPUT.oneofs_by_name["input_data_type"].fields.append(
-    _MODELWARMUP_INPUT.fields_by_name["random_data"]
-)
-_MODELWARMUP_INPUT.fields_by_name[
-    "random_data"
-].containing_oneof = _MODELWARMUP_INPUT.oneofs_by_name["input_data_type"]
-_MODELWARMUP_INPUT.oneofs_by_name["input_data_type"].fields.append(
-    _MODELWARMUP_INPUT.fields_by_name["input_data_file"]
-)
-_MODELWARMUP_INPUT.fields_by_name[
-    "input_data_file"
-].containing_oneof = _MODELWARMUP_INPUT.oneofs_by_name["input_data_type"]
-_MODELWARMUP_INPUTSENTRY.fields_by_name["value"].message_type = _MODELWARMUP_INPUT
-_MODELWARMUP_INPUTSENTRY.containing_type = _MODELWARMUP
-_MODELWARMUP.fields_by_name["inputs"].message_type = _MODELWARMUP_INPUTSENTRY
-_MODELCONFIG_CCMODELFILENAMESENTRY.containing_type = _MODELCONFIG
-_MODELCONFIG_METRICTAGSENTRY.containing_type = _MODELCONFIG
-_MODELCONFIG_PARAMETERSENTRY.fields_by_name["value"].message_type = _MODELPARAMETER
-_MODELCONFIG_PARAMETERSENTRY.containing_type = _MODELCONFIG
-_MODELCONFIG.fields_by_name["version_policy"].message_type = _MODELVERSIONPOLICY
-_MODELCONFIG.fields_by_name["input"].message_type = _MODELINPUT
-_MODELCONFIG.fields_by_name["output"].message_type = _MODELOUTPUT
-_MODELCONFIG.fields_by_name["batch_input"].message_type = _BATCHINPUT
-_MODELCONFIG.fields_by_name["batch_output"].message_type = _BATCHOUTPUT
-_MODELCONFIG.fields_by_name["optimization"].message_type = _MODELOPTIMIZATIONPOLICY
-_MODELCONFIG.fields_by_name["dynamic_batching"].message_type = _MODELDYNAMICBATCHING
-_MODELCONFIG.fields_by_name["sequence_batching"].message_type = _MODELSEQUENCEBATCHING
-_MODELCONFIG.fields_by_name["ensemble_scheduling"].message_type = _MODELENSEMBLING
-_MODELCONFIG.fields_by_name["instance_group"].message_type = _MODELINSTANCEGROUP
-_MODELCONFIG.fields_by_name["cc_model_filenames"].message_type = _MODELCONFIG_CCMODELFILENAMESENTRY
-_MODELCONFIG.fields_by_name["metric_tags"].message_type = _MODELCONFIG_METRICTAGSENTRY
-_MODELCONFIG.fields_by_name["parameters"].message_type = _MODELCONFIG_PARAMETERSENTRY
-_MODELCONFIG.fields_by_name["model_warmup"].message_type = _MODELWARMUP
-_MODELCONFIG.fields_by_name["model_operations"].message_type = _MODELOPERATIONS
-_MODELCONFIG.fields_by_name["model_transaction_policy"].message_type = _MODELTRANSACTIONPOLICY
-_MODELCONFIG.oneofs_by_name["scheduling_choice"].fields.append(
-    _MODELCONFIG.fields_by_name["dynamic_batching"]
-)
-_MODELCONFIG.fields_by_name["dynamic_batching"].containing_oneof = _MODELCONFIG.oneofs_by_name[
-    "scheduling_choice"
-]
-_MODELCONFIG.oneofs_by_name["scheduling_choice"].fields.append(
-    _MODELCONFIG.fields_by_name["sequence_batching"]
-)
-_MODELCONFIG.fields_by_name["sequence_batching"].containing_oneof = _MODELCONFIG.oneofs_by_name[
-    "scheduling_choice"
-]
-_MODELCONFIG.oneofs_by_name["scheduling_choice"].fields.append(
-    _MODELCONFIG.fields_by_name["ensemble_scheduling"]
-)
-_MODELCONFIG.fields_by_name["ensemble_scheduling"].containing_oneof = _MODELCONFIG.oneofs_by_name[
-    "scheduling_choice"
-]
-DESCRIPTOR.message_types_by_name["ModelRateLimiter"] = _MODELRATELIMITER
-DESCRIPTOR.message_types_by_name["ModelInstanceGroup"] = _MODELINSTANCEGROUP
-DESCRIPTOR.message_types_by_name["ModelTensorReshape"] = _MODELTENSORRESHAPE
-DESCRIPTOR.message_types_by_name["ModelInput"] = _MODELINPUT
-DESCRIPTOR.message_types_by_name["ModelOutput"] = _MODELOUTPUT
-DESCRIPTOR.message_types_by_name["BatchInput"] = _BATCHINPUT
-DESCRIPTOR.message_types_by_name["BatchOutput"] = _BATCHOUTPUT
-DESCRIPTOR.message_types_by_name["ModelVersionPolicy"] = _MODELVERSIONPOLICY
-DESCRIPTOR.message_types_by_name["ModelOptimizationPolicy"] = _MODELOPTIMIZATIONPOLICY
-DESCRIPTOR.message_types_by_name["ModelQueuePolicy"] = _MODELQUEUEPOLICY
-DESCRIPTOR.message_types_by_name["ModelDynamicBatching"] = _MODELDYNAMICBATCHING
-DESCRIPTOR.message_types_by_name["ModelSequenceBatching"] = _MODELSEQUENCEBATCHING
-DESCRIPTOR.message_types_by_name["ModelEnsembling"] = _MODELENSEMBLING
-DESCRIPTOR.message_types_by_name["ModelParameter"] = _MODELPARAMETER
-DESCRIPTOR.message_types_by_name["ModelWarmup"] = _MODELWARMUP
-DESCRIPTOR.message_types_by_name["ModelOperations"] = _MODELOPERATIONS
-DESCRIPTOR.message_types_by_name["ModelTransactionPolicy"] = _MODELTRANSACTIONPOLICY
-DESCRIPTOR.message_types_by_name["ModelConfig"] = _MODELCONFIG
-DESCRIPTOR.enum_types_by_name["DataType"] = _DATATYPE
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-ModelRateLimiter = _reflection.GeneratedProtocolMessageType(
-    "ModelRateLimiter",
-    (_message.Message,),
-    dict(
-        Resource=_reflection.GeneratedProtocolMessageType(
-            "Resource",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELRATELIMITER_RESOURCE,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter.Resource)
-            ),
-        ),
-        DESCRIPTOR=_MODELRATELIMITER,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter)
-    ),
-)
-_sym_db.RegisterMessage(ModelRateLimiter)
-_sym_db.RegisterMessage(ModelRateLimiter.Resource)
-
-ModelInstanceGroup = _reflection.GeneratedProtocolMessageType(
-    "ModelInstanceGroup",
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_MODELINSTANCEGROUP,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup)
-    ),
-)
-_sym_db.RegisterMessage(ModelInstanceGroup)
-
-ModelTensorReshape = _reflection.GeneratedProtocolMessageType(
-    "ModelTensorReshape",
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_MODELTENSORRESHAPE,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelTensorReshape)
-    ),
-)
-_sym_db.RegisterMessage(ModelTensorReshape)
-
-ModelInput = _reflection.GeneratedProtocolMessageType(
-    "ModelInput",
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_MODELINPUT,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelInput)
-    ),
-)
-_sym_db.RegisterMessage(ModelInput)
-
-ModelOutput = _reflection.GeneratedProtocolMessageType(
-    "ModelOutput",
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_MODELOUTPUT,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelOutput)
-    ),
-)
-_sym_db.RegisterMessage(ModelOutput)
-
-BatchInput = _reflection.GeneratedProtocolMessageType(
-    "BatchInput",
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_BATCHINPUT,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.BatchInput)
-    ),
-)
-_sym_db.RegisterMessage(BatchInput)
-
-BatchOutput = _reflection.GeneratedProtocolMessageType(
-    "BatchOutput",
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_BATCHOUTPUT,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.BatchOutput)
-    ),
-)
-_sym_db.RegisterMessage(BatchOutput)
-
-ModelVersionPolicy = _reflection.GeneratedProtocolMessageType(
-    "ModelVersionPolicy",
-    (_message.Message,),
-    dict(
-        Latest=_reflection.GeneratedProtocolMessageType(
-            "Latest",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELVERSIONPOLICY_LATEST,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Latest)
-            ),
-        ),
-        All=_reflection.GeneratedProtocolMessageType(
-            "All",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELVERSIONPOLICY_ALL,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.All)
-            ),
-        ),
-        Specific=_reflection.GeneratedProtocolMessageType(
-            "Specific",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELVERSIONPOLICY_SPECIFIC,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Specific)
-            ),
-        ),
-        DESCRIPTOR=_MODELVERSIONPOLICY,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy)
-    ),
-)
-_sym_db.RegisterMessage(ModelVersionPolicy)
-_sym_db.RegisterMessage(ModelVersionPolicy.Latest)
-_sym_db.RegisterMessage(ModelVersionPolicy.All)
-_sym_db.RegisterMessage(ModelVersionPolicy.Specific)
-
-ModelOptimizationPolicy = _reflection.GeneratedProtocolMessageType(
-    "ModelOptimizationPolicy",
-    (_message.Message,),
-    dict(
-        Graph=_reflection.GeneratedProtocolMessageType(
-            "Graph",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_GRAPH,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Graph)
-            ),
-        ),
-        Cuda=_reflection.GeneratedProtocolMessageType(
-            "Cuda",
-            (_message.Message,),
-            dict(
-                GraphSpec=_reflection.GeneratedProtocolMessageType(
-                    "GraphSpec",
-                    (_message.Message,),
-                    dict(
-                        Shape=_reflection.GeneratedProtocolMessageType(
-                            "Shape",
-                            (_message.Message,),
-                            dict(
-                                DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE,
-                                __module__="model_config_pb2"
-                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape)
-                            ),
-                        ),
-                        LowerBound=_reflection.GeneratedProtocolMessageType(
-                            "LowerBound",
-                            (_message.Message,),
-                            dict(
-                                InputEntry=_reflection.GeneratedProtocolMessageType(
-                                    "InputEntry",
-                                    (_message.Message,),
-                                    dict(
-                                        DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY,
-                                        __module__="model_config_pb2"
-                                        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry)
-                                    ),
-                                ),
-                                DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND,
-                                __module__="model_config_pb2"
-                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound)
-                            ),
-                        ),
-                        InputEntry=_reflection.GeneratedProtocolMessageType(
-                            "InputEntry",
-                            (_message.Message,),
-                            dict(
-                                DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY,
-                                __module__="model_config_pb2"
-                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry)
-                            ),
-                        ),
-                        DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC,
-                        __module__="model_config_pb2"
-                        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec)
-                    ),
-                ),
-                DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_CUDA,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda)
-            ),
-        ),
-        ExecutionAccelerators=_reflection.GeneratedProtocolMessageType(
-            "ExecutionAccelerators",
-            (_message.Message,),
-            dict(
-                Accelerator=_reflection.GeneratedProtocolMessageType(
-                    "Accelerator",
-                    (_message.Message,),
-                    dict(
-                        ParametersEntry=_reflection.GeneratedProtocolMessageType(
-                            "ParametersEntry",
-                            (_message.Message,),
-                            dict(
-                                DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY,
-                                __module__="model_config_pb2"
-                                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry)
-                            ),
-                        ),
-                        DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR,
-                        __module__="model_config_pb2"
-                        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator)
-                    ),
-                ),
-                DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators)
-            ),
-        ),
-        PinnedMemoryBuffer=_reflection.GeneratedProtocolMessageType(
-            "PinnedMemoryBuffer",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.PinnedMemoryBuffer)
-            ),
-        ),
-        DESCRIPTOR=_MODELOPTIMIZATIONPOLICY,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy)
-    ),
-)
-_sym_db.RegisterMessage(ModelOptimizationPolicy)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.Graph)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.Shape)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators.Accelerator)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry)
-_sym_db.RegisterMessage(ModelOptimizationPolicy.PinnedMemoryBuffer)
-
-ModelQueuePolicy = _reflection.GeneratedProtocolMessageType(
-    "ModelQueuePolicy",
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_MODELQUEUEPOLICY,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelQueuePolicy)
-    ),
-)
-_sym_db.RegisterMessage(ModelQueuePolicy)
-
-ModelDynamicBatching = _reflection.GeneratedProtocolMessageType(
-    "ModelDynamicBatching",
-    (_message.Message,),
-    dict(
-        PriorityQueuePolicyEntry=_reflection.GeneratedProtocolMessageType(
-            "PriorityQueuePolicyEntry",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching.PriorityQueuePolicyEntry)
-            ),
-        ),
-        DESCRIPTOR=_MODELDYNAMICBATCHING,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching)
-    ),
-)
-_sym_db.RegisterMessage(ModelDynamicBatching)
-_sym_db.RegisterMessage(ModelDynamicBatching.PriorityQueuePolicyEntry)
-
-ModelSequenceBatching = _reflection.GeneratedProtocolMessageType(
-    "ModelSequenceBatching",
-    (_message.Message,),
-    dict(
-        Control=_reflection.GeneratedProtocolMessageType(
-            "Control",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELSEQUENCEBATCHING_CONTROL,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.Control)
-            ),
-        ),
-        ControlInput=_reflection.GeneratedProtocolMessageType(
-            "ControlInput",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELSEQUENCEBATCHING_CONTROLINPUT,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.ControlInput)
-            ),
-        ),
-        StrategyDirect=_reflection.GeneratedProtocolMessageType(
-            "StrategyDirect",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELSEQUENCEBATCHING_STRATEGYDIRECT,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyDirect)
-            ),
-        ),
-        StrategyOldest=_reflection.GeneratedProtocolMessageType(
-            "StrategyOldest",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELSEQUENCEBATCHING_STRATEGYOLDEST,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyOldest)
-            ),
-        ),
-        DESCRIPTOR=_MODELSEQUENCEBATCHING,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching)
-    ),
-)
-_sym_db.RegisterMessage(ModelSequenceBatching)
-_sym_db.RegisterMessage(ModelSequenceBatching.Control)
-_sym_db.RegisterMessage(ModelSequenceBatching.ControlInput)
-_sym_db.RegisterMessage(ModelSequenceBatching.StrategyDirect)
-_sym_db.RegisterMessage(ModelSequenceBatching.StrategyOldest)
-
-ModelEnsembling = _reflection.GeneratedProtocolMessageType(
-    "ModelEnsembling",
-    (_message.Message,),
-    dict(
-        Step=_reflection.GeneratedProtocolMessageType(
-            "Step",
-            (_message.Message,),
-            dict(
-                InputMapEntry=_reflection.GeneratedProtocolMessageType(
-                    "InputMapEntry",
-                    (_message.Message,),
-                    dict(
-                        DESCRIPTOR=_MODELENSEMBLING_STEP_INPUTMAPENTRY,
-                        __module__="model_config_pb2"
-                        # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.InputMapEntry)
-                    ),
-                ),
-                OutputMapEntry=_reflection.GeneratedProtocolMessageType(
-                    "OutputMapEntry",
-                    (_message.Message,),
-                    dict(
-                        DESCRIPTOR=_MODELENSEMBLING_STEP_OUTPUTMAPENTRY,
-                        __module__="model_config_pb2"
-                        # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.OutputMapEntry)
-                    ),
-                ),
-                DESCRIPTOR=_MODELENSEMBLING_STEP,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step)
-            ),
-        ),
-        DESCRIPTOR=_MODELENSEMBLING,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelEnsembling)
-    ),
-)
-_sym_db.RegisterMessage(ModelEnsembling)
-_sym_db.RegisterMessage(ModelEnsembling.Step)
-_sym_db.RegisterMessage(ModelEnsembling.Step.InputMapEntry)
-_sym_db.RegisterMessage(ModelEnsembling.Step.OutputMapEntry)
-
-ModelParameter = _reflection.GeneratedProtocolMessageType(
-    "ModelParameter",
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_MODELPARAMETER,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelParameter)
-    ),
-)
-_sym_db.RegisterMessage(ModelParameter)
-
-ModelWarmup = _reflection.GeneratedProtocolMessageType(
-    "ModelWarmup",
-    (_message.Message,),
-    dict(
-        Input=_reflection.GeneratedProtocolMessageType(
-            "Input",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELWARMUP_INPUT,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelWarmup.Input)
-            ),
-        ),
-        InputsEntry=_reflection.GeneratedProtocolMessageType(
-            "InputsEntry",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELWARMUP_INPUTSENTRY,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelWarmup.InputsEntry)
-            ),
-        ),
-        DESCRIPTOR=_MODELWARMUP,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelWarmup)
-    ),
-)
-_sym_db.RegisterMessage(ModelWarmup)
-_sym_db.RegisterMessage(ModelWarmup.Input)
-_sym_db.RegisterMessage(ModelWarmup.InputsEntry)
-
-ModelOperations = _reflection.GeneratedProtocolMessageType(
-    "ModelOperations",
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_MODELOPERATIONS,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelOperations)
-    ),
-)
-_sym_db.RegisterMessage(ModelOperations)
-
-ModelTransactionPolicy = _reflection.GeneratedProtocolMessageType(
-    "ModelTransactionPolicy",
-    (_message.Message,),
-    dict(
-        DESCRIPTOR=_MODELTRANSACTIONPOLICY,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelTransactionPolicy)
-    ),
-)
-_sym_db.RegisterMessage(ModelTransactionPolicy)
-
-ModelConfig = _reflection.GeneratedProtocolMessageType(
-    "ModelConfig",
-    (_message.Message,),
-    dict(
-        CcModelFilenamesEntry=_reflection.GeneratedProtocolMessageType(
-            "CcModelFilenamesEntry",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELCONFIG_CCMODELFILENAMESENTRY,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelConfig.CcModelFilenamesEntry)
-            ),
-        ),
-        MetricTagsEntry=_reflection.GeneratedProtocolMessageType(
-            "MetricTagsEntry",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELCONFIG_METRICTAGSENTRY,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelConfig.MetricTagsEntry)
-            ),
-        ),
-        ParametersEntry=_reflection.GeneratedProtocolMessageType(
-            "ParametersEntry",
-            (_message.Message,),
-            dict(
-                DESCRIPTOR=_MODELCONFIG_PARAMETERSENTRY,
-                __module__="model_config_pb2"
-                # @@protoc_insertion_point(class_scope:inference.ModelConfig.ParametersEntry)
-            ),
-        ),
-        DESCRIPTOR=_MODELCONFIG,
-        __module__="model_config_pb2"
-        # @@protoc_insertion_point(class_scope:inference.ModelConfig)
-    ),
-)
-_sym_db.RegisterMessage(ModelConfig)
-_sym_db.RegisterMessage(ModelConfig.CcModelFilenamesEntry)
-_sym_db.RegisterMessage(ModelConfig.MetricTagsEntry)
-_sym_db.RegisterMessage(ModelConfig.ParametersEntry)
-
-
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._options = None
-_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._options = None
-_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._options = None
-_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._options = None
-_MODELENSEMBLING_STEP_INPUTMAPENTRY._options = None
-_MODELENSEMBLING_STEP_OUTPUTMAPENTRY._options = None
-_MODELWARMUP_INPUTSENTRY._options = None
-_MODELCONFIG_CCMODELFILENAMESENTRY._options = None
-_MODELCONFIG_METRICTAGSENTRY._options = None
-_MODELCONFIG_PARAMETERSENTRY._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/merlin/systems/triton/oprunner_model.py b/merlin/systems/triton/oprunner_model.py
deleted file mode 100644
index 1b5e36e27cb..00000000000
--- a/merlin/systems/triton/oprunner_model.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import json
-import sys
-import traceback
-from typing import List
-
-import triton_python_backend_utils as pb_utils
-from triton_python_backend_utils import (
-    InferenceRequest,
-    InferenceResponse,
-    Tensor,
-    get_input_tensor_by_name,
-)
-
-from merlin.systems.dag.op_runner import OperatorRunner
-from merlin.systems.dag.ops.operator import InferenceDataFrame
-
-
-class TritonPythonModel:
-    def initialize(self, args):
-        self.model_config = json.loads(args["model_config"])
-        self.runner = OperatorRunner(self.model_config)
-
-    def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]:
-        params = self.model_config["parameters"]
-        op_names = json.loads(params["operator_names"]["string_value"])
-        first_operator_name = op_names[0]
-        operator_params = json.loads(params[first_operator_name]["string_value"])
-        input_column_names = list(json.loads(operator_params["input_dict"]).keys())
-
-        responses = []
-
-        for request in requests:
-            try:
-                # transform the triton tensors to a dict of name:numpy tensor
-                input_tensors = {
-                    name: get_input_tensor_by_name(request, name).as_numpy()
-                    for name in input_column_names
-                }
-
-                inf_df = InferenceDataFrame(input_tensors)
-
-                raw_tensor_tuples = self.runner.execute(inf_df)
-
-                tensors = {
-                    name: (data.get() if hasattr(data, "get") else data)
-                    for name, data in raw_tensor_tuples
-                }
-
-                result = [Tensor(name, data) for name, data in tensors.items()]
-
-                responses.append(InferenceResponse(result))
-
-            except Exception:  # pylint: disable=broad-except
-                exc_type, exc_value, exc_traceback = sys.exc_info()
-                tb_string = repr(traceback.extract_tb(exc_traceback))
-                responses.append(
-                    pb_utils.InferenceResponse(
-                        tensors=[], error=f"{exc_type}, {exc_value}, {tb_string}"
-                    )
-                )
-
-        return responses
diff --git a/merlin/systems/triton/workflow_model.py b/merlin/systems/triton/workflow_model.py
deleted file mode 100644
index a0de2514b91..00000000000
--- a/merlin/systems/triton/workflow_model.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import json
-import os
-from typing import List
-
-from triton_python_backend_utils import (
-    InferenceRequest,
-    InferenceResponse,
-    Tensor,
-    get_input_tensor_by_name,
-    get_output_config_by_name,
-    triton_string_to_numpy,
-)
-
-import nvtabular
-from merlin.core.dispatch import is_list_dtype
-from merlin.systems.triton import _convert_tensor
-from merlin.systems.workflow.hugectr import HugeCTRWorkflowRunner
-from merlin.systems.workflow.pytorch import PyTorchWorkflowRunner
-from merlin.systems.workflow.tensorflow import TensorflowWorkflowRunner
-
-
-class TritonPythonModel:
-    def initialize(self, args):
-        # Arg parsing
-        workflow_path = os.path.join(
-            args["model_repository"], str(args["model_version"]), "workflow"
-        )
-        model_device = args["model_instance_kind"]
-
-        # Workflow instantiation
-        self.workflow = nvtabular.Workflow.load(workflow_path)
-
-        # Config loading and parsing
-        self.model_config = json.loads(args["model_config"])
-        model_framework = self.model_config["parameters"]["output_model"]["string_value"]
-
-        # Dtype parsing
-        input_dtypes = self.workflow.input_dtypes.items()
-        self.input_dtypes, self.input_multihots = _parse_input_dtypes(input_dtypes)
-
-        self.output_dtypes = dict()
-        for col_name, col_schema in self.workflow.output_schema.column_schemas.items():
-            if col_schema.is_list and col_schema.is_ragged:
-                self._set_output_dtype(col_name + "__nnzs")
-                self._set_output_dtype(col_name + "__values")
-            else:
-                self._set_output_dtype(col_name)
-
-        if model_framework == "hugectr":
-            runner_class = HugeCTRWorkflowRunner
-        elif model_framework == "pytorch":
-            runner_class = PyTorchWorkflowRunner
-        else:
-            runner_class = TensorflowWorkflowRunner
-
-        self.runner = runner_class(
-            self.workflow, self.output_dtypes, self.model_config, model_device
-        )
-
-    def _set_output_dtype(self, name):
-        conf = get_output_config_by_name(self.model_config, name)
-        self.output_dtypes[name] = triton_string_to_numpy(conf["data_type"])
-
-    def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]:
-        """Transforms the input batches by running through a NVTabular workflow.transform
-        function.
-        """
-        responses = []
-        for request in requests:
-            # transform the triton tensors to a dict of name:numpy tensor
-            input_tensors = {
-                name: _convert_tensor(get_input_tensor_by_name(request, name))
-                for name in self.input_dtypes
-            }
-
-            # multihots are represented as a tuple of (values, offsets)
-            for name, dtype in self.input_multihots.items():
-                values = _convert_tensor(get_input_tensor_by_name(request, name + "__values"))
-                offsets = _convert_tensor(get_input_tensor_by_name(request, name + "__nnzs"))
-                input_tensors[name] = (values, offsets)
-
-            raw_tensor_tuples = self.runner.run_workflow(input_tensors)
-
-            result = [Tensor(name, data) for name, data in raw_tensor_tuples]
-
-            responses.append(InferenceResponse(result))
-
-        return responses
-
-
-def _parse_input_dtypes(dtypes):
-    input_dtypes = {col: dtype for col, dtype in dtypes if not is_list_dtype(dtype)}
-    input_multihots = {col: dtype for col, dtype in dtypes if is_list_dtype(dtype)}
-
-    return input_dtypes, input_multihots
diff --git a/merlin/systems/workflow/__init__.py b/merlin/systems/workflow/__init__.py
deleted file mode 100644
index c1bff8b30ac..00000000000
--- a/merlin/systems/workflow/__init__.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-from merlin.schema import Tags
-
-
-def get_embedding_sizes(source, output_dtypes=None):
-    """Returns a dictionary of embedding sizes from a workflow or workflow_node
-
-    Parameters
-    ----------
-    source : Workflow or ColumnSelector
-        Either a nvtabular Workflow or ColumnSelector object that we should use to find
-        embedding sizes
-    output_dtypes : dict, optional
-        Optional dictionary of column_name:dtype. If passing a workflow object dtypes
-        will be read from the workflow. This is used to figure out which columns
-        are multihot-categorical, which are split out by this function. If passed a workflow_node
-        and this parameter isn't set, you won't have multihot columns returned separately
-    """
-    # TODO: do we need to distinguish multihot columns here?  (if so why? )
-
-    # have to lazy import Workflow to avoid circular import errors
-    from nvtabular.workflow import Workflow
-
-    output_node = source.output_node if isinstance(source, Workflow) else source
-
-    if isinstance(source, Workflow):
-        output_dtypes = output_dtypes or source.output_dtypes
-    else:
-        # passed in a column group
-        output_dtypes = output_dtypes or {}
-
-    output = {}
-    multihot_columns = set()
-    cats_schema = output_node.output_schema.select_by_tag(Tags.CATEGORICAL)
-    for col_name, col_schema in cats_schema.column_schemas.items():
-        if col_schema.dtype and col_schema.is_list and col_schema.is_ragged:
-            # multi hot so remove from output and add to multihot
-            multihot_columns.add(col_name)
-
-        embeddings_sizes = col_schema.properties.get("embedding_sizes", {})
-        cardinality = embeddings_sizes["cardinality"]
-        dimensions = embeddings_sizes["dimension"]
-        output[col_name] = (cardinality, dimensions)
-
-    # TODO: returning different return types like this (based off the presence
-    # of multihot features) is pretty janky. fix.
-    if not multihot_columns:
-        return output
-
-    single_hots = {k: v for k, v in output.items() if k not in multihot_columns}
-    multi_hots = {k: v for k, v in output.items() if k in multihot_columns}
-    return single_hots, multi_hots
diff --git a/merlin/systems/workflow/base.py b/merlin/systems/workflow/base.py
deleted file mode 100644
index b19e576df62..00000000000
--- a/merlin/systems/workflow/base.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import functools
-import json
-import logging
-from abc import ABC, abstractmethod
-
-import numpy as np
-
-from merlin.core.dispatch import concat_columns
-from merlin.dag import ColumnSelector, Supports
-from merlin.schema import Tags
-from merlin.systems.triton.conversions import convert_format
-
-LOG = logging.getLogger("merlin-systems")
-
-
-class WorkflowRunner(ABC):
-    def __init__(self, workflow, output_dtypes, model_config, model_device):
-        self.workflow = workflow
-        self.output_dtypes = output_dtypes
-        self.model_config = model_config
-        self.device = model_device
-
-        output_schema = self.workflow.output_schema
-
-        schema_cats = output_schema.apply(ColumnSelector(tags=[Tags.CATEGORICAL])).column_names
-        schema_conts = output_schema.apply(ColumnSelector(tags=[Tags.CONTINUOUS])).column_names
-
-        mc_cats = json.loads(self._get_param(model_config, "cats", "string_value", default="[]"))
-        mc_conts = json.loads(self._get_param(model_config, "conts", "string_value", default="[]"))
-
-        self.cats = mc_cats or schema_cats
-        self.conts = mc_conts or schema_conts
-
-        workflow_outputs = set(workflow.output_schema.column_names)
-        requested_cols = set(self.cats + self.conts)
-        missing_cols = requested_cols - workflow_outputs
-        extra_cols = workflow_outputs - requested_cols
-
-        if missing_cols:
-            raise ValueError(
-                f"The following columns were not found in the workflow's output: {missing_cols}"
-            )
-        if extra_cols:
-            raise ValueError(
-                f"The following extra columns were found in the workflow's output: {extra_cols}"
-            )
-
-        # recurse over all column groups, initializing operators for inference pipeline
-        self._initialize_ops(self.workflow.output_node)
-
-    def _initialize_ops(self, workflow_node, visited=None):
-        if visited is None:
-            visited = set()
-
-        if workflow_node.op and hasattr(workflow_node.op, "inference_initialize"):
-            inference_op = workflow_node.op.inference_initialize(
-                workflow_node.selector, self.model_config
-            )
-            if inference_op:
-                workflow_node.op = inference_op
-
-            supported = workflow_node.op.supports
-
-            # if we're running on the CPU only, mask off support for GPU data formats
-            if self.device == "CPU":
-                supported = functools.reduce(
-                    lambda a, b: a | b,
-                    (v for v in list(Supports) if v & supported and "CPU" in str(v)),
-                )
-            # the 'supports' property is readonly, and we can't always attach a new property
-            # to some of the operators (C++ categorify etc). set on the workflow_node instead
-            workflow_node.inference_supports = supported
-
-        for parent in workflow_node.parents_with_dependencies:
-            if parent not in visited:
-                visited.add(parent)
-                self._initialize_ops(parent, visited)
-
-    def run_workflow(self, input_tensors):
-        # use our NVTabular workflow to transform the dataset
-        transformed, kind = self._transform_tensors(input_tensors, self.workflow.output_node)
-
-        # if we don't have tensors in numpy format, convert back so that the we can return
-        # to triton
-        if kind != Supports.CPU_DICT_ARRAY:
-            transformed, kind = convert_format(transformed, kind, Supports.CPU_DICT_ARRAY)
-
-        # convert to the format expected by the DL models
-        return self._transform_outputs(transformed)
-
-    @abstractmethod
-    def _transform_outputs(self, tensors):
-        pass
-
-    def _convert_to_np(self, columns, tensors, dtype, rows):
-        """converts outputs to a numpy input compatible with pytorch"""
-        d = np.empty((rows, len(columns)), dtype=dtype)
-        for i, name in enumerate(columns):
-            d[:, i] = tensors[name].astype(dtype)
-        return d
-
-    def _transform_tensors(self, input_tensors, workflow_node):
-        upstream_inputs = []
-
-        # Gather inputs from the parents and dependency nodes
-        if workflow_node.parents_with_dependencies:
-            for parent in workflow_node.parents_with_dependencies:
-                upstream_tensors, upstream_kind = self._transform_tensors(input_tensors, parent)
-                if upstream_tensors is not None and upstream_kind:
-                    upstream_inputs.append((upstream_tensors, upstream_kind))
-
-        # Gather additional input columns from the original input tensors
-        if workflow_node.selector:
-            selector_columns = workflow_node.selector.names
-            to_remove = []
-            for upstream_tensors, upstream_kind in upstream_inputs:
-                for col in selector_columns:
-                    if col in upstream_tensors:
-                        to_remove.append(col)
-            for col in set(to_remove):
-                selector_columns.remove(col)
-
-            if selector_columns:
-                selected_tensors = {c: input_tensors[c] for c in selector_columns}
-                selected_kinds = Supports.CPU_DICT_ARRAY
-                upstream_inputs.append((selected_tensors, selected_kinds))
-
-        # Standardize the formats
-        tensors, kind = None, None
-        for upstream_tensors, upstream_kind in upstream_inputs:
-            if tensors is None:
-                tensors, kind = upstream_tensors, upstream_kind
-            else:
-                if kind != upstream_kind:
-                    # we have multiple different kinds of data here (dataframe/array on cpu/gpu)
-                    # we need to convert to a common format here first before concatenating.
-                    op = workflow_node.op
-                    if op and hasattr(op, "inference_supports"):
-                        target_kind = op.inference_supports
-                    else:
-                        target_kind = Supports.CPU_DICT_ARRAY
-                    # note : the 2nd convert_format call needs to be stricter in what the kind is
-                    # (exact match rather than a bitmask of values)
-                    tensors, kind = convert_format(tensors, kind, target_kind)
-                    upstream_tensors, _ = convert_format(upstream_tensors, upstream_kind, kind)
-
-                tensors = self.concat_tensors([tensors, upstream_tensors], kind)
-
-        # Run the transform
-        if tensors is not None and kind and workflow_node.op:
-            try:
-                # if the op doesn't support the current kind - we need to convert
-                if (
-                    hasattr(workflow_node, "inference_supports")
-                    and not workflow_node.inference_supports & kind
-                ):
-                    tensors, kind = convert_format(tensors, kind, workflow_node.inference_supports)
-
-                tensors = workflow_node.op.transform(
-                    workflow_node.input_columns,
-                    tensors,
-                )
-
-            except Exception:
-                LOG.exception("Failed to transform operator %s", workflow_node.op)
-                raise
-
-        return tensors, kind
-
-    def concat_tensors(self, tensors, kind):
-        if kind & (Supports.GPU_DATAFRAME | Supports.CPU_DATAFRAME):
-            return concat_columns(tensors)
-        else:
-            output = tensors[0]
-            for tensor in tensors[1:]:
-                output.update(tensor)
-            return output
-
-    def _get_param(self, config, *args, default=None):
-        config_element = config["parameters"]
-        for key in args:
-            config_element = config_element.get(key, {})
-        return config_element or default
diff --git a/merlin/systems/workflow/hugectr.py b/merlin/systems/workflow/hugectr.py
deleted file mode 100644
index 5d10f79b23f..00000000000
--- a/merlin/systems/workflow/hugectr.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import numpy as np
-
-from merlin.systems.workflow import get_embedding_sizes
-from merlin.systems.workflow.base import WorkflowRunner
-
-
-class HugeCTRWorkflowRunner(WorkflowRunner):
-    def __init__(self, workflow, output_dtypes, model_config, model_device):
-        super().__init__(workflow, output_dtypes, model_config, model_device)
-
-        if self.cats:
-            self.offsets = self.get_offsets(self.workflow, self.cats)
-
-    def _transform_outputs(self, tensors):
-        output_tensors = []
-        if self.conts:
-            output_tensors.append(
-                (
-                    "DES",
-                    self._convert(self.conts, tensors, np.float32),
-                )
-            )
-        else:
-            output_tensors.append(("DES", np.array([[]], np.float32)))
-
-        if self.cats:
-            for name in self.cats:
-                tensors[name] += self.offsets[name]
-            cats_np = self._convert(self.cats, tensors, np.int64)
-            output_tensors.append(
-                (
-                    "CATCOLUMN",
-                    cats_np,
-                )
-            )
-        else:
-            output_tensors.append(("CATCOLUMN", np.array([[]], np.int64)))
-
-        len_cats_np = cats_np.shape[1]
-        row_index = np.arange(len_cats_np + 1, dtype=np.int32).reshape(1, len_cats_np + 1)
-        output_tensors.append(("ROWINDEX", row_index))
-
-        return output_tensors
-
-    def _convert(self, columns, tensors, dtype):
-        """converts outputs to a numpy input compatible with hugectr"""
-        rows = max(len(tensors[name]) for name in columns)
-        d = self._convert_to_np(columns, tensors, dtype, rows)
-        return d.reshape(1, len(columns) * rows)
-
-    def get_offsets(self, workflow, categorical_cols):
-        embeddings = get_embedding_sizes(workflow)
-        if embeddings is None:
-            raise Exception("embeddings cannot be None")
-        else:
-            offsets = dict()
-            curr_offset = 0
-            for name in categorical_cols:
-                offsets[name] = curr_offset
-                curr_offset += embeddings[name][0]
-            return offsets
diff --git a/merlin/systems/workflow/pytorch.py b/merlin/systems/workflow/pytorch.py
deleted file mode 100644
index 2475fce02b6..00000000000
--- a/merlin/systems/workflow/pytorch.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from merlin.systems.workflow.base import WorkflowRunner
-
-
-class PyTorchWorkflowRunner(WorkflowRunner):
-    def _transform_outputs(self, tensors):
-        output_tensors = []
-        for col_name in self.cats + self.conts:
-            output_tensors.append(
-                (
-                    col_name,
-                    self._convert([col_name], tensors, self.workflow.output_dtypes[col_name]),
-                )
-            )
-
-        return output_tensors
-
-    def _convert(self, columns, tensors, dtype):
-        """converts outputs to a numpy input compatible with pytorch"""
-        rows = max(len(tensors[name]) for name in columns)
-        return self._convert_to_np(columns, tensors, dtype, rows)
diff --git a/merlin/systems/workflow/tensorflow.py b/merlin/systems/workflow/tensorflow.py
deleted file mode 100644
index 05a7b2e7e94..00000000000
--- a/merlin/systems/workflow/tensorflow.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-import json
-
-from merlin.systems.workflow.base import WorkflowRunner
-
-
-class TensorflowWorkflowRunner(WorkflowRunner):
-    def __init__(self, workflow, output_dtypes, model_config, model_device):
-        super().__init__(workflow, output_dtypes, model_config, model_device)
-
-        self.offsets = None
-
-    def _transform_outputs(self, tensors):
-        # Load extra info needed for the Transformer4Rec (if exists)
-        sparse_feat = None
-        params = self.model_config["parameters"]
-        if "sparse_max" in params.keys():
-            sparse_feat = json.loads(self.model_config["parameters"]["sparse_max"]["string_value"])
-        # transforms outputs for both pytorch and tensorflow
-        output_tensors = []
-        for name in self.cats + self.conts:
-            value = tensors[name]
-            if sparse_feat and name in sparse_feat.keys():
-                # convert sparse tensors to dense representations
-                d = value[0].astype(self.output_dtypes[name])
-                col_dim = sparse_feat[name]
-                row_dim = d.shape[0] // col_dim
-                d = d.reshape(row_dim, col_dim)
-                output_tensors.append((name, d))
-            elif isinstance(value, tuple):
-                # convert list values to match TF dataloader
-                values = value[0].astype(self.output_dtypes[name + "__values"])
-                values = values.reshape(len(values), 1)
-                output_tensors.append((name + "__values", values))
-
-                offsets = value[1].astype(self.output_dtypes[name + "__nnzs"])
-                nnzs = offsets[1:] - offsets[:-1]
-                nnzs = nnzs.reshape(len(nnzs), 1)
-                output_tensors.append((name + "__nnzs", nnzs))
-            else:
-                d = value.astype(self.output_dtypes[name])
-                d = d.reshape(len(d), 1)
-                output_tensors.append((name, d))
-        return output_tensors
diff --git a/tests/unit/systems/__init__.py b/tests/unit/systems/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/unit/systems/inf_test_ops.py b/tests/unit/systems/inf_test_ops.py
deleted file mode 100644
index e6bfeb61312..00000000000
--- a/tests/unit/systems/inf_test_ops.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import pytest
-
-inf_op = pytest.importorskip("merlin.systems.dag.ops.operator")
-
-
-class PlusTwoOp(inf_op.PipelineableInferenceOperator):
-    def transform(self, df: inf_op.InferenceDataFrame) -> inf_op.InferenceDataFrame:
-        focus_df = df
-        new_df = inf_op.InferenceDataFrame()
-
-        for name, data in focus_df:
-            new_df.tensors[f"{name}_plus_2"] = data + 2
-
-        return new_df
-
-    def column_mapping(self, col_selector):
-        column_mapping = {}
-        for col_name in col_selector.names:
-            column_mapping[f"{col_name}_plus_2"] = [col_name]
-        return column_mapping
-
-    @classmethod
-    def from_config(cls, config):
-        return PlusTwoOp()
diff --git a/tests/unit/systems/inference_utils.py b/tests/unit/systems/inference_utils.py
deleted file mode 100644
index e0d4894c398..00000000000
--- a/tests/unit/systems/inference_utils.py
+++ /dev/null
@@ -1,81 +0,0 @@
-from distutils.spawn import find_executable
-
-import pytest
-
-torch = pytest.importorskip("torch")  # noqa
-loader_tf_utils = pytest.importorskip("nvtabular.loader.tf_utils")  # noqa
-loader_tf_utils.configure_tensorflow()
-
-import nvtabular.framework_utils.tensorflow.layers as layers  # noqa
-from nvtabular.framework_utils.torch.models import Model  # noqa
-
-triton = pytest.importorskip("merlin.systems.triton")
-data_conversions = pytest.importorskip("merlin.systems.triton.conversions")
-
-tritonclient = pytest.importorskip("tritonclient")
-grpcclient = pytest.importorskip("tritonclient.grpc")
-
-TRITON_SERVER_PATH = find_executable("tritonserver")
-from tests.unit.test_triton_inference import run_triton_server  # noqa
-
-tf = pytest.importorskip("tensorflow")
-
-
-def create_tf_model(cat_columns: list, cat_mh_columns: list, embed_tbl_shapes: dict):
-    inputs = {}  # tf.keras.Input placeholders for each feature to be used
-    emb_layers = []  # output of all embedding layers, which will be concatenated
-    for col in cat_columns:
-        inputs[col] = tf.keras.Input(name=col, dtype=tf.int64, shape=(1,))
-    # Note that we need two input tensors for multi-hot categorical features
-    for col in cat_mh_columns:
-        inputs[col] = (
-            tf.keras.Input(name=f"{col}__values", dtype=tf.int64, shape=(1,)),
-            tf.keras.Input(name=f"{col}__nnzs", dtype=tf.int64, shape=(1,)),
-        )
-    for col in cat_columns + cat_mh_columns:
-        emb_layers.append(
-            tf.feature_column.embedding_column(
-                tf.feature_column.categorical_column_with_identity(
-                    col, embed_tbl_shapes[col][0]
-                ),  # Input dimension (vocab size)
-                embed_tbl_shapes[col][1],  # Embedding output dimension
-            )
-        )
-    emb_layer = layers.DenseFeatures(emb_layers)
-    x_emb_output = emb_layer(inputs)
-    x = tf.keras.layers.Dense(128, activation="relu")(x_emb_output)
-    x = tf.keras.layers.Dense(128, activation="relu")(x)
-    x = tf.keras.layers.Dense(128, activation="relu")(x)
-    x = tf.keras.layers.Dense(1, activation="sigmoid", name="output")(x)
-
-    model = tf.keras.Model(inputs=inputs, outputs=x)
-    model.compile("sgd", "binary_crossentropy")
-    return model
-
-
-def create_pytorch_model(cat_columns: list, cat_mh_columns: list, embed_tbl_shapes: dict):
-    single_hot = {k: v for k, v in embed_tbl_shapes.items() if k in cat_columns}
-    multi_hot = {k: v for k, v in embed_tbl_shapes.items() if k in cat_mh_columns}
-    model = Model(
-        embedding_table_shapes=(single_hot, multi_hot),
-        num_continuous=0,
-        emb_dropout=0.0,
-        layer_hidden_dims=[128, 128, 128],
-        layer_dropout_rates=[0.0, 0.0, 0.0],
-    ).to("cuda")
-    return model
-
-
-def _run_ensemble_on_tritonserver(
-    tmpdir,
-    output_columns,
-    df,
-    model_name,
-):
-    inputs = triton.convert_df_to_triton_input(df.columns, df)
-    outputs = [grpcclient.InferRequestedOutput(col) for col in output_columns]
-    response = None
-    with run_triton_server(tmpdir) as client:
-        response = client.infer(model_name, inputs, outputs=outputs)
-
-    return response
diff --git a/tests/unit/systems/test_ensemble.py b/tests/unit/systems/test_ensemble.py
deleted file mode 100644
index 6a8d5cc7c6b..00000000000
--- a/tests/unit/systems/test_ensemble.py
+++ /dev/null
@@ -1,236 +0,0 @@
-#
-# Copyright (c) 2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import os
-from distutils.spawn import find_executable
-
-import pytest
-
-os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
-
-from google.protobuf import text_format  # noqa
-
-from merlin.core.dispatch import make_df  # noqa
-from merlin.dag import ColumnSelector  # noqa
-from merlin.dag.node import postorder_iter_nodes  # noqa
-from merlin.dag.ops.concat_columns import ConcatColumns  # noqa
-from merlin.dag.ops.selection import SelectionOp  # noqa
-from merlin.schema import Tags  # noqa
-from nvtabular import Workflow  # noqa
-from nvtabular import ops as wf_ops  # noqa
-
-loader_tf_utils = pytest.importorskip("nvtabular.loader.tf_utils")
-
-# everything tensorflow related must be imported after this.
-loader_tf_utils.configure_tensorflow()
-tf = pytest.importorskip("tensorflow")
-
-triton = pytest.importorskip("merlin.systems.triton")
-export = pytest.importorskip("merlin.systems.dag.ensemble")
-
-from merlin.systems.dag.ensemble import Ensemble  # noqa
-from merlin.systems.dag.ops.tensorflow import PredictTensorflow  # noqa
-from merlin.systems.dag.ops.workflow import TransformWorkflow  # noqa
-from tests.unit.systems.inf_test_ops import PlusTwoOp  # noqa
-from tests.unit.systems.inference_utils import (  # noqa
-    _run_ensemble_on_tritonserver,
-    create_tf_model,
-)
-
-tritonclient = pytest.importorskip("tritonclient")
-import merlin.systems.triton.model_config_pb2 as model_config  # noqa
-
-grpcclient = pytest.importorskip("tritonclient.grpc")
-
-TRITON_SERVER_PATH = find_executable("tritonserver")
-
-
-@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found")
-@pytest.mark.parametrize("engine", ["parquet"])
-def test_workflow_tf_e2e_config_verification(tmpdir, dataset, engine):
-    # Create a Workflow
-    schema = dataset.schema
-    for name in ["x", "y", "id"]:
-        dataset.schema.column_schemas[name] = dataset.schema.column_schemas[name].with_tags(
-            [Tags.USER]
-        )
-    selector = ColumnSelector(["x", "y", "id"])
-
-    workflow_ops = selector >> wf_ops.Rename(postfix="_nvt")
-    workflow = Workflow(workflow_ops["x_nvt"])
-    workflow.fit(dataset)
-
-    # Create Tensorflow Model
-    model = tf.keras.models.Sequential(
-        [
-            tf.keras.Input(name="x_nvt", dtype=tf.float64, shape=(1,)),
-            tf.keras.layers.Dense(16, activation="relu"),
-            tf.keras.layers.Dropout(0.2),
-            tf.keras.layers.Dense(1, name="output"),
-        ]
-    )
-    model.compile(
-        optimizer="adam",
-        loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
-        metrics=[tf.metrics.SparseCategoricalAccuracy()],
-    )
-
-    # Creating Triton Ensemble
-    triton_chain = (
-        selector >> TransformWorkflow(workflow, cats=["x_nvt"]) >> PredictTensorflow(model)
-    )
-    triton_ens = Ensemble(triton_chain, schema)
-
-    # Creating Triton Ensemble Config
-    ensemble_config, node_configs = triton_ens.export(str(tmpdir))
-
-    config_path = tmpdir / "ensemble_model" / "config.pbtxt"
-
-    # Checking Triton Ensemble Config
-    with open(config_path, "rb") as f:
-        config = model_config.ModelConfig()
-        raw_config = f.read()
-        parsed = text_format.Parse(raw_config, config)
-
-        # The config file contents are correct
-        assert parsed.name == "ensemble_model"
-        assert parsed.platform == "ensemble"
-        assert hasattr(parsed, "ensemble_scheduling")
-
-    df = make_df({"x": [1.0, 2.0, 3.0], "y": [4.0, 5.0, 6.0], "id": [7, 8, 9]})
-
-    output_columns = triton_ens.graph.output_schema.column_names
-    response = _run_ensemble_on_tritonserver(str(tmpdir), output_columns, df, triton_ens.name)
-    assert len(response.as_numpy("output")) == df.shape[0]
-
-
-@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found")
-@pytest.mark.parametrize("engine", ["parquet"])
-def test_workflow_tf_e2e_multi_op_run(tmpdir, dataset, engine):
-    # Create a Workflow
-    schema = dataset.schema
-    for name in ["x", "y", "id"]:
-        dataset.schema.column_schemas[name] = dataset.schema.column_schemas[name].with_tags(
-            [Tags.USER]
-        )
-
-    workflow_ops = ["name-cat"] >> wf_ops.Categorify(cat_cache="host")
-    workflow = Workflow(workflow_ops)
-    workflow.fit(dataset)
-
-    embedding_shapes_1 = wf_ops.get_embedding_sizes(workflow)
-
-    cats = ["name-string"] >> wf_ops.Categorify(cat_cache="host")
-    workflow_2 = Workflow(cats)
-    workflow_2.fit(dataset)
-
-    embedding_shapes = wf_ops.get_embedding_sizes(workflow_2)
-    embedding_shapes_1.update(embedding_shapes)
-    # Create Tensorflow Model
-    model = create_tf_model(["name-cat", "name-string"], [], embedding_shapes_1)
-
-    # Creating Triton Ensemble
-    triton_chain_1 = ["name-cat"] >> TransformWorkflow(workflow)
-    triton_chain_2 = ["name-string"] >> TransformWorkflow(workflow_2)
-    triton_chain = (triton_chain_1 + triton_chain_2) >> PredictTensorflow(model)
-
-    triton_ens = Ensemble(triton_chain, schema)
-
-    # Creating Triton Ensemble Config
-    ensemble_config, nodes_config = triton_ens.export(str(tmpdir))
-    config_path = tmpdir / "ensemble_model" / "config.pbtxt"
-
-    # Checking Triton Ensemble Config
-    with open(config_path, "rb") as f:
-        config = model_config.ModelConfig()
-        raw_config = f.read()
-        parsed = text_format.Parse(raw_config, config)
-
-        # The config file contents are correct
-        assert parsed.name == "ensemble_model"
-        assert parsed.platform == "ensemble"
-        assert hasattr(parsed, "ensemble_scheduling")
-
-    df = dataset.to_ddf().compute()[["name-string", "name-cat"]].iloc[:3]
-
-    response = _run_ensemble_on_tritonserver(str(tmpdir), ["output"], df, triton_ens.name)
-    assert len(response.as_numpy("output")) == df.shape[0]
-
-
-def test_graph_traverse_algo():
-    chain_1 = ["name-cat"] >> TransformWorkflow(Workflow(["name-cat"] >> wf_ops.Categorify()))
-    chain_2 = ["name-string"] >> TransformWorkflow(Workflow(["name-string"] >> wf_ops.Categorify()))
-
-    triton_chain = chain_1 + chain_2
-
-    ordered_list = list(postorder_iter_nodes(triton_chain))
-    assert len(ordered_list) == 5
-    assert isinstance(ordered_list[0].op, SelectionOp)
-    assert isinstance(ordered_list[-1].op, ConcatColumns)
-
-
-@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found")
-@pytest.mark.parametrize("engine", ["parquet"])
-def test_workflow_tf_e2e_multi_op_plus_2_run(tmpdir, dataset, engine):
-    # Create a Workflow
-    schema = dataset.schema
-    for name in ["x", "y", "id"]:
-        dataset.schema.column_schemas[name] = dataset.schema.column_schemas[name].with_tags(
-            [Tags.USER]
-        )
-
-    workflow_ops = ["name-cat"] >> wf_ops.Categorify(cat_cache="host")
-    workflow = Workflow(workflow_ops)
-    workflow.fit(dataset)
-
-    embedding_shapes_1 = wf_ops.get_embedding_sizes(workflow)
-
-    cats = ["name-string"] >> wf_ops.Categorify(cat_cache="host")
-    workflow_2 = Workflow(cats)
-    workflow_2.fit(dataset)
-
-    embedding_shapes = wf_ops.get_embedding_sizes(workflow_2)
-    embedding_shapes_1.update(embedding_shapes)
-    embedding_shapes_1["name-string_plus_2"] = embedding_shapes_1["name-string"]
-
-    # Create Tensorflow Model
-    model = create_tf_model(["name-cat", "name-string_plus_2"], [], embedding_shapes_1)
-
-    # Creating Triton Ensemble
-    triton_chain_1 = ["name-cat"] >> TransformWorkflow(workflow)
-    triton_chain_2 = ["name-string"] >> TransformWorkflow(workflow_2) >> PlusTwoOp()
-    triton_chain = (triton_chain_1 + triton_chain_2) >> PredictTensorflow(model)
-
-    triton_ens = Ensemble(triton_chain, schema)
-
-    # Creating Triton Ensemble Config
-    ensemble_config, nodes_config = triton_ens.export(str(tmpdir))
-    config_path = tmpdir / "ensemble_model" / "config.pbtxt"
-
-    # Checking Triton Ensemble Config
-    with open(config_path, "rb") as f:
-        config = model_config.ModelConfig()
-        raw_config = f.read()
-        parsed = text_format.Parse(raw_config, config)
-
-        # The config file contents are correct
-        assert parsed.name == "ensemble_model"
-        assert parsed.platform == "ensemble"
-        assert hasattr(parsed, "ensemble_scheduling")
-
-    df = dataset.to_ddf().compute()[["name-string", "name-cat"]].iloc[:3]
-
-    response = _run_ensemble_on_tritonserver(str(tmpdir), ["output"], df, triton_ens.name)
-    assert len(response.as_numpy("output")) == df.shape[0]
diff --git a/tests/unit/systems/test_ensemble_ops.py b/tests/unit/systems/test_ensemble_ops.py
deleted file mode 100644
index 528ac27caae..00000000000
--- a/tests/unit/systems/test_ensemble_ops.py
+++ /dev/null
@@ -1,88 +0,0 @@
-#
-# Copyright (c) 2022, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-from distutils.spawn import find_executable
-
-import numpy as np
-import pytest
-
-from merlin.core.dispatch import make_df
-from merlin.schema import ColumnSchema, Schema
-from merlin.systems.dag.ensemble import Ensemble
-from merlin.systems.dag.ops.session_filter import FilterCandidates
-from merlin.systems.dag.ops.softmax_sampling import SoftmaxSampling
-from tests.unit.systems.inference_utils import _run_ensemble_on_tritonserver  # noqa
-
-TRITON_SERVER_PATH = find_executable("tritonserver")
-
-
-@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found")
-def test_softmax_sampling(tmpdir):
-    request_schema = Schema(
-        [
-            ColumnSchema("movie_ids", dtype=np.int32),
-            ColumnSchema("output_1", dtype=np.float32),
-        ]
-    )
-
-    combined_features = {
-        "movie_ids": np.random.randint(0, 10000, 100).astype(np.int32),
-        "output_1": np.random.random(100).astype(np.float32),
-    }
-
-    request = make_df(combined_features)
-
-    ordering = ["movie_ids"] >> SoftmaxSampling(relevance_col="output_1", topk=10, temperature=20.0)
-
-    ensemble = Ensemble(ordering, request_schema)
-    ens_config, node_configs = ensemble.export(tmpdir)
-
-    response = _run_ensemble_on_tritonserver(
-        tmpdir, ensemble.graph.output_schema.column_names, request, "ensemble_model"
-    )
-    assert response is not None
-    assert len(response.as_numpy("ordered_ids")) == 10
-
-
-@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found")
-def test_filter_candidates(tmpdir):
-    request_schema = Schema(
-        [
-            ColumnSchema("candidate_ids", dtype=np.int32),
-            ColumnSchema("movie_ids", dtype=np.int32),
-        ]
-    )
-
-    candidate_ids = np.random.randint(1, 100000, 100).astype(np.int32)
-    movie_ids_1 = np.zeros(100, dtype=np.int32)
-    movie_ids_1[:20] = np.unique(candidate_ids)[:20]
-
-    combined_features = {
-        "candidate_ids": candidate_ids,
-        "movie_ids": movie_ids_1,
-    }
-
-    request = make_df(combined_features)
-
-    filtering = ["candidate_ids"] >> FilterCandidates(filter_out=["movie_ids"])
-
-    ensemble = Ensemble(filtering, request_schema)
-    ens_config, node_configs = ensemble.export(tmpdir)
-
-    response = _run_ensemble_on_tritonserver(
-        tmpdir, ensemble.graph.output_schema.column_names, request, "ensemble_model"
-    )
-    assert response is not None
-    assert len(response.as_numpy("filtered_ids")) == 80
diff --git a/tests/unit/systems/test_export.py b/tests/unit/systems/test_export.py
deleted file mode 100644
index 6d70696f8a6..00000000000
--- a/tests/unit/systems/test_export.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from distutils.spawn import find_executable
-
-import pytest
-
-from merlin.io import Dataset
-from merlin.systems.workflow import get_embedding_sizes
-from nvtabular import Workflow, ops
-
-tf_utils = pytest.importorskip("nvtabular.loader.tf_utils")  # noqa
-
-triton = pytest.importorskip("merlin.systems.triton")
-data_conversions = pytest.importorskip("merlin.systems.triton.conversions")
-ensemble = pytest.importorskip("merlin.systems.triton.export")
-
-torch = pytest.importorskip("torch")  # noqa
-
-from merlin.systems.triton.export import export_pytorch_ensemble, export_tensorflow_ensemble  # noqa
-from tests.unit.systems.inference_utils import (  # noqa
-    _run_ensemble_on_tritonserver,
-    create_pytorch_model,
-    create_tf_model,
-)
-
-tritonclient = pytest.importorskip("tritonclient")
-grpcclient = pytest.importorskip("tritonclient.grpc")
-
-TRITON_SERVER_PATH = find_executable("tritonserver")
-tf_utils.configure_tensorflow()
-
-
-@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found")
-@pytest.mark.parametrize("engine", ["parquet"])
-@pytest.mark.parametrize("output_model", ["tensorflow"])
-def test_export_run_ensemble_triton(tmpdir, engine, output_model, df):
-    conts = ["x", "y", "id"] >> ops.FillMissing() >> ops.Normalize()
-    cats = ["name-cat", "name-string"] >> ops.Categorify(cat_cache="host")
-    workflow = Workflow(conts + cats)
-    dataset = Dataset(df)
-    workflow.fit(dataset)
-
-    embed_shapes = get_embedding_sizes(workflow)
-    cat_cols = list(embed_shapes.keys())
-
-    if output_model == "tensorflow":
-        tf_model = create_tf_model(cat_cols, [], embed_shapes)
-        export_tensorflow_ensemble(tf_model, workflow, "test_name", tmpdir, [])
-    elif output_model == "pytorch":
-        torch_model = create_pytorch_model(cat_cols, [], embed_shapes)
-        export_pytorch_ensemble(
-            torch_model,
-            workflow,
-            {},
-            "test_name",
-            tmpdir,
-            [],
-        )
-
-    # assert os.path.exists(os.path.join(repo, "config.pbtxt"))
-    tri_df = df.iloc[:10]
-    tri_df = tri_df[["x", "y", "id", "name-cat", "name-string"]]
-    response = _run_ensemble_on_tritonserver(str(tmpdir), ["output"], tri_df, "test_name")
-    assert response is not None
-    assert len(response.as_numpy("output")) == 10
diff --git a/tests/unit/systems/test_graph.py b/tests/unit/systems/test_graph.py
deleted file mode 100644
index c943620e033..00000000000
--- a/tests/unit/systems/test_graph.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import pytest
-
-from merlin.schema import Schema
-from nvtabular import Workflow
-from nvtabular import ops as wf_ops
-
-ensemble = pytest.importorskip("merlin.systems.dag.ensemble")
-workflow_op = pytest.importorskip("merlin.systems.dag.ops.workflow")
-
-
-def test_inference_schema_propagation():
-    input_columns = ["a", "b", "c"]
-    request_schema = Schema(input_columns)
-    expected_schema = Schema(["a_nvt", "b_nvt", "c_nvt"])
-
-    # NVT
-    workflow_ops = input_columns >> wf_ops.Rename(postfix="_nvt")
-    workflow = Workflow(workflow_ops)
-    workflow.fit_schema(request_schema)
-
-    assert workflow.graph.output_schema == expected_schema
-
-    # Triton
-    triton_ops = input_columns >> workflow_op.TransformWorkflow(workflow)
-    ensemble_out = ensemble.Ensemble(triton_ops, request_schema)
-
-    assert ensemble_out.graph.output_schema == expected_schema
diff --git a/tests/unit/systems/test_inference_ops.py b/tests/unit/systems/test_inference_ops.py
deleted file mode 100644
index 1023b6ad0bf..00000000000
--- a/tests/unit/systems/test_inference_ops.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#
-# Copyright (c) 2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import os
-import pathlib
-
-import pytest
-
-# this needs to be before any modules that import protobuf
-os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
-
-from google.protobuf import text_format  # noqa
-
-from merlin.schema import Schema  # noqa
-from nvtabular import Workflow  # noqa
-from nvtabular import ops as wf_ops  # noqa
-
-ensemble = pytest.importorskip("merlin.systems.dag.ensemble")
-model_config = pytest.importorskip("nvtabular.inference.triton.model_config_pb2")
-workflow_op = pytest.importorskip("merlin.systems.dag.ops.workflow")
-
-
-@pytest.mark.parametrize("engine", ["parquet"])
-def test_workflow_op_validates_schemas(dataset, engine):
-    input_columns = ["x", "y", "id"]
-    request_schema = Schema(input_columns)
-
-    # NVT
-    workflow_ops = input_columns >> wf_ops.Rename(postfix="_nvt")
-    workflow = Workflow(workflow_ops)
-    workflow.fit(dataset)
-
-    # Triton
-    triton_ops = ["a", "b", "c"] >> workflow_op.TransformWorkflow(workflow)
-
-    with pytest.raises(ValueError) as exc_info:
-        ensemble.Ensemble(triton_ops, request_schema)
-        assert "Missing column" in str(exc_info.value)
-
-
-@pytest.mark.parametrize("engine", ["parquet"])
-def test_workflow_op_exports_own_config(tmpdir, dataset, engine):
-    input_columns = ["x", "y", "id"]
-
-    # NVT
-    workflow_ops = input_columns >> wf_ops.Rename(postfix="_nvt")
-    workflow = Workflow(workflow_ops)
-    workflow.fit(dataset)
-
-    # Triton
-    triton_op = workflow_op.TransformWorkflow(workflow)
-    triton_op.export(tmpdir, None, None)
-
-    # Export creates directory
-    export_path = pathlib.Path(tmpdir) / triton_op.export_name
-    assert export_path.exists()
-
-    # Export creates the config file
-    config_path = export_path / "config.pbtxt"
-    assert config_path.exists()
-
-    # Read the config file back in from proto
-    with open(config_path, "rb") as f:
-        config = model_config.ModelConfig()
-        raw_config = f.read()
-        parsed = text_format.Parse(raw_config, config)
-
-        # The config file contents are correct
-        assert parsed.name == triton_op.export_name
-        assert parsed.backend == "nvtabular"
diff --git a/tests/unit/systems/test_op_runner.py b/tests/unit/systems/test_op_runner.py
deleted file mode 100644
index fad865a575f..00000000000
--- a/tests/unit/systems/test_op_runner.py
+++ /dev/null
@@ -1,163 +0,0 @@
-import json
-import os
-
-import numpy as np
-import pytest
-
-import nvtabular as nvt
-import nvtabular.ops as wf_ops
-from merlin.dag import Graph
-from merlin.schema import Tags
-from tests.unit.systems.inf_test_ops import PlusTwoOp
-
-op_runner = pytest.importorskip("merlin.systems.dag.op_runner")
-inf_op = pytest.importorskip("merlin.systems.dag.ops.operator")
-
-
-@pytest.mark.parametrize("engine", ["parquet"])
-def test_op_runner_loads_config(tmpdir, dataset, engine):
-    input_columns = ["x", "y", "id"]
-
-    # NVT
-    workflow_ops = input_columns >> wf_ops.Rename(postfix="_nvt")
-    workflow = nvt.Workflow(workflow_ops)
-    workflow.fit(dataset)
-    workflow.save(str(tmpdir))
-
-    repository = "repository_path/"
-    version = 1
-    kind = ""
-    config = {
-        "parameters": {
-            "operator_names": {"string_value": json.dumps(["PlusTwoOp_1"])},
-            "PlusTwoOp_1": {
-                "string_value": json.dumps(
-                    {
-                        "module_name": PlusTwoOp.__module__,
-                        "class_name": "PlusTwoOp",
-                    }
-                )
-            },
-        }
-    }
-
-    runner = op_runner.OperatorRunner(config, repository, version, kind)
-
-    loaded_op = runner.operators[0]
-    assert isinstance(loaded_op, PlusTwoOp)
-
-
-@pytest.mark.parametrize("engine", ["parquet"])
-def test_op_runner_loads_multiple_ops_same(tmpdir, dataset, engine):
-    # NVT
-    schema = dataset.schema
-    for name in schema.column_names:
-        dataset.schema.column_schemas[name] = dataset.schema.column_schemas[name].with_tags(
-            [Tags.USER]
-        )
-
-    repository = "repository_path/"
-    version = 1
-    kind = ""
-    config = {
-        "parameters": {
-            "operator_names": {"string_value": json.dumps(["PlusTwoOp_1", "PlusTwoOp_2"])},
-            "PlusTwoOp_1": {
-                "string_value": json.dumps(
-                    {
-                        "module_name": PlusTwoOp.__module__,
-                        "class_name": "PlusTwoOp",
-                    }
-                )
-            },
-            "PlusTwoOp_2": {
-                "string_value": json.dumps(
-                    {
-                        "module_name": PlusTwoOp.__module__,
-                        "class_name": "PlusTwoOp",
-                    }
-                )
-            },
-        }
-    }
-
-    runner = op_runner.OperatorRunner(config, repository, version, kind)
-
-    assert len(runner.operators) == 2
-
-    for idx, loaded_op in enumerate(runner.operators):
-        assert isinstance(loaded_op, PlusTwoOp)
-
-
-@pytest.mark.parametrize("engine", ["parquet"])
-def test_op_runner_loads_multiple_ops_same_execute(tmpdir, dataset, engine):
-    # NVT
-    schema = dataset.schema
-    for name in schema.column_names:
-        dataset.schema.column_schemas[name] = dataset.schema.column_schemas[name].with_tags(
-            [Tags.USER]
-        )
-
-    repository = "repository_path/"
-    version = 1
-    kind = ""
-    config = {
-        "parameters": {
-            "operator_names": {"string_value": json.dumps(["PlusTwoOp_1", "PlusTwoOp_2"])},
-            "PlusTwoOp_1": {
-                "string_value": json.dumps(
-                    {
-                        "module_name": PlusTwoOp.__module__,
-                        "class_name": "PlusTwoOp",
-                    }
-                )
-            },
-            "PlusTwoOp_2": {
-                "string_value": json.dumps(
-                    {
-                        "module_name": PlusTwoOp.__module__,
-                        "class_name": "PlusTwoOp",
-                    }
-                )
-            },
-        }
-    }
-
-    runner = op_runner.OperatorRunner(config, repository, version, kind)
-
-    inputs = {}
-    for col_name in schema.column_names:
-        inputs[col_name] = np.random.randint(10)
-
-    outputs = runner.execute(inf_op.InferenceDataFrame(inputs))
-
-    assert outputs["x_plus_2_plus_2"] == inputs["x"] + 4
-
-
-@pytest.mark.parametrize("engine", ["parquet"])
-def test_op_runner_single_node_export(tmpdir, dataset, engine):
-    # assert against produced config
-    schema = dataset.schema
-    for name in schema.column_names:
-        dataset.schema.column_schemas[name] = dataset.schema.column_schemas[name].with_tags(
-            [Tags.USER]
-        )
-
-    inputs = ["x", "y"]
-
-    node = inputs >> PlusTwoOp()
-
-    graph = Graph(node)
-    graph.construct_schema(dataset.schema)
-
-    config = node.export(tmpdir)
-
-    file_path = os.path.join(str(tmpdir), node.export_name, "config.pbtxt")
-
-    assert os.path.exists(file_path)
-    config_file = open(file_path, "r").read()
-    assert config_file == str(config)
-    assert len(config.input) == len(inputs)
-    assert len(config.output) == len(inputs)
-    for idx, conf in enumerate(config.output):
-        assert conf.name == inputs[idx] + "_plus_2"
diff --git a/tests/unit/systems/test_tensorflow_inf_op.py b/tests/unit/systems/test_tensorflow_inf_op.py
deleted file mode 100644
index 2c76395bd49..00000000000
--- a/tests/unit/systems/test_tensorflow_inf_op.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import os
-import pathlib
-from copy import deepcopy
-
-import pytest
-
-from merlin.dag import ColumnSelector, Graph
-from merlin.schema import Schema
-
-# this needs to be before any modules that import protobuf
-os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
-
-from google.protobuf import text_format  # noqa
-
-model_config = pytest.importorskip("nvtabular.inference.triton.model_config_pb2")
-tf_op = pytest.importorskip("merlin.systems.dag.ops.tensorflow")
-
-tf = pytest.importorskip("tensorflow")
-
-
-def test_tf_op_exports_own_config(tmpdir):
-    model = tf.keras.models.Sequential(
-        [
-            tf.keras.Input(name="input", dtype=tf.int32, shape=(784,)),
-            tf.keras.layers.Dense(512, activation="relu"),
-            tf.keras.layers.Dropout(0.2),
-            tf.keras.layers.Dense(10, name="output"),
-        ]
-    )
-
-    model.compile(
-        optimizer="adam",
-        loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
-        metrics=[tf.metrics.SparseCategoricalAccuracy()],
-    )
-
-    # Triton
-    triton_op = tf_op.PredictTensorflow(model)
-    triton_op.export(tmpdir, None, None)
-
-    # Export creates directory
-    export_path = pathlib.Path(tmpdir) / triton_op.export_name
-    assert export_path.exists()
-
-    # Export creates the config file
-    config_path = export_path / "config.pbtxt"
-    assert config_path.exists()
-
-    # Read the config file back in from proto
-    with open(config_path, "rb") as f:
-        config = model_config.ModelConfig()
-        raw_config = f.read()
-        parsed = text_format.Parse(raw_config, config)
-
-        # The config file contents are correct
-        assert parsed.name == triton_op.export_name
-        assert parsed.backend == "tensorflow"
-
-
-def test_tf_op_compute_schema():
-    model = tf.keras.models.Sequential(
-        [
-            tf.keras.Input(name="input", dtype=tf.int32, shape=(784,)),
-            tf.keras.layers.Dense(512, activation="relu"),
-            tf.keras.layers.Dropout(0.2),
-            tf.keras.layers.Dense(10, name="output"),
-        ]
-    )
-
-    model.compile(
-        optimizer="adam",
-        loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
-        metrics=[tf.metrics.SparseCategoricalAccuracy()],
-    )
-
-    # Triton
-    triton_op = tf_op.PredictTensorflow(model)
-
-    out_schema = triton_op.compute_output_schema(Schema(["input"]), ColumnSelector(["input"]), None)
-    assert out_schema.column_names == ["output"]
-
-
-def test_tf_schema_validation():
-    model = tf.keras.models.Sequential(
-        [
-            tf.keras.Input(name="input", dtype=tf.int32, shape=(784,)),
-            tf.keras.layers.Dense(512, activation="relu"),
-            tf.keras.layers.Dropout(0.2),
-            tf.keras.layers.Dense(10, name="output"),
-        ]
-    )
-
-    model.compile(
-        optimizer="adam",
-        loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
-        metrics=[tf.metrics.SparseCategoricalAccuracy()],
-    )
-
-    # Triton
-    tf_node = [] >> tf_op.PredictTensorflow(model)
-    tf_graph = Graph(tf_node)
-
-    with pytest.raises(ValueError) as exception_info:
-        deepcopy(tf_graph).construct_schema(Schema([]))
-    assert "Missing column 'input'" in str(exception_info.value)
-
-    with pytest.raises(ValueError) as exception_info:
-        deepcopy(tf_graph).construct_schema(Schema(["not_input"]))
-    assert "Missing column 'input'" in str(exception_info.value)
-
-    with pytest.raises(ValueError) as exception_info:
-        deepcopy(tf_graph).construct_schema(Schema(["input", "not_input"]))
-    assert "Mismatched dtypes for column 'input'" in str(exception_info.value)