From cac92f8ba75269e888e58a82bc4ac5f8b29e73d7 Mon Sep 17 00:00:00 2001 From: Julio Perez <37191411+jperez999@users.noreply.github.com> Date: Sun, 20 Mar 2022 16:29:48 -0400 Subject: [PATCH] Remove Systems library from nvtabular (#1456) --- merlin/systems/dag/__init__.py | 21 - merlin/systems/dag/ensemble.py | 125 - merlin/systems/dag/node.py | 62 - merlin/systems/dag/op_runner.py | 43 - merlin/systems/dag/ops/__init__.py | 15 - merlin/systems/dag/ops/faiss.py | 108 - merlin/systems/dag/ops/feast.py | 243 - merlin/systems/dag/ops/operator.py | 138 - merlin/systems/dag/ops/session_filter.py | 97 - merlin/systems/dag/ops/softmax_sampling.py | 108 - merlin/systems/dag/ops/tensorflow.py | 163 - merlin/systems/dag/ops/unroll_features.py | 85 - merlin/systems/dag/ops/workflow.py | 72 - merlin/systems/triton/__init__.py | 82 - merlin/systems/triton/conversions.py | 150 - merlin/systems/triton/export.py | 795 --- merlin/systems/triton/model_config.proto | 1660 ------- merlin/systems/triton/model_config_pb2.py | 4564 ------------------ merlin/systems/triton/oprunner_model.py | 88 - merlin/systems/triton/workflow_model.py | 121 - merlin/systems/workflow/__init__.py | 65 - merlin/systems/workflow/base.py | 209 - merlin/systems/workflow/hugectr.py | 87 - merlin/systems/workflow/pytorch.py | 46 - merlin/systems/workflow/tensorflow.py | 68 - tests/unit/systems/__init__.py | 0 tests/unit/systems/inf_test_ops.py | 24 - tests/unit/systems/inference_utils.py | 81 - tests/unit/systems/test_ensemble.py | 236 - tests/unit/systems/test_ensemble_ops.py | 88 - tests/unit/systems/test_export.py | 63 - tests/unit/systems/test_graph.py | 27 - tests/unit/systems/test_inference_ops.py | 82 - tests/unit/systems/test_op_runner.py | 163 - tests/unit/systems/test_tensorflow_inf_op.py | 113 - 35 files changed, 10092 deletions(-) delete mode 100644 merlin/systems/dag/__init__.py delete mode 100644 merlin/systems/dag/ensemble.py delete mode 100644 merlin/systems/dag/node.py delete mode 100644 merlin/systems/dag/op_runner.py delete mode 100644 merlin/systems/dag/ops/__init__.py delete mode 100644 merlin/systems/dag/ops/faiss.py delete mode 100644 merlin/systems/dag/ops/feast.py delete mode 100644 merlin/systems/dag/ops/operator.py delete mode 100644 merlin/systems/dag/ops/session_filter.py delete mode 100644 merlin/systems/dag/ops/softmax_sampling.py delete mode 100644 merlin/systems/dag/ops/tensorflow.py delete mode 100644 merlin/systems/dag/ops/unroll_features.py delete mode 100644 merlin/systems/dag/ops/workflow.py delete mode 100644 merlin/systems/triton/__init__.py delete mode 100644 merlin/systems/triton/conversions.py delete mode 100644 merlin/systems/triton/export.py delete mode 100644 merlin/systems/triton/model_config.proto delete mode 100644 merlin/systems/triton/model_config_pb2.py delete mode 100644 merlin/systems/triton/oprunner_model.py delete mode 100644 merlin/systems/triton/workflow_model.py delete mode 100644 merlin/systems/workflow/__init__.py delete mode 100644 merlin/systems/workflow/base.py delete mode 100644 merlin/systems/workflow/hugectr.py delete mode 100644 merlin/systems/workflow/pytorch.py delete mode 100644 merlin/systems/workflow/tensorflow.py delete mode 100644 tests/unit/systems/__init__.py delete mode 100644 tests/unit/systems/inf_test_ops.py delete mode 100644 tests/unit/systems/inference_utils.py delete mode 100644 tests/unit/systems/test_ensemble.py delete mode 100644 tests/unit/systems/test_ensemble_ops.py delete mode 100644 tests/unit/systems/test_export.py delete mode 100644 tests/unit/systems/test_graph.py delete mode 100644 tests/unit/systems/test_inference_ops.py delete mode 100644 tests/unit/systems/test_op_runner.py delete mode 100644 tests/unit/systems/test_tensorflow_inf_op.py diff --git a/merlin/systems/dag/__init__.py b/merlin/systems/dag/__init__.py deleted file mode 100644 index 80bef9b2086..00000000000 --- a/merlin/systems/dag/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# alias submodules here to avoid breaking everything with moving to submodules -# flake8: noqa -from .ensemble import Ensemble -from .node import Node -from .op_runner import OperatorRunner diff --git a/merlin/systems/dag/ensemble.py b/merlin/systems/dag/ensemble.py deleted file mode 100644 index d0bf5db9bdf..00000000000 --- a/merlin/systems/dag/ensemble.py +++ /dev/null @@ -1,125 +0,0 @@ -# -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os - -from merlin.dag import postorder_iter_nodes - -# this needs to be before any modules that import protobuf -os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" - -from google.protobuf import text_format # noqa - -import merlin.systems.triton.model_config_pb2 as model_config # noqa -from merlin.dag import Graph # noqa -from merlin.systems.triton.export import _convert_dtype # noqa - - -class Ensemble: - def __init__(self, ops, schema, name="ensemble_model", label_columns=None): - self.graph = Graph(ops) - self.graph.construct_schema(schema) - self.name = name - self.label_columns = label_columns or [] - - def export(self, export_path, version=1): - # Create ensemble config - ensemble_config = model_config.ModelConfig( - name=self.name, - platform="ensemble", - # max_batch_size=configs[0].max_batch_size - ) - - for col_name, col_schema in self.graph.input_schema.column_schemas.items(): - ensemble_config.input.append( - model_config.ModelInput( - name=col_name, data_type=_convert_dtype(col_schema.dtype), dims=[-1, -1] - ) - ) - - for col_name, col_schema in self.graph.output_schema.column_schemas.items(): - ensemble_config.output.append( - model_config.ModelOutput( - name=col_name, data_type=_convert_dtype(col_schema.dtype), dims=[-1, -1] - ) - ) - - # Build node id lookup table - postorder_nodes = list(postorder_iter_nodes(self.graph.output_node)) - - node_idx = 0 - node_id_lookup = {} - for node in postorder_nodes: - if node.exportable: - node_id_lookup[node] = node_idx - node_idx += 1 - - node_configs = [] - # Export node configs and add ensemble steps - for node in postorder_nodes: - if node.exportable: - node_id = node_id_lookup.get(node, None) - node_name = f"{node_id}_{node.export_name}" - - found = False - for step in ensemble_config.ensemble_scheduling.step: - if step.model_name == node_name: - found = True - if found: - continue - - node_config = node.export(export_path, node_id=node_id, version=version) - - config_step = model_config.ModelEnsembling.Step( - model_name=node_name, model_version=-1 - ) - - for input_col_name in node.input_schema.column_names: - source = _find_column_source(node.parents_with_dependencies, input_col_name) - source_id = node_id_lookup.get(source, None) - in_suffix = f"_{source_id}" if source_id is not None else "" - config_step.input_map[input_col_name] = input_col_name + in_suffix - - for output_col_name in node.output_schema.column_names: - out_suffix = ( - f"_{node_id}" if node_id is not None and node_id < node_idx - 1 else "" - ) - config_step.output_map[output_col_name] = output_col_name + out_suffix - - ensemble_config.ensemble_scheduling.step.append(config_step) - node_configs.append(node_config) - - # Write the ensemble config file - ensemble_path = os.path.join(export_path, self.name) - os.makedirs(ensemble_path, exist_ok=True) - os.makedirs(os.path.join(ensemble_path, str(version)), exist_ok=True) - - with open(os.path.join(ensemble_path, "config.pbtxt"), "w") as o: - text_format.PrintMessage(ensemble_config, o) - - return (ensemble_config, node_configs) - - -def _find_column_source(upstream_nodes, column_name): - source_node = None - for upstream_node in upstream_nodes: - if column_name in upstream_node.output_columns.names: - source_node = upstream_node - break - - if source_node and not source_node.exportable: - return _find_column_source(source_node.parents_with_dependencies, column_name) - else: - return source_node diff --git a/merlin/systems/dag/node.py b/merlin/systems/dag/node.py deleted file mode 100644 index b4825205286..00000000000 --- a/merlin/systems/dag/node.py +++ /dev/null @@ -1,62 +0,0 @@ -# -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from merlin.dag import Node -from merlin.schema import Schema - - -class InferenceNode(Node): - def export(self, output_path, node_id=None, version=1): - return self.op.export( - output_path, self.input_schema, self.output_schema, node_id=node_id, version=version - ) - - @property - def export_name(self): - return self.op.export_name - - def match_descendant_dtypes(self, source_node): - self.output_schema = _match_dtypes(source_node.input_schema, self.output_schema) - return self - - def match_ancestor_dtypes(self, source_node): - self.input_schema = _match_dtypes(source_node.output_schema, self.input_schema) - return self - - def validate_schemas(self, root_schema, strict_dtypes=False): - super().validate_schemas(root_schema, strict_dtypes) - - if self.children: - childrens_schema = Schema() - for elem in self.children: - childrens_schema += elem.input_schema - - for col_name, col_schema in self.output_schema.column_schemas.items(): - sink_col_schema = childrens_schema.get(col_name) - - if not sink_col_schema: - raise ValueError( - f"Output column '{col_name}' not detected in any " - f"child inputs for '{self.op.__class__.__name__}'." - ) - - -def _match_dtypes(source_schema, dest_schema): - matched = Schema() - for col_name, col_schema in dest_schema.column_schemas.items(): - source_dtype = source_schema.get(col_name, col_schema).dtype - matched[col_name] = col_schema.with_dtype(source_dtype) - - return matched diff --git a/merlin/systems/dag/op_runner.py b/merlin/systems/dag/op_runner.py deleted file mode 100644 index 2db6247fb43..00000000000 --- a/merlin/systems/dag/op_runner.py +++ /dev/null @@ -1,43 +0,0 @@ -# -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import importlib -import json - - -class OperatorRunner: - def __init__(self, config, repository="./", version=1, kind=""): - operator_names = self.fetch_json_param(config, "operator_names") - op_configs = [self.fetch_json_param(config, op_name) for op_name in operator_names] - - self.operators = [] - for op_config in op_configs: - module_name = op_config["module_name"] - class_name = op_config["class_name"] - - op_module = importlib.import_module(module_name) - op_class = getattr(op_module, class_name) - - operator = op_class.from_config(op_config) - self.operators.append(operator) - - def execute(self, tensors): - for operator in self.operators: - tensors = operator.transform(tensors) - return tensors - - def fetch_json_param(self, model_config, param_name): - string_value = model_config["parameters"][param_name]["string_value"] - return json.loads(string_value) diff --git a/merlin/systems/dag/ops/__init__.py b/merlin/systems/dag/ops/__init__.py deleted file mode 100644 index 5d9909dec4e..00000000000 --- a/merlin/systems/dag/ops/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# diff --git a/merlin/systems/dag/ops/faiss.py b/merlin/systems/dag/ops/faiss.py deleted file mode 100644 index 70dd019054a..00000000000 --- a/merlin/systems/dag/ops/faiss.py +++ /dev/null @@ -1,108 +0,0 @@ -# -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import json -import os -from shutil import copy2 - -import faiss -import numpy as np - -from merlin.dag import ColumnSelector -from merlin.schema import ColumnSchema, Schema -from merlin.systems.dag.ops.operator import InferenceDataFrame, PipelineableInferenceOperator - - -class QueryFaiss(PipelineableInferenceOperator): - def __init__(self, index_path, topk=10): - self.index_path = str(index_path) - self.topk = topk - self._index = None - super().__init__() - - @classmethod - def from_config(cls, config): - parameters = json.loads(config.get("params", "")) - index_path = parameters["index_path"] - topk = parameters["topk"] - - operator = QueryFaiss(index_path, topk=topk) - operator._index = faiss.read_index(str(index_path)) - - return operator - - def export(self, path, input_schema, output_schema, params=None, node_id=None, version=1): - params = params or {} - - # TODO: Copy the index into the export directory - - self_params = { - # TODO: Write the (relative) path from inside the export directory - "index_path": self.index_path, - "topk": self.topk, - } - self_params.update(params) - index_filename = os.path.basename(os.path.realpath(self.index_path)) - - # set index path to new path after export - new_index_path = os.path.join( - path, f"{node_id}_{QueryFaiss.__name__.lower()}", str(version), index_filename - ) - copy2(self.index_path, new_index_path) - self.index_path = new_index_path - return super().export(path, input_schema, output_schema, self_params, node_id, version) - - def transform(self, df: InferenceDataFrame): - user_vector = list(df.tensors.values())[0] - - _, indices = self._index.search(user_vector, self.topk) - # distances, indices = self.index.search(user_vector, self.topk) - - candidate_ids = np.array(indices).T.astype(np.int32) - - return InferenceDataFrame({"candidate_ids": candidate_ids}) - - def compute_input_schema( - self, - root_schema: Schema, - parents_schema: Schema, - deps_schema: Schema, - selector: ColumnSelector, - ) -> Schema: - input_schema = super().compute_input_schema( - root_schema, parents_schema, deps_schema, selector - ) - if len(input_schema.column_schemas) > 1: - raise ValueError( - "More than one input has been detected for this node," - / f"inputs received: {input_schema.column_names}" - ) - return input_schema - - def compute_output_schema( - self, input_schema: Schema, col_selector: ColumnSelector, prev_output_schema: Schema = None - ) -> Schema: - return Schema( - [ - ColumnSchema("candidate_ids", dtype=np.int32), - ] - ) - - -def setup_faiss(item_vector, output_path): - index = faiss.IndexFlatL2(item_vector[0].shape[0]) - index.add(item_vector) - faiss.write_index(index, str(output_path)) diff --git a/merlin/systems/dag/ops/feast.py b/merlin/systems/dag/ops/feast.py deleted file mode 100644 index e9d4770bc4a..00000000000 --- a/merlin/systems/dag/ops/feast.py +++ /dev/null @@ -1,243 +0,0 @@ -import json - -import numpy as np -from feast import FeatureStore, ValueType - -from merlin.dag import ColumnSelector -from merlin.schema import ColumnSchema, Schema -from merlin.systems.dag.ops.operator import InferenceDataFrame, PipelineableInferenceOperator - -# Feast_key: (numpy dtype, is_list, is_ragged) -feast_2_numpy = { - ValueType.INT64: (np.int64, False, False), - ValueType.INT32: (np.int32, False, False), - ValueType.FLOAT: (np.float, False, False), - ValueType.INT64_LIST: (np.int64, True, True), - ValueType.INT32_LIST: (np.int32, True, True), - ValueType.FLOAT_LIST: (np.float, True, True), -} - - -class QueryFeast(PipelineableInferenceOperator): - @classmethod - def from_feature_view(cls, store, path, view, column, output_prefix=None, include_id=False): - feature_view = store.get_feature_view(view) - entity_id = feature_view.entities[0] - - features = [] - mh_features = [] - - input_schema = Schema([ColumnSchema(column, dtype=np.int32)]) - - output_schema = Schema([]) - for feature in feature_view.features: - feature_dtype, is_list, is_ragged = feast_2_numpy[feature.dtype] - - if is_list: - mh_features.append(feature.name) - - values_name = cls._prefixed_name(output_prefix, f"{feature.name}_1") - nnzs_name = cls._prefixed_name(output_prefix, f"{feature.name}_2") - output_schema[values_name] = ColumnSchema( - values_name, dtype=feature_dtype, is_list=is_list, is_ragged=is_ragged - ) - output_schema[nnzs_name] = ColumnSchema( - nnzs_name, dtype=np.int64, is_list=True, is_ragged=False - ) - else: - features.append(feature.name) - - name = cls._prefixed_name(output_prefix, feature.name) - output_schema[name] = ColumnSchema( - name, dtype=feature_dtype, is_list=is_list, is_ragged=is_ragged - ) - - if include_id: - output_schema[entity_id] = ColumnSchema(entity_id, dtype=np.int32) - - return QueryFeast( - path, - entity_id, - view, - column, - features, - mh_features, - input_schema, - output_schema, - include_id=include_id, - output_prefix=output_prefix or "", - suffix_int=1, - ) - - def __init__( - self, - repo_path, - entity_id, - entity_view, - entity_column, - features, - mh_features, - input_schema, - output_schema, - include_id=False, - output_prefix="", - suffix_int=1, - ): - self.repo_path = repo_path - self.entity_id = entity_id - self.entity_view = entity_view - self.entity_column = entity_column - - self.features = features - self.mh_features = mh_features - self.input_schema = input_schema - self.output_schema = output_schema - self.include_id = include_id - self.output_prefix = output_prefix - self.suffix_int = suffix_int - - self.store = FeatureStore(repo_path=repo_path) - super().__init__() - - def compute_output_schema( - self, input_schema: Schema, col_selector: ColumnSelector, prev_output_schema: Schema = None - ) -> Schema: - return self.output_schema - - def compute_input_schema( - self, - root_schema: Schema, - parents_schema: Schema, - deps_schema: Schema, - selector: ColumnSelector, - ) -> Schema: - return self.input_schema - - @classmethod - def from_config(cls, config): - parameters = json.loads(config.get("params", "")) - entity_id = parameters["entity_id"] - entity_view = parameters["entity_view"] - entity_column = parameters["entity_column"] - repo_path = parameters["feast_repo_path"] - features = parameters["features"] - mh_features = parameters["mh_features"] - in_dict = json.loads(config.get("input_dict", "{}")) - out_dict = json.loads(config.get("output_dict", "{}")) - include_id = parameters["include_id"] - output_prefix = parameters["output_prefix"] - suffix_int = parameters["suffix_int"] - - in_schema = Schema([]) - for col_name, col_rep in in_dict.items(): - in_schema[col_name] = ColumnSchema( - col_name, - dtype=col_rep["dtype"], - is_list=col_rep["is_list"], - is_ragged=col_rep["is_ragged"], - ) - out_schema = Schema([]) - for col_name, col_rep in out_dict.items(): - out_schema[col_name] = ColumnSchema( - col_name, - dtype=col_rep["dtype"], - is_list=col_rep["is_list"], - is_ragged=col_rep["is_ragged"], - ) - - return QueryFeast( - repo_path, - entity_id, - entity_view, - entity_column, - features, - mh_features, - in_schema, - out_schema, - include_id, - output_prefix, - suffix_int, - ) - - def export(self, path, input_schema, output_schema, params=None, node_id=None, version=1): - params = params or {} - self_params = { - "entity_id": self.entity_id, - "entity_view": self.entity_view, - "entity_column": self.entity_column, - "features": self.features, - "mh_features": self.mh_features, - "feast_repo_path": self.repo_path, - "include_id": self.include_id, - "output_prefix": self.output_prefix, - "suffix_int": self.suffix_int, - } - self_params.update(params) - return super().export(path, input_schema, output_schema, self_params, node_id, version) - - def transform(self, df: InferenceDataFrame) -> InferenceDataFrame: - entity_ids = df[self.entity_column] - entity_rows = [{self.entity_id: int(entity_id)} for entity_id in entity_ids] - - feature_names = self.features + self.mh_features - feature_refs = [ - ":".join([self.entity_view, feature_name]) for feature_name in feature_names - ] - - feast_response = self.store.get_online_features( - features=feature_refs, - entity_rows=entity_rows, - ).to_dict() - - output_tensors = {} - if self.include_id: - output_tensors[self.entity_id] = entity_ids - - # Numerical and single-hot categorical - for feature_name in self.features: - prefixed_name = self.__class__._prefixed_name(self.output_prefix, feature_name) - - feature_value = feast_response[feature_name] - feature_array = np.array([feature_value]).T.astype( - self.output_schema[prefixed_name].dtype - ) - output_tensors[prefixed_name] = feature_array - - # Multi-hot categorical - for feature_name in self.mh_features: - feature_value = feast_response[feature_name] - - prefixed_name = self.__class__._prefixed_name(self.output_prefix, feature_name) - feature_out_name = f"{prefixed_name}_{self.suffix_int}" - - nnzs = None - if ( - isinstance(feature_value[0], list) - and self.output_schema[feature_out_name].is_ragged - ): - flattened_value = [] - for val in feature_value: - flattened_value.extend(val) - - nnzs = [len(vals) for vals in feature_value] - feature_value = [flattened_value] - - feature_array = np.array(feature_value).T.astype( - self.output_schema[feature_out_name].dtype - ) - if not nnzs: - nnzs = [len(feature_array)] - feature_out_nnz = f"{prefixed_name}_{self.suffix_int+1}" - feature_nnzs = np.array([nnzs], dtype=self.output_schema[feature_out_nnz].dtype).T - - output_tensors[feature_out_name] = feature_array - output_tensors[feature_out_nnz] = feature_nnzs - - return InferenceDataFrame(output_tensors) - - @classmethod - def _prefixed_name(cls, output_prefix, col_name): - if output_prefix and col_name and not col_name.startswith(output_prefix): - return f"{output_prefix}_{col_name}" - else: - return col_name diff --git a/merlin/systems/dag/ops/operator.py b/merlin/systems/dag/ops/operator.py deleted file mode 100644 index c81ddee4837..00000000000 --- a/merlin/systems/dag/ops/operator.py +++ /dev/null @@ -1,138 +0,0 @@ -import json -import os -import pathlib -from abc import abstractclassmethod, abstractmethod -from shutil import copyfile - -# this needs to be before any modules that import protobuf -os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" - -from google.protobuf import text_format # noqa - -import merlin.systems.triton.model_config_pb2 as model_config # noqa -from merlin.dag import BaseOperator # noqa -from merlin.systems.dag.node import InferenceNode # noqa -from merlin.systems.triton.export import _convert_dtype # noqa - - -class InferenceDataFrame: - def __init__(self, tensors=None): - self.tensors = tensors or {} - - def __getitem__(self, col_items): - if isinstance(col_items, list): - results = {name: self.tensors[name] for name in col_items} - return InferenceDataFrame(results) - else: - return self.tensors[col_items] - - def __len__(self): - return len(self.tensors) - - def __iter__(self): - for name, tensor in self.tensors.items(): - yield name, tensor - - def __repr__(self): - dict_rep = {} - for k, v in self.tensors.items(): - dict_rep[k] = v - return str(dict_rep) - - -class InferenceOperator(BaseOperator): - @property - def export_name(self): - return self.__class__.__name__.lower() - - @abstractmethod - def export(self, export_path, input_schema, output_schema, node_id=None, version=1): - pass - - def create_node(self, selector): - return InferenceNode(selector) - - -class PipelineableInferenceOperator(InferenceOperator): - @abstractclassmethod - def from_config(cls, config): - pass - - @abstractmethod - def transform(self, df: InferenceDataFrame) -> InferenceDataFrame: - """Transform the dataframe by applying this operator to the set of input columns - - Parameters - ----------- - df: Dataframe - A pandas or cudf dataframe that this operator will work on - - Returns - ------- - DataFrame - Returns a transformed dataframe for this operator - """ - - def export(self, path, input_schema, output_schema, params=None, node_id=None, version=1): - params = params or {} - - node_name = f"{node_id}_{self.export_name}" if node_id is not None else self.export_name - - node_export_path = pathlib.Path(path) / node_name - node_export_path.mkdir(exist_ok=True) - - config = model_config.ModelConfig(name=node_name, backend="nvtabular", platform="op_runner") - - config.parameters["operator_names"].string_value = json.dumps([node_name]) - - config.parameters[node_name].string_value = json.dumps( - { - "module_name": self.__class__.__module__, - "class_name": self.__class__.__name__, - "input_dict": json.dumps(_schema_to_dict(input_schema)), - "output_dict": json.dumps(_schema_to_dict(output_schema)), - "params": json.dumps(params), - } - ) - - for col_name, col_dict in _schema_to_dict(input_schema).items(): - config.input.append( - model_config.ModelInput( - name=col_name, data_type=_convert_dtype(col_dict["dtype"]), dims=[-1, -1] - ) - ) - - for col_name, col_dict in _schema_to_dict(output_schema).items(): - # this assumes the list columns are 1D tensors both for cats and conts - config.output.append( - model_config.ModelOutput( - name=col_name.split("/")[0], - data_type=_convert_dtype(col_dict["dtype"]), - dims=[-1, -1], - ) - ) - - with open(os.path.join(node_export_path, "config.pbtxt"), "w") as o: - text_format.PrintMessage(config, o) - - os.makedirs(node_export_path, exist_ok=True) - os.makedirs(os.path.join(node_export_path, str(version)), exist_ok=True) - copyfile( - os.path.join(os.path.dirname(__file__), "..", "..", "triton", "oprunner_model.py"), - os.path.join(node_export_path, str(version), "model.py"), - ) - - return config - - -def _schema_to_dict(schema): - # TODO: Write the conversion - schema_dict = {} - for col_name, col_schema in schema.column_schemas.items(): - schema_dict[col_name] = { - "dtype": col_schema.dtype.name, - "is_list": col_schema.is_list, - "is_ragged": col_schema.is_ragged, - } - - return schema_dict diff --git a/merlin/systems/dag/ops/session_filter.py b/merlin/systems/dag/ops/session_filter.py deleted file mode 100644 index ead50284321..00000000000 --- a/merlin/systems/dag/ops/session_filter.py +++ /dev/null @@ -1,97 +0,0 @@ -# -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import json - -import numpy as np - -from merlin.dag import ColumnSelector, Node -from merlin.schema import ColumnSchema, Schema -from merlin.systems.dag.ops.operator import InferenceDataFrame, PipelineableInferenceOperator - - -class FilterCandidates(PipelineableInferenceOperator): - def __init__(self, filter_out, input_col=None): - self.filter_out = Node.construct_from(filter_out) - self._input_col = input_col - self._filter_out_col = filter_out - super().__init__() - - @classmethod - def from_config(cls, config): - parameters = json.loads(config.get("params", "")) - filter_out_col = parameters["filter_out_col"] - input_col = parameters["input_col"] - return FilterCandidates(filter_out_col, input_col) - - @property - def dependencies(self): - return self.filter_out - - def compute_input_schema( - self, - root_schema: Schema, - parents_schema: Schema, - deps_schema: Schema, - selector: ColumnSelector, - ) -> Schema: - input_schema = super().compute_input_schema( - root_schema, parents_schema, deps_schema, selector - ) - - if len(parents_schema.column_schemas) > 1: - raise ValueError( - "More than one input has been detected for this node," - / f"inputs received: {input_schema.column_names}" - ) - if len(deps_schema.column_schemas) > 1: - raise ValueError( - "More than one dependency input has been detected" - / f"for this node, inputs received: {input_schema.column_names}" - ) - - # 1 for deps and 1 for parents - if len(input_schema.column_schemas) > 2: - raise ValueError( - "More than one input has been detected for this node," - / f"inputs received: {input_schema.column_names}" - ) - - self._input_col = parents_schema.column_names[0] - self._filter_out_col = deps_schema.column_names[0] - - return input_schema - - def compute_output_schema( - self, input_schema: Schema, col_selector: ColumnSelector, prev_output_schema: Schema = None - ) -> Schema: - return Schema([ColumnSchema("filtered_ids", dtype=np.int32, is_list=False)]) - - def transform(self, df: InferenceDataFrame): - candidate_ids = df[self._input_col] - filter_ids = df[self._filter_out_col] - - filtered_results = np.array([candidate_ids[~np.isin(candidate_ids, filter_ids)]]).T - return InferenceDataFrame({"filtered_ids": filtered_results}) - - def export(self, path, input_schema, output_schema, params=None, node_id=None, version=1): - params = params or {} - self_params = { - "input_col": self._input_col, - "filter_out_col": self._filter_out_col, - } - self_params.update(params) - return super().export(path, input_schema, output_schema, self_params, node_id, version) diff --git a/merlin/systems/dag/ops/softmax_sampling.py b/merlin/systems/dag/ops/softmax_sampling.py deleted file mode 100644 index 04063584a37..00000000000 --- a/merlin/systems/dag/ops/softmax_sampling.py +++ /dev/null @@ -1,108 +0,0 @@ -import json - -import numpy as np - -from merlin.dag.node import Node -from merlin.dag.selector import ColumnSelector -from merlin.schema import ColumnSchema, Schema -from merlin.systems.dag.ops.operator import InferenceDataFrame, PipelineableInferenceOperator - - -class SoftmaxSampling(PipelineableInferenceOperator): - def __init__(self, relevance_col, temperature=20.0, topk=10, _input_col=None): - self.relevance_col = Node.construct_from(relevance_col) - self.temperature = temperature - self.topk = topk - self._input_col_name = _input_col - self._relevance_col_name = relevance_col - super().__init__() - - @classmethod - def from_config(cls, config): - """Load operator and properties from Triton config""" - parameters = json.loads(config.get("params", "")) - relevance_col = parameters["relevance_col"] - input_col = parameters["input_col"] - temperature = parameters["temperature"] - topk = parameters["topk"] - - return SoftmaxSampling( - relevance_col, temperature=temperature, topk=topk, _input_col=input_col - ) - - @property - def dependencies(self): - return self.relevance_col - - def export(self, path, input_schema, output_schema, params=None, node_id=None, version=1): - """Write out a Triton model config directory""" - params = params or {} - self_params = { - "input_col": self._input_col_name, - "relevance_col": self._relevance_col_name, - "temperature": self.temperature, - "topk": self.topk, - } - self_params.update(params) - return super().export(path, input_schema, output_schema, self_params, node_id, version) - - def compute_input_schema( - self, - root_schema: Schema, - parents_schema: Schema, - deps_schema: Schema, - selector: ColumnSelector, - ) -> Schema: - input_schema = super().compute_input_schema( - root_schema, parents_schema, deps_schema, selector - ) - if len(parents_schema.column_schemas) > 1: - raise ValueError( - "More than one input has been detected for this node," - f" inputs received: {input_schema.column_names}" - ) - - self._input_col_name = parents_schema.column_names[0] - self._relevance_col_name = deps_schema.column_names[0] - return input_schema - - def compute_output_schema( - self, input_schema: Schema, col_selector: ColumnSelector, prev_output_schema: Schema = None - ) -> Schema: - """Describe the operator's outputs""" - return Schema([ColumnSchema("ordered_ids", dtype=np.int32, is_list=True, is_ragged=True)]) - - def transform(self, df: InferenceDataFrame) -> InferenceDataFrame: - """Transform the dataframe by applying this operator to the set of input columns""" - # Extract parameters from the request - candidate_ids = df[self._input_col_name].reshape(-1) - - predicted_scores = df[self._relevance_col_name].reshape(-1) - - # Exponential sort trick for sampling from a distribution without replacement from: - - # Pavlos S. Efraimidis, Paul G. Spirakis, Weighted random sampling with a reservoir, - # Information Processing Letters, Volume 97, Issue 5, 2006, Pages 181-185, ISSN 0020-0190, - # https://doi.org/10.1016/j.ipl.2005.11.003. - - # As implemented by Tim Vieira in "Algorithms for sampling without replacement" - # https://timvieira.github.io/blog/post/2019/09/16/algorithms-for-sampling-without-replacement/ - - # The weights for the sampling distribution are the softmax of the scores - weights = np.exp(self.temperature * predicted_scores) / np.sum(predicted_scores) - - # This is the core of the exponential sampling trick, which creates a - # set of values that depend on both the predicted scores and random - # variables, resulting in a set of values that will sort into an order - # that reflects sampling without replacement according to the weight - # distribution - num_items = candidate_ids.shape[0] - exponentials = -np.log(np.random.uniform(0, 1, size=(num_items,))) - exponentials /= weights - - # This is just bookkeeping to produce the final ordered list of recs - sorted_indices = np.argsort(exponentials) - topk_movie_ids = candidate_ids[sorted_indices][: self.topk] - ordered_movie_ids = topk_movie_ids.reshape(1, -1).T - - return InferenceDataFrame({"ordered_ids": ordered_movie_ids}) diff --git a/merlin/systems/dag/ops/tensorflow.py b/merlin/systems/dag/ops/tensorflow.py deleted file mode 100644 index 68c5dc1d058..00000000000 --- a/merlin/systems/dag/ops/tensorflow.py +++ /dev/null @@ -1,163 +0,0 @@ -# -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import pathlib -import tempfile -from shutil import copytree - -# this needs to be before any modules that import protobuf -os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" - -import tensorflow as tf # noqa -from google.protobuf import text_format # noqa - -import merlin.systems.triton.model_config_pb2 as model_config # noqa -from merlin.dag import ColumnSelector # noqa -from merlin.schema import ColumnSchema, Schema # noqa -from merlin.systems.dag.ops.operator import InferenceOperator # noqa -from merlin.systems.triton.export import _convert_dtype # noqa - - -class PredictTensorflow(InferenceOperator): - def __init__(self, model_or_path, custom_objects=None): - custom_objects = custom_objects or {} - - if isinstance(model_or_path, (str, os.PathLike)): - self.path = model_or_path - self.model = tf.keras.models.load_model(self.path, custom_objects=custom_objects) - else: - self.path = None - self.model = model_or_path - - signatures = getattr(self.model, "signatures", {}) or {} - default_signature = signatures.get("serving_default") - - if not default_signature: - # roundtrip saved self.model to disk to generate signature if it doesn't exist - - with tempfile.TemporaryDirectory() as tmp_dir: - tf_model_path = pathlib.Path(tmp_dir) / "model.savedmodel" - self.model.save(tf_model_path, include_optimizer=False) - reloaded = tf.keras.models.load_model(tf_model_path) - default_signature = reloaded.signatures["serving_default"] - - inputs = list(default_signature.structured_input_signature[1].values()) - outputs = list(default_signature.structured_outputs.values()) - - input_col_names = [col.name.split("/")[0] for col in inputs] - output_col_names = [col.name.split("/")[0] for col in outputs] - - self.input_schema = Schema() - for col, input_col in zip(input_col_names, inputs): - self.input_schema.column_schemas[col] = ColumnSchema( - col, dtype=input_col.dtype.as_numpy_dtype - ) - - self.output_schema = Schema() - for col, output_col in zip(output_col_names, outputs): - self.output_schema.column_schemas[col] = ColumnSchema( - col, dtype=output_col.dtype.as_numpy_dtype - ) - super().__init__() - - def export(self, path, input_schema, output_schema, node_id=None, version=1): - """Create a directory inside supplied path based on our export name""" - node_name = f"{node_id}_{self.export_name}" if node_id is not None else self.export_name - - node_export_path = pathlib.Path(path) / node_name - node_export_path.mkdir(exist_ok=True) - - tf_model_path = pathlib.Path(node_export_path) / str(version) / "model.savedmodel" - - if self.path: - copytree( - str(self.path), - tf_model_path, - dirs_exist_ok=True, - ) - else: - self.model.save(tf_model_path, include_optimizer=False) - - return self._export_model(self.model, node_name, node_export_path, version=version) - - def compute_input_schema( - self, - root_schema: Schema, - parents_schema: Schema, - deps_schema: Schema, - selector: ColumnSelector, - ) -> Schema: - return self.input_schema - - def compute_output_schema( - self, input_schema: Schema, col_selector: ColumnSelector, prev_output_schema: Schema = None - ) -> Schema: - return self.output_schema - - def _export_model(self, model, name, output_path, version=1): - """Exports a TensorFlow model for serving with Triton - - Parameters - ---------- - model: - The tensorflow model that should be served - name: - The name of the triton model to export - output_path: - The path to write the exported model to - """ - tf_model_path = os.path.join(output_path, str(version), "model.savedmodel") - config = model_config.ModelConfig( - name=name, backend="tensorflow", platform="tensorflow_savedmodel" - ) - - inputs, outputs = model.inputs, model.outputs - - if not inputs or not outputs: - signatures = getattr(model, "signatures", {}) or {} - default_signature = signatures.get("serving_default") - if not default_signature: - # roundtrip saved model to disk to generate signature if it doesn't exist - - reloaded = tf.keras.models.load_model(tf_model_path) - default_signature = reloaded.signatures["serving_default"] - - inputs = list(default_signature.structured_input_signature[1].values()) - outputs = list(default_signature.structured_outputs.values()) - - config.parameters["TF_GRAPH_TAG"].string_value = "serve" - config.parameters["TF_SIGNATURE_DEF"].string_value = "serving_default" - - for col in inputs: - config.input.append( - model_config.ModelInput( - name=f"{col.name}", data_type=_convert_dtype(col.dtype), dims=[-1, col.shape[1]] - ) - ) - - for col in outputs: - # this assumes the list columns are 1D tensors both for cats and conts - config.output.append( - model_config.ModelOutput( - name=col.name.split("/")[0], - data_type=_convert_dtype(col.dtype), - dims=[-1, col.shape[1]], - ) - ) - - with open(os.path.join(output_path, "config.pbtxt"), "w") as o: - text_format.PrintMessage(config, o) - return config diff --git a/merlin/systems/dag/ops/unroll_features.py b/merlin/systems/dag/ops/unroll_features.py deleted file mode 100644 index 7d908069c37..00000000000 --- a/merlin/systems/dag/ops/unroll_features.py +++ /dev/null @@ -1,85 +0,0 @@ -# -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import json - -import numpy as np - -from merlin.dag import Node -from merlin.dag.selector import ColumnSelector -from merlin.schema import Schema -from merlin.systems.dag.ops.operator import InferenceDataFrame, PipelineableInferenceOperator - - -class UnrollFeatures(PipelineableInferenceOperator): - def __init__(self, item_id_col, unroll_cols, unrolled_prefix=""): - self.item_id_col = item_id_col - self.unroll_cols = Node.construct_from(unroll_cols) - self.unrolled_prefix = unrolled_prefix - super().__init__() - - @classmethod - def from_config(cls, config): - parameters = json.loads(config.get("params", "")) - candidate_col = parameters["item_id_col"] - unroll_cols = parameters["unroll_cols"] - unrolled_prefix = parameters["unrolled_prefix"] - return UnrollFeatures(candidate_col, unroll_cols, unrolled_prefix) - - def export(self, path, input_schema, output_schema, params=None, node_id=None, version=1): - params = params or {} - self_params = { - "item_id_col": self.item_id_col, - "unroll_cols": self._unroll_col_names, - "unrolled_prefix": self.unrolled_prefix, - } - self_params.update(params) - return super().export(path, input_schema, output_schema, self_params, node_id, version) - - @property - def dependencies(self): - return self.unroll_cols - - def compute_output_schema( - self, input_schema: Schema, col_selector: ColumnSelector, prev_output_schema: Schema = None - ) -> Schema: - schema = super().compute_output_schema(input_schema, col_selector, prev_output_schema) - - for col_name, col_schema in self.unroll_cols.output_schema.column_schemas.items(): - schema.column_schemas.pop(col_name, None) - col_name = f"{self.unrolled_prefix}_{col_name}" if self.unrolled_prefix else col_name - schema[col_name] = col_schema.with_name(col_name) - - return schema - - def transform(self, df: InferenceDataFrame): - num_items = df[self.item_id_col].shape[0] - outputs = {} - for col_name, col_value in df.tensors.items(): - outputs[col_name] = col_value - - for col in self._unroll_col_names: - target = outputs.pop(col) - col_name = f"{self.unrolled_prefix}_{col}" if self.unrolled_prefix else col - outputs[col_name] = np.repeat(target, num_items, axis=0) - - return InferenceDataFrame(outputs) - - @property - def _unroll_col_names(self): - if self.unroll_cols.selector: - return self.unroll_cols.selector.names - else: - return self.unroll_cols.output_columns.names diff --git a/merlin/systems/dag/ops/workflow.py b/merlin/systems/dag/ops/workflow.py deleted file mode 100644 index 3c839f08357..00000000000 --- a/merlin/systems/dag/ops/workflow.py +++ /dev/null @@ -1,72 +0,0 @@ -# -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import pathlib - -from merlin.dag import ColumnSelector -from merlin.schema import Schema -from merlin.systems.dag.ops.operator import InferenceOperator -from merlin.systems.triton.export import _generate_nvtabular_config - - -class TransformWorkflow(InferenceOperator): - def __init__( - self, - workflow, - sparse_max=None, - max_batch_size=None, - label_columns=None, - model_framework=None, - cats=None, - conts=None, - ): - super().__init__() - - self.workflow = workflow - self.sparse_max = sparse_max or {} - self.max_batch_size = max_batch_size - self.label_columns = label_columns or [] - self.model_framework = model_framework or "" - self.cats = cats or [] - self.conts = conts or [] - super().__init__() - - def compute_output_schema( - self, input_schema: Schema, col_selector: ColumnSelector, prev_output_schema: Schema = None - ) -> Schema: - return self.workflow.output_schema - - def export(self, path, input_schema, output_schema, node_id=None, version=1): - """Create a directory inside supplied path based on our export name""" - modified_workflow = self.workflow.remove_inputs(self.label_columns) - - node_name = f"{node_id}_{self.export_name}" if node_id is not None else self.export_name - - node_export_path = pathlib.Path(path) / node_name - node_export_path.mkdir(exist_ok=True) - - workflow_export_path = node_export_path / str(version) / "workflow" - modified_workflow.save(str(workflow_export_path)) - - return _generate_nvtabular_config( - modified_workflow, - node_name, - node_export_path, - backend="nvtabular", - sparse_max=self.sparse_max, - max_batch_size=self.max_batch_size, - cats=self.cats, - conts=self.conts, - ) diff --git a/merlin/systems/triton/__init__.py b/merlin/systems/triton/__init__.py deleted file mode 100644 index 8d11484134f..00000000000 --- a/merlin/systems/triton/__init__.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import json -import os - -import pandas as pd - -# this needs to be before any modules that import protobuf -os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" - -import tritonclient.grpc as grpcclient # noqa -from tritonclient.utils import np_to_triton_dtype # noqa - -from merlin.core.dispatch import is_list_dtype, is_string_dtype, make_df # noqa -from merlin.systems.triton.export import ( # noqa - _convert_string2pytorch_dtype, - export_hugectr_ensemble, - export_pytorch_ensemble, - export_tensorflow_ensemble, - generate_hugectr_model, - generate_nvtabular_model, -) - - -def convert_df_to_triton_input(column_names, batch, input_class=grpcclient.InferInput): - columns = [(col, batch[col]) for col in column_names] - inputs = [] - for i, (name, col) in enumerate(columns): - if is_list_dtype(col): - if isinstance(col, pd.Series): - raise ValueError("this function doesn't support CPU list values yet") - inputs.append( - _convert_column_to_triton_input( - col._column.offsets.values_host.astype("int64"), name + "__nnzs", input_class - ) - ) - inputs.append( - _convert_column_to_triton_input( - col.list.leaves.values_host.astype("int64"), name + "__values", input_class - ) - ) - else: - values = col.values if isinstance(col, pd.Series) else col.values_host - inputs.append(_convert_column_to_triton_input(values, name, input_class)) - return inputs - - -def _convert_column_to_triton_input(col, name, input_class=grpcclient.InferInput): - col = col.reshape(len(col), 1) - input_tensor = input_class(name, col.shape, np_to_triton_dtype(col.dtype)) - input_tensor.set_data_from_numpy(col) - return input_tensor - - -def convert_triton_output_to_df(columns, response): - return make_df({col: response.as_numpy(col) for col in columns}) - - -def get_column_types(path): - return json.load(open(os.path.join(path, "column_types.json"))) - - -def _convert_tensor(t): - out = t.as_numpy() - if len(out.shape) == 2: - out = out[:, 0] - # cudf doesn't seem to handle dtypes like |S15 or object that well - if is_string_dtype(out.dtype): - out = out.astype("str") - return out diff --git a/merlin/systems/triton/conversions.py b/merlin/systems/triton/conversions.py deleted file mode 100644 index 123f467be1e..00000000000 --- a/merlin/systems/triton/conversions.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import itertools - -try: - import cudf - import cupy as cp -except ImportError: - cudf = cp = None - -import numpy as np -import pandas as pd - -from merlin.core.dispatch import build_cudf_list_column, is_list_dtype -from merlin.dag import Supports - - -def convert_format(tensors, kind, target_kind): - """Converts data from format 'kind' to one of the formats specified in 'target_kind' - This allows us to convert data to/from dataframe representations for operators that - only support certain reprentations - """ - - # this is all much more difficult because of multihot columns, which don't have - # great representations in dicts of cpu/gpu arrays. we're representing multihots - # as tuples of (values, offsets) tensors in this case - but have to do work at - # each step in terms of converting. - if kind & target_kind: - return tensors, kind - - elif target_kind & Supports.GPU_DICT_ARRAY: - if kind == Supports.CPU_DICT_ARRAY: - return _convert_array(tensors, cp.array), Supports.GPU_DICT_ARRAY - elif kind == Supports.CPU_DATAFRAME: - return _pandas_to_array(tensors, False), Supports.GPU_DICT_ARRAY - elif kind == Supports.GPU_DATAFRAME: - return _cudf_to_array(tensors, False), Supports.GPU_DICT_ARRAY - - elif target_kind & Supports.CPU_DICT_ARRAY: - if kind == Supports.GPU_DICT_ARRAY: - return _convert_array(tensors, cp.asnumpy), Supports.CPU_DICT_ARRAY - elif kind == Supports.CPU_DATAFRAME: - return _pandas_to_array(tensors, True), Supports.CPU_DICT_ARRAY - elif kind == Supports.GPU_DATAFRAME: - return _cudf_to_array(tensors, True), Supports.CPU_DICT_ARRAY - - elif target_kind & Supports.GPU_DATAFRAME: - if kind == Supports.CPU_DATAFRAME: - return cudf.DataFrame(tensors), Supports.GPU_DATAFRAME - return _array_to_cudf(tensors), Supports.GPU_DATAFRAME - - elif target_kind & Supports.CPU_DATAFRAME: - if kind == Supports.GPU_DATAFRAME: - return tensors.to_pandas(), Supports.CPU_DATAFRAME - elif kind == Supports.CPU_DICT_ARRAY: - return _array_to_pandas(tensors), Supports.CPU_DATAFRAME - elif kind == Supports.GPU_DICT_ARRAY: - return _array_to_pandas(_convert_array(tensors, cp.asnumpy)), Supports.CPU_DATAFRAME - - raise ValueError("unsupported target for converting tensors", target_kind) - - -def _convert_array(tensors, converter): - output = {} - for name, tensor in tensors.items(): - if isinstance(tensor, tuple): - output[name] = tuple(converter(t) for t in tensor) - else: - output[name] = converter(tensor) - return output - - -def _array_to_pandas(tensors): - output = pd.DataFrame() - for name, tensor in tensors.items(): - if isinstance(tensor, tuple): - values, offsets = tensor - output[name] = [values[offsets[i] : offsets[i + 1]] for i in range(len(offsets) - 1)] - else: - output[name] = tensor - return output - - -def _array_to_cudf(tensors): - output = cudf.DataFrame() - for name, tensor in tensors.items(): - if isinstance(tensor, tuple): - output[name] = build_cudf_list_column(tensor[0], tensor[1].astype("int32")) - else: - output[name] = tensor - return output - - -def _pandas_to_array(df, cpu=True): - array_type = np.array if cpu else cp.array - - output = {} - for name in df.columns: - col = df[name] - if pd.api.types.is_list_like(col.values[0]): - offsets = pd.Series([0]).append(col.map(len).cumsum()).values - if not cpu: - offsets = cp.array(offsets) - values = array_type(list(itertools.chain(*col))) - output[name] = (values, offsets) - else: - values = col.values - if not cpu: - values = cp.array(values) - output[name] = values - - return output - - -def _cudf_to_array(df, cpu=True): - output = {} - for name in df.columns: - col = df[name] - if is_list_dtype(col.dtype): - offsets = col._column.offsets.values_host if cpu else col._column.offsets.values - values = col.list.leaves.values_host if cpu else col.list.leaves.values - output[name] = (values, offsets) - else: - output[name] = col.values_host if cpu else col.values - - return output diff --git a/merlin/systems/triton/export.py b/merlin/systems/triton/export.py deleted file mode 100644 index bced3607c3c..00000000000 --- a/merlin/systems/triton/export.py +++ /dev/null @@ -1,795 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import json -import os -import warnings -from shutil import copyfile, copytree - -import numpy as np - -# this needs to be before any modules that import protobuf -os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" - -from google.protobuf import text_format # noqa - -import merlin.systems.triton.model_config_pb2 as model_config # noqa -from merlin.core.dispatch import is_string_dtype # noqa -from merlin.dag import ColumnSelector # noqa -from merlin.schema import Tags # noqa - - -def export_tensorflow_ensemble( - model, - workflow, - name, - model_path, - label_columns=None, - sparse_max=None, - version=1, - nvtabular_backend="nvtabular", - cats=None, - conts=None, -): - """Creates an ensemble triton server model, with the first model being a nvtabular - preprocessing, and the second by a tensorflow savedmodel - - Parameters - ---------- - model: - The tensorflow model that should be served - workflow: - The nvtabular workflow used in preprocessing - name: - The base name of the various triton models - model_path: - The root path to write out files to - cats: - Names of the categorical columns - conts: - Names of the continuous columns - label_columns: - Labels in the dataset (will be removed from the dataset) - sparse_max: - Max length of the each row when the sparse data is converted to dense - version: - Version of the model - nvtabular_backend: "python" or "nvtabular" - The backend that will be used for inference in Triton. - """ - labels = ( - label_columns - or workflow.output_schema.apply(ColumnSelector(tags=[Tags.TARGET])).column_names - ) - workflow = workflow.remove_inputs(labels) - - # generate the TF saved model - tf_path = os.path.join(model_path, name + "_tf") - tf_config = export_tensorflow_model(model, name + "_tf", tf_path, version=version) - - # override the output dtype of the nvtabular model if necessary (fixes mismatches - # in dtypes between tf inputs and nvt outputs) - for column in tf_config.input: - tf_dtype = _triton_datatype_to_dtype(column.data_type) - nvt_col_name = column.name.replace("__values", "").replace("__nnzs", "") - col_schema = workflow.output_schema[nvt_col_name] - if col_schema.dtype and col_schema.dtype != tf_dtype: - warnings.warn( - f"TF model expects {tf_dtype} for column {col_schema.name}, but workflow " - f" is producing type {col_schema.dtype}. Overriding dtype in NVTabular workflow." - ) - workflow.output_schema.column_schemas[col_schema.name] = col_schema.with_dtype(tf_dtype) - - # generate the nvtabular triton model - preprocessing_path = os.path.join(model_path, name + "_nvt") - nvt_config = generate_nvtabular_model( - workflow, - name + "_nvt", - preprocessing_path, - sparse_max=sparse_max, - backend=nvtabular_backend, - cats=cats, - conts=conts, - ) - - # generate the triton ensemble - ensemble_path = os.path.join(model_path, name) - os.makedirs(ensemble_path, exist_ok=True) - os.makedirs(os.path.join(ensemble_path, str(version)), exist_ok=True) - _generate_ensemble_config(name, ensemble_path, nvt_config, tf_config) - - -def export_pytorch_ensemble( - model, - workflow, - sparse_max, - name, - model_path, - label_columns=None, - use_fix_dtypes=True, - version=1, - nvtabular_backend="python", - cats=None, - conts=None, -): - """Creates an ensemble triton server model, with the first model being a nvtabular - preprocessing, and the second by a pytorch savedmodel - - Parameters - ---------- - model: - The pytorch model that should be served - workflow: - The nvtabular workflow used in preprocessing - sparse_max: - Max length of the each row when the sparse data is converted to dense - name: - The base name of the various triton models - model_path: - The root path to write out files to - cats: - Names of the categorical columns - conts: - Names of the continuous columns - label_columns: - Labels in the dataset (will be removed from the dataset) - use_fix_dtypes: - Transformers4Rec is using fixed dtypes and this option is - whether to use fixed dtypes in inference or not - version: - Version of the model - nvtabular_backend: "python" or "nvtabular" - The backend that will be used for inference in Triton. - """ - labels = ( - label_columns - or workflow.output_schema.apply(ColumnSelector(tags=[Tags.TARGET])).column_names - ) - workflow = workflow.remove_inputs(labels) - - # generate the TF saved model - pt_path = os.path.join(model_path, name + "_pt") - pt_config = export_pytorch_model( - model, workflow, sparse_max, name + "_pt", pt_path, use_fix_dtypes, version=version - ) - - # override the output dtype of the nvtabular model if necessary (fixes mismatches - # in dtypes between tf inputs and nvt outputs) - for column in pt_config.input: - pt_dtype = _triton_datatype_to_dtype(column.data_type) - nvt_dtype = workflow.output_dtypes.get(column.name) - if nvt_dtype and nvt_dtype != pt_dtype: - warnings.warn( - f"PyTorch model expects {pt_dtype} for column {column.name}, but workflow " - f" is producing type {nvt_dtype}. Overriding dtype in NVTabular workflow." - ) - workflow.output_dtypes[column.name] = pt_dtype - - # generate the nvtabular triton model - preprocessing_path = os.path.join(model_path, name + "_nvt") - nvt_config = generate_nvtabular_model( - workflow, - name + "_nvt", - preprocessing_path, - backend=nvtabular_backend, - cats=cats, - conts=conts, - ) - - # generate the triton ensemble - ensemble_path = os.path.join(model_path, name) - os.makedirs(ensemble_path, exist_ok=True) - os.makedirs(os.path.join(ensemble_path, str(version)), exist_ok=True) - _generate_ensemble_config(name, ensemble_path, nvt_config, pt_config) - - -def export_hugectr_ensemble( - workflow, - hugectr_model_path, - hugectr_params, - name, - output_path, - version=1, - max_batch_size=None, - nvtabular_backend="python", - cats=None, - conts=None, - label_columns=None, -): - """Creates an ensemble hugectr server model, with the first model being a nvtabular - preprocessing, and the second by a hugectr savedmodel - - Parameters - ---------- - workflow: - The nvtabular workflow used in preprocessing - hugectr_model_path: - The path of the trained model files - hugectr_params: - HugeCTR specific parameters - name: - The base name of the various triton models - output_path: - The path where the models will be served - version: - The version of the model - max_batch_size: - Max batch size that Triton can receive - nvtabular_backend: "python" or "nvtabular" - The backend that will be used for inference in Triton. - cats: - Names of the categorical columns - conts: - Names of the continuous columns - label_columns: - Labels in the dataset (will be removed from the dataset) - """ - cats = cats or workflow.output_schema.apply(ColumnSelector(tags=[Tags.CATEGORICAL])) - conts = conts or workflow.output_schema.apply(ColumnSelector(tags=[Tags.CONTINUOUS])) - labels = label_columns or workflow.output_schema.apply(ColumnSelector(tags=[Tags.TARGET])) - - if not cats and not conts: - raise ValueError("Either cats or conts has to have a value.") - - workflow = workflow.remove_inputs(labels) - - # generate the nvtabular triton model - preprocessing_path = os.path.join(output_path, name + "_nvt") - nvt_config = generate_nvtabular_model( - workflow=workflow, - name=name + "_nvt", - output_path=preprocessing_path, - version=version, - output_model="hugectr", - max_batch_size=max_batch_size, - backend=nvtabular_backend, - cats=cats, - conts=conts, - ) - - hugectr_params["label_dim"] = len(labels) - if conts is None: - hugectr_params["des_feature_num"] = 0 - else: - hugectr_params["des_feature_num"] = len(conts) - - if cats is None: - hugectr_params["cat_feature_num"] = 0 - else: - hugectr_params["cat_feature_num"] = len(cats) - - # generate the HugeCTR saved model - hugectr_config = generate_hugectr_model( - trained_model_path=hugectr_model_path, - hugectr_params=hugectr_params, - name=name, - output_path=output_path, - version=version, - max_batch_size=max_batch_size, - ) - - # generate the triton ensemble - ensemble_path = os.path.join(output_path, name + "_ens") - os.makedirs(ensemble_path, exist_ok=True) - os.makedirs(os.path.join(ensemble_path, str(version)), exist_ok=True) - _generate_ensemble_config(name, ensemble_path, nvt_config, hugectr_config, "_ens") - - -def _generate_ensemble_config(name, output_path, nvt_config, nn_config, name_ext=""): - config = model_config.ModelConfig( - name=name + name_ext, platform="ensemble", max_batch_size=nvt_config.max_batch_size - ) - config.input.extend(nvt_config.input) - config.output.extend(nn_config.output) - - nn_input_cols = set(col.name for col in nn_config.input) - - nvt_step = model_config.ModelEnsembling.Step(model_name=nvt_config.name, model_version=-1) - for input_col in nvt_config.input: - nvt_step.input_map[input_col.name] = input_col.name - for output_col in nvt_config.output: - if output_col.name not in nn_input_cols: - warnings.warn( - f"Column {output_col.name} is being generated by NVTabular workflow " - f" but is unused in {nn_config.name} model" - ) - continue - nvt_step.output_map[output_col.name] = output_col.name + "_nvt" - - tf_step = model_config.ModelEnsembling.Step(model_name=nn_config.name, model_version=-1) - for input_col in nn_config.input: - tf_step.input_map[input_col.name] = input_col.name + "_nvt" - for output_col in nn_config.output: - tf_step.output_map[output_col.name] = output_col.name - - config.ensemble_scheduling.step.append(nvt_step) - config.ensemble_scheduling.step.append(tf_step) - - with open(os.path.join(output_path, "config.pbtxt"), "w") as o: - text_format.PrintMessage(config, o) - return config - - -def generate_nvtabular_model( - workflow, - name, - output_path, - version=1, - output_model=None, - max_batch_size=None, - sparse_max=None, - backend="python", - cats=None, - conts=None, -): - """converts a workflow to a triton mode - Parameters - ---------- - sparse_max: - Max length of the each row when the sparse data is converted to dense - cats: - Names of the categorical columns - conts: - Names of the continuous columns - """ - workflow.save(os.path.join(output_path, str(version), "workflow")) - config = _generate_nvtabular_config( - workflow, - name, - output_path, - output_model, - max_batch_size, - sparse_max=sparse_max, - backend=backend, - cats=cats, - conts=conts, - ) - - # copy the model file over. note that this isn't necessary with the c++ backend, but - # does provide us to use the python backend with just changing the 'backend' parameter - copyfile( - os.path.join(os.path.dirname(__file__), "workflow_model.py"), - os.path.join(output_path, str(version), "model.py"), - ) - - return config - - -def generate_hugectr_model( - trained_model_path, - hugectr_params, - name, - output_path, - version=1, - max_batch_size=None, -): - """converts a trained HugeCTR model to a triton mode""" - - out_path = os.path.join(output_path, name) - os.makedirs(os.path.join(output_path, name), exist_ok=True) - out_path_version = os.path.join(out_path, str(version)) - os.makedirs(out_path_version, exist_ok=True) - - config = _generate_hugectr_config(name, out_path, hugectr_params, max_batch_size=max_batch_size) - copytree(trained_model_path, out_path_version, dirs_exist_ok=True) - - return config - - -def _generate_nvtabular_config( - workflow, - name, - output_path, - output_model=None, - max_batch_size=None, - sparse_max=None, - backend="python", - cats=None, - conts=None, -): - """given a workflow generates the trton modelconfig proto object describing the inputs - and outputs to that workflow""" - config = model_config.ModelConfig(name=name, backend=backend, max_batch_size=max_batch_size) - - config.parameters["python_module"].string_value = "merlin.systems.triton.workflow_model" - config.parameters["output_model"].string_value = output_model if output_model else "" - - config.parameters["cats"].string_value = json.dumps(cats) if cats else "" - config.parameters["conts"].string_value = json.dumps(conts) if conts else "" - - if sparse_max: - # this assumes seq_length is same for each list column - config.parameters["sparse_max"].string_value = json.dumps(sparse_max) - - if output_model == "hugectr": - config.instance_group.append(model_config.ModelInstanceGroup(kind=2)) - - for column in workflow.output_node.input_columns.names: - dtype = workflow.input_dtypes[column] - config.input.append( - model_config.ModelInput(name=column, data_type=_convert_dtype(dtype), dims=[-1]) - ) - - config.output.append( - model_config.ModelOutput(name="DES", data_type=model_config.TYPE_FP32, dims=[-1]) - ) - - config.output.append( - model_config.ModelOutput(name="CATCOLUMN", data_type=model_config.TYPE_INT64, dims=[-1]) - ) - - config.output.append( - model_config.ModelOutput(name="ROWINDEX", data_type=model_config.TYPE_INT32, dims=[-1]) - ) - elif output_model == "pytorch": - for col_name, col_schema in workflow.input_schema.column_schemas.items(): - _add_model_param(col_schema, model_config.ModelInput, config.input) - - for col_name, col_schema in workflow.output_schema.column_schemas.items(): - _add_model_param( - col_schema, - model_config.ModelOutput, - config.output, - [-1, 1], - ) - else: - for col_name, col_schema in workflow.input_schema.column_schemas.items(): - _add_model_param(col_schema, model_config.ModelInput, config.input) - - for col_name, col_schema in workflow.output_schema.column_schemas.items(): - if sparse_max and col_name in sparse_max.keys(): - # this assumes max_sequence_length is equal for all output columns - dim = sparse_max[col_name] - _add_model_param(col_schema, model_config.ModelOutput, config.output, [-1, dim]) - else: - _add_model_param(col_schema, model_config.ModelOutput, config.output) - - with open(os.path.join(output_path, "config.pbtxt"), "w") as o: - text_format.PrintMessage(config, o) - return config - - -def export_tensorflow_model(model, name, output_path, version=1): - """Exports a TensorFlow model for serving with Triton - - Parameters - ---------- - model: - The tensorflow model that should be served - name: - The name of the triton model to export - output_path: - The path to write the exported model to - """ - tf_model_path = os.path.join(output_path, str(version), "model.savedmodel") - model.save(tf_model_path, include_optimizer=False) - config = model_config.ModelConfig( - name=name, backend="tensorflow", platform="tensorflow_savedmodel" - ) - - inputs, outputs = model.inputs, model.outputs - - if not inputs or not outputs: - signatures = getattr(model, "signatures", {}) or {} - default_signature = signatures.get("serving_default") - if not default_signature: - # roundtrip saved model to disk to generate signature if it doesn't exist - import tensorflow as tf - - reloaded = tf.keras.models.load_model(tf_model_path) - default_signature = reloaded.signatures["serving_default"] - - inputs = list(default_signature.structured_input_signature[1].values()) - outputs = list(default_signature.structured_outputs.values()) - - config.parameters["TF_GRAPH_TAG"].string_value = "serve" - config.parameters["TF_SIGNATURE_DEF"].string_value = "serving_default" - - for col in inputs: - config.input.append( - model_config.ModelInput( - name=col.name, data_type=_convert_dtype(col.dtype), dims=[-1, col.shape[1]] - ) - ) - - for col in outputs: - # this assumes the list columns are 1D tensors both for cats and conts - config.output.append( - model_config.ModelOutput( - name=col.name.split("/")[0], - data_type=_convert_dtype(col.dtype), - dims=[-1, col.shape[1]], - ) - ) - - with open(os.path.join(output_path, "config.pbtxt"), "w") as o: - text_format.PrintMessage(config, o) - return config - - -def export_pytorch_model( - model, workflow, sparse_max, name, output_path, use_fix_dtypes=True, version=1, backend="python" -): - """Exports a PyTorch model for serving with Triton - - Parameters - ---------- - model: - The PyTorch model that should be served - workflow: - The nvtabular workflow used in preprocessing - sparse_max: - Max length of the each row when the sparse data is converted to dense - name: - The name of the triton model to export - output_path: - The path to write the exported model to - use_fix_dtypes: - Transformers4Rec is using fixed dtypes and this option is - whether to use fixed dtypes in inference or not - version: - Version of the model - backend: "python" or "nvtabular" - The backend that will be used for inference in Triton. - """ - import cloudpickle - import torch - - os.makedirs(os.path.join(output_path, str(version)), exist_ok=True) - - pt_model_path = os.path.join(output_path, str(version), "model.pth") - torch.save(model.state_dict(), pt_model_path) - - pt_model_path = os.path.join(output_path, str(version), "model.pkl") - with open(pt_model_path, "wb") as o: - cloudpickle.dump(model, o) - - copyfile( - os.path.join(os.path.dirname(__file__), "model", "model_pt.py"), - os.path.join(output_path, str(version), "model.py"), - ) - - config = model_config.ModelConfig(name=name, backend=backend) - - for col_name, col_schema in workflow.output_schema.column_schemas.items(): - _add_model_param(col_schema, model_config.ModelInput, config.input) - - *_, last_layer = model.parameters() - dims = last_layer.shape[0] - dtype = last_layer.dtype - config.output.append( - model_config.ModelOutput( - name="output", data_type=_convert_pytorch_dtype(dtype), dims=[-1, dims] - ) - ) - - if sparse_max: - with open(os.path.join(output_path, str(version), "model_info.json"), "w") as o: - model_info = dict() - model_info["sparse_max"] = sparse_max - model_info["use_fix_dtypes"] = use_fix_dtypes - json.dump(model_info, o) - - with open(os.path.join(output_path, "config.pbtxt"), "w") as o: - text_format.PrintMessage(config, o) - return config - - -def _generate_pytorch_config(model, name, output_path, max_batch_size=None): - """given a workflow generates the trton modelconfig proto object describing the inputs - and outputs to that workflow""" - config = model_config.ModelConfig(name=name, backend="python", max_batch_size=max_batch_size) - - for col in model.inputs: - config.input.append( - model_config.ModelInput(name=col.name, data_type=_convert_dtype(col.dtype), dims=[-1]) - ) - - for col in model.outputs: - config.output.append( - model_config.ModelOutput( - name=col.name.split("/")[0], data_type=_convert_dtype(col.dtype), dims=[-1] - ) - ) - - with open(os.path.join(output_path, "config.pbtxt"), "w") as o: - text_format.PrintMessage(config, o) - return config - - -def _generate_hugectr_config(name, output_path, hugectr_params, max_batch_size=None): - config = model_config.ModelConfig(name=name, backend="hugectr", max_batch_size=max_batch_size) - - config.input.append( - model_config.ModelInput(name="DES", data_type=model_config.TYPE_FP32, dims=[-1]) - ) - - config.input.append( - model_config.ModelInput(name="CATCOLUMN", data_type=model_config.TYPE_INT64, dims=[-1]) - ) - - config.input.append( - model_config.ModelInput(name="ROWINDEX", data_type=model_config.TYPE_INT32, dims=[-1]) - ) - - for i in range(hugectr_params["n_outputs"]): - config.output.append( - model_config.ModelOutput( - name="OUTPUT" + str(i), data_type=model_config.TYPE_FP32, dims=[-1] - ) - ) - - config.instance_group.append(model_config.ModelInstanceGroup(gpus=[0], count=1, kind=1)) - - config_hugectr = model_config.ModelParameter(string_value=hugectr_params["config"]) - config.parameters["config"].CopyFrom(config_hugectr) - - gpucache_val = hugectr_params.get("gpucache", "true") - - gpucache = model_config.ModelParameter(string_value=gpucache_val) - config.parameters["gpucache"].CopyFrom(gpucache) - - gpucacheper_val = str(hugectr_params.get("gpucacheper_val", "0.5")) - - gpucacheper = model_config.ModelParameter(string_value=gpucacheper_val) - config.parameters["gpucacheper"].CopyFrom(gpucacheper) - - label_dim = model_config.ModelParameter(string_value=str(hugectr_params["label_dim"])) - config.parameters["label_dim"].CopyFrom(label_dim) - - slots = model_config.ModelParameter(string_value=str(hugectr_params["slots"])) - config.parameters["slots"].CopyFrom(slots) - - des_feature_num = model_config.ModelParameter( - string_value=str(hugectr_params["des_feature_num"]) - ) - config.parameters["des_feature_num"].CopyFrom(des_feature_num) - - cat_feature_num = model_config.ModelParameter( - string_value=str(hugectr_params["cat_feature_num"]) - ) - config.parameters["cat_feature_num"].CopyFrom(cat_feature_num) - - max_nnz = model_config.ModelParameter(string_value=str(hugectr_params["max_nnz"])) - config.parameters["max_nnz"].CopyFrom(max_nnz) - - embedding_vector_size = model_config.ModelParameter( - string_value=str(hugectr_params["embedding_vector_size"]) - ) - config.parameters["embedding_vector_size"].CopyFrom(embedding_vector_size) - - embeddingkey_long_type_val = hugectr_params.get("embeddingkey_long_type", "true") - - embeddingkey_long_type = model_config.ModelParameter(string_value=embeddingkey_long_type_val) - config.parameters["embeddingkey_long_type"].CopyFrom(embeddingkey_long_type) - - with open(os.path.join(output_path, "config.pbtxt"), "w") as o: - text_format.PrintMessage(config, o) - return config - - -def _add_model_param(col_schema, paramclass, params, dims=None): - dims = dims if dims is not None else [-1, 1] - if col_schema.is_list and col_schema.is_ragged: - params.append( - paramclass( - name=col_schema.name + "__values", - data_type=_convert_dtype(col_schema.dtype), - dims=dims, - ) - ) - params.append( - paramclass( - name=col_schema.name + "__nnzs", data_type=model_config.TYPE_INT64, dims=dims - ) - ) - else: - params.append( - paramclass(name=col_schema.name, data_type=_convert_dtype(col_schema.dtype), dims=dims) - ) - - -def _convert_dtype(dtype): - """converts a dtype to the appropriate triton proto type""" - - if dtype and not isinstance(dtype, str): - dtype_name = dtype.name if hasattr(dtype, "name") else dtype.__name__ - else: - dtype_name = dtype - - dtypes = { - "float64": model_config.TYPE_FP64, - "float32": model_config.TYPE_FP32, - "float16": model_config.TYPE_FP16, - "int64": model_config.TYPE_INT64, - "int32": model_config.TYPE_INT32, - "int16": model_config.TYPE_INT16, - "int8": model_config.TYPE_INT8, - "uint64": model_config.TYPE_UINT64, - "uint32": model_config.TYPE_UINT32, - "uint16": model_config.TYPE_UINT16, - "uint8": model_config.TYPE_UINT8, - "bool": model_config.TYPE_BOOL, - } - - if is_string_dtype(dtype): - return model_config.TYPE_STRING - elif dtype_name in dtypes: - return dtypes[dtype_name] - else: - raise ValueError(f"Can't convert {dtype} to a Triton dtype") - - -def _convert_pytorch_dtype(dtype): - """converts a dtype to the appropriate triton proto type""" - - import torch - - dtypes = { - torch.float64: model_config.TYPE_FP64, - torch.float32: model_config.TYPE_FP32, - torch.float16: model_config.TYPE_FP16, - torch.int64: model_config.TYPE_INT64, - torch.int32: model_config.TYPE_INT32, - torch.int16: model_config.TYPE_INT16, - torch.int8: model_config.TYPE_INT8, - torch.uint8: model_config.TYPE_UINT8, - torch.bool: model_config.TYPE_BOOL, - } - - if is_string_dtype(dtype): - return model_config.TYPE_STRING - elif dtype in dtypes: - return dtypes[dtype] - else: - raise ValueError(f"Can't convert dtype {dtype})") - - -def _convert_string2pytorch_dtype(dtype): - """converts a dtype to the appropriate torch type""" - - import torch - - if not isinstance(dtype, str): - dtype_name = dtype.name - else: - dtype_name = dtype - - dtypes = { - "TYPE_FP64": torch.float64, - "TYPE_FP32": torch.float32, - "TYPE_FP16": torch.float16, - "TYPE_INT64": torch.int64, - "TYPE_INT32": torch.int32, - "TYPE_INT16": torch.int16, - "TYPE_INT8": torch.int8, - "TYPE_UINT8": torch.uint8, - "TYPE_BOOL": torch.bool, - } - - if is_string_dtype(dtype): - return model_config.TYPE_STRING - elif dtype_name in dtypes: - return dtypes[dtype_name] - else: - raise ValueError(f"Can't convert dtype {dtype})") - - -def _triton_datatype_to_dtype(data_type): - """the reverse of _convert_dtype: converts a triton proto data_type to a numpy dtype""" - name = model_config._DATATYPE.values[data_type].name[5:].lower() - if name == "string": - return np.dtype("str") - return np.dtype(name.replace("fp", "float")) diff --git a/merlin/systems/triton/model_config.proto b/merlin/systems/triton/model_config.proto deleted file mode 100644 index 61e00cd8268..00000000000 --- a/merlin/systems/triton/model_config.proto +++ /dev/null @@ -1,1660 +0,0 @@ -// Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// * Neither the name of NVIDIA CORPORATION nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Copyright (c) 2018, TensorFlow Authors. All rights reserved. - -syntax = "proto3"; - -package inference; - -//@@.. cpp:namespace:: inference - -//@@ -//@@.. cpp:enum:: DataType -//@@ -//@@ Data types supported for input and output tensors. -//@@ -enum DataType { - //@@ .. cpp:enumerator:: DataType::INVALID = 0 - TYPE_INVALID = 0; - - //@@ .. cpp:enumerator:: DataType::BOOL = 1 - TYPE_BOOL = 1; - - //@@ .. cpp:enumerator:: DataType::UINT8 = 2 - TYPE_UINT8 = 2; - //@@ .. cpp:enumerator:: DataType::UINT16 = 3 - TYPE_UINT16 = 3; - //@@ .. cpp:enumerator:: DataType::UINT32 = 4 - TYPE_UINT32 = 4; - //@@ .. cpp:enumerator:: DataType::UINT64 = 5 - TYPE_UINT64 = 5; - - //@@ .. cpp:enumerator:: DataType::INT8 = 6 - TYPE_INT8 = 6; - //@@ .. cpp:enumerator:: DataType::INT16 = 7 - TYPE_INT16 = 7; - //@@ .. cpp:enumerator:: DataType::INT32 = 8 - TYPE_INT32 = 8; - //@@ .. cpp:enumerator:: DataType::INT64 = 9 - TYPE_INT64 = 9; - - //@@ .. cpp:enumerator:: DataType::FP16 = 10 - TYPE_FP16 = 10; - //@@ .. cpp:enumerator:: DataType::FP32 = 11 - TYPE_FP32 = 11; - //@@ .. cpp:enumerator:: DataType::FP64 = 12 - TYPE_FP64 = 12; - - //@@ .. cpp:enumerator:: DataType::STRING = 13 - TYPE_STRING = 13; -} - -//@@ -//@@ .. cpp:var:: message ModelRateLimiter -//@@ -//@@ The specifications required by the rate limiter to properly -//@@ schedule the inference requests across the different models -//@@ and their instances. -//@@ -message ModelRateLimiter -{ - //@@ .. cpp:var:: message Resource - //@@ - //@@ The resource property. - //@@ - message Resource - { - //@@ .. cpp:var:: string name - //@@ - //@@ The name associated with the resource. - //@@ - string name = 1; - - //@@ .. cpp:var:: bool global - //@@ - //@@ Whether or not the resource is global. If true then the resource - //@@ is assumed to be shared among the devices otherwise specified - //@@ count of the resource is assumed for each device associated - //@@ with the instance. - //@@ - bool global = 2; - - //@@ .. cpp:var:: uint32 count - //@@ - //@@ The number of resources required for the execution of the model - //@@ instance. - //@@ - uint32 count = 3; - } - - //@@ .. cpp:var:: Resource resources (repeated) - //@@ - //@@ The resources required to execute the request on a model instance. - //@@ Resources are just names with a corresponding count. The execution - //@@ of the instance will be blocked until the specified resources are - //@@ available. By default an instance uses no rate-limiter resources. - //@@ - repeated Resource resources = 1; - - //@@ .. cpp:var:: uint32 priority - //@@ - //@@ The weighting value to be used for prioritizing across instances. - //@@ An instance with priority 2 will be given 1/2 the number of - //@@ scheduling chances as an instance_group with priority 1. The - //@@ default priority is 1. - //@@ - uint32 priority = 2; -} - -//@@ -//@@.. cpp:var:: message ModelInstanceGroup -//@@ -//@@ A group of one or more instances of a model and resources made -//@@ available for those instances. -//@@ -message ModelInstanceGroup -{ - //@@ - //@@ .. cpp:enum:: Kind - //@@ - //@@ Kind of this instance group. - //@@ - enum Kind { - //@@ .. cpp:enumerator:: Kind::KIND_AUTO = 0 - //@@ - //@@ This instance group represents instances that can run on either - //@@ CPU or GPU. If all GPUs listed in 'gpus' are available then - //@@ instances will be created on GPU(s), otherwise instances will - //@@ be created on CPU. - //@@ - KIND_AUTO = 0; - - //@@ .. cpp:enumerator:: Kind::KIND_GPU = 1 - //@@ - //@@ This instance group represents instances that must run on the - //@@ GPU. - //@@ - KIND_GPU = 1; - - //@@ .. cpp:enumerator:: Kind::KIND_CPU = 2 - //@@ - //@@ This instance group represents instances that must run on the - //@@ CPU. - //@@ - KIND_CPU = 2; - - //@@ .. cpp:enumerator:: Kind::KIND_MODEL = 3 - //@@ - //@@ This instance group represents instances that should run on the - //@@ CPU and/or GPU(s) as specified by the model or backend itself. - //@@ The inference server will not override the model/backend - //@@ settings. - //@@ Currently, this option is supported only for Tensorflow models. - //@@ - KIND_MODEL = 3; - } - - //@@ .. cpp:var:: string name - //@@ - //@@ Optional name of this group of instances. If not specified the - //@@ name will be formed as _. The name of - //@@ individual instances will be further formed by a unique instance - //@@ number and GPU index: - //@@ - string name = 1; - - //@@ .. cpp:var:: Kind kind - //@@ - //@@ The kind of this instance group. Default is KIND_AUTO. If - //@@ KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and - //@@ may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid - //@@ and 'gpu' cannot be specified. - //@@ - Kind kind = 4; - - //@@ .. cpp:var:: int32 count - //@@ - //@@ For a group assigned to GPU, the number of instances created for - //@@ each GPU listed in 'gpus'. For a group assigned to CPU the number - //@@ of instances created. Default is 1. - int32 count = 2; - - //@@ .. cpp:var:: ModelRateLimiter rate_limiter - //@@ - //@@ The rate limiter specific settings to be associated with this - //@@ instance group. Optional, if not specified no rate limiting - //@@ will be applied to this instance group. - //@@ - ModelRateLimiter rate_limiter = 6; - - //@@ .. cpp:var:: int32 gpus (repeated) - //@@ - //@@ GPU(s) where instances should be available. For each GPU listed, - //@@ 'count' instances of the model will be available. Setting 'gpus' - //@@ to empty (or not specifying at all) is equivalent to listing all - //@@ available GPUs. - //@@ - repeated int32 gpus = 3; - - //@@ .. cpp:var:: string profile (repeated) - //@@ - //@@ For TensorRT models containing multiple optimization profile, this - //@@ parameter specifies a set of optimization profiles available to this - //@@ instance group. The inference server will choose the optimal profile - //@@ based on the shapes of the input tensors. This field should lie - //@@ between 0 and - 1 - //@@ and be specified only for TensorRT backend, otherwise an error will - //@@ be generated. If not specified, the server will select the first - //@@ optimization profile by default. - //@@ - repeated string profile = 5; -} - -//@@ -//@@.. cpp:var:: message ModelTensorReshape -//@@ -//@@ Reshape specification for input and output tensors. -//@@ -message ModelTensorReshape -{ - //@@ .. cpp:var:: int64 shape (repeated) - //@@ - //@@ The shape to use for reshaping. - //@@ - repeated int64 shape = 1; -} - -//@@ -//@@.. cpp:var:: message ModelInput -//@@ -//@@ An input required by the model. -//@@ -message ModelInput -{ - //@@ - //@@ .. cpp:enum:: Format - //@@ - //@@ The format for the input. - //@@ - enum Format { - //@@ .. cpp:enumerator:: Format::FORMAT_NONE = 0 - //@@ - //@@ The input has no specific format. This is the default. - //@@ - FORMAT_NONE = 0; - - //@@ .. cpp:enumerator:: Format::FORMAT_NHWC = 1 - //@@ - //@@ HWC image format. Tensors with this format require 3 dimensions - //@@ if the model does not support batching (max_batch_size = 0) or 4 - //@@ dimensions if the model does support batching (max_batch_size - //@@ >= 1). In either case the 'dims' below should only specify the - //@@ 3 non-batch dimensions (i.e. HWC or CHW). - //@@ - FORMAT_NHWC = 1; - - //@@ .. cpp:enumerator:: Format::FORMAT_NCHW = 2 - //@@ - //@@ CHW image format. Tensors with this format require 3 dimensions - //@@ if the model does not support batching (max_batch_size = 0) or 4 - //@@ dimensions if the model does support batching (max_batch_size - //@@ >= 1). In either case the 'dims' below should only specify the - //@@ 3 non-batch dimensions (i.e. HWC or CHW). - //@@ - FORMAT_NCHW = 2; - } - - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the input. - //@@ - string name = 1; - - //@@ .. cpp:var:: DataType data_type - //@@ - //@@ The data-type of the input. - //@@ - DataType data_type = 2; - - //@@ .. cpp:var:: Format format - //@@ - //@@ The format of the input. Optional. - //@@ - Format format = 3; - - //@@ .. cpp:var:: int64 dims (repeated) - //@@ - //@@ The dimensions/shape of the input tensor that must be provided - //@@ when invoking the inference API for this model. - //@@ - repeated int64 dims = 4; - - //@@ .. cpp:var:: ModelTensorReshape reshape - //@@ - //@@ The shape expected for this input by the backend. The input will - //@@ be reshaped to this before being presented to the backend. The - //@@ reshape must have the same number of elements as the input shape - //@@ specified by 'dims'. Optional. - //@@ - ModelTensorReshape reshape = 5; - - //@@ .. cpp:var:: bool is_shape_tensor - //@@ - //@@ Whether or not the input is a shape tensor to the model. This field - //@@ is currently supported only for the TensorRT model. An error will be - //@@ generated if this specification does not comply with underlying - //@@ model. - //@@ - bool is_shape_tensor = 6; - - //@@ .. cpp:var:: bool allow_ragged_batch - //@@ - //@@ Whether or not the input is allowed to be "ragged" in a dynamically - //@@ created batch. Default is false indicating that two requests will - //@@ only be batched if this tensor has the same shape in both requests. - //@@ True indicates that two requests can be batched even if this tensor - //@@ has a different shape in each request. A true value is currently - //@@ supported only for custom models. - //@@ - bool allow_ragged_batch = 7; -} - -//@@ -//@@.. cpp:var:: message ModelOutput -//@@ -//@@ An output produced by the model. -//@@ -message ModelOutput -{ - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the output. - //@@ - string name = 1; - - //@@ .. cpp:var:: DataType data_type - //@@ - //@@ The data-type of the output. - //@@ - DataType data_type = 2; - - //@@ .. cpp:var:: int64 dims (repeated) - //@@ - //@@ The dimensions/shape of the output tensor. - //@@ - repeated int64 dims = 3; - - //@@ .. cpp:var:: ModelTensorReshape reshape - //@@ - //@@ The shape produced for this output by the backend. The output will - //@@ be reshaped from this to the shape specified in 'dims' before being - //@@ returned in the inference response. The reshape must have the same - //@@ number of elements as the output shape specified by 'dims'. Optional. - //@@ - ModelTensorReshape reshape = 5; - - //@@ .. cpp:var:: string label_filename - //@@ - //@@ The label file associated with this output. Should be specified only - //@@ for outputs that represent classifications. Optional. - //@@ - string label_filename = 4; - - - //@@ .. cpp:var:: bool is_shape_tensor - //@@ - //@@ Whether or not the output is a shape tensor to the model. This field - //@@ is currently supported only for the TensorRT model. An error will be - //@@ generated if this specification does not comply with underlying - //@@ model. - //@@ - bool is_shape_tensor = 6; -} - -//@@ .. cpp:var:: message BatchInput -//@@ -//@@ A batch input is an additional input that must be added by -//@@ the backend based on all the requests in a batch. -//@@ -message BatchInput -{ - //@@ - //@@ .. cpp:enum:: Kind - //@@ - //@@ The kind of the batch input. - //@@ - enum Kind { - //@@ .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0 - //@@ - //@@ The element count of the 'source_input' will be added as - //@@ input with shape [1]. - //@@ - BATCH_ELEMENT_COUNT = 0; - - //@@ .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1 - //@@ - //@@ The accumulated element count of the 'source_input' will be - //@@ added as input with shape [1]. For example, if there is a - //@@ batch of two request, each with 2 elements, an input of value - //@@ 2 will be added to the first request, and an input of value - //@@ 4 will be added to the second request. - //@@ - BATCH_ACCUMULATED_ELEMENT_COUNT = 1; - - //@@ .. cpp:enumerator:: - //@@ Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2 - //@@ - //@@ The accumulated element count of the 'source_input' will be - //@@ added as input with shape [1], except for the first request - //@@ in the batch. For the first request in the batch, the input - //@@ will have shape [2] where the first element is value 0. - //@@ - BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2; - - //@@ .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3 - //@@ - //@@ Among the requests in the batch, the max element count of the - //@@ 'source_input' will be added as input with shape - //@@ [max_element_count] for the first request in the batch. - //@@ For other requests, such input will be with shape [0]. - //@@ The data of the tensor will be uninitialized. - //@@ - BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3; - } - - //@@ .. cpp:var:: Kind kind - //@@ - //@@ The kind of this batch input. - //@@ - Kind kind = 1; - - //@@ .. cpp:var:: string target_name (repeated) - //@@ - //@@ The name of the model inputs that the backend will create - //@@ for this batch input. - //@@ - repeated string target_name = 2; - - //@@ .. cpp:var:: DataType data_type - //@@ - //@@ The input's datatype. The data type can be TYPE_INT32 or - //@@ TYPE_FP32. - //@@ - DataType data_type = 3; - - //@@ .. cpp:var:: string source_input (repeated) - //@@ - //@@ The backend derives the value for each batch input from one or - //@@ more other inputs. 'source_input' gives the names of those - //@@ inputs. - //@@ - repeated string source_input = 4; -} - -//@@.. cpp:var:: message BatchOutput -//@@ -//@@ A batch output is an output produced by the model that must be handled -//@@ differently by the backend based on all the requests in a batch. -//@@ -message BatchOutput -{ - //@@ - //@@ .. cpp:enum:: Kind - //@@ - //@@ The kind of the batch output. - //@@ - enum Kind { - //@@ .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0 - //@@ - //@@ The output should be scattered according to the shape of - //@@ 'source_input'. The dynamic dimension of the output will - //@@ be set to the value of the same dimension in the input. - //@@ - BATCH_SCATTER_WITH_INPUT_SHAPE = 0; - } - - //@@ .. cpp:var:: string target_name (repeated) - //@@ - //@@ The name of the outputs to be produced by this batch output - //@@ specification. - //@@ - repeated string target_name = 1; - - //@@ .. cpp:var:: Kind kind - //@@ - //@@ The kind of this batch output. - //@@ - Kind kind = 2; - - //@@ .. cpp:var:: string source_input (repeated) - //@@ - //@@ The backend derives each batch output from one or more inputs. - //@@ 'source_input' gives the names of those inputs. - //@@ - repeated string source_input = 3; -} - -//@@ -//@@.. cpp:var:: message ModelVersionPolicy -//@@ -//@@ Policy indicating which versions of a model should be made -//@@ available by the inference server. -//@@ -message ModelVersionPolicy -{ - //@@ .. cpp:var:: message Latest - //@@ - //@@ Serve only the latest version(s) of a model. This is - //@@ the default policy. - //@@ - message Latest - { - //@@ .. cpp:var:: uint32 num_versions - //@@ - //@@ Serve only the 'num_versions' highest-numbered versions. T - //@@ The default value of 'num_versions' is 1, indicating that by - //@@ default only the single highest-number version of a - //@@ model will be served. - //@@ - uint32 num_versions = 1; - } - - //@@ .. cpp:var:: message All - //@@ - //@@ Serve all versions of the model. - //@@ - message All {} - - //@@ .. cpp:var:: message Specific - //@@ - //@@ Serve only specific versions of the model. - //@@ - message Specific - { - //@@ .. cpp:var:: int64 versions (repeated) - //@@ - //@@ The specific versions of the model that will be served. - //@@ - repeated int64 versions = 1; - } - - //@@ .. cpp:var:: oneof policy_choice - //@@ - //@@ Each model must implement only a single version policy. The - //@@ default policy is 'Latest'. - //@@ - oneof policy_choice - { - //@@ .. cpp:var:: Latest latest - //@@ - //@@ Serve only latest version(s) of the model. - //@@ - Latest latest = 1; - - //@@ .. cpp:var:: All all - //@@ - //@@ Serve all versions of the model. - //@@ - All all = 2; - - //@@ .. cpp:var:: Specific specific - //@@ - //@@ Serve only specific version(s) of the model. - //@@ - Specific specific = 3; - } -} - -//@@ -//@@.. cpp:var:: message ModelOptimizationPolicy -//@@ -//@@ Optimization settings for a model. These settings control if/how a -//@@ model is optimized and prioritized by the backend framework when -//@@ it is loaded. -//@@ -message ModelOptimizationPolicy -{ - //@@ - //@@ .. cpp:var:: message Graph - //@@ - //@@ Enable generic graph optimization of the model. If not specified - //@@ the framework's default level of optimization is used. Supports - //@@ TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow - //@@ causes XLA to be enabled/disabled for the model. For Onnx defaults - //@@ to enabling all optimizations, -1 enables only basic optimizations, - //@@ +1 enables only basic and extended optimizations. - //@@ - message Graph - { - //@@ .. cpp:var:: int32 level - //@@ - //@@ The optimization level. Defaults to 0 (zero) if not specified. - //@@ - //@@ - -1: Disabled - //@@ - 0: Framework default - //@@ - 1+: Enable optimization level (greater values indicate - //@@ higher optimization levels) - //@@ - int32 level = 1; - } - - //@@ - //@@ .. cpp:enum:: ModelPriority - //@@ - //@@ Model priorities. A model will be given scheduling and execution - //@@ preference over models at lower priorities. Current model - //@@ priorities only work for TensorRT models. - //@@ - enum ModelPriority { - //@@ .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0 - //@@ - //@@ The default model priority. - //@@ - PRIORITY_DEFAULT = 0; - - //@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1 - //@@ - //@@ The maximum model priority. - //@@ - PRIORITY_MAX = 1; - - //@@ .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2 - //@@ - //@@ The minimum model priority. - //@@ - PRIORITY_MIN = 2; - } - - //@@ - //@@ .. cpp:var:: message Cuda - //@@ - //@@ CUDA-specific optimization settings. - //@@ - message Cuda - { - //@@ .. cpp:var:: message GraphSpec - //@@ - //@@ Specification of the CUDA graph to be captured. - //@@ - message GraphSpec - { - //@@ .. cpp:var:: message Dims - //@@ - //@@ Specification of tensor dimension. - //@@ - message Shape - { - //@@ .. cpp:var:: int64 dim (repeated) - //@@ - //@@ The dimension. - //@@ - repeated int64 dim = 1; - } - - message LowerBound - { - //@@ .. cpp:var:: int32 batch_size - //@@ - //@@ The batch size of the CUDA graph. If 'max_batch_size' is 0, - //@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must - //@@ be set to value between 1 and 'max_batch_size'. - //@@ - int32 batch_size = 1; - - //@@ .. cpp:var:: map input - //@@ - //@@ The specification of the inputs. 'Shape' is the shape of - //@@ the input without batching dimension. - //@@ - map input = 2; - } - - //@@ .. cpp:var:: int32 batch_size - //@@ - //@@ The batch size of the CUDA graph. If 'max_batch_size' is 0, - //@@ 'batch_size' must be set to 0. Otherwise, 'batch_size' must - //@@ be set to value between 1 and 'max_batch_size'. - //@@ - int32 batch_size = 1; - - //@@ .. cpp:var:: map input - //@@ - //@@ The specification of the inputs. 'Shape' is the shape of the - //@@ input without batching dimension. - //@@ - map input = 2; - - //@@ .. cpp:var:: LowerBound graph_lower_bound - //@@ - //@@ Specify the lower bound of the CUDA graph. Optional. - //@@ If specified, the graph can be used for input shapes and - //@@ batch sizes that are in closed interval between the lower - //@@ bound specification and graph specification. For dynamic - //@@ shape model, this allows CUDA graphs to be launched - //@@ frequently without capturing all possible shape combinations. - //@@ However, using graph for shape combinations different from - //@@ the one used for capturing introduces uninitialized data for - //@@ execution and it may distort the inference result if - //@@ the model is sensitive to uninitialized data. - //@@ - LowerBound graph_lower_bound = 3; - } - - //@@ .. cpp:var:: bool graphs - //@@ - //@@ Use CUDA graphs API to capture model operations and execute - //@@ them more efficiently. Default value is false. - //@@ Currently only recognized by TensorRT backend. - //@@ - bool graphs = 1; - - //@@ .. cpp:var:: bool busy_wait_events - //@@ - //@@ Use busy-waiting to synchronize CUDA events to achieve minimum - //@@ latency from event complete to host thread to be notified, with - //@@ the cost of high CPU load. Default value is false. - //@@ Currently only recognized by TensorRT backend. - //@@ - bool busy_wait_events = 2; - - //@@ .. cpp:var:: GraphSpec graph_spec (repeated) - //@@ - //@@ Specification of the CUDA graph to be captured. If not specified - //@@ and 'graphs' is true, the default CUDA graphs will be captured - //@@ based on model settings. - //@@ Currently only recognized by TensorRT backend. - //@@ - repeated GraphSpec graph_spec = 3; - } - - //@@ - //@@ .. cpp:var:: message ExecutionAccelerators - //@@ - //@@ Specify the preferred execution accelerators to be used to execute - //@@ the model. Currently only recognized by ONNX Runtime backend and - //@@ TensorFlow backend. - //@@ - //@@ For ONNX Runtime backend, it will deploy the model with the execution - //@@ accelerators by priority, the priority is determined based on the - //@@ order that they are set, i.e. the provider at the front has highest - //@@ priority. Overall, the priority will be in the following order: - //@@ (if instance is on GPU) - //@@ CUDA Execution Provider (if instance is on GPU) - //@@ - //@@ Default CPU Execution Provider - //@@ - message ExecutionAccelerators - { - //@@ - //@@ .. cpp:var:: message Accelerator - //@@ - //@@ Specify the accelerator to be used to execute the model. - //@@ Accelerator with the same name may accept different parameters - //@@ depending on the backends. - //@@ - message Accelerator - { - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the execution accelerator. - //@@ - string name = 1; - - //@@ .. cpp:var:: map parameters - //@@ - //@@ Additional parameters used to configure the accelerator. - //@@ - map parameters = 2; - } - - //@@ .. cpp:var:: Accelerator gpu_execution_accelerator (repeated) - //@@ - //@@ The preferred execution provider to be used if the model instance - //@@ is deployed on GPU. - //@@ - //@@ For ONNX Runtime backend, possible value is "tensorrt" as name, - //@@ and no parameters are required. - //@@ - //@@ For TensorFlow backend, possible values are "tensorrt", - //@@ "auto_mixed_precision", "gpu_io". - //@@ - //@@ For "tensorrt", the following parameters can be specified: - //@@ "precision_mode": The precision used for optimization. - //@@ Allowed values are "FP32" and "FP16". Default value is "FP32". - //@@ - //@@ "max_cached_engines": The maximum number of cached TensorRT - //@@ engines in dynamic TensorRT ops. Default value is 100. - //@@ - //@@ "minimum_segment_size": The smallest model subgraph that will - //@@ be considered for optimization by TensorRT. Default value is 3. - //@@ - //@@ "max_workspace_size_bytes": The maximum GPU memory the model - //@@ can use temporarily during execution. Default value is 1GB. - //@@ - //@@ For "auto_mixed_precision", no parameters are required. If set, - //@@ the model will try to use FP16 for better performance. - //@@ This optimization can not be set with "tensorrt". - //@@ - //@@ For "gpu_io", no parameters are required. If set, the model will - //@@ be executed using TensorFlow Callable API to set input and output - //@@ tensors in GPU memory if possible, which can reduce data transfer - //@@ overhead if the model is used in ensemble. However, the Callable - //@@ object will be created on model creation and it will request all - //@@ outputs for every model execution, which may impact the - //@@ performance if a request does not require all outputs. This - //@@ optimization will only take affect if the model instance is - //@@ created with KIND_GPU. - //@@ - repeated Accelerator gpu_execution_accelerator = 1; - - //@@ .. cpp:var:: Accelerator cpu_execution_accelerator (repeated) - //@@ - //@@ The preferred execution provider to be used if the model instance - //@@ is deployed on CPU. - //@@ - //@@ For ONNX Runtime backend, possible value is "openvino" as name, - //@@ and no parameters are required. - //@@ - repeated Accelerator cpu_execution_accelerator = 2; - } - - //@@ - //@@ .. cpp:var:: message PinnedMemoryBuffer - //@@ - //@@ Specify whether to use a pinned memory buffer when transferring data - //@@ between non-pinned system memory and GPU memory. Using a pinned - //@@ memory buffer for system from/to GPU transfers will typically provide - //@@ increased performance. For example, in the common use case where the - //@@ request provides inputs and delivers outputs via non-pinned system - //@@ memory, if the model instance accepts GPU IOs, the inputs will be - //@@ processed by two copies: from non-pinned system memory to pinned - //@@ memory, and from pinned memory to GPU memory. Similarly, pinned - //@@ memory will be used for delivering the outputs. - //@@ - message PinnedMemoryBuffer - { - //@@ .. cpp:var:: bool enable - //@@ - //@@ Use pinned memory buffer. Default is true. - //@@ - bool enable = 1; - } - - //@@ .. cpp:var:: Graph graph - //@@ - //@@ The graph optimization setting for the model. Optional. - //@@ - Graph graph = 1; - - //@@ .. cpp:var:: ModelPriority priority - //@@ - //@@ The priority setting for the model. Optional. - //@@ - ModelPriority priority = 2; - - //@@ .. cpp:var:: Cuda cuda - //@@ - //@@ CUDA-specific optimization settings. Optional. - //@@ - Cuda cuda = 3; - - //@@ .. cpp:var:: ExecutionAccelerators execution_accelerators - //@@ - //@@ The accelerators used for the model. Optional. - //@@ - ExecutionAccelerators execution_accelerators = 4; - - //@@ .. cpp:var:: PinnedMemoryBuffer input_pinned_memory - //@@ - //@@ Use pinned memory buffer when the data transfer for inputs - //@@ is between GPU memory and non-pinned system memory. - //@@ Default is true. - //@@ - PinnedMemoryBuffer input_pinned_memory = 5; - - //@@ .. cpp:var:: PinnedMemoryBuffer output_pinned_memory - //@@ - //@@ Use pinned memory buffer when the data transfer for outputs - //@@ is between GPU memory and non-pinned system memory. - //@@ Default is true. - //@@ - PinnedMemoryBuffer output_pinned_memory = 6; -} - -//@@ -//@@.. cpp:var:: message ModelQueuePolicy -//@@ -//@@ Queue policy for inference requests. -//@@ -message ModelQueuePolicy -{ - //@@ - //@@ .. cpp:enum:: TimeoutAction - //@@ - //@@ The action applied to timed-out requests. - //@@ - enum TimeoutAction { - //@@ .. cpp:enumerator:: Action::REJECT = 0 - //@@ - //@@ Reject the request and return error message accordingly. - //@@ - REJECT = 0; - - //@@ .. cpp:enumerator:: Action::DELAY = 1 - //@@ - //@@ Delay the request until all other requests at the same - //@@ (or higher) priority levels that have not reached their timeouts - //@@ are processed. A delayed request will eventually be processed, - //@@ but may be delayed indefinitely due to newly arriving requests. - //@@ - DELAY = 1; - } - - //@@ - //@@ .. cpp:var:: TimeoutAction timeout_action - //@@ - //@@ The action applied to timed-out request. - //@@ The default action is REJECT. - //@@ - TimeoutAction timeout_action = 1; - - //@@ - //@@ .. cpp:var:: uint64 default_timeout_microseconds - //@@ - //@@ The default timeout for every request, in microseconds. - //@@ The default value is 0 which indicates that no timeout is set. - //@@ - uint64 default_timeout_microseconds = 2; - - //@@ - //@@ .. cpp:var:: bool allow_timeout_override - //@@ - //@@ Whether individual request can override the default timeout value. - //@@ When true, individual requests can set a timeout that is less than - //@@ the default timeout value but may not increase the timeout. - //@@ The default value is false. - //@@ - bool allow_timeout_override = 3; - - //@@ - //@@ .. cpp:var:: uint32 max_queue_size - //@@ - //@@ The maximum queue size for holding requests. A request will be - //@@ rejected immediately if it can't be enqueued because the queue is - //@@ full. The default value is 0 which indicates that no maximum - //@@ queue size is enforced. - //@@ - uint32 max_queue_size = 4; -} - -//@@ -//@@.. cpp:var:: message ModelDynamicBatching -//@@ -//@@ Dynamic batching configuration. These settings control how dynamic -//@@ batching operates for the model. -//@@ -message ModelDynamicBatching -{ - //@@ .. cpp:var:: int32 preferred_batch_size (repeated) - //@@ - //@@ Preferred batch sizes for dynamic batching. If a batch of one of - //@@ these sizes can be formed it will be executed immediately. If - //@@ not specified a preferred batch size will be chosen automatically - //@@ based on model and GPU characteristics. - //@@ - repeated int32 preferred_batch_size = 1; - - //@@ .. cpp:var:: uint64 max_queue_delay_microseconds - //@@ - //@@ The maximum time, in microseconds, a request will be delayed in - //@@ the scheduling queue to wait for additional requests for - //@@ batching. Default is 0. - //@@ - uint64 max_queue_delay_microseconds = 2; - - //@@ .. cpp:var:: bool preserve_ordering - //@@ - //@@ Should the dynamic batcher preserve the ordering of responses to - //@@ match the order of requests received by the scheduler. Default is - //@@ false. If true, the responses will be returned in the same order as - //@@ the order of requests sent to the scheduler. If false, the responses - //@@ may be returned in arbitrary order. This option is specifically - //@@ needed when a sequence of related inference requests (i.e. inference - //@@ requests with the same correlation ID) are sent to the dynamic - //@@ batcher to ensure that the sequence responses are in the correct - //@@ order. - //@@ - bool preserve_ordering = 3; - - //@@ .. cpp:var:: uint32 priority_levels - //@@ - //@@ The number of priority levels to be enabled for the model, - //@@ the priority level starts from 1 and 1 is the highest priority. - //@@ Requests are handled in priority order with all priority 1 requests - //@@ processed before priority 2, all priority 2 requests processed before - //@@ priority 3, etc. Requests with the same priority level will be - //@@ handled in the order that they are received. - //@@ - uint32 priority_levels = 4; - - //@@ .. cpp:var:: uint32 default_priority_level - //@@ - //@@ The priority level used for requests that don't specify their - //@@ priority. The value must be in the range [ 1, 'priority_levels' ]. - //@@ - uint32 default_priority_level = 5; - - //@@ .. cpp:var:: ModelQueuePolicy default_queue_policy - //@@ - //@@ The default queue policy used for requests that don't require - //@@ priority handling and requests that specify priority levels where - //@@ there is no specific policy given. If not specified, a policy with - //@@ default field values will be used. - //@@ - ModelQueuePolicy default_queue_policy = 6; - - //@@ .. cpp:var:: map priority_queue_policy - //@@ - //@@ Specify the queue policy for the priority level. The default queue - //@@ policy will be used if a priority level doesn't specify a queue - //@@ policy. - //@@ - map priority_queue_policy = 7; -} - -//@@ -//@@.. cpp:var:: message ModelSequenceBatching -//@@ -//@@ Sequence batching configuration. These settings control how sequence -//@@ batching operates for the model. -//@@ -message ModelSequenceBatching -{ - //@@ .. cpp:var:: message Control - //@@ - //@@ A control is a signal that the sequence batcher uses to - //@@ communicate with a backend. - //@@ - message Control - { - //@@ - //@@ .. cpp:enum:: Kind - //@@ - //@@ The kind of the control. - //@@ - enum Kind { - //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0 - //@@ - //@@ A new sequence is/is-not starting. If true a sequence is - //@@ starting, if false a sequence is continuing. Must - //@@ specify either int32_false_true or fp32_false_true for - //@@ this control. This control is optional. - //@@ - CONTROL_SEQUENCE_START = 0; - - //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1 - //@@ - //@@ A sequence is/is-not ready for inference. If true the - //@@ input tensor data is valid and should be used. If false - //@@ the input tensor data is invalid and inferencing should - //@@ be "skipped". Must specify either int32_false_true or - //@@ fp32_false_true for this control. This control is optional. - //@@ - CONTROL_SEQUENCE_READY = 1; - - //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2 - //@@ - //@@ A sequence is/is-not ending. If true a sequence is - //@@ ending, if false a sequence is continuing. Must - //@@ specify either int32_false_true or fp32_false_true for - //@@ this control. This control is optional. - //@@ - CONTROL_SEQUENCE_END = 2; - - //@@ .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3 - //@@ - //@@ The correlation ID of the sequence. The correlation ID - //@@ is an uint64_t value that is communicated in whole or - //@@ in part by the tensor. The tensor's datatype must be - //@@ specified by data_type and must be TYPE_UINT64, TYPE_INT64, - //@@ TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified - //@@ the correlation ID will be truncated to the low-order 32 - //@@ bits. This control is optional. - //@@ - CONTROL_SEQUENCE_CORRID = 3; - } - - //@@ .. cpp:var:: Kind kind - //@@ - //@@ The kind of this control. - //@@ - Kind kind = 1; - - //@@ .. cpp:var:: int32 int32_false_true (repeated) - //@@ - //@@ The control's true and false setting is indicated by setting - //@@ a value in an int32 tensor. The tensor must be a - //@@ 1-dimensional tensor with size equal to the batch size of - //@@ the request. 'int32_false_true' must have two entries: the - //@@ first the false value and the second the true value. - //@@ - repeated int32 int32_false_true = 2; - - //@@ .. cpp:var:: float fp32_false_true (repeated) - //@@ - //@@ The control's true and false setting is indicated by setting - //@@ a value in a fp32 tensor. The tensor must be a - //@@ 1-dimensional tensor with size equal to the batch size of - //@@ the request. 'fp32_false_true' must have two entries: the - //@@ first the false value and the second the true value. - //@@ - repeated float fp32_false_true = 3; - - //@@ .. cpp:var:: DataType data_type - //@@ - //@@ The control's datatype. - //@@ - DataType data_type = 4; - } - - //@@ .. cpp:var:: message ControlInput - //@@ - //@@ The sequence control values to communicate by a model input. - //@@ - message ControlInput - { - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the model input. - //@@ - string name = 1; - - //@@ .. cpp:var:: Control control (repeated) - //@@ - //@@ The control value(s) that should be communicated to the - //@@ model using this model input. - //@@ - repeated Control control = 2; - } - - //@@ .. cpp:var:: message StrategyDirect - //@@ - //@@ The sequence batcher uses a specific, unique batch - //@@ slot for each sequence. All inference requests in a - //@@ sequence are directed to the same batch slot in the same - //@@ model instance over the lifetime of the sequence. This - //@@ is the default strategy. - //@@ - message StrategyDirect - { - //@@ .. cpp:var:: uint64 max_queue_delay_microseconds - //@@ - //@@ The maximum time, in microseconds, a candidate request - //@@ will be delayed in the sequence batch scheduling queue to - //@@ wait for additional requests for batching. Default is 0. - //@@ - uint64 max_queue_delay_microseconds = 1; - - //@@ .. cpp:var:: float minimum_slot_utilization - //@@ - //@@ The minimum slot utilization that must be satisfied to - //@@ execute the batch before 'max_queue_delay_microseconds' expires. - //@@ For example, a value of 0.5 indicates that the batch should be - //@@ executed as soon as 50% or more of the slots are ready even if - //@@ the 'max_queue_delay_microseconds' timeout has not expired. - //@@ The default is 0.0, indicating that a batch will be executed - //@@ before 'max_queue_delay_microseconds' timeout expires if at least - //@@ one batch slot is ready. 'max_queue_delay_microseconds' will be - //@@ ignored unless minimum_slot_utilization is set to a non-zero - //@@ value. - //@@ - float minimum_slot_utilization = 2; - } - - //@@ .. cpp:var:: message StrategyOldest - //@@ - //@@ The sequence batcher maintains up to 'max_candidate_sequences' - //@@ candidate sequences. 'max_candidate_sequences' can be greater - //@@ than the model's 'max_batch_size'. For inferencing the batcher - //@@ chooses from the candidate sequences up to 'max_batch_size' - //@@ inference requests. Requests are chosen in an oldest-first - //@@ manner across all candidate sequences. A given sequence is - //@@ not guaranteed to be assigned to the same batch slot for - //@@ all inference requests of that sequence. - //@@ - message StrategyOldest - { - //@@ .. cpp:var:: int32 max_candidate_sequences - //@@ - //@@ Maximum number of candidate sequences that the batcher - //@@ maintains. Excess sequences are kept in an ordered backlog - //@@ and become candidates when existing candidate sequences - //@@ complete. - //@@ - int32 max_candidate_sequences = 1; - - //@@ .. cpp:var:: int32 preferred_batch_size (repeated) - //@@ - //@@ Preferred batch sizes for dynamic batching of candidate - //@@ sequences. If a batch of one of these sizes can be formed - //@@ it will be executed immediately. If not specified a - //@@ preferred batch size will be chosen automatically - //@@ based on model and GPU characteristics. - //@@ - repeated int32 preferred_batch_size = 2; - - //@@ .. cpp:var:: uint64 max_queue_delay_microseconds - //@@ - //@@ The maximum time, in microseconds, a candidate request - //@@ will be delayed in the dynamic batch scheduling queue to - //@@ wait for additional requests for batching. Default is 0. - //@@ - uint64 max_queue_delay_microseconds = 3; - } - - //@@ .. cpp:var:: oneof strategy_choice - //@@ - //@@ The strategy used by the sequence batcher. Default strategy - //@@ is 'direct'. - //@@ - oneof strategy_choice - { - //@@ .. cpp:var:: StrategyDirect direct - //@@ - //@@ StrategyDirect scheduling strategy. - //@@ - StrategyDirect direct = 3; - - //@@ .. cpp:var:: StrategyOldest oldest - //@@ - //@@ StrategyOldest scheduling strategy. - //@@ - StrategyOldest oldest = 4; - } - - //@@ .. cpp:var:: uint64 max_sequence_idle_microseconds - //@@ - //@@ The maximum time, in microseconds, that a sequence is allowed to - //@@ be idle before it is aborted. The inference server considers a - //@@ sequence idle when it does not have any inference request queued - //@@ for the sequence. If this limit is exceeded, the inference server - //@@ will free the sequence slot allocated by the sequence and make it - //@@ available for another sequence. If not specified (or specified as - //@@ zero) a default value of 1000000 (1 second) is used. - //@@ - uint64 max_sequence_idle_microseconds = 1; - - //@@ .. cpp:var:: ControlInput control_input (repeated) - //@@ - //@@ The model input(s) that the server should use to communicate - //@@ sequence start, stop, ready and similar control values to the - //@@ model. - //@@ - repeated ControlInput control_input = 2; -} - -//@@ -//@@.. cpp:var:: message ModelEnsembling -//@@ -//@@ Model ensembling configuration. These settings specify the models that -//@@ compose the ensemble and how data flows between the models. -//@@ -message ModelEnsembling -{ - //@@ .. cpp:var:: message Step - //@@ - //@@ Each step specifies a model included in the ensemble, - //@@ maps ensemble tensor names to the model input tensors, - //@@ and maps model output tensors to ensemble tensor names - //@@ - message Step - { - //@@ .. cpp:var:: string model_name - //@@ - //@@ The name of the model to execute for this step of the ensemble. - //@@ - string model_name = 1; - - //@@ .. cpp:var:: int64 model_version - //@@ - //@@ The version of the model to use for inference. If -1 - //@@ the latest/most-recent version of the model is used. - //@@ - int64 model_version = 2; - - //@@ .. cpp:var:: map input_map - //@@ - //@@ Map from name of an input tensor on this step's model to ensemble - //@@ tensor name. The ensemble tensor must have the same data type and - //@@ shape as the model input. Each model input must be assigned to - //@@ one ensemble tensor, but the same ensemble tensor can be assigned - //@@ to multiple model inputs. - //@@ - map input_map = 3; - - //@@ .. cpp:var:: map output_map - //@@ - //@@ Map from name of an output tensor on this step's model to ensemble - //@@ tensor name. The data type and shape of the ensemble tensor will - //@@ be inferred from the model output. It is optional to assign all - //@@ model outputs to ensemble tensors. One ensemble tensor name - //@@ can appear in an output map only once. - //@@ - map output_map = 4; - } - - //@@ .. cpp:var:: Step step (repeated) - //@@ - //@@ The models and the input / output mappings used within the ensemble. - //@@ - repeated Step step = 1; -} - -//@@ -//@@.. cpp:var:: message ModelParameter -//@@ -//@@ A model parameter. -//@@ -message ModelParameter -{ - //@@ .. cpp:var:: string string_value - //@@ - //@@ The string value of the parameter. - //@@ - string string_value = 1; -} - -//@@ -//@@.. cpp:var:: message ModelWarmup -//@@ -//@@ Settings used to construct the request sample for model warmup. -//@@ -message ModelWarmup -{ - //@@ - //@@ .. cpp:var:: message Input - //@@ - //@@ Meta data associated with an input. - //@@ - message Input - { - //@@ .. cpp:var:: DataType data_type - //@@ - //@@ The data-type of the input. - //@@ - DataType data_type = 1; - - //@@ .. cpp:var:: int64 dims (repeated) - //@@ - //@@ The shape of the input tensor, not including the batch dimension. - //@@ - repeated int64 dims = 2; - - //@@ .. cpp:var:: oneof input_data_type - //@@ - //@@ Specify how the input data is generated. If the input has STRING - //@@ data type and 'random_data' is set, the data generation will fall - //@@ back to 'zero_data'. - //@@ - oneof input_data_type - { - //@@ - //@@ .. cpp:var:: bool zero_data - //@@ - //@@ The identifier for using zeros as input data. Note that the - //@@ value of 'zero_data' will not be checked, instead, zero data - //@@ will be used as long as the field is set. - //@@ - bool zero_data = 3; - - //@@ - //@@ .. cpp:var:: bool random_data - //@@ - //@@ The identifier for using random data as input data. Note that - //@@ the value of 'random_data' will not be checked, instead, - //@@ random data will be used as long as the field is set. - //@@ - bool random_data = 4; - - //@@ .. cpp:var:: string input_data_file - //@@ - //@@ The file whose content will be used as raw input data in - //@@ row-major order. The file must be provided in a sub-directory - //@@ 'warmup' under the model directory. - //@@ - string input_data_file = 5; - } - } - - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the request sample. - //@@ - string name = 1; - - //@@ .. cpp:var:: uint32 batch_size - //@@ - //@@ The batch size of the inference request. This must be >= 1. For - //@@ models that don't support batching, batch_size must be 1. If - //@@ batch_size > 1, the 'inputs' specified below will be duplicated to - //@@ match the batch size requested. - //@@ - uint32 batch_size = 2; - - //@@ .. cpp:var:: map inputs - //@@ - //@@ The warmup meta data associated with every model input, including - //@@ control tensors. - //@@ - map inputs = 3; -} - -//@@ -//@@ .. cpp:var:: message ModelOperations -//@@ -//@@ The metadata of libraries providing custom operations for this model. -//@@ -message ModelOperations -{ - //@@ .. cpp:var:: string op_library_filename - //@@ - //@@ Optional paths of the libraries providing custom operations for - //@@ this model. Valid only for ONNX models. - //@@ - repeated string op_library_filename = 1; -} - -//@@ -//@@ .. cpp:var:: message ModelTransactionPolicy -//@@ -//@@ The specification that describes the nature of transactions -//@@ to be expected from the model. -//@@ -message ModelTransactionPolicy -{ - //@@ .. cpp:var:: bool decoupled - //@@ - //@@ Indicates whether responses generated by the model are decoupled with - //@@ the requests issued to it, which means the number of responses - //@@ generated by model may differ from number of requests issued, and - //@@ that the responses may be out of order relative to the order of - //@@ requests. The default is false, which means the model will generate - //@@ exactly one response for each request. - //@@ - bool decoupled = 1; -} - -//@@ -//@@.. cpp:var:: message ModelConfig -//@@ -//@@ A model configuration. -//@@ -message ModelConfig -{ - //@@ .. cpp:var:: string name - //@@ - //@@ The name of the model. - //@@ - string name = 1; - - //@@ .. cpp:var:: string platform - //@@ - //@@ The framework for the model. Possible values are - //@@ "tensorrt_plan", "tensorflow_graphdef", - //@@ "tensorflow_savedmodel", "onnxruntime_onnx", - //@@ "pytorch_libtorch" and "custom". - //@@ - string platform = 2; - - //@@ .. cpp:var:: string backend - //@@ - //@@ The backend used by the model. - //@@ - string backend = 17; - - //@@ .. cpp:var:: ModelVersionPolicy version_policy - //@@ - //@@ Policy indicating which version(s) of the model will be served. - //@@ - ModelVersionPolicy version_policy = 3; - - //@@ .. cpp:var:: int32 max_batch_size - //@@ - //@@ Maximum batch size allowed for inference. This can only decrease - //@@ what is allowed by the model itself. A max_batch_size value of 0 - //@@ indicates that batching is not allowed for the model and the - //@@ dimension/shape of the input and output tensors must exactly - //@@ match what is specified in the input and output configuration. A - //@@ max_batch_size value > 0 indicates that batching is allowed and - //@@ so the model expects the input tensors to have an additional - //@@ initial dimension for the batching that is not specified in the - //@@ input (for example, if the model supports batched inputs of - //@@ 2-dimensional tensors then the model configuration will specify - //@@ the input shape as [ X, Y ] but the model will expect the actual - //@@ input tensors to have shape [ N, X, Y ]). For max_batch_size > 0 - //@@ returned outputs will also have an additional initial dimension - //@@ for the batch. - //@@ - int32 max_batch_size = 4; - - //@@ .. cpp:var:: ModelInput input (repeated) - //@@ - //@@ The inputs request by the model. - //@@ - repeated ModelInput input = 5; - - //@@ .. cpp:var:: ModelOutput output (repeated) - //@@ - //@@ The outputs produced by the model. - //@@ - repeated ModelOutput output = 6; - - //@@ .. cpp:var:: BatchInput batch_input (repeated) - //@@ - //@@ The model input(s) that the server should use to communicate - //@@ batch related values to the model. - //@@ - repeated BatchInput batch_input = 20; - - //@@ .. cpp:var:: BatchOutput batch_output (repeated) - //@@ - //@@ The outputs produced by the model that requires special handling - //@@ by the model backend. - //@@ - repeated BatchOutput batch_output = 21; - - //@@ .. cpp:var:: ModelOptimizationPolicy optimization - //@@ - //@@ Optimization configuration for the model. If not specified - //@@ then default optimization policy is used. - //@@ - ModelOptimizationPolicy optimization = 12; - - //@@ .. cpp:var:: oneof scheduling_choice - //@@ - //@@ The scheduling policy for the model. If not specified the - //@@ default scheduling policy is used for the model. The default - //@@ policy is to execute each inference request independently. - //@@ - oneof scheduling_choice - { - //@@ .. cpp:var:: ModelDynamicBatching dynamic_batching - //@@ - //@@ If specified, enables the dynamic-batching scheduling - //@@ policy. With dynamic-batching the scheduler may group - //@@ together independent requests into a single batch to - //@@ improve inference throughput. - //@@ - ModelDynamicBatching dynamic_batching = 11; - - //@@ .. cpp:var:: ModelSequenceBatching sequence_batching - //@@ - //@@ If specified, enables the sequence-batching scheduling - //@@ policy. With sequence-batching, inference requests - //@@ with the same correlation ID are routed to the same - //@@ model instance. Multiple sequences of inference requests - //@@ may be batched together into a single batch to - //@@ improve inference throughput. - //@@ - ModelSequenceBatching sequence_batching = 13; - - //@@ .. cpp:var:: ModelEnsembling ensemble_scheduling - //@@ - //@@ If specified, enables the model-ensembling scheduling - //@@ policy. With model-ensembling, inference requests - //@@ will be processed according to the specification, such as an - //@@ execution sequence of models. The input specified in this model - //@@ config will be the input for the ensemble, and the output - //@@ specified will be the output of the ensemble. - //@@ - ModelEnsembling ensemble_scheduling = 15; - } - - //@@ .. cpp:var:: ModelInstanceGroup instance_group (repeated) - //@@ - //@@ Instances of this model. If not specified, one instance - //@@ of the model will be instantiated on each available GPU. - //@@ - repeated ModelInstanceGroup instance_group = 7; - - //@@ .. cpp:var:: string default_model_filename - //@@ - //@@ Optional filename of the model file to use if a - //@@ compute-capability specific model is not specified in - //@@ :cpp:var:`cc_model_filenames`. If not specified the default name - //@@ is 'model.graphdef', 'model.savedmodel', 'model.plan' or - //@@ 'model.pt' depending on the model type. - //@@ - string default_model_filename = 8; - - //@@ .. cpp:var:: map cc_model_filenames - //@@ - //@@ Optional map from CUDA compute capability to the filename of - //@@ the model that supports that compute capability. The filename - //@@ refers to a file within the model version directory. - //@@ - map cc_model_filenames = 9; - - //@@ .. cpp:var:: map metric_tags - //@@ - //@@ Optional metric tags. User-specific key-value pairs for metrics - //@@ reported for this model. These tags are applied to the metrics - //@@ reported on the HTTP metrics port. - //@@ - map metric_tags = 10; - - //@@ .. cpp:var:: map parameters - //@@ - //@@ Optional model parameters. User-specified parameter values that - //@@ are made available to custom backends. - //@@ - map parameters = 14; - - //@@ .. cpp:var:: ModelWarmup model_warmup (repeated) - //@@ - //@@ Warmup setting of this model. If specified, all instances - //@@ will be run with the request samples in sequence before - //@@ serving the model. - //@@ This field can only be specified if the model is not an ensemble - //@@ model. - //@@ - repeated ModelWarmup model_warmup = 16; - - //@@ .. cpp:var:: ModelOperations model_operations - //@@ - //@@ Optional metadata of the libraries providing custom operations for - //@@ this model. - //@@ - ModelOperations model_operations = 18; - - //@@ .. cpp:var:: ModelTransactionPolicy model_transaction_policy - //@@ - //@@ Optional specification that describes the nature of transactions - //@@ to be expected from the model. - //@@ - ModelTransactionPolicy model_transaction_policy = 19; -} diff --git a/merlin/systems/triton/model_config_pb2.py b/merlin/systems/triton/model_config_pb2.py deleted file mode 100644 index 362d46a6a61..00000000000 --- a/merlin/systems/triton/model_config_pb2.py +++ /dev/null @@ -1,4564 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: model_config.proto - -import sys - -_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) -from google.protobuf.internal import enum_type_wrapper -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="model_config.proto", - package="inference", - syntax="proto3", - serialized_options=None, - serialized_pb=_b( - '\n\x12model_config.proto\x12\tinference"\x96\x01\n\x10ModelRateLimiter\x12\x37\n\tresources\x18\x01 \x03(\x0b\x32$.inference.ModelRateLimiter.Resource\x12\x10\n\x08priority\x18\x02 \x01(\r\x1a\x37\n\x08Resource\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06global\x18\x02 \x01(\x08\x12\r\n\x05\x63ount\x18\x03 \x01(\r"\xf8\x01\n\x12ModelInstanceGroup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x30\n\x04kind\x18\x04 \x01(\x0e\x32".inference.ModelInstanceGroup.Kind\x12\r\n\x05\x63ount\x18\x02 \x01(\x05\x12\x31\n\x0crate_limiter\x18\x06 \x01(\x0b\x32\x1b.inference.ModelRateLimiter\x12\x0c\n\x04gpus\x18\x03 \x03(\x05\x12\x0f\n\x07profile\x18\x05 \x03(\t"A\n\x04Kind\x12\r\n\tKIND_AUTO\x10\x00\x12\x0c\n\x08KIND_GPU\x10\x01\x12\x0c\n\x08KIND_CPU\x10\x02\x12\x0e\n\nKIND_MODEL\x10\x03"#\n\x12ModelTensorReshape\x12\r\n\x05shape\x18\x01 \x03(\x03"\xa0\x02\n\nModelInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12,\n\x06\x66ormat\x18\x03 \x01(\x0e\x32\x1c.inference.ModelInput.Format\x12\x0c\n\x04\x64ims\x18\x04 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08\x12\x1a\n\x12\x61llow_ragged_batch\x18\x07 \x01(\x08";\n\x06\x46ormat\x12\x0f\n\x0b\x46ORMAT_NONE\x10\x00\x12\x0f\n\x0b\x46ORMAT_NHWC\x10\x01\x12\x0f\n\x0b\x46ORMAT_NCHW\x10\x02"\xb2\x01\n\x0bModelOutput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12&\n\tdata_type\x18\x02 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x03 \x03(\x03\x12.\n\x07reshape\x18\x05 \x01(\x0b\x32\x1d.inference.ModelTensorReshape\x12\x16\n\x0elabel_filename\x18\x04 \x01(\t\x12\x17\n\x0fis_shape_tensor\x18\x06 \x01(\x08"\xa5\x02\n\nBatchInput\x12(\n\x04kind\x18\x01 \x01(\x0e\x32\x1a.inference.BatchInput.Kind\x12\x13\n\x0btarget_name\x18\x02 \x03(\t\x12&\n\tdata_type\x18\x03 \x01(\x0e\x32\x13.inference.DataType\x12\x14\n\x0csource_input\x18\x04 \x03(\t"\x99\x01\n\x04Kind\x12\x17\n\x13\x42\x41TCH_ELEMENT_COUNT\x10\x00\x12#\n\x1f\x42\x41TCH_ACCUMULATED_ELEMENT_COUNT\x10\x01\x12-\n)BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO\x10\x02\x12$\n BATCH_MAX_ELEMENT_COUNT_AS_SHAPE\x10\x03"\x8f\x01\n\x0b\x42\x61tchOutput\x12\x13\n\x0btarget_name\x18\x01 \x03(\t\x12)\n\x04kind\x18\x02 \x01(\x0e\x32\x1b.inference.BatchOutput.Kind\x12\x14\n\x0csource_input\x18\x03 \x03(\t"*\n\x04Kind\x12"\n\x1e\x42\x41TCH_SCATTER_WITH_INPUT_SHAPE\x10\x00"\x90\x02\n\x12ModelVersionPolicy\x12\x36\n\x06latest\x18\x01 \x01(\x0b\x32$.inference.ModelVersionPolicy.LatestH\x00\x12\x30\n\x03\x61ll\x18\x02 \x01(\x0b\x32!.inference.ModelVersionPolicy.AllH\x00\x12:\n\x08specific\x18\x03 \x01(\x0b\x32&.inference.ModelVersionPolicy.SpecificH\x00\x1a\x1e\n\x06Latest\x12\x14\n\x0cnum_versions\x18\x01 \x01(\r\x1a\x05\n\x03\x41ll\x1a\x1c\n\x08Specific\x12\x10\n\x08versions\x18\x01 \x03(\x03\x42\x0f\n\rpolicy_choice"\xa1\r\n\x17ModelOptimizationPolicy\x12\x37\n\x05graph\x18\x01 \x01(\x0b\x32(.inference.ModelOptimizationPolicy.Graph\x12\x42\n\x08priority\x18\x02 \x01(\x0e\x32\x30.inference.ModelOptimizationPolicy.ModelPriority\x12\x35\n\x04\x63uda\x18\x03 \x01(\x0b\x32\'.inference.ModelOptimizationPolicy.Cuda\x12X\n\x16\x65xecution_accelerators\x18\x04 \x01(\x0b\x32\x38.inference.ModelOptimizationPolicy.ExecutionAccelerators\x12R\n\x13input_pinned_memory\x18\x05 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x12S\n\x14output_pinned_memory\x18\x06 \x01(\x0b\x32\x35.inference.ModelOptimizationPolicy.PinnedMemoryBuffer\x1a\x16\n\x05Graph\x12\r\n\x05level\x18\x01 \x01(\x05\x1a\x9e\x05\n\x04\x43uda\x12\x0e\n\x06graphs\x18\x01 \x01(\x08\x12\x18\n\x10\x62usy_wait_events\x18\x02 \x01(\x08\x12\x45\n\ngraph_spec\x18\x03 \x03(\x0b\x32\x31.inference.ModelOptimizationPolicy.Cuda.GraphSpec\x1a\xa4\x04\n\tGraphSpec\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12K\n\x05input\x18\x02 \x03(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry\x12W\n\x11graph_lower_bound\x18\x03 \x01(\x0b\x32<.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound\x1a\x14\n\x05Shape\x12\x0b\n\x03\x64im\x18\x01 \x03(\x03\x1a\xdf\x01\n\nLowerBound\x12\x12\n\nbatch_size\x18\x01 \x01(\x05\x12V\n\x05input\x18\x02 \x03(\x0b\x32G.inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\x65\n\nInputEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x46\n\x05value\x18\x02 \x01(\x0b\x32\x37.inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape:\x02\x38\x01\x1a\xa4\x03\n\x15\x45xecutionAccelerators\x12g\n\x19gpu_execution_accelerator\x18\x01 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x12g\n\x19\x63pu_execution_accelerator\x18\x02 \x03(\x0b\x32\x44.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator\x1a\xb8\x01\n\x0b\x41\x63\x63\x65lerator\x12\x0c\n\x04name\x18\x01 \x01(\t\x12h\n\nparameters\x18\x02 \x03(\x0b\x32T.inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry\x1a\x31\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a$\n\x12PinnedMemoryBuffer\x12\x0e\n\x06\x65nable\x18\x01 \x01(\x08"I\n\rModelPriority\x12\x14\n\x10PRIORITY_DEFAULT\x10\x00\x12\x10\n\x0cPRIORITY_MAX\x10\x01\x12\x10\n\x0cPRIORITY_MIN\x10\x02"\xdb\x01\n\x10ModelQueuePolicy\x12\x41\n\x0etimeout_action\x18\x01 \x01(\x0e\x32).inference.ModelQueuePolicy.TimeoutAction\x12$\n\x1c\x64\x65\x66\x61ult_timeout_microseconds\x18\x02 \x01(\x04\x12\x1e\n\x16\x61llow_timeout_override\x18\x03 \x01(\x08\x12\x16\n\x0emax_queue_size\x18\x04 \x01(\r"&\n\rTimeoutAction\x12\n\n\x06REJECT\x10\x00\x12\t\n\x05\x44\x45LAY\x10\x01"\x9b\x03\n\x14ModelDynamicBatching\x12\x1c\n\x14preferred_batch_size\x18\x01 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x02 \x01(\x04\x12\x19\n\x11preserve_ordering\x18\x03 \x01(\x08\x12\x17\n\x0fpriority_levels\x18\x04 \x01(\r\x12\x1e\n\x16\x64\x65\x66\x61ult_priority_level\x18\x05 \x01(\r\x12\x39\n\x14\x64\x65\x66\x61ult_queue_policy\x18\x06 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy\x12W\n\x15priority_queue_policy\x18\x07 \x03(\x0b\x32\x38.inference.ModelDynamicBatching.PriorityQueuePolicyEntry\x1aW\n\x18PriorityQueuePolicyEntry\x12\x0b\n\x03key\x18\x01 \x01(\r\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.inference.ModelQueuePolicy:\x02\x38\x01"\xe3\x06\n\x15ModelSequenceBatching\x12\x41\n\x06\x64irect\x18\x03 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyDirectH\x00\x12\x41\n\x06oldest\x18\x04 \x01(\x0b\x32/.inference.ModelSequenceBatching.StrategyOldestH\x00\x12&\n\x1emax_sequence_idle_microseconds\x18\x01 \x01(\x04\x12\x44\n\rcontrol_input\x18\x02 \x03(\x0b\x32-.inference.ModelSequenceBatching.ControlInput\x1a\x98\x02\n\x07\x43ontrol\x12;\n\x04kind\x18\x01 \x01(\x0e\x32-.inference.ModelSequenceBatching.Control.Kind\x12\x18\n\x10int32_false_true\x18\x02 \x03(\x05\x12\x17\n\x0f\x66p32_false_true\x18\x03 \x03(\x02\x12&\n\tdata_type\x18\x04 \x01(\x0e\x32\x13.inference.DataType"u\n\x04Kind\x12\x1a\n\x16\x43ONTROL_SEQUENCE_START\x10\x00\x12\x1a\n\x16\x43ONTROL_SEQUENCE_READY\x10\x01\x12\x18\n\x14\x43ONTROL_SEQUENCE_END\x10\x02\x12\x1b\n\x17\x43ONTROL_SEQUENCE_CORRID\x10\x03\x1aW\n\x0c\x43ontrolInput\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x39\n\x07\x63ontrol\x18\x02 \x03(\x0b\x32(.inference.ModelSequenceBatching.Control\x1aX\n\x0eStrategyDirect\x12$\n\x1cmax_queue_delay_microseconds\x18\x01 \x01(\x04\x12 \n\x18minimum_slot_utilization\x18\x02 \x01(\x02\x1au\n\x0eStrategyOldest\x12\x1f\n\x17max_candidate_sequences\x18\x01 \x01(\x05\x12\x1c\n\x14preferred_batch_size\x18\x02 \x03(\x05\x12$\n\x1cmax_queue_delay_microseconds\x18\x03 \x01(\x04\x42\x11\n\x0fstrategy_choice"\xdd\x02\n\x0fModelEnsembling\x12-\n\x04step\x18\x01 \x03(\x0b\x32\x1f.inference.ModelEnsembling.Step\x1a\x9a\x02\n\x04Step\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\x03\x12@\n\tinput_map\x18\x03 \x03(\x0b\x32-.inference.ModelEnsembling.Step.InputMapEntry\x12\x42\n\noutput_map\x18\x04 \x03(\x0b\x32..inference.ModelEnsembling.Step.OutputMapEntry\x1a/\n\rInputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x30\n\x0eOutputMapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"&\n\x0eModelParameter\x12\x14\n\x0cstring_value\x18\x01 \x01(\t"\xca\x02\n\x0bModelWarmup\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x32\n\x06inputs\x18\x03 \x03(\x0b\x32".inference.ModelWarmup.InputsEntry\x1a\x97\x01\n\x05Input\x12&\n\tdata_type\x18\x01 \x01(\x0e\x32\x13.inference.DataType\x12\x0c\n\x04\x64ims\x18\x02 \x03(\x03\x12\x13\n\tzero_data\x18\x03 \x01(\x08H\x00\x12\x15\n\x0brandom_data\x18\x04 \x01(\x08H\x00\x12\x19\n\x0finput_data_file\x18\x05 \x01(\tH\x00\x42\x11\n\x0finput_data_type\x1aK\n\x0bInputsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12+\n\x05value\x18\x02 \x01(\x0b\x32\x1c.inference.ModelWarmup.Input:\x02\x38\x01".\n\x0fModelOperations\x12\x1b\n\x13op_library_filename\x18\x01 \x03(\t"+\n\x16ModelTransactionPolicy\x12\x11\n\tdecoupled\x18\x01 \x01(\x08"\xb8\t\n\x0bModelConfig\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08platform\x18\x02 \x01(\t\x12\x0f\n\x07\x62\x61\x63kend\x18\x11 \x01(\t\x12\x35\n\x0eversion_policy\x18\x03 \x01(\x0b\x32\x1d.inference.ModelVersionPolicy\x12\x16\n\x0emax_batch_size\x18\x04 \x01(\x05\x12$\n\x05input\x18\x05 \x03(\x0b\x32\x15.inference.ModelInput\x12&\n\x06output\x18\x06 \x03(\x0b\x32\x16.inference.ModelOutput\x12*\n\x0b\x62\x61tch_input\x18\x14 \x03(\x0b\x32\x15.inference.BatchInput\x12,\n\x0c\x62\x61tch_output\x18\x15 \x03(\x0b\x32\x16.inference.BatchOutput\x12\x38\n\x0coptimization\x18\x0c \x01(\x0b\x32".inference.ModelOptimizationPolicy\x12;\n\x10\x64ynamic_batching\x18\x0b \x01(\x0b\x32\x1f.inference.ModelDynamicBatchingH\x00\x12=\n\x11sequence_batching\x18\r \x01(\x0b\x32 .inference.ModelSequenceBatchingH\x00\x12\x39\n\x13\x65nsemble_scheduling\x18\x0f \x01(\x0b\x32\x1a.inference.ModelEnsemblingH\x00\x12\x35\n\x0einstance_group\x18\x07 \x03(\x0b\x32\x1d.inference.ModelInstanceGroup\x12\x1e\n\x16\x64\x65\x66\x61ult_model_filename\x18\x08 \x01(\t\x12H\n\x12\x63\x63_model_filenames\x18\t \x03(\x0b\x32,.inference.ModelConfig.CcModelFilenamesEntry\x12;\n\x0bmetric_tags\x18\n \x03(\x0b\x32&.inference.ModelConfig.MetricTagsEntry\x12:\n\nparameters\x18\x0e \x03(\x0b\x32&.inference.ModelConfig.ParametersEntry\x12,\n\x0cmodel_warmup\x18\x10 \x03(\x0b\x32\x16.inference.ModelWarmup\x12\x34\n\x10model_operations\x18\x12 \x01(\x0b\x32\x1a.inference.ModelOperations\x12\x43\n\x18model_transaction_policy\x18\x13 \x01(\x0b\x32!.inference.ModelTransactionPolicy\x1a\x37\n\x15\x43\x63ModelFilenamesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1a\x31\n\x0fMetricTagsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.ModelParameter:\x02\x38\x01\x42\x13\n\x11scheduling_choice*\xeb\x01\n\x08\x44\x61taType\x12\x10\n\x0cTYPE_INVALID\x10\x00\x12\r\n\tTYPE_BOOL\x10\x01\x12\x0e\n\nTYPE_UINT8\x10\x02\x12\x0f\n\x0bTYPE_UINT16\x10\x03\x12\x0f\n\x0bTYPE_UINT32\x10\x04\x12\x0f\n\x0bTYPE_UINT64\x10\x05\x12\r\n\tTYPE_INT8\x10\x06\x12\x0e\n\nTYPE_INT16\x10\x07\x12\x0e\n\nTYPE_INT32\x10\x08\x12\x0e\n\nTYPE_INT64\x10\t\x12\r\n\tTYPE_FP16\x10\n\x12\r\n\tTYPE_FP32\x10\x0b\x12\r\n\tTYPE_FP64\x10\x0c\x12\x0f\n\x0bTYPE_STRING\x10\rb\x06proto3' - ), -) - -_DATATYPE = _descriptor.EnumDescriptor( - name="DataType", - full_name="inference.DataType", - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name="TYPE_INVALID", index=0, number=0, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="TYPE_BOOL", index=1, number=1, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="TYPE_UINT8", index=2, number=2, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="TYPE_UINT16", index=3, number=3, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="TYPE_UINT32", index=4, number=4, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="TYPE_UINT64", index=5, number=5, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="TYPE_INT8", index=6, number=6, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="TYPE_INT16", index=7, number=7, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="TYPE_INT32", index=8, number=8, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="TYPE_INT64", index=9, number=9, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="TYPE_FP16", index=10, number=10, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="TYPE_FP32", index=11, number=11, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="TYPE_FP64", index=12, number=12, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="TYPE_STRING", index=13, number=13, serialized_options=None, type=None - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=6899, - serialized_end=7134, -) -_sym_db.RegisterEnumDescriptor(_DATATYPE) - -DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE) -TYPE_INVALID = 0 -TYPE_BOOL = 1 -TYPE_UINT8 = 2 -TYPE_UINT16 = 3 -TYPE_UINT32 = 4 -TYPE_UINT64 = 5 -TYPE_INT8 = 6 -TYPE_INT16 = 7 -TYPE_INT32 = 8 -TYPE_INT64 = 9 -TYPE_FP16 = 10 -TYPE_FP32 = 11 -TYPE_FP64 = 12 -TYPE_STRING = 13 - - -_MODELINSTANCEGROUP_KIND = _descriptor.EnumDescriptor( - name="Kind", - full_name="inference.ModelInstanceGroup.Kind", - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name="KIND_AUTO", index=0, number=0, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="KIND_GPU", index=1, number=1, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="KIND_CPU", index=2, number=2, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="KIND_MODEL", index=3, number=3, serialized_options=None, type=None - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=370, - serialized_end=435, -) -_sym_db.RegisterEnumDescriptor(_MODELINSTANCEGROUP_KIND) - -_MODELINPUT_FORMAT = _descriptor.EnumDescriptor( - name="Format", - full_name="inference.ModelInput.Format", - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name="FORMAT_NONE", index=0, number=0, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="FORMAT_NHWC", index=1, number=1, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="FORMAT_NCHW", index=2, number=2, serialized_options=None, type=None - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=704, - serialized_end=763, -) -_sym_db.RegisterEnumDescriptor(_MODELINPUT_FORMAT) - -_BATCHINPUT_KIND = _descriptor.EnumDescriptor( - name="Kind", - full_name="inference.BatchInput.Kind", - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name="BATCH_ELEMENT_COUNT", index=0, number=0, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="BATCH_ACCUMULATED_ELEMENT_COUNT", - index=1, - number=1, - serialized_options=None, - type=None, - ), - _descriptor.EnumValueDescriptor( - name="BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO", - index=2, - number=2, - serialized_options=None, - type=None, - ), - _descriptor.EnumValueDescriptor( - name="BATCH_MAX_ELEMENT_COUNT_AS_SHAPE", - index=3, - number=3, - serialized_options=None, - type=None, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=1087, - serialized_end=1240, -) -_sym_db.RegisterEnumDescriptor(_BATCHINPUT_KIND) - -_BATCHOUTPUT_KIND = _descriptor.EnumDescriptor( - name="Kind", - full_name="inference.BatchOutput.Kind", - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name="BATCH_SCATTER_WITH_INPUT_SHAPE", - index=0, - number=0, - serialized_options=None, - type=None, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=1344, - serialized_end=1386, -) -_sym_db.RegisterEnumDescriptor(_BATCHOUTPUT_KIND) - -_MODELOPTIMIZATIONPOLICY_MODELPRIORITY = _descriptor.EnumDescriptor( - name="ModelPriority", - full_name="inference.ModelOptimizationPolicy.ModelPriority", - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name="PRIORITY_DEFAULT", index=0, number=0, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="PRIORITY_MAX", index=1, number=1, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="PRIORITY_MIN", index=2, number=2, serialized_options=None, type=None - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=3288, - serialized_end=3361, -) -_sym_db.RegisterEnumDescriptor(_MODELOPTIMIZATIONPOLICY_MODELPRIORITY) - -_MODELQUEUEPOLICY_TIMEOUTACTION = _descriptor.EnumDescriptor( - name="TimeoutAction", - full_name="inference.ModelQueuePolicy.TimeoutAction", - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name="REJECT", index=0, number=0, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="DELAY", index=1, number=1, serialized_options=None, type=None - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=3545, - serialized_end=3583, -) -_sym_db.RegisterEnumDescriptor(_MODELQUEUEPOLICY_TIMEOUTACTION) - -_MODELSEQUENCEBATCHING_CONTROL_KIND = _descriptor.EnumDescriptor( - name="Kind", - full_name="inference.ModelSequenceBatching.Control.Kind", - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name="CONTROL_SEQUENCE_START", index=0, number=0, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="CONTROL_SEQUENCE_READY", index=1, number=1, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="CONTROL_SEQUENCE_END", index=2, number=2, serialized_options=None, type=None - ), - _descriptor.EnumValueDescriptor( - name="CONTROL_SEQUENCE_CORRID", index=3, number=3, serialized_options=None, type=None - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=4433, - serialized_end=4550, -) -_sym_db.RegisterEnumDescriptor(_MODELSEQUENCEBATCHING_CONTROL_KIND) - - -_MODELRATELIMITER_RESOURCE = _descriptor.Descriptor( - name="Resource", - full_name="inference.ModelRateLimiter.Resource", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="inference.ModelRateLimiter.Resource.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="global", - full_name="inference.ModelRateLimiter.Resource.global", - index=1, - number=2, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="count", - full_name="inference.ModelRateLimiter.Resource.count", - index=2, - number=3, - type=13, - cpp_type=3, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=129, - serialized_end=184, -) - -_MODELRATELIMITER = _descriptor.Descriptor( - name="ModelRateLimiter", - full_name="inference.ModelRateLimiter", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="resources", - full_name="inference.ModelRateLimiter.resources", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="priority", - full_name="inference.ModelRateLimiter.priority", - index=1, - number=2, - type=13, - cpp_type=3, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _MODELRATELIMITER_RESOURCE, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=34, - serialized_end=184, -) - - -_MODELINSTANCEGROUP = _descriptor.Descriptor( - name="ModelInstanceGroup", - full_name="inference.ModelInstanceGroup", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="inference.ModelInstanceGroup.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="kind", - full_name="inference.ModelInstanceGroup.kind", - index=1, - number=4, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="count", - full_name="inference.ModelInstanceGroup.count", - index=2, - number=2, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="rate_limiter", - full_name="inference.ModelInstanceGroup.rate_limiter", - index=3, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="gpus", - full_name="inference.ModelInstanceGroup.gpus", - index=4, - number=3, - type=5, - cpp_type=1, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="profile", - full_name="inference.ModelInstanceGroup.profile", - index=5, - number=5, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[ - _MODELINSTANCEGROUP_KIND, - ], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=187, - serialized_end=435, -) - - -_MODELTENSORRESHAPE = _descriptor.Descriptor( - name="ModelTensorReshape", - full_name="inference.ModelTensorReshape", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="shape", - full_name="inference.ModelTensorReshape.shape", - index=0, - number=1, - type=3, - cpp_type=2, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=437, - serialized_end=472, -) - - -_MODELINPUT = _descriptor.Descriptor( - name="ModelInput", - full_name="inference.ModelInput", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="inference.ModelInput.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="data_type", - full_name="inference.ModelInput.data_type", - index=1, - number=2, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="format", - full_name="inference.ModelInput.format", - index=2, - number=3, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="dims", - full_name="inference.ModelInput.dims", - index=3, - number=4, - type=3, - cpp_type=2, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="reshape", - full_name="inference.ModelInput.reshape", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="is_shape_tensor", - full_name="inference.ModelInput.is_shape_tensor", - index=5, - number=6, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="allow_ragged_batch", - full_name="inference.ModelInput.allow_ragged_batch", - index=6, - number=7, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[ - _MODELINPUT_FORMAT, - ], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=475, - serialized_end=763, -) - - -_MODELOUTPUT = _descriptor.Descriptor( - name="ModelOutput", - full_name="inference.ModelOutput", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="inference.ModelOutput.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="data_type", - full_name="inference.ModelOutput.data_type", - index=1, - number=2, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="dims", - full_name="inference.ModelOutput.dims", - index=2, - number=3, - type=3, - cpp_type=2, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="reshape", - full_name="inference.ModelOutput.reshape", - index=3, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="label_filename", - full_name="inference.ModelOutput.label_filename", - index=4, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="is_shape_tensor", - full_name="inference.ModelOutput.is_shape_tensor", - index=5, - number=6, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=766, - serialized_end=944, -) - - -_BATCHINPUT = _descriptor.Descriptor( - name="BatchInput", - full_name="inference.BatchInput", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="kind", - full_name="inference.BatchInput.kind", - index=0, - number=1, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="target_name", - full_name="inference.BatchInput.target_name", - index=1, - number=2, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="data_type", - full_name="inference.BatchInput.data_type", - index=2, - number=3, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="source_input", - full_name="inference.BatchInput.source_input", - index=3, - number=4, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[ - _BATCHINPUT_KIND, - ], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=947, - serialized_end=1240, -) - - -_BATCHOUTPUT = _descriptor.Descriptor( - name="BatchOutput", - full_name="inference.BatchOutput", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="target_name", - full_name="inference.BatchOutput.target_name", - index=0, - number=1, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="kind", - full_name="inference.BatchOutput.kind", - index=1, - number=2, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="source_input", - full_name="inference.BatchOutput.source_input", - index=2, - number=3, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[ - _BATCHOUTPUT_KIND, - ], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1243, - serialized_end=1386, -) - - -_MODELVERSIONPOLICY_LATEST = _descriptor.Descriptor( - name="Latest", - full_name="inference.ModelVersionPolicy.Latest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="num_versions", - full_name="inference.ModelVersionPolicy.Latest.num_versions", - index=0, - number=1, - type=13, - cpp_type=3, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1577, - serialized_end=1607, -) - -_MODELVERSIONPOLICY_ALL = _descriptor.Descriptor( - name="All", - full_name="inference.ModelVersionPolicy.All", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1609, - serialized_end=1614, -) - -_MODELVERSIONPOLICY_SPECIFIC = _descriptor.Descriptor( - name="Specific", - full_name="inference.ModelVersionPolicy.Specific", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="versions", - full_name="inference.ModelVersionPolicy.Specific.versions", - index=0, - number=1, - type=3, - cpp_type=2, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1616, - serialized_end=1644, -) - -_MODELVERSIONPOLICY = _descriptor.Descriptor( - name="ModelVersionPolicy", - full_name="inference.ModelVersionPolicy", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="latest", - full_name="inference.ModelVersionPolicy.latest", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="all", - full_name="inference.ModelVersionPolicy.all", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="specific", - full_name="inference.ModelVersionPolicy.specific", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _MODELVERSIONPOLICY_LATEST, - _MODELVERSIONPOLICY_ALL, - _MODELVERSIONPOLICY_SPECIFIC, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="policy_choice", - full_name="inference.ModelVersionPolicy.policy_choice", - index=0, - containing_type=None, - fields=[], - ), - ], - serialized_start=1389, - serialized_end=1661, -) - - -_MODELOPTIMIZATIONPOLICY_GRAPH = _descriptor.Descriptor( - name="Graph", - full_name="inference.ModelOptimizationPolicy.Graph", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="level", - full_name="inference.ModelOptimizationPolicy.Graph.level", - index=0, - number=1, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2130, - serialized_end=2152, -) - -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE = _descriptor.Descriptor( - name="Shape", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="dim", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape.dim", - index=0, - number=1, - type=3, - cpp_type=2, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2476, - serialized_end=2496, -) - -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY = _descriptor.Descriptor( - name="InputEntry", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry.value", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=_b("8\001"), - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2621, - serialized_end=2722, -) - -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND = _descriptor.Descriptor( - name="LowerBound", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="batch_size", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.batch_size", - index=0, - number=1, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="input", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.input", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2499, - serialized_end=2722, -) - -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY = _descriptor.Descriptor( - name="InputEntry", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry.value", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=_b("8\001"), - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2621, - serialized_end=2722, -) - -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC = _descriptor.Descriptor( - name="GraphSpec", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="batch_size", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.batch_size", - index=0, - number=1, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="input", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.input", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="graph_lower_bound", - full_name="inference.ModelOptimizationPolicy.Cuda.GraphSpec.graph_lower_bound", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE, - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND, - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2277, - serialized_end=2825, -) - -_MODELOPTIMIZATIONPOLICY_CUDA = _descriptor.Descriptor( - name="Cuda", - full_name="inference.ModelOptimizationPolicy.Cuda", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="graphs", - full_name="inference.ModelOptimizationPolicy.Cuda.graphs", - index=0, - number=1, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="busy_wait_events", - full_name="inference.ModelOptimizationPolicy.Cuda.busy_wait_events", - index=1, - number=2, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="graph_spec", - full_name="inference.ModelOptimizationPolicy.Cuda.graph_spec", - index=2, - number=3, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2155, - serialized_end=2825, -) - -_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY = _descriptor.Descriptor( - name="ParametersEntry", - full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry.value", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=_b("8\001"), - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3199, - serialized_end=3248, -) - -_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR = _descriptor.Descriptor( - name="Accelerator", - full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="parameters", - full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.parameters", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3064, - serialized_end=3248, -) - -_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS = _descriptor.Descriptor( - name="ExecutionAccelerators", - full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="gpu_execution_accelerator", - full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.gpu_execution_accelerator", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="cpu_execution_accelerator", - full_name="inference.ModelOptimizationPolicy.ExecutionAccelerators.cpu_execution_accelerator", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2828, - serialized_end=3248, -) - -_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER = _descriptor.Descriptor( - name="PinnedMemoryBuffer", - full_name="inference.ModelOptimizationPolicy.PinnedMemoryBuffer", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="enable", - full_name="inference.ModelOptimizationPolicy.PinnedMemoryBuffer.enable", - index=0, - number=1, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3250, - serialized_end=3286, -) - -_MODELOPTIMIZATIONPOLICY = _descriptor.Descriptor( - name="ModelOptimizationPolicy", - full_name="inference.ModelOptimizationPolicy", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="graph", - full_name="inference.ModelOptimizationPolicy.graph", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="priority", - full_name="inference.ModelOptimizationPolicy.priority", - index=1, - number=2, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="cuda", - full_name="inference.ModelOptimizationPolicy.cuda", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="execution_accelerators", - full_name="inference.ModelOptimizationPolicy.execution_accelerators", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="input_pinned_memory", - full_name="inference.ModelOptimizationPolicy.input_pinned_memory", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="output_pinned_memory", - full_name="inference.ModelOptimizationPolicy.output_pinned_memory", - index=5, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _MODELOPTIMIZATIONPOLICY_GRAPH, - _MODELOPTIMIZATIONPOLICY_CUDA, - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS, - _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER, - ], - enum_types=[ - _MODELOPTIMIZATIONPOLICY_MODELPRIORITY, - ], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1664, - serialized_end=3361, -) - - -_MODELQUEUEPOLICY = _descriptor.Descriptor( - name="ModelQueuePolicy", - full_name="inference.ModelQueuePolicy", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="timeout_action", - full_name="inference.ModelQueuePolicy.timeout_action", - index=0, - number=1, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="default_timeout_microseconds", - full_name="inference.ModelQueuePolicy.default_timeout_microseconds", - index=1, - number=2, - type=4, - cpp_type=4, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="allow_timeout_override", - full_name="inference.ModelQueuePolicy.allow_timeout_override", - index=2, - number=3, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="max_queue_size", - full_name="inference.ModelQueuePolicy.max_queue_size", - index=3, - number=4, - type=13, - cpp_type=3, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[ - _MODELQUEUEPOLICY_TIMEOUTACTION, - ], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3364, - serialized_end=3583, -) - - -_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY = _descriptor.Descriptor( - name="PriorityQueuePolicyEntry", - full_name="inference.ModelDynamicBatching.PriorityQueuePolicyEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="inference.ModelDynamicBatching.PriorityQueuePolicyEntry.key", - index=0, - number=1, - type=13, - cpp_type=3, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="inference.ModelDynamicBatching.PriorityQueuePolicyEntry.value", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=_b("8\001"), - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3910, - serialized_end=3997, -) - -_MODELDYNAMICBATCHING = _descriptor.Descriptor( - name="ModelDynamicBatching", - full_name="inference.ModelDynamicBatching", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="preferred_batch_size", - full_name="inference.ModelDynamicBatching.preferred_batch_size", - index=0, - number=1, - type=5, - cpp_type=1, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="max_queue_delay_microseconds", - full_name="inference.ModelDynamicBatching.max_queue_delay_microseconds", - index=1, - number=2, - type=4, - cpp_type=4, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="preserve_ordering", - full_name="inference.ModelDynamicBatching.preserve_ordering", - index=2, - number=3, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="priority_levels", - full_name="inference.ModelDynamicBatching.priority_levels", - index=3, - number=4, - type=13, - cpp_type=3, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="default_priority_level", - full_name="inference.ModelDynamicBatching.default_priority_level", - index=4, - number=5, - type=13, - cpp_type=3, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="default_queue_policy", - full_name="inference.ModelDynamicBatching.default_queue_policy", - index=5, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="priority_queue_policy", - full_name="inference.ModelDynamicBatching.priority_queue_policy", - index=6, - number=7, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3586, - serialized_end=3997, -) - - -_MODELSEQUENCEBATCHING_CONTROL = _descriptor.Descriptor( - name="Control", - full_name="inference.ModelSequenceBatching.Control", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="kind", - full_name="inference.ModelSequenceBatching.Control.kind", - index=0, - number=1, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="int32_false_true", - full_name="inference.ModelSequenceBatching.Control.int32_false_true", - index=1, - number=2, - type=5, - cpp_type=1, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="fp32_false_true", - full_name="inference.ModelSequenceBatching.Control.fp32_false_true", - index=2, - number=3, - type=2, - cpp_type=6, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="data_type", - full_name="inference.ModelSequenceBatching.Control.data_type", - index=3, - number=4, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[ - _MODELSEQUENCEBATCHING_CONTROL_KIND, - ], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4270, - serialized_end=4550, -) - -_MODELSEQUENCEBATCHING_CONTROLINPUT = _descriptor.Descriptor( - name="ControlInput", - full_name="inference.ModelSequenceBatching.ControlInput", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="inference.ModelSequenceBatching.ControlInput.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="control", - full_name="inference.ModelSequenceBatching.ControlInput.control", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4552, - serialized_end=4639, -) - -_MODELSEQUENCEBATCHING_STRATEGYDIRECT = _descriptor.Descriptor( - name="StrategyDirect", - full_name="inference.ModelSequenceBatching.StrategyDirect", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="max_queue_delay_microseconds", - full_name="inference.ModelSequenceBatching.StrategyDirect.max_queue_delay_microseconds", - index=0, - number=1, - type=4, - cpp_type=4, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="minimum_slot_utilization", - full_name="inference.ModelSequenceBatching.StrategyDirect.minimum_slot_utilization", - index=1, - number=2, - type=2, - cpp_type=6, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4641, - serialized_end=4729, -) - -_MODELSEQUENCEBATCHING_STRATEGYOLDEST = _descriptor.Descriptor( - name="StrategyOldest", - full_name="inference.ModelSequenceBatching.StrategyOldest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="max_candidate_sequences", - full_name="inference.ModelSequenceBatching.StrategyOldest.max_candidate_sequences", - index=0, - number=1, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="preferred_batch_size", - full_name="inference.ModelSequenceBatching.StrategyOldest.preferred_batch_size", - index=1, - number=2, - type=5, - cpp_type=1, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="max_queue_delay_microseconds", - full_name="inference.ModelSequenceBatching.StrategyOldest.max_queue_delay_microseconds", - index=2, - number=3, - type=4, - cpp_type=4, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4731, - serialized_end=4848, -) - -_MODELSEQUENCEBATCHING = _descriptor.Descriptor( - name="ModelSequenceBatching", - full_name="inference.ModelSequenceBatching", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="direct", - full_name="inference.ModelSequenceBatching.direct", - index=0, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="oldest", - full_name="inference.ModelSequenceBatching.oldest", - index=1, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="max_sequence_idle_microseconds", - full_name="inference.ModelSequenceBatching.max_sequence_idle_microseconds", - index=2, - number=1, - type=4, - cpp_type=4, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="control_input", - full_name="inference.ModelSequenceBatching.control_input", - index=3, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _MODELSEQUENCEBATCHING_CONTROL, - _MODELSEQUENCEBATCHING_CONTROLINPUT, - _MODELSEQUENCEBATCHING_STRATEGYDIRECT, - _MODELSEQUENCEBATCHING_STRATEGYOLDEST, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="strategy_choice", - full_name="inference.ModelSequenceBatching.strategy_choice", - index=0, - containing_type=None, - fields=[], - ), - ], - serialized_start=4000, - serialized_end=4867, -) - - -_MODELENSEMBLING_STEP_INPUTMAPENTRY = _descriptor.Descriptor( - name="InputMapEntry", - full_name="inference.ModelEnsembling.Step.InputMapEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="inference.ModelEnsembling.Step.InputMapEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="inference.ModelEnsembling.Step.InputMapEntry.value", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=_b("8\001"), - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=5122, - serialized_end=5169, -) - -_MODELENSEMBLING_STEP_OUTPUTMAPENTRY = _descriptor.Descriptor( - name="OutputMapEntry", - full_name="inference.ModelEnsembling.Step.OutputMapEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="inference.ModelEnsembling.Step.OutputMapEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="inference.ModelEnsembling.Step.OutputMapEntry.value", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=_b("8\001"), - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=5171, - serialized_end=5219, -) - -_MODELENSEMBLING_STEP = _descriptor.Descriptor( - name="Step", - full_name="inference.ModelEnsembling.Step", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="model_name", - full_name="inference.ModelEnsembling.Step.model_name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="model_version", - full_name="inference.ModelEnsembling.Step.model_version", - index=1, - number=2, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="input_map", - full_name="inference.ModelEnsembling.Step.input_map", - index=2, - number=3, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="output_map", - full_name="inference.ModelEnsembling.Step.output_map", - index=3, - number=4, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _MODELENSEMBLING_STEP_INPUTMAPENTRY, - _MODELENSEMBLING_STEP_OUTPUTMAPENTRY, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4937, - serialized_end=5219, -) - -_MODELENSEMBLING = _descriptor.Descriptor( - name="ModelEnsembling", - full_name="inference.ModelEnsembling", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="step", - full_name="inference.ModelEnsembling.step", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _MODELENSEMBLING_STEP, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4870, - serialized_end=5219, -) - - -_MODELPARAMETER = _descriptor.Descriptor( - name="ModelParameter", - full_name="inference.ModelParameter", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="string_value", - full_name="inference.ModelParameter.string_value", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=5221, - serialized_end=5259, -) - - -_MODELWARMUP_INPUT = _descriptor.Descriptor( - name="Input", - full_name="inference.ModelWarmup.Input", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="data_type", - full_name="inference.ModelWarmup.Input.data_type", - index=0, - number=1, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="dims", - full_name="inference.ModelWarmup.Input.dims", - index=1, - number=2, - type=3, - cpp_type=2, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="zero_data", - full_name="inference.ModelWarmup.Input.zero_data", - index=2, - number=3, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="random_data", - full_name="inference.ModelWarmup.Input.random_data", - index=3, - number=4, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="input_data_file", - full_name="inference.ModelWarmup.Input.input_data_file", - index=4, - number=5, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="input_data_type", - full_name="inference.ModelWarmup.Input.input_data_type", - index=0, - containing_type=None, - fields=[], - ), - ], - serialized_start=5364, - serialized_end=5515, -) - -_MODELWARMUP_INPUTSENTRY = _descriptor.Descriptor( - name="InputsEntry", - full_name="inference.ModelWarmup.InputsEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="inference.ModelWarmup.InputsEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="inference.ModelWarmup.InputsEntry.value", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=_b("8\001"), - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=5517, - serialized_end=5592, -) - -_MODELWARMUP = _descriptor.Descriptor( - name="ModelWarmup", - full_name="inference.ModelWarmup", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="inference.ModelWarmup.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="batch_size", - full_name="inference.ModelWarmup.batch_size", - index=1, - number=2, - type=13, - cpp_type=3, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="inputs", - full_name="inference.ModelWarmup.inputs", - index=2, - number=3, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _MODELWARMUP_INPUT, - _MODELWARMUP_INPUTSENTRY, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=5262, - serialized_end=5592, -) - - -_MODELOPERATIONS = _descriptor.Descriptor( - name="ModelOperations", - full_name="inference.ModelOperations", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="op_library_filename", - full_name="inference.ModelOperations.op_library_filename", - index=0, - number=1, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=5594, - serialized_end=5640, -) - - -_MODELTRANSACTIONPOLICY = _descriptor.Descriptor( - name="ModelTransactionPolicy", - full_name="inference.ModelTransactionPolicy", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="decoupled", - full_name="inference.ModelTransactionPolicy.decoupled", - index=0, - number=1, - type=8, - cpp_type=7, - label=1, - has_default_value=False, - default_value=False, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=5642, - serialized_end=5685, -) - - -_MODELCONFIG_CCMODELFILENAMESENTRY = _descriptor.Descriptor( - name="CcModelFilenamesEntry", - full_name="inference.ModelConfig.CcModelFilenamesEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="inference.ModelConfig.CcModelFilenamesEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="inference.ModelConfig.CcModelFilenamesEntry.value", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=_b("8\001"), - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6691, - serialized_end=6746, -) - -_MODELCONFIG_METRICTAGSENTRY = _descriptor.Descriptor( - name="MetricTagsEntry", - full_name="inference.ModelConfig.MetricTagsEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="inference.ModelConfig.MetricTagsEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="inference.ModelConfig.MetricTagsEntry.value", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=_b("8\001"), - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6748, - serialized_end=6797, -) - -_MODELCONFIG_PARAMETERSENTRY = _descriptor.Descriptor( - name="ParametersEntry", - full_name="inference.ModelConfig.ParametersEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="inference.ModelConfig.ParametersEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="inference.ModelConfig.ParametersEntry.value", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=_b("8\001"), - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6799, - serialized_end=6875, -) - -_MODELCONFIG = _descriptor.Descriptor( - name="ModelConfig", - full_name="inference.ModelConfig", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="inference.ModelConfig.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="platform", - full_name="inference.ModelConfig.platform", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="backend", - full_name="inference.ModelConfig.backend", - index=2, - number=17, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="version_policy", - full_name="inference.ModelConfig.version_policy", - index=3, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="max_batch_size", - full_name="inference.ModelConfig.max_batch_size", - index=4, - number=4, - type=5, - cpp_type=1, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="input", - full_name="inference.ModelConfig.input", - index=5, - number=5, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="output", - full_name="inference.ModelConfig.output", - index=6, - number=6, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="batch_input", - full_name="inference.ModelConfig.batch_input", - index=7, - number=20, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="batch_output", - full_name="inference.ModelConfig.batch_output", - index=8, - number=21, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="optimization", - full_name="inference.ModelConfig.optimization", - index=9, - number=12, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="dynamic_batching", - full_name="inference.ModelConfig.dynamic_batching", - index=10, - number=11, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="sequence_batching", - full_name="inference.ModelConfig.sequence_batching", - index=11, - number=13, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="ensemble_scheduling", - full_name="inference.ModelConfig.ensemble_scheduling", - index=12, - number=15, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="instance_group", - full_name="inference.ModelConfig.instance_group", - index=13, - number=7, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="default_model_filename", - full_name="inference.ModelConfig.default_model_filename", - index=14, - number=8, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="cc_model_filenames", - full_name="inference.ModelConfig.cc_model_filenames", - index=15, - number=9, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="metric_tags", - full_name="inference.ModelConfig.metric_tags", - index=16, - number=10, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="parameters", - full_name="inference.ModelConfig.parameters", - index=17, - number=14, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="model_warmup", - full_name="inference.ModelConfig.model_warmup", - index=18, - number=16, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="model_operations", - full_name="inference.ModelConfig.model_operations", - index=19, - number=18, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - _descriptor.FieldDescriptor( - name="model_transaction_policy", - full_name="inference.ModelConfig.model_transaction_policy", - index=20, - number=19, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ), - ], - extensions=[], - nested_types=[ - _MODELCONFIG_CCMODELFILENAMESENTRY, - _MODELCONFIG_METRICTAGSENTRY, - _MODELCONFIG_PARAMETERSENTRY, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="scheduling_choice", - full_name="inference.ModelConfig.scheduling_choice", - index=0, - containing_type=None, - fields=[], - ), - ], - serialized_start=5688, - serialized_end=6896, -) - -_MODELRATELIMITER_RESOURCE.containing_type = _MODELRATELIMITER -_MODELRATELIMITER.fields_by_name["resources"].message_type = _MODELRATELIMITER_RESOURCE -_MODELINSTANCEGROUP.fields_by_name["kind"].enum_type = _MODELINSTANCEGROUP_KIND -_MODELINSTANCEGROUP.fields_by_name["rate_limiter"].message_type = _MODELRATELIMITER -_MODELINSTANCEGROUP_KIND.containing_type = _MODELINSTANCEGROUP -_MODELINPUT.fields_by_name["data_type"].enum_type = _DATATYPE -_MODELINPUT.fields_by_name["format"].enum_type = _MODELINPUT_FORMAT -_MODELINPUT.fields_by_name["reshape"].message_type = _MODELTENSORRESHAPE -_MODELINPUT_FORMAT.containing_type = _MODELINPUT -_MODELOUTPUT.fields_by_name["data_type"].enum_type = _DATATYPE -_MODELOUTPUT.fields_by_name["reshape"].message_type = _MODELTENSORRESHAPE -_BATCHINPUT.fields_by_name["kind"].enum_type = _BATCHINPUT_KIND -_BATCHINPUT.fields_by_name["data_type"].enum_type = _DATATYPE -_BATCHINPUT_KIND.containing_type = _BATCHINPUT -_BATCHOUTPUT.fields_by_name["kind"].enum_type = _BATCHOUTPUT_KIND -_BATCHOUTPUT_KIND.containing_type = _BATCHOUTPUT -_MODELVERSIONPOLICY_LATEST.containing_type = _MODELVERSIONPOLICY -_MODELVERSIONPOLICY_ALL.containing_type = _MODELVERSIONPOLICY -_MODELVERSIONPOLICY_SPECIFIC.containing_type = _MODELVERSIONPOLICY -_MODELVERSIONPOLICY.fields_by_name["latest"].message_type = _MODELVERSIONPOLICY_LATEST -_MODELVERSIONPOLICY.fields_by_name["all"].message_type = _MODELVERSIONPOLICY_ALL -_MODELVERSIONPOLICY.fields_by_name["specific"].message_type = _MODELVERSIONPOLICY_SPECIFIC -_MODELVERSIONPOLICY.oneofs_by_name["policy_choice"].fields.append( - _MODELVERSIONPOLICY.fields_by_name["latest"] -) -_MODELVERSIONPOLICY.fields_by_name["latest"].containing_oneof = _MODELVERSIONPOLICY.oneofs_by_name[ - "policy_choice" -] -_MODELVERSIONPOLICY.oneofs_by_name["policy_choice"].fields.append( - _MODELVERSIONPOLICY.fields_by_name["all"] -) -_MODELVERSIONPOLICY.fields_by_name["all"].containing_oneof = _MODELVERSIONPOLICY.oneofs_by_name[ - "policy_choice" -] -_MODELVERSIONPOLICY.oneofs_by_name["policy_choice"].fields.append( - _MODELVERSIONPOLICY.fields_by_name["specific"] -) -_MODELVERSIONPOLICY.fields_by_name[ - "specific" -].containing_oneof = _MODELVERSIONPOLICY.oneofs_by_name["policy_choice"] -_MODELOPTIMIZATIONPOLICY_GRAPH.containing_type = _MODELOPTIMIZATIONPOLICY -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE.containing_type = ( - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC -) -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY.fields_by_name[ - "value" -].message_type = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY.containing_type = ( - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND -) -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND.fields_by_name[ - "input" -].message_type = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND.containing_type = ( - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC -) -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY.fields_by_name[ - "value" -].message_type = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY.containing_type = ( - _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC -) -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.fields_by_name[ - "input" -].message_type = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.fields_by_name[ - "graph_lower_bound" -].message_type = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC.containing_type = _MODELOPTIMIZATIONPOLICY_CUDA -_MODELOPTIMIZATIONPOLICY_CUDA.fields_by_name[ - "graph_spec" -].message_type = _MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC -_MODELOPTIMIZATIONPOLICY_CUDA.containing_type = _MODELOPTIMIZATIONPOLICY -_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY.containing_type = ( - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR -) -_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR.fields_by_name[ - "parameters" -].message_type = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY -_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR.containing_type = ( - _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS -) -_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.fields_by_name[ - "gpu_execution_accelerator" -].message_type = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR -_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.fields_by_name[ - "cpu_execution_accelerator" -].message_type = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR -_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS.containing_type = _MODELOPTIMIZATIONPOLICY -_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER.containing_type = _MODELOPTIMIZATIONPOLICY -_MODELOPTIMIZATIONPOLICY.fields_by_name["graph"].message_type = _MODELOPTIMIZATIONPOLICY_GRAPH -_MODELOPTIMIZATIONPOLICY.fields_by_name[ - "priority" -].enum_type = _MODELOPTIMIZATIONPOLICY_MODELPRIORITY -_MODELOPTIMIZATIONPOLICY.fields_by_name["cuda"].message_type = _MODELOPTIMIZATIONPOLICY_CUDA -_MODELOPTIMIZATIONPOLICY.fields_by_name[ - "execution_accelerators" -].message_type = _MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS -_MODELOPTIMIZATIONPOLICY.fields_by_name[ - "input_pinned_memory" -].message_type = _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER -_MODELOPTIMIZATIONPOLICY.fields_by_name[ - "output_pinned_memory" -].message_type = _MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER -_MODELOPTIMIZATIONPOLICY_MODELPRIORITY.containing_type = _MODELOPTIMIZATIONPOLICY -_MODELQUEUEPOLICY.fields_by_name["timeout_action"].enum_type = _MODELQUEUEPOLICY_TIMEOUTACTION -_MODELQUEUEPOLICY_TIMEOUTACTION.containing_type = _MODELQUEUEPOLICY -_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY.fields_by_name[ - "value" -].message_type = _MODELQUEUEPOLICY -_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY.containing_type = _MODELDYNAMICBATCHING -_MODELDYNAMICBATCHING.fields_by_name["default_queue_policy"].message_type = _MODELQUEUEPOLICY -_MODELDYNAMICBATCHING.fields_by_name[ - "priority_queue_policy" -].message_type = _MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY -_MODELSEQUENCEBATCHING_CONTROL.fields_by_name[ - "kind" -].enum_type = _MODELSEQUENCEBATCHING_CONTROL_KIND -_MODELSEQUENCEBATCHING_CONTROL.fields_by_name["data_type"].enum_type = _DATATYPE -_MODELSEQUENCEBATCHING_CONTROL.containing_type = _MODELSEQUENCEBATCHING -_MODELSEQUENCEBATCHING_CONTROL_KIND.containing_type = _MODELSEQUENCEBATCHING_CONTROL -_MODELSEQUENCEBATCHING_CONTROLINPUT.fields_by_name[ - "control" -].message_type = _MODELSEQUENCEBATCHING_CONTROL -_MODELSEQUENCEBATCHING_CONTROLINPUT.containing_type = _MODELSEQUENCEBATCHING -_MODELSEQUENCEBATCHING_STRATEGYDIRECT.containing_type = _MODELSEQUENCEBATCHING -_MODELSEQUENCEBATCHING_STRATEGYOLDEST.containing_type = _MODELSEQUENCEBATCHING -_MODELSEQUENCEBATCHING.fields_by_name["direct"].message_type = _MODELSEQUENCEBATCHING_STRATEGYDIRECT -_MODELSEQUENCEBATCHING.fields_by_name["oldest"].message_type = _MODELSEQUENCEBATCHING_STRATEGYOLDEST -_MODELSEQUENCEBATCHING.fields_by_name[ - "control_input" -].message_type = _MODELSEQUENCEBATCHING_CONTROLINPUT -_MODELSEQUENCEBATCHING.oneofs_by_name["strategy_choice"].fields.append( - _MODELSEQUENCEBATCHING.fields_by_name["direct"] -) -_MODELSEQUENCEBATCHING.fields_by_name[ - "direct" -].containing_oneof = _MODELSEQUENCEBATCHING.oneofs_by_name["strategy_choice"] -_MODELSEQUENCEBATCHING.oneofs_by_name["strategy_choice"].fields.append( - _MODELSEQUENCEBATCHING.fields_by_name["oldest"] -) -_MODELSEQUENCEBATCHING.fields_by_name[ - "oldest" -].containing_oneof = _MODELSEQUENCEBATCHING.oneofs_by_name["strategy_choice"] -_MODELENSEMBLING_STEP_INPUTMAPENTRY.containing_type = _MODELENSEMBLING_STEP -_MODELENSEMBLING_STEP_OUTPUTMAPENTRY.containing_type = _MODELENSEMBLING_STEP -_MODELENSEMBLING_STEP.fields_by_name["input_map"].message_type = _MODELENSEMBLING_STEP_INPUTMAPENTRY -_MODELENSEMBLING_STEP.fields_by_name[ - "output_map" -].message_type = _MODELENSEMBLING_STEP_OUTPUTMAPENTRY -_MODELENSEMBLING_STEP.containing_type = _MODELENSEMBLING -_MODELENSEMBLING.fields_by_name["step"].message_type = _MODELENSEMBLING_STEP -_MODELWARMUP_INPUT.fields_by_name["data_type"].enum_type = _DATATYPE -_MODELWARMUP_INPUT.containing_type = _MODELWARMUP -_MODELWARMUP_INPUT.oneofs_by_name["input_data_type"].fields.append( - _MODELWARMUP_INPUT.fields_by_name["zero_data"] -) -_MODELWARMUP_INPUT.fields_by_name["zero_data"].containing_oneof = _MODELWARMUP_INPUT.oneofs_by_name[ - "input_data_type" -] -_MODELWARMUP_INPUT.oneofs_by_name["input_data_type"].fields.append( - _MODELWARMUP_INPUT.fields_by_name["random_data"] -) -_MODELWARMUP_INPUT.fields_by_name[ - "random_data" -].containing_oneof = _MODELWARMUP_INPUT.oneofs_by_name["input_data_type"] -_MODELWARMUP_INPUT.oneofs_by_name["input_data_type"].fields.append( - _MODELWARMUP_INPUT.fields_by_name["input_data_file"] -) -_MODELWARMUP_INPUT.fields_by_name[ - "input_data_file" -].containing_oneof = _MODELWARMUP_INPUT.oneofs_by_name["input_data_type"] -_MODELWARMUP_INPUTSENTRY.fields_by_name["value"].message_type = _MODELWARMUP_INPUT -_MODELWARMUP_INPUTSENTRY.containing_type = _MODELWARMUP -_MODELWARMUP.fields_by_name["inputs"].message_type = _MODELWARMUP_INPUTSENTRY -_MODELCONFIG_CCMODELFILENAMESENTRY.containing_type = _MODELCONFIG -_MODELCONFIG_METRICTAGSENTRY.containing_type = _MODELCONFIG -_MODELCONFIG_PARAMETERSENTRY.fields_by_name["value"].message_type = _MODELPARAMETER -_MODELCONFIG_PARAMETERSENTRY.containing_type = _MODELCONFIG -_MODELCONFIG.fields_by_name["version_policy"].message_type = _MODELVERSIONPOLICY -_MODELCONFIG.fields_by_name["input"].message_type = _MODELINPUT -_MODELCONFIG.fields_by_name["output"].message_type = _MODELOUTPUT -_MODELCONFIG.fields_by_name["batch_input"].message_type = _BATCHINPUT -_MODELCONFIG.fields_by_name["batch_output"].message_type = _BATCHOUTPUT -_MODELCONFIG.fields_by_name["optimization"].message_type = _MODELOPTIMIZATIONPOLICY -_MODELCONFIG.fields_by_name["dynamic_batching"].message_type = _MODELDYNAMICBATCHING -_MODELCONFIG.fields_by_name["sequence_batching"].message_type = _MODELSEQUENCEBATCHING -_MODELCONFIG.fields_by_name["ensemble_scheduling"].message_type = _MODELENSEMBLING -_MODELCONFIG.fields_by_name["instance_group"].message_type = _MODELINSTANCEGROUP -_MODELCONFIG.fields_by_name["cc_model_filenames"].message_type = _MODELCONFIG_CCMODELFILENAMESENTRY -_MODELCONFIG.fields_by_name["metric_tags"].message_type = _MODELCONFIG_METRICTAGSENTRY -_MODELCONFIG.fields_by_name["parameters"].message_type = _MODELCONFIG_PARAMETERSENTRY -_MODELCONFIG.fields_by_name["model_warmup"].message_type = _MODELWARMUP -_MODELCONFIG.fields_by_name["model_operations"].message_type = _MODELOPERATIONS -_MODELCONFIG.fields_by_name["model_transaction_policy"].message_type = _MODELTRANSACTIONPOLICY -_MODELCONFIG.oneofs_by_name["scheduling_choice"].fields.append( - _MODELCONFIG.fields_by_name["dynamic_batching"] -) -_MODELCONFIG.fields_by_name["dynamic_batching"].containing_oneof = _MODELCONFIG.oneofs_by_name[ - "scheduling_choice" -] -_MODELCONFIG.oneofs_by_name["scheduling_choice"].fields.append( - _MODELCONFIG.fields_by_name["sequence_batching"] -) -_MODELCONFIG.fields_by_name["sequence_batching"].containing_oneof = _MODELCONFIG.oneofs_by_name[ - "scheduling_choice" -] -_MODELCONFIG.oneofs_by_name["scheduling_choice"].fields.append( - _MODELCONFIG.fields_by_name["ensemble_scheduling"] -) -_MODELCONFIG.fields_by_name["ensemble_scheduling"].containing_oneof = _MODELCONFIG.oneofs_by_name[ - "scheduling_choice" -] -DESCRIPTOR.message_types_by_name["ModelRateLimiter"] = _MODELRATELIMITER -DESCRIPTOR.message_types_by_name["ModelInstanceGroup"] = _MODELINSTANCEGROUP -DESCRIPTOR.message_types_by_name["ModelTensorReshape"] = _MODELTENSORRESHAPE -DESCRIPTOR.message_types_by_name["ModelInput"] = _MODELINPUT -DESCRIPTOR.message_types_by_name["ModelOutput"] = _MODELOUTPUT -DESCRIPTOR.message_types_by_name["BatchInput"] = _BATCHINPUT -DESCRIPTOR.message_types_by_name["BatchOutput"] = _BATCHOUTPUT -DESCRIPTOR.message_types_by_name["ModelVersionPolicy"] = _MODELVERSIONPOLICY -DESCRIPTOR.message_types_by_name["ModelOptimizationPolicy"] = _MODELOPTIMIZATIONPOLICY -DESCRIPTOR.message_types_by_name["ModelQueuePolicy"] = _MODELQUEUEPOLICY -DESCRIPTOR.message_types_by_name["ModelDynamicBatching"] = _MODELDYNAMICBATCHING -DESCRIPTOR.message_types_by_name["ModelSequenceBatching"] = _MODELSEQUENCEBATCHING -DESCRIPTOR.message_types_by_name["ModelEnsembling"] = _MODELENSEMBLING -DESCRIPTOR.message_types_by_name["ModelParameter"] = _MODELPARAMETER -DESCRIPTOR.message_types_by_name["ModelWarmup"] = _MODELWARMUP -DESCRIPTOR.message_types_by_name["ModelOperations"] = _MODELOPERATIONS -DESCRIPTOR.message_types_by_name["ModelTransactionPolicy"] = _MODELTRANSACTIONPOLICY -DESCRIPTOR.message_types_by_name["ModelConfig"] = _MODELCONFIG -DESCRIPTOR.enum_types_by_name["DataType"] = _DATATYPE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -ModelRateLimiter = _reflection.GeneratedProtocolMessageType( - "ModelRateLimiter", - (_message.Message,), - dict( - Resource=_reflection.GeneratedProtocolMessageType( - "Resource", - (_message.Message,), - dict( - DESCRIPTOR=_MODELRATELIMITER_RESOURCE, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter.Resource) - ), - ), - DESCRIPTOR=_MODELRATELIMITER, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelRateLimiter) - ), -) -_sym_db.RegisterMessage(ModelRateLimiter) -_sym_db.RegisterMessage(ModelRateLimiter.Resource) - -ModelInstanceGroup = _reflection.GeneratedProtocolMessageType( - "ModelInstanceGroup", - (_message.Message,), - dict( - DESCRIPTOR=_MODELINSTANCEGROUP, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelInstanceGroup) - ), -) -_sym_db.RegisterMessage(ModelInstanceGroup) - -ModelTensorReshape = _reflection.GeneratedProtocolMessageType( - "ModelTensorReshape", - (_message.Message,), - dict( - DESCRIPTOR=_MODELTENSORRESHAPE, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelTensorReshape) - ), -) -_sym_db.RegisterMessage(ModelTensorReshape) - -ModelInput = _reflection.GeneratedProtocolMessageType( - "ModelInput", - (_message.Message,), - dict( - DESCRIPTOR=_MODELINPUT, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelInput) - ), -) -_sym_db.RegisterMessage(ModelInput) - -ModelOutput = _reflection.GeneratedProtocolMessageType( - "ModelOutput", - (_message.Message,), - dict( - DESCRIPTOR=_MODELOUTPUT, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelOutput) - ), -) -_sym_db.RegisterMessage(ModelOutput) - -BatchInput = _reflection.GeneratedProtocolMessageType( - "BatchInput", - (_message.Message,), - dict( - DESCRIPTOR=_BATCHINPUT, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.BatchInput) - ), -) -_sym_db.RegisterMessage(BatchInput) - -BatchOutput = _reflection.GeneratedProtocolMessageType( - "BatchOutput", - (_message.Message,), - dict( - DESCRIPTOR=_BATCHOUTPUT, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.BatchOutput) - ), -) -_sym_db.RegisterMessage(BatchOutput) - -ModelVersionPolicy = _reflection.GeneratedProtocolMessageType( - "ModelVersionPolicy", - (_message.Message,), - dict( - Latest=_reflection.GeneratedProtocolMessageType( - "Latest", - (_message.Message,), - dict( - DESCRIPTOR=_MODELVERSIONPOLICY_LATEST, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Latest) - ), - ), - All=_reflection.GeneratedProtocolMessageType( - "All", - (_message.Message,), - dict( - DESCRIPTOR=_MODELVERSIONPOLICY_ALL, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.All) - ), - ), - Specific=_reflection.GeneratedProtocolMessageType( - "Specific", - (_message.Message,), - dict( - DESCRIPTOR=_MODELVERSIONPOLICY_SPECIFIC, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy.Specific) - ), - ), - DESCRIPTOR=_MODELVERSIONPOLICY, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelVersionPolicy) - ), -) -_sym_db.RegisterMessage(ModelVersionPolicy) -_sym_db.RegisterMessage(ModelVersionPolicy.Latest) -_sym_db.RegisterMessage(ModelVersionPolicy.All) -_sym_db.RegisterMessage(ModelVersionPolicy.Specific) - -ModelOptimizationPolicy = _reflection.GeneratedProtocolMessageType( - "ModelOptimizationPolicy", - (_message.Message,), - dict( - Graph=_reflection.GeneratedProtocolMessageType( - "Graph", - (_message.Message,), - dict( - DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_GRAPH, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Graph) - ), - ), - Cuda=_reflection.GeneratedProtocolMessageType( - "Cuda", - (_message.Message,), - dict( - GraphSpec=_reflection.GeneratedProtocolMessageType( - "GraphSpec", - (_message.Message,), - dict( - Shape=_reflection.GeneratedProtocolMessageType( - "Shape", - (_message.Message,), - dict( - DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_SHAPE, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.Shape) - ), - ), - LowerBound=_reflection.GeneratedProtocolMessageType( - "LowerBound", - (_message.Message,), - dict( - InputEntry=_reflection.GeneratedProtocolMessageType( - "InputEntry", - (_message.Message,), - dict( - DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry) - ), - ), - DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound) - ), - ), - InputEntry=_reflection.GeneratedProtocolMessageType( - "InputEntry", - (_message.Message,), - dict( - DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry) - ), - ), - DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda.GraphSpec) - ), - ), - DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_CUDA, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.Cuda) - ), - ), - ExecutionAccelerators=_reflection.GeneratedProtocolMessageType( - "ExecutionAccelerators", - (_message.Message,), - dict( - Accelerator=_reflection.GeneratedProtocolMessageType( - "Accelerator", - (_message.Message,), - dict( - ParametersEntry=_reflection.GeneratedProtocolMessageType( - "ParametersEntry", - (_message.Message,), - dict( - DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry) - ), - ), - DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator) - ), - ), - DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.ExecutionAccelerators) - ), - ), - PinnedMemoryBuffer=_reflection.GeneratedProtocolMessageType( - "PinnedMemoryBuffer", - (_message.Message,), - dict( - DESCRIPTOR=_MODELOPTIMIZATIONPOLICY_PINNEDMEMORYBUFFER, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy.PinnedMemoryBuffer) - ), - ), - DESCRIPTOR=_MODELOPTIMIZATIONPOLICY, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelOptimizationPolicy) - ), -) -_sym_db.RegisterMessage(ModelOptimizationPolicy) -_sym_db.RegisterMessage(ModelOptimizationPolicy.Graph) -_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda) -_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec) -_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.Shape) -_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound) -_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.LowerBound.InputEntry) -_sym_db.RegisterMessage(ModelOptimizationPolicy.Cuda.GraphSpec.InputEntry) -_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators) -_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators.Accelerator) -_sym_db.RegisterMessage(ModelOptimizationPolicy.ExecutionAccelerators.Accelerator.ParametersEntry) -_sym_db.RegisterMessage(ModelOptimizationPolicy.PinnedMemoryBuffer) - -ModelQueuePolicy = _reflection.GeneratedProtocolMessageType( - "ModelQueuePolicy", - (_message.Message,), - dict( - DESCRIPTOR=_MODELQUEUEPOLICY, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelQueuePolicy) - ), -) -_sym_db.RegisterMessage(ModelQueuePolicy) - -ModelDynamicBatching = _reflection.GeneratedProtocolMessageType( - "ModelDynamicBatching", - (_message.Message,), - dict( - PriorityQueuePolicyEntry=_reflection.GeneratedProtocolMessageType( - "PriorityQueuePolicyEntry", - (_message.Message,), - dict( - DESCRIPTOR=_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching.PriorityQueuePolicyEntry) - ), - ), - DESCRIPTOR=_MODELDYNAMICBATCHING, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelDynamicBatching) - ), -) -_sym_db.RegisterMessage(ModelDynamicBatching) -_sym_db.RegisterMessage(ModelDynamicBatching.PriorityQueuePolicyEntry) - -ModelSequenceBatching = _reflection.GeneratedProtocolMessageType( - "ModelSequenceBatching", - (_message.Message,), - dict( - Control=_reflection.GeneratedProtocolMessageType( - "Control", - (_message.Message,), - dict( - DESCRIPTOR=_MODELSEQUENCEBATCHING_CONTROL, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.Control) - ), - ), - ControlInput=_reflection.GeneratedProtocolMessageType( - "ControlInput", - (_message.Message,), - dict( - DESCRIPTOR=_MODELSEQUENCEBATCHING_CONTROLINPUT, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.ControlInput) - ), - ), - StrategyDirect=_reflection.GeneratedProtocolMessageType( - "StrategyDirect", - (_message.Message,), - dict( - DESCRIPTOR=_MODELSEQUENCEBATCHING_STRATEGYDIRECT, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyDirect) - ), - ), - StrategyOldest=_reflection.GeneratedProtocolMessageType( - "StrategyOldest", - (_message.Message,), - dict( - DESCRIPTOR=_MODELSEQUENCEBATCHING_STRATEGYOLDEST, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching.StrategyOldest) - ), - ), - DESCRIPTOR=_MODELSEQUENCEBATCHING, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelSequenceBatching) - ), -) -_sym_db.RegisterMessage(ModelSequenceBatching) -_sym_db.RegisterMessage(ModelSequenceBatching.Control) -_sym_db.RegisterMessage(ModelSequenceBatching.ControlInput) -_sym_db.RegisterMessage(ModelSequenceBatching.StrategyDirect) -_sym_db.RegisterMessage(ModelSequenceBatching.StrategyOldest) - -ModelEnsembling = _reflection.GeneratedProtocolMessageType( - "ModelEnsembling", - (_message.Message,), - dict( - Step=_reflection.GeneratedProtocolMessageType( - "Step", - (_message.Message,), - dict( - InputMapEntry=_reflection.GeneratedProtocolMessageType( - "InputMapEntry", - (_message.Message,), - dict( - DESCRIPTOR=_MODELENSEMBLING_STEP_INPUTMAPENTRY, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.InputMapEntry) - ), - ), - OutputMapEntry=_reflection.GeneratedProtocolMessageType( - "OutputMapEntry", - (_message.Message,), - dict( - DESCRIPTOR=_MODELENSEMBLING_STEP_OUTPUTMAPENTRY, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step.OutputMapEntry) - ), - ), - DESCRIPTOR=_MODELENSEMBLING_STEP, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelEnsembling.Step) - ), - ), - DESCRIPTOR=_MODELENSEMBLING, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelEnsembling) - ), -) -_sym_db.RegisterMessage(ModelEnsembling) -_sym_db.RegisterMessage(ModelEnsembling.Step) -_sym_db.RegisterMessage(ModelEnsembling.Step.InputMapEntry) -_sym_db.RegisterMessage(ModelEnsembling.Step.OutputMapEntry) - -ModelParameter = _reflection.GeneratedProtocolMessageType( - "ModelParameter", - (_message.Message,), - dict( - DESCRIPTOR=_MODELPARAMETER, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelParameter) - ), -) -_sym_db.RegisterMessage(ModelParameter) - -ModelWarmup = _reflection.GeneratedProtocolMessageType( - "ModelWarmup", - (_message.Message,), - dict( - Input=_reflection.GeneratedProtocolMessageType( - "Input", - (_message.Message,), - dict( - DESCRIPTOR=_MODELWARMUP_INPUT, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelWarmup.Input) - ), - ), - InputsEntry=_reflection.GeneratedProtocolMessageType( - "InputsEntry", - (_message.Message,), - dict( - DESCRIPTOR=_MODELWARMUP_INPUTSENTRY, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelWarmup.InputsEntry) - ), - ), - DESCRIPTOR=_MODELWARMUP, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelWarmup) - ), -) -_sym_db.RegisterMessage(ModelWarmup) -_sym_db.RegisterMessage(ModelWarmup.Input) -_sym_db.RegisterMessage(ModelWarmup.InputsEntry) - -ModelOperations = _reflection.GeneratedProtocolMessageType( - "ModelOperations", - (_message.Message,), - dict( - DESCRIPTOR=_MODELOPERATIONS, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelOperations) - ), -) -_sym_db.RegisterMessage(ModelOperations) - -ModelTransactionPolicy = _reflection.GeneratedProtocolMessageType( - "ModelTransactionPolicy", - (_message.Message,), - dict( - DESCRIPTOR=_MODELTRANSACTIONPOLICY, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelTransactionPolicy) - ), -) -_sym_db.RegisterMessage(ModelTransactionPolicy) - -ModelConfig = _reflection.GeneratedProtocolMessageType( - "ModelConfig", - (_message.Message,), - dict( - CcModelFilenamesEntry=_reflection.GeneratedProtocolMessageType( - "CcModelFilenamesEntry", - (_message.Message,), - dict( - DESCRIPTOR=_MODELCONFIG_CCMODELFILENAMESENTRY, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelConfig.CcModelFilenamesEntry) - ), - ), - MetricTagsEntry=_reflection.GeneratedProtocolMessageType( - "MetricTagsEntry", - (_message.Message,), - dict( - DESCRIPTOR=_MODELCONFIG_METRICTAGSENTRY, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelConfig.MetricTagsEntry) - ), - ), - ParametersEntry=_reflection.GeneratedProtocolMessageType( - "ParametersEntry", - (_message.Message,), - dict( - DESCRIPTOR=_MODELCONFIG_PARAMETERSENTRY, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelConfig.ParametersEntry) - ), - ), - DESCRIPTOR=_MODELCONFIG, - __module__="model_config_pb2" - # @@protoc_insertion_point(class_scope:inference.ModelConfig) - ), -) -_sym_db.RegisterMessage(ModelConfig) -_sym_db.RegisterMessage(ModelConfig.CcModelFilenamesEntry) -_sym_db.RegisterMessage(ModelConfig.MetricTagsEntry) -_sym_db.RegisterMessage(ModelConfig.ParametersEntry) - - -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_LOWERBOUND_INPUTENTRY._options = None -_MODELOPTIMIZATIONPOLICY_CUDA_GRAPHSPEC_INPUTENTRY._options = None -_MODELOPTIMIZATIONPOLICY_EXECUTIONACCELERATORS_ACCELERATOR_PARAMETERSENTRY._options = None -_MODELDYNAMICBATCHING_PRIORITYQUEUEPOLICYENTRY._options = None -_MODELENSEMBLING_STEP_INPUTMAPENTRY._options = None -_MODELENSEMBLING_STEP_OUTPUTMAPENTRY._options = None -_MODELWARMUP_INPUTSENTRY._options = None -_MODELCONFIG_CCMODELFILENAMESENTRY._options = None -_MODELCONFIG_METRICTAGSENTRY._options = None -_MODELCONFIG_PARAMETERSENTRY._options = None -# @@protoc_insertion_point(module_scope) diff --git a/merlin/systems/triton/oprunner_model.py b/merlin/systems/triton/oprunner_model.py deleted file mode 100644 index 1b5e36e27cb..00000000000 --- a/merlin/systems/triton/oprunner_model.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import json -import sys -import traceback -from typing import List - -import triton_python_backend_utils as pb_utils -from triton_python_backend_utils import ( - InferenceRequest, - InferenceResponse, - Tensor, - get_input_tensor_by_name, -) - -from merlin.systems.dag.op_runner import OperatorRunner -from merlin.systems.dag.ops.operator import InferenceDataFrame - - -class TritonPythonModel: - def initialize(self, args): - self.model_config = json.loads(args["model_config"]) - self.runner = OperatorRunner(self.model_config) - - def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]: - params = self.model_config["parameters"] - op_names = json.loads(params["operator_names"]["string_value"]) - first_operator_name = op_names[0] - operator_params = json.loads(params[first_operator_name]["string_value"]) - input_column_names = list(json.loads(operator_params["input_dict"]).keys()) - - responses = [] - - for request in requests: - try: - # transform the triton tensors to a dict of name:numpy tensor - input_tensors = { - name: get_input_tensor_by_name(request, name).as_numpy() - for name in input_column_names - } - - inf_df = InferenceDataFrame(input_tensors) - - raw_tensor_tuples = self.runner.execute(inf_df) - - tensors = { - name: (data.get() if hasattr(data, "get") else data) - for name, data in raw_tensor_tuples - } - - result = [Tensor(name, data) for name, data in tensors.items()] - - responses.append(InferenceResponse(result)) - - except Exception: # pylint: disable=broad-except - exc_type, exc_value, exc_traceback = sys.exc_info() - tb_string = repr(traceback.extract_tb(exc_traceback)) - responses.append( - pb_utils.InferenceResponse( - tensors=[], error=f"{exc_type}, {exc_value}, {tb_string}" - ) - ) - - return responses diff --git a/merlin/systems/triton/workflow_model.py b/merlin/systems/triton/workflow_model.py deleted file mode 100644 index a0de2514b91..00000000000 --- a/merlin/systems/triton/workflow_model.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import json -import os -from typing import List - -from triton_python_backend_utils import ( - InferenceRequest, - InferenceResponse, - Tensor, - get_input_tensor_by_name, - get_output_config_by_name, - triton_string_to_numpy, -) - -import nvtabular -from merlin.core.dispatch import is_list_dtype -from merlin.systems.triton import _convert_tensor -from merlin.systems.workflow.hugectr import HugeCTRWorkflowRunner -from merlin.systems.workflow.pytorch import PyTorchWorkflowRunner -from merlin.systems.workflow.tensorflow import TensorflowWorkflowRunner - - -class TritonPythonModel: - def initialize(self, args): - # Arg parsing - workflow_path = os.path.join( - args["model_repository"], str(args["model_version"]), "workflow" - ) - model_device = args["model_instance_kind"] - - # Workflow instantiation - self.workflow = nvtabular.Workflow.load(workflow_path) - - # Config loading and parsing - self.model_config = json.loads(args["model_config"]) - model_framework = self.model_config["parameters"]["output_model"]["string_value"] - - # Dtype parsing - input_dtypes = self.workflow.input_dtypes.items() - self.input_dtypes, self.input_multihots = _parse_input_dtypes(input_dtypes) - - self.output_dtypes = dict() - for col_name, col_schema in self.workflow.output_schema.column_schemas.items(): - if col_schema.is_list and col_schema.is_ragged: - self._set_output_dtype(col_name + "__nnzs") - self._set_output_dtype(col_name + "__values") - else: - self._set_output_dtype(col_name) - - if model_framework == "hugectr": - runner_class = HugeCTRWorkflowRunner - elif model_framework == "pytorch": - runner_class = PyTorchWorkflowRunner - else: - runner_class = TensorflowWorkflowRunner - - self.runner = runner_class( - self.workflow, self.output_dtypes, self.model_config, model_device - ) - - def _set_output_dtype(self, name): - conf = get_output_config_by_name(self.model_config, name) - self.output_dtypes[name] = triton_string_to_numpy(conf["data_type"]) - - def execute(self, requests: List[InferenceRequest]) -> List[InferenceResponse]: - """Transforms the input batches by running through a NVTabular workflow.transform - function. - """ - responses = [] - for request in requests: - # transform the triton tensors to a dict of name:numpy tensor - input_tensors = { - name: _convert_tensor(get_input_tensor_by_name(request, name)) - for name in self.input_dtypes - } - - # multihots are represented as a tuple of (values, offsets) - for name, dtype in self.input_multihots.items(): - values = _convert_tensor(get_input_tensor_by_name(request, name + "__values")) - offsets = _convert_tensor(get_input_tensor_by_name(request, name + "__nnzs")) - input_tensors[name] = (values, offsets) - - raw_tensor_tuples = self.runner.run_workflow(input_tensors) - - result = [Tensor(name, data) for name, data in raw_tensor_tuples] - - responses.append(InferenceResponse(result)) - - return responses - - -def _parse_input_dtypes(dtypes): - input_dtypes = {col: dtype for col, dtype in dtypes if not is_list_dtype(dtype)} - input_multihots = {col: dtype for col, dtype in dtypes if is_list_dtype(dtype)} - - return input_dtypes, input_multihots diff --git a/merlin/systems/workflow/__init__.py b/merlin/systems/workflow/__init__.py deleted file mode 100644 index c1bff8b30ac..00000000000 --- a/merlin/systems/workflow/__init__.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from merlin.schema import Tags - - -def get_embedding_sizes(source, output_dtypes=None): - """Returns a dictionary of embedding sizes from a workflow or workflow_node - - Parameters - ---------- - source : Workflow or ColumnSelector - Either a nvtabular Workflow or ColumnSelector object that we should use to find - embedding sizes - output_dtypes : dict, optional - Optional dictionary of column_name:dtype. If passing a workflow object dtypes - will be read from the workflow. This is used to figure out which columns - are multihot-categorical, which are split out by this function. If passed a workflow_node - and this parameter isn't set, you won't have multihot columns returned separately - """ - # TODO: do we need to distinguish multihot columns here? (if so why? ) - - # have to lazy import Workflow to avoid circular import errors - from nvtabular.workflow import Workflow - - output_node = source.output_node if isinstance(source, Workflow) else source - - if isinstance(source, Workflow): - output_dtypes = output_dtypes or source.output_dtypes - else: - # passed in a column group - output_dtypes = output_dtypes or {} - - output = {} - multihot_columns = set() - cats_schema = output_node.output_schema.select_by_tag(Tags.CATEGORICAL) - for col_name, col_schema in cats_schema.column_schemas.items(): - if col_schema.dtype and col_schema.is_list and col_schema.is_ragged: - # multi hot so remove from output and add to multihot - multihot_columns.add(col_name) - - embeddings_sizes = col_schema.properties.get("embedding_sizes", {}) - cardinality = embeddings_sizes["cardinality"] - dimensions = embeddings_sizes["dimension"] - output[col_name] = (cardinality, dimensions) - - # TODO: returning different return types like this (based off the presence - # of multihot features) is pretty janky. fix. - if not multihot_columns: - return output - - single_hots = {k: v for k, v in output.items() if k not in multihot_columns} - multi_hots = {k: v for k, v in output.items() if k in multihot_columns} - return single_hots, multi_hots diff --git a/merlin/systems/workflow/base.py b/merlin/systems/workflow/base.py deleted file mode 100644 index b19e576df62..00000000000 --- a/merlin/systems/workflow/base.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import functools -import json -import logging -from abc import ABC, abstractmethod - -import numpy as np - -from merlin.core.dispatch import concat_columns -from merlin.dag import ColumnSelector, Supports -from merlin.schema import Tags -from merlin.systems.triton.conversions import convert_format - -LOG = logging.getLogger("merlin-systems") - - -class WorkflowRunner(ABC): - def __init__(self, workflow, output_dtypes, model_config, model_device): - self.workflow = workflow - self.output_dtypes = output_dtypes - self.model_config = model_config - self.device = model_device - - output_schema = self.workflow.output_schema - - schema_cats = output_schema.apply(ColumnSelector(tags=[Tags.CATEGORICAL])).column_names - schema_conts = output_schema.apply(ColumnSelector(tags=[Tags.CONTINUOUS])).column_names - - mc_cats = json.loads(self._get_param(model_config, "cats", "string_value", default="[]")) - mc_conts = json.loads(self._get_param(model_config, "conts", "string_value", default="[]")) - - self.cats = mc_cats or schema_cats - self.conts = mc_conts or schema_conts - - workflow_outputs = set(workflow.output_schema.column_names) - requested_cols = set(self.cats + self.conts) - missing_cols = requested_cols - workflow_outputs - extra_cols = workflow_outputs - requested_cols - - if missing_cols: - raise ValueError( - f"The following columns were not found in the workflow's output: {missing_cols}" - ) - if extra_cols: - raise ValueError( - f"The following extra columns were found in the workflow's output: {extra_cols}" - ) - - # recurse over all column groups, initializing operators for inference pipeline - self._initialize_ops(self.workflow.output_node) - - def _initialize_ops(self, workflow_node, visited=None): - if visited is None: - visited = set() - - if workflow_node.op and hasattr(workflow_node.op, "inference_initialize"): - inference_op = workflow_node.op.inference_initialize( - workflow_node.selector, self.model_config - ) - if inference_op: - workflow_node.op = inference_op - - supported = workflow_node.op.supports - - # if we're running on the CPU only, mask off support for GPU data formats - if self.device == "CPU": - supported = functools.reduce( - lambda a, b: a | b, - (v for v in list(Supports) if v & supported and "CPU" in str(v)), - ) - # the 'supports' property is readonly, and we can't always attach a new property - # to some of the operators (C++ categorify etc). set on the workflow_node instead - workflow_node.inference_supports = supported - - for parent in workflow_node.parents_with_dependencies: - if parent not in visited: - visited.add(parent) - self._initialize_ops(parent, visited) - - def run_workflow(self, input_tensors): - # use our NVTabular workflow to transform the dataset - transformed, kind = self._transform_tensors(input_tensors, self.workflow.output_node) - - # if we don't have tensors in numpy format, convert back so that the we can return - # to triton - if kind != Supports.CPU_DICT_ARRAY: - transformed, kind = convert_format(transformed, kind, Supports.CPU_DICT_ARRAY) - - # convert to the format expected by the DL models - return self._transform_outputs(transformed) - - @abstractmethod - def _transform_outputs(self, tensors): - pass - - def _convert_to_np(self, columns, tensors, dtype, rows): - """converts outputs to a numpy input compatible with pytorch""" - d = np.empty((rows, len(columns)), dtype=dtype) - for i, name in enumerate(columns): - d[:, i] = tensors[name].astype(dtype) - return d - - def _transform_tensors(self, input_tensors, workflow_node): - upstream_inputs = [] - - # Gather inputs from the parents and dependency nodes - if workflow_node.parents_with_dependencies: - for parent in workflow_node.parents_with_dependencies: - upstream_tensors, upstream_kind = self._transform_tensors(input_tensors, parent) - if upstream_tensors is not None and upstream_kind: - upstream_inputs.append((upstream_tensors, upstream_kind)) - - # Gather additional input columns from the original input tensors - if workflow_node.selector: - selector_columns = workflow_node.selector.names - to_remove = [] - for upstream_tensors, upstream_kind in upstream_inputs: - for col in selector_columns: - if col in upstream_tensors: - to_remove.append(col) - for col in set(to_remove): - selector_columns.remove(col) - - if selector_columns: - selected_tensors = {c: input_tensors[c] for c in selector_columns} - selected_kinds = Supports.CPU_DICT_ARRAY - upstream_inputs.append((selected_tensors, selected_kinds)) - - # Standardize the formats - tensors, kind = None, None - for upstream_tensors, upstream_kind in upstream_inputs: - if tensors is None: - tensors, kind = upstream_tensors, upstream_kind - else: - if kind != upstream_kind: - # we have multiple different kinds of data here (dataframe/array on cpu/gpu) - # we need to convert to a common format here first before concatenating. - op = workflow_node.op - if op and hasattr(op, "inference_supports"): - target_kind = op.inference_supports - else: - target_kind = Supports.CPU_DICT_ARRAY - # note : the 2nd convert_format call needs to be stricter in what the kind is - # (exact match rather than a bitmask of values) - tensors, kind = convert_format(tensors, kind, target_kind) - upstream_tensors, _ = convert_format(upstream_tensors, upstream_kind, kind) - - tensors = self.concat_tensors([tensors, upstream_tensors], kind) - - # Run the transform - if tensors is not None and kind and workflow_node.op: - try: - # if the op doesn't support the current kind - we need to convert - if ( - hasattr(workflow_node, "inference_supports") - and not workflow_node.inference_supports & kind - ): - tensors, kind = convert_format(tensors, kind, workflow_node.inference_supports) - - tensors = workflow_node.op.transform( - workflow_node.input_columns, - tensors, - ) - - except Exception: - LOG.exception("Failed to transform operator %s", workflow_node.op) - raise - - return tensors, kind - - def concat_tensors(self, tensors, kind): - if kind & (Supports.GPU_DATAFRAME | Supports.CPU_DATAFRAME): - return concat_columns(tensors) - else: - output = tensors[0] - for tensor in tensors[1:]: - output.update(tensor) - return output - - def _get_param(self, config, *args, default=None): - config_element = config["parameters"] - for key in args: - config_element = config_element.get(key, {}) - return config_element or default diff --git a/merlin/systems/workflow/hugectr.py b/merlin/systems/workflow/hugectr.py deleted file mode 100644 index 5d10f79b23f..00000000000 --- a/merlin/systems/workflow/hugectr.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import numpy as np - -from merlin.systems.workflow import get_embedding_sizes -from merlin.systems.workflow.base import WorkflowRunner - - -class HugeCTRWorkflowRunner(WorkflowRunner): - def __init__(self, workflow, output_dtypes, model_config, model_device): - super().__init__(workflow, output_dtypes, model_config, model_device) - - if self.cats: - self.offsets = self.get_offsets(self.workflow, self.cats) - - def _transform_outputs(self, tensors): - output_tensors = [] - if self.conts: - output_tensors.append( - ( - "DES", - self._convert(self.conts, tensors, np.float32), - ) - ) - else: - output_tensors.append(("DES", np.array([[]], np.float32))) - - if self.cats: - for name in self.cats: - tensors[name] += self.offsets[name] - cats_np = self._convert(self.cats, tensors, np.int64) - output_tensors.append( - ( - "CATCOLUMN", - cats_np, - ) - ) - else: - output_tensors.append(("CATCOLUMN", np.array([[]], np.int64))) - - len_cats_np = cats_np.shape[1] - row_index = np.arange(len_cats_np + 1, dtype=np.int32).reshape(1, len_cats_np + 1) - output_tensors.append(("ROWINDEX", row_index)) - - return output_tensors - - def _convert(self, columns, tensors, dtype): - """converts outputs to a numpy input compatible with hugectr""" - rows = max(len(tensors[name]) for name in columns) - d = self._convert_to_np(columns, tensors, dtype, rows) - return d.reshape(1, len(columns) * rows) - - def get_offsets(self, workflow, categorical_cols): - embeddings = get_embedding_sizes(workflow) - if embeddings is None: - raise Exception("embeddings cannot be None") - else: - offsets = dict() - curr_offset = 0 - for name in categorical_cols: - offsets[name] = curr_offset - curr_offset += embeddings[name][0] - return offsets diff --git a/merlin/systems/workflow/pytorch.py b/merlin/systems/workflow/pytorch.py deleted file mode 100644 index 2475fce02b6..00000000000 --- a/merlin/systems/workflow/pytorch.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from merlin.systems.workflow.base import WorkflowRunner - - -class PyTorchWorkflowRunner(WorkflowRunner): - def _transform_outputs(self, tensors): - output_tensors = [] - for col_name in self.cats + self.conts: - output_tensors.append( - ( - col_name, - self._convert([col_name], tensors, self.workflow.output_dtypes[col_name]), - ) - ) - - return output_tensors - - def _convert(self, columns, tensors, dtype): - """converts outputs to a numpy input compatible with pytorch""" - rows = max(len(tensors[name]) for name in columns) - return self._convert_to_np(columns, tensors, dtype, rows) diff --git a/merlin/systems/workflow/tensorflow.py b/merlin/systems/workflow/tensorflow.py deleted file mode 100644 index 05a7b2e7e94..00000000000 --- a/merlin/systems/workflow/tensorflow.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import json - -from merlin.systems.workflow.base import WorkflowRunner - - -class TensorflowWorkflowRunner(WorkflowRunner): - def __init__(self, workflow, output_dtypes, model_config, model_device): - super().__init__(workflow, output_dtypes, model_config, model_device) - - self.offsets = None - - def _transform_outputs(self, tensors): - # Load extra info needed for the Transformer4Rec (if exists) - sparse_feat = None - params = self.model_config["parameters"] - if "sparse_max" in params.keys(): - sparse_feat = json.loads(self.model_config["parameters"]["sparse_max"]["string_value"]) - # transforms outputs for both pytorch and tensorflow - output_tensors = [] - for name in self.cats + self.conts: - value = tensors[name] - if sparse_feat and name in sparse_feat.keys(): - # convert sparse tensors to dense representations - d = value[0].astype(self.output_dtypes[name]) - col_dim = sparse_feat[name] - row_dim = d.shape[0] // col_dim - d = d.reshape(row_dim, col_dim) - output_tensors.append((name, d)) - elif isinstance(value, tuple): - # convert list values to match TF dataloader - values = value[0].astype(self.output_dtypes[name + "__values"]) - values = values.reshape(len(values), 1) - output_tensors.append((name + "__values", values)) - - offsets = value[1].astype(self.output_dtypes[name + "__nnzs"]) - nnzs = offsets[1:] - offsets[:-1] - nnzs = nnzs.reshape(len(nnzs), 1) - output_tensors.append((name + "__nnzs", nnzs)) - else: - d = value.astype(self.output_dtypes[name]) - d = d.reshape(len(d), 1) - output_tensors.append((name, d)) - return output_tensors diff --git a/tests/unit/systems/__init__.py b/tests/unit/systems/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/unit/systems/inf_test_ops.py b/tests/unit/systems/inf_test_ops.py deleted file mode 100644 index e6bfeb61312..00000000000 --- a/tests/unit/systems/inf_test_ops.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -inf_op = pytest.importorskip("merlin.systems.dag.ops.operator") - - -class PlusTwoOp(inf_op.PipelineableInferenceOperator): - def transform(self, df: inf_op.InferenceDataFrame) -> inf_op.InferenceDataFrame: - focus_df = df - new_df = inf_op.InferenceDataFrame() - - for name, data in focus_df: - new_df.tensors[f"{name}_plus_2"] = data + 2 - - return new_df - - def column_mapping(self, col_selector): - column_mapping = {} - for col_name in col_selector.names: - column_mapping[f"{col_name}_plus_2"] = [col_name] - return column_mapping - - @classmethod - def from_config(cls, config): - return PlusTwoOp() diff --git a/tests/unit/systems/inference_utils.py b/tests/unit/systems/inference_utils.py deleted file mode 100644 index e0d4894c398..00000000000 --- a/tests/unit/systems/inference_utils.py +++ /dev/null @@ -1,81 +0,0 @@ -from distutils.spawn import find_executable - -import pytest - -torch = pytest.importorskip("torch") # noqa -loader_tf_utils = pytest.importorskip("nvtabular.loader.tf_utils") # noqa -loader_tf_utils.configure_tensorflow() - -import nvtabular.framework_utils.tensorflow.layers as layers # noqa -from nvtabular.framework_utils.torch.models import Model # noqa - -triton = pytest.importorskip("merlin.systems.triton") -data_conversions = pytest.importorskip("merlin.systems.triton.conversions") - -tritonclient = pytest.importorskip("tritonclient") -grpcclient = pytest.importorskip("tritonclient.grpc") - -TRITON_SERVER_PATH = find_executable("tritonserver") -from tests.unit.test_triton_inference import run_triton_server # noqa - -tf = pytest.importorskip("tensorflow") - - -def create_tf_model(cat_columns: list, cat_mh_columns: list, embed_tbl_shapes: dict): - inputs = {} # tf.keras.Input placeholders for each feature to be used - emb_layers = [] # output of all embedding layers, which will be concatenated - for col in cat_columns: - inputs[col] = tf.keras.Input(name=col, dtype=tf.int64, shape=(1,)) - # Note that we need two input tensors for multi-hot categorical features - for col in cat_mh_columns: - inputs[col] = ( - tf.keras.Input(name=f"{col}__values", dtype=tf.int64, shape=(1,)), - tf.keras.Input(name=f"{col}__nnzs", dtype=tf.int64, shape=(1,)), - ) - for col in cat_columns + cat_mh_columns: - emb_layers.append( - tf.feature_column.embedding_column( - tf.feature_column.categorical_column_with_identity( - col, embed_tbl_shapes[col][0] - ), # Input dimension (vocab size) - embed_tbl_shapes[col][1], # Embedding output dimension - ) - ) - emb_layer = layers.DenseFeatures(emb_layers) - x_emb_output = emb_layer(inputs) - x = tf.keras.layers.Dense(128, activation="relu")(x_emb_output) - x = tf.keras.layers.Dense(128, activation="relu")(x) - x = tf.keras.layers.Dense(128, activation="relu")(x) - x = tf.keras.layers.Dense(1, activation="sigmoid", name="output")(x) - - model = tf.keras.Model(inputs=inputs, outputs=x) - model.compile("sgd", "binary_crossentropy") - return model - - -def create_pytorch_model(cat_columns: list, cat_mh_columns: list, embed_tbl_shapes: dict): - single_hot = {k: v for k, v in embed_tbl_shapes.items() if k in cat_columns} - multi_hot = {k: v for k, v in embed_tbl_shapes.items() if k in cat_mh_columns} - model = Model( - embedding_table_shapes=(single_hot, multi_hot), - num_continuous=0, - emb_dropout=0.0, - layer_hidden_dims=[128, 128, 128], - layer_dropout_rates=[0.0, 0.0, 0.0], - ).to("cuda") - return model - - -def _run_ensemble_on_tritonserver( - tmpdir, - output_columns, - df, - model_name, -): - inputs = triton.convert_df_to_triton_input(df.columns, df) - outputs = [grpcclient.InferRequestedOutput(col) for col in output_columns] - response = None - with run_triton_server(tmpdir) as client: - response = client.infer(model_name, inputs, outputs=outputs) - - return response diff --git a/tests/unit/systems/test_ensemble.py b/tests/unit/systems/test_ensemble.py deleted file mode 100644 index 6a8d5cc7c6b..00000000000 --- a/tests/unit/systems/test_ensemble.py +++ /dev/null @@ -1,236 +0,0 @@ -# -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -from distutils.spawn import find_executable - -import pytest - -os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" - -from google.protobuf import text_format # noqa - -from merlin.core.dispatch import make_df # noqa -from merlin.dag import ColumnSelector # noqa -from merlin.dag.node import postorder_iter_nodes # noqa -from merlin.dag.ops.concat_columns import ConcatColumns # noqa -from merlin.dag.ops.selection import SelectionOp # noqa -from merlin.schema import Tags # noqa -from nvtabular import Workflow # noqa -from nvtabular import ops as wf_ops # noqa - -loader_tf_utils = pytest.importorskip("nvtabular.loader.tf_utils") - -# everything tensorflow related must be imported after this. -loader_tf_utils.configure_tensorflow() -tf = pytest.importorskip("tensorflow") - -triton = pytest.importorskip("merlin.systems.triton") -export = pytest.importorskip("merlin.systems.dag.ensemble") - -from merlin.systems.dag.ensemble import Ensemble # noqa -from merlin.systems.dag.ops.tensorflow import PredictTensorflow # noqa -from merlin.systems.dag.ops.workflow import TransformWorkflow # noqa -from tests.unit.systems.inf_test_ops import PlusTwoOp # noqa -from tests.unit.systems.inference_utils import ( # noqa - _run_ensemble_on_tritonserver, - create_tf_model, -) - -tritonclient = pytest.importorskip("tritonclient") -import merlin.systems.triton.model_config_pb2 as model_config # noqa - -grpcclient = pytest.importorskip("tritonclient.grpc") - -TRITON_SERVER_PATH = find_executable("tritonserver") - - -@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") -@pytest.mark.parametrize("engine", ["parquet"]) -def test_workflow_tf_e2e_config_verification(tmpdir, dataset, engine): - # Create a Workflow - schema = dataset.schema - for name in ["x", "y", "id"]: - dataset.schema.column_schemas[name] = dataset.schema.column_schemas[name].with_tags( - [Tags.USER] - ) - selector = ColumnSelector(["x", "y", "id"]) - - workflow_ops = selector >> wf_ops.Rename(postfix="_nvt") - workflow = Workflow(workflow_ops["x_nvt"]) - workflow.fit(dataset) - - # Create Tensorflow Model - model = tf.keras.models.Sequential( - [ - tf.keras.Input(name="x_nvt", dtype=tf.float64, shape=(1,)), - tf.keras.layers.Dense(16, activation="relu"), - tf.keras.layers.Dropout(0.2), - tf.keras.layers.Dense(1, name="output"), - ] - ) - model.compile( - optimizer="adam", - loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=[tf.metrics.SparseCategoricalAccuracy()], - ) - - # Creating Triton Ensemble - triton_chain = ( - selector >> TransformWorkflow(workflow, cats=["x_nvt"]) >> PredictTensorflow(model) - ) - triton_ens = Ensemble(triton_chain, schema) - - # Creating Triton Ensemble Config - ensemble_config, node_configs = triton_ens.export(str(tmpdir)) - - config_path = tmpdir / "ensemble_model" / "config.pbtxt" - - # Checking Triton Ensemble Config - with open(config_path, "rb") as f: - config = model_config.ModelConfig() - raw_config = f.read() - parsed = text_format.Parse(raw_config, config) - - # The config file contents are correct - assert parsed.name == "ensemble_model" - assert parsed.platform == "ensemble" - assert hasattr(parsed, "ensemble_scheduling") - - df = make_df({"x": [1.0, 2.0, 3.0], "y": [4.0, 5.0, 6.0], "id": [7, 8, 9]}) - - output_columns = triton_ens.graph.output_schema.column_names - response = _run_ensemble_on_tritonserver(str(tmpdir), output_columns, df, triton_ens.name) - assert len(response.as_numpy("output")) == df.shape[0] - - -@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") -@pytest.mark.parametrize("engine", ["parquet"]) -def test_workflow_tf_e2e_multi_op_run(tmpdir, dataset, engine): - # Create a Workflow - schema = dataset.schema - for name in ["x", "y", "id"]: - dataset.schema.column_schemas[name] = dataset.schema.column_schemas[name].with_tags( - [Tags.USER] - ) - - workflow_ops = ["name-cat"] >> wf_ops.Categorify(cat_cache="host") - workflow = Workflow(workflow_ops) - workflow.fit(dataset) - - embedding_shapes_1 = wf_ops.get_embedding_sizes(workflow) - - cats = ["name-string"] >> wf_ops.Categorify(cat_cache="host") - workflow_2 = Workflow(cats) - workflow_2.fit(dataset) - - embedding_shapes = wf_ops.get_embedding_sizes(workflow_2) - embedding_shapes_1.update(embedding_shapes) - # Create Tensorflow Model - model = create_tf_model(["name-cat", "name-string"], [], embedding_shapes_1) - - # Creating Triton Ensemble - triton_chain_1 = ["name-cat"] >> TransformWorkflow(workflow) - triton_chain_2 = ["name-string"] >> TransformWorkflow(workflow_2) - triton_chain = (triton_chain_1 + triton_chain_2) >> PredictTensorflow(model) - - triton_ens = Ensemble(triton_chain, schema) - - # Creating Triton Ensemble Config - ensemble_config, nodes_config = triton_ens.export(str(tmpdir)) - config_path = tmpdir / "ensemble_model" / "config.pbtxt" - - # Checking Triton Ensemble Config - with open(config_path, "rb") as f: - config = model_config.ModelConfig() - raw_config = f.read() - parsed = text_format.Parse(raw_config, config) - - # The config file contents are correct - assert parsed.name == "ensemble_model" - assert parsed.platform == "ensemble" - assert hasattr(parsed, "ensemble_scheduling") - - df = dataset.to_ddf().compute()[["name-string", "name-cat"]].iloc[:3] - - response = _run_ensemble_on_tritonserver(str(tmpdir), ["output"], df, triton_ens.name) - assert len(response.as_numpy("output")) == df.shape[0] - - -def test_graph_traverse_algo(): - chain_1 = ["name-cat"] >> TransformWorkflow(Workflow(["name-cat"] >> wf_ops.Categorify())) - chain_2 = ["name-string"] >> TransformWorkflow(Workflow(["name-string"] >> wf_ops.Categorify())) - - triton_chain = chain_1 + chain_2 - - ordered_list = list(postorder_iter_nodes(triton_chain)) - assert len(ordered_list) == 5 - assert isinstance(ordered_list[0].op, SelectionOp) - assert isinstance(ordered_list[-1].op, ConcatColumns) - - -@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") -@pytest.mark.parametrize("engine", ["parquet"]) -def test_workflow_tf_e2e_multi_op_plus_2_run(tmpdir, dataset, engine): - # Create a Workflow - schema = dataset.schema - for name in ["x", "y", "id"]: - dataset.schema.column_schemas[name] = dataset.schema.column_schemas[name].with_tags( - [Tags.USER] - ) - - workflow_ops = ["name-cat"] >> wf_ops.Categorify(cat_cache="host") - workflow = Workflow(workflow_ops) - workflow.fit(dataset) - - embedding_shapes_1 = wf_ops.get_embedding_sizes(workflow) - - cats = ["name-string"] >> wf_ops.Categorify(cat_cache="host") - workflow_2 = Workflow(cats) - workflow_2.fit(dataset) - - embedding_shapes = wf_ops.get_embedding_sizes(workflow_2) - embedding_shapes_1.update(embedding_shapes) - embedding_shapes_1["name-string_plus_2"] = embedding_shapes_1["name-string"] - - # Create Tensorflow Model - model = create_tf_model(["name-cat", "name-string_plus_2"], [], embedding_shapes_1) - - # Creating Triton Ensemble - triton_chain_1 = ["name-cat"] >> TransformWorkflow(workflow) - triton_chain_2 = ["name-string"] >> TransformWorkflow(workflow_2) >> PlusTwoOp() - triton_chain = (triton_chain_1 + triton_chain_2) >> PredictTensorflow(model) - - triton_ens = Ensemble(triton_chain, schema) - - # Creating Triton Ensemble Config - ensemble_config, nodes_config = triton_ens.export(str(tmpdir)) - config_path = tmpdir / "ensemble_model" / "config.pbtxt" - - # Checking Triton Ensemble Config - with open(config_path, "rb") as f: - config = model_config.ModelConfig() - raw_config = f.read() - parsed = text_format.Parse(raw_config, config) - - # The config file contents are correct - assert parsed.name == "ensemble_model" - assert parsed.platform == "ensemble" - assert hasattr(parsed, "ensemble_scheduling") - - df = dataset.to_ddf().compute()[["name-string", "name-cat"]].iloc[:3] - - response = _run_ensemble_on_tritonserver(str(tmpdir), ["output"], df, triton_ens.name) - assert len(response.as_numpy("output")) == df.shape[0] diff --git a/tests/unit/systems/test_ensemble_ops.py b/tests/unit/systems/test_ensemble_ops.py deleted file mode 100644 index 528ac27caae..00000000000 --- a/tests/unit/systems/test_ensemble_ops.py +++ /dev/null @@ -1,88 +0,0 @@ -# -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from distutils.spawn import find_executable - -import numpy as np -import pytest - -from merlin.core.dispatch import make_df -from merlin.schema import ColumnSchema, Schema -from merlin.systems.dag.ensemble import Ensemble -from merlin.systems.dag.ops.session_filter import FilterCandidates -from merlin.systems.dag.ops.softmax_sampling import SoftmaxSampling -from tests.unit.systems.inference_utils import _run_ensemble_on_tritonserver # noqa - -TRITON_SERVER_PATH = find_executable("tritonserver") - - -@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") -def test_softmax_sampling(tmpdir): - request_schema = Schema( - [ - ColumnSchema("movie_ids", dtype=np.int32), - ColumnSchema("output_1", dtype=np.float32), - ] - ) - - combined_features = { - "movie_ids": np.random.randint(0, 10000, 100).astype(np.int32), - "output_1": np.random.random(100).astype(np.float32), - } - - request = make_df(combined_features) - - ordering = ["movie_ids"] >> SoftmaxSampling(relevance_col="output_1", topk=10, temperature=20.0) - - ensemble = Ensemble(ordering, request_schema) - ens_config, node_configs = ensemble.export(tmpdir) - - response = _run_ensemble_on_tritonserver( - tmpdir, ensemble.graph.output_schema.column_names, request, "ensemble_model" - ) - assert response is not None - assert len(response.as_numpy("ordered_ids")) == 10 - - -@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") -def test_filter_candidates(tmpdir): - request_schema = Schema( - [ - ColumnSchema("candidate_ids", dtype=np.int32), - ColumnSchema("movie_ids", dtype=np.int32), - ] - ) - - candidate_ids = np.random.randint(1, 100000, 100).astype(np.int32) - movie_ids_1 = np.zeros(100, dtype=np.int32) - movie_ids_1[:20] = np.unique(candidate_ids)[:20] - - combined_features = { - "candidate_ids": candidate_ids, - "movie_ids": movie_ids_1, - } - - request = make_df(combined_features) - - filtering = ["candidate_ids"] >> FilterCandidates(filter_out=["movie_ids"]) - - ensemble = Ensemble(filtering, request_schema) - ens_config, node_configs = ensemble.export(tmpdir) - - response = _run_ensemble_on_tritonserver( - tmpdir, ensemble.graph.output_schema.column_names, request, "ensemble_model" - ) - assert response is not None - assert len(response.as_numpy("filtered_ids")) == 80 diff --git a/tests/unit/systems/test_export.py b/tests/unit/systems/test_export.py deleted file mode 100644 index 6d70696f8a6..00000000000 --- a/tests/unit/systems/test_export.py +++ /dev/null @@ -1,63 +0,0 @@ -from distutils.spawn import find_executable - -import pytest - -from merlin.io import Dataset -from merlin.systems.workflow import get_embedding_sizes -from nvtabular import Workflow, ops - -tf_utils = pytest.importorskip("nvtabular.loader.tf_utils") # noqa - -triton = pytest.importorskip("merlin.systems.triton") -data_conversions = pytest.importorskip("merlin.systems.triton.conversions") -ensemble = pytest.importorskip("merlin.systems.triton.export") - -torch = pytest.importorskip("torch") # noqa - -from merlin.systems.triton.export import export_pytorch_ensemble, export_tensorflow_ensemble # noqa -from tests.unit.systems.inference_utils import ( # noqa - _run_ensemble_on_tritonserver, - create_pytorch_model, - create_tf_model, -) - -tritonclient = pytest.importorskip("tritonclient") -grpcclient = pytest.importorskip("tritonclient.grpc") - -TRITON_SERVER_PATH = find_executable("tritonserver") -tf_utils.configure_tensorflow() - - -@pytest.mark.skipif(not TRITON_SERVER_PATH, reason="triton server not found") -@pytest.mark.parametrize("engine", ["parquet"]) -@pytest.mark.parametrize("output_model", ["tensorflow"]) -def test_export_run_ensemble_triton(tmpdir, engine, output_model, df): - conts = ["x", "y", "id"] >> ops.FillMissing() >> ops.Normalize() - cats = ["name-cat", "name-string"] >> ops.Categorify(cat_cache="host") - workflow = Workflow(conts + cats) - dataset = Dataset(df) - workflow.fit(dataset) - - embed_shapes = get_embedding_sizes(workflow) - cat_cols = list(embed_shapes.keys()) - - if output_model == "tensorflow": - tf_model = create_tf_model(cat_cols, [], embed_shapes) - export_tensorflow_ensemble(tf_model, workflow, "test_name", tmpdir, []) - elif output_model == "pytorch": - torch_model = create_pytorch_model(cat_cols, [], embed_shapes) - export_pytorch_ensemble( - torch_model, - workflow, - {}, - "test_name", - tmpdir, - [], - ) - - # assert os.path.exists(os.path.join(repo, "config.pbtxt")) - tri_df = df.iloc[:10] - tri_df = tri_df[["x", "y", "id", "name-cat", "name-string"]] - response = _run_ensemble_on_tritonserver(str(tmpdir), ["output"], tri_df, "test_name") - assert response is not None - assert len(response.as_numpy("output")) == 10 diff --git a/tests/unit/systems/test_graph.py b/tests/unit/systems/test_graph.py deleted file mode 100644 index c943620e033..00000000000 --- a/tests/unit/systems/test_graph.py +++ /dev/null @@ -1,27 +0,0 @@ -import pytest - -from merlin.schema import Schema -from nvtabular import Workflow -from nvtabular import ops as wf_ops - -ensemble = pytest.importorskip("merlin.systems.dag.ensemble") -workflow_op = pytest.importorskip("merlin.systems.dag.ops.workflow") - - -def test_inference_schema_propagation(): - input_columns = ["a", "b", "c"] - request_schema = Schema(input_columns) - expected_schema = Schema(["a_nvt", "b_nvt", "c_nvt"]) - - # NVT - workflow_ops = input_columns >> wf_ops.Rename(postfix="_nvt") - workflow = Workflow(workflow_ops) - workflow.fit_schema(request_schema) - - assert workflow.graph.output_schema == expected_schema - - # Triton - triton_ops = input_columns >> workflow_op.TransformWorkflow(workflow) - ensemble_out = ensemble.Ensemble(triton_ops, request_schema) - - assert ensemble_out.graph.output_schema == expected_schema diff --git a/tests/unit/systems/test_inference_ops.py b/tests/unit/systems/test_inference_ops.py deleted file mode 100644 index 1023b6ad0bf..00000000000 --- a/tests/unit/systems/test_inference_ops.py +++ /dev/null @@ -1,82 +0,0 @@ -# -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import pathlib - -import pytest - -# this needs to be before any modules that import protobuf -os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" - -from google.protobuf import text_format # noqa - -from merlin.schema import Schema # noqa -from nvtabular import Workflow # noqa -from nvtabular import ops as wf_ops # noqa - -ensemble = pytest.importorskip("merlin.systems.dag.ensemble") -model_config = pytest.importorskip("nvtabular.inference.triton.model_config_pb2") -workflow_op = pytest.importorskip("merlin.systems.dag.ops.workflow") - - -@pytest.mark.parametrize("engine", ["parquet"]) -def test_workflow_op_validates_schemas(dataset, engine): - input_columns = ["x", "y", "id"] - request_schema = Schema(input_columns) - - # NVT - workflow_ops = input_columns >> wf_ops.Rename(postfix="_nvt") - workflow = Workflow(workflow_ops) - workflow.fit(dataset) - - # Triton - triton_ops = ["a", "b", "c"] >> workflow_op.TransformWorkflow(workflow) - - with pytest.raises(ValueError) as exc_info: - ensemble.Ensemble(triton_ops, request_schema) - assert "Missing column" in str(exc_info.value) - - -@pytest.mark.parametrize("engine", ["parquet"]) -def test_workflow_op_exports_own_config(tmpdir, dataset, engine): - input_columns = ["x", "y", "id"] - - # NVT - workflow_ops = input_columns >> wf_ops.Rename(postfix="_nvt") - workflow = Workflow(workflow_ops) - workflow.fit(dataset) - - # Triton - triton_op = workflow_op.TransformWorkflow(workflow) - triton_op.export(tmpdir, None, None) - - # Export creates directory - export_path = pathlib.Path(tmpdir) / triton_op.export_name - assert export_path.exists() - - # Export creates the config file - config_path = export_path / "config.pbtxt" - assert config_path.exists() - - # Read the config file back in from proto - with open(config_path, "rb") as f: - config = model_config.ModelConfig() - raw_config = f.read() - parsed = text_format.Parse(raw_config, config) - - # The config file contents are correct - assert parsed.name == triton_op.export_name - assert parsed.backend == "nvtabular" diff --git a/tests/unit/systems/test_op_runner.py b/tests/unit/systems/test_op_runner.py deleted file mode 100644 index fad865a575f..00000000000 --- a/tests/unit/systems/test_op_runner.py +++ /dev/null @@ -1,163 +0,0 @@ -import json -import os - -import numpy as np -import pytest - -import nvtabular as nvt -import nvtabular.ops as wf_ops -from merlin.dag import Graph -from merlin.schema import Tags -from tests.unit.systems.inf_test_ops import PlusTwoOp - -op_runner = pytest.importorskip("merlin.systems.dag.op_runner") -inf_op = pytest.importorskip("merlin.systems.dag.ops.operator") - - -@pytest.mark.parametrize("engine", ["parquet"]) -def test_op_runner_loads_config(tmpdir, dataset, engine): - input_columns = ["x", "y", "id"] - - # NVT - workflow_ops = input_columns >> wf_ops.Rename(postfix="_nvt") - workflow = nvt.Workflow(workflow_ops) - workflow.fit(dataset) - workflow.save(str(tmpdir)) - - repository = "repository_path/" - version = 1 - kind = "" - config = { - "parameters": { - "operator_names": {"string_value": json.dumps(["PlusTwoOp_1"])}, - "PlusTwoOp_1": { - "string_value": json.dumps( - { - "module_name": PlusTwoOp.__module__, - "class_name": "PlusTwoOp", - } - ) - }, - } - } - - runner = op_runner.OperatorRunner(config, repository, version, kind) - - loaded_op = runner.operators[0] - assert isinstance(loaded_op, PlusTwoOp) - - -@pytest.mark.parametrize("engine", ["parquet"]) -def test_op_runner_loads_multiple_ops_same(tmpdir, dataset, engine): - # NVT - schema = dataset.schema - for name in schema.column_names: - dataset.schema.column_schemas[name] = dataset.schema.column_schemas[name].with_tags( - [Tags.USER] - ) - - repository = "repository_path/" - version = 1 - kind = "" - config = { - "parameters": { - "operator_names": {"string_value": json.dumps(["PlusTwoOp_1", "PlusTwoOp_2"])}, - "PlusTwoOp_1": { - "string_value": json.dumps( - { - "module_name": PlusTwoOp.__module__, - "class_name": "PlusTwoOp", - } - ) - }, - "PlusTwoOp_2": { - "string_value": json.dumps( - { - "module_name": PlusTwoOp.__module__, - "class_name": "PlusTwoOp", - } - ) - }, - } - } - - runner = op_runner.OperatorRunner(config, repository, version, kind) - - assert len(runner.operators) == 2 - - for idx, loaded_op in enumerate(runner.operators): - assert isinstance(loaded_op, PlusTwoOp) - - -@pytest.mark.parametrize("engine", ["parquet"]) -def test_op_runner_loads_multiple_ops_same_execute(tmpdir, dataset, engine): - # NVT - schema = dataset.schema - for name in schema.column_names: - dataset.schema.column_schemas[name] = dataset.schema.column_schemas[name].with_tags( - [Tags.USER] - ) - - repository = "repository_path/" - version = 1 - kind = "" - config = { - "parameters": { - "operator_names": {"string_value": json.dumps(["PlusTwoOp_1", "PlusTwoOp_2"])}, - "PlusTwoOp_1": { - "string_value": json.dumps( - { - "module_name": PlusTwoOp.__module__, - "class_name": "PlusTwoOp", - } - ) - }, - "PlusTwoOp_2": { - "string_value": json.dumps( - { - "module_name": PlusTwoOp.__module__, - "class_name": "PlusTwoOp", - } - ) - }, - } - } - - runner = op_runner.OperatorRunner(config, repository, version, kind) - - inputs = {} - for col_name in schema.column_names: - inputs[col_name] = np.random.randint(10) - - outputs = runner.execute(inf_op.InferenceDataFrame(inputs)) - - assert outputs["x_plus_2_plus_2"] == inputs["x"] + 4 - - -@pytest.mark.parametrize("engine", ["parquet"]) -def test_op_runner_single_node_export(tmpdir, dataset, engine): - # assert against produced config - schema = dataset.schema - for name in schema.column_names: - dataset.schema.column_schemas[name] = dataset.schema.column_schemas[name].with_tags( - [Tags.USER] - ) - - inputs = ["x", "y"] - - node = inputs >> PlusTwoOp() - - graph = Graph(node) - graph.construct_schema(dataset.schema) - - config = node.export(tmpdir) - - file_path = os.path.join(str(tmpdir), node.export_name, "config.pbtxt") - - assert os.path.exists(file_path) - config_file = open(file_path, "r").read() - assert config_file == str(config) - assert len(config.input) == len(inputs) - assert len(config.output) == len(inputs) - for idx, conf in enumerate(config.output): - assert conf.name == inputs[idx] + "_plus_2" diff --git a/tests/unit/systems/test_tensorflow_inf_op.py b/tests/unit/systems/test_tensorflow_inf_op.py deleted file mode 100644 index 2c76395bd49..00000000000 --- a/tests/unit/systems/test_tensorflow_inf_op.py +++ /dev/null @@ -1,113 +0,0 @@ -import os -import pathlib -from copy import deepcopy - -import pytest - -from merlin.dag import ColumnSelector, Graph -from merlin.schema import Schema - -# this needs to be before any modules that import protobuf -os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" - -from google.protobuf import text_format # noqa - -model_config = pytest.importorskip("nvtabular.inference.triton.model_config_pb2") -tf_op = pytest.importorskip("merlin.systems.dag.ops.tensorflow") - -tf = pytest.importorskip("tensorflow") - - -def test_tf_op_exports_own_config(tmpdir): - model = tf.keras.models.Sequential( - [ - tf.keras.Input(name="input", dtype=tf.int32, shape=(784,)), - tf.keras.layers.Dense(512, activation="relu"), - tf.keras.layers.Dropout(0.2), - tf.keras.layers.Dense(10, name="output"), - ] - ) - - model.compile( - optimizer="adam", - loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=[tf.metrics.SparseCategoricalAccuracy()], - ) - - # Triton - triton_op = tf_op.PredictTensorflow(model) - triton_op.export(tmpdir, None, None) - - # Export creates directory - export_path = pathlib.Path(tmpdir) / triton_op.export_name - assert export_path.exists() - - # Export creates the config file - config_path = export_path / "config.pbtxt" - assert config_path.exists() - - # Read the config file back in from proto - with open(config_path, "rb") as f: - config = model_config.ModelConfig() - raw_config = f.read() - parsed = text_format.Parse(raw_config, config) - - # The config file contents are correct - assert parsed.name == triton_op.export_name - assert parsed.backend == "tensorflow" - - -def test_tf_op_compute_schema(): - model = tf.keras.models.Sequential( - [ - tf.keras.Input(name="input", dtype=tf.int32, shape=(784,)), - tf.keras.layers.Dense(512, activation="relu"), - tf.keras.layers.Dropout(0.2), - tf.keras.layers.Dense(10, name="output"), - ] - ) - - model.compile( - optimizer="adam", - loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=[tf.metrics.SparseCategoricalAccuracy()], - ) - - # Triton - triton_op = tf_op.PredictTensorflow(model) - - out_schema = triton_op.compute_output_schema(Schema(["input"]), ColumnSelector(["input"]), None) - assert out_schema.column_names == ["output"] - - -def test_tf_schema_validation(): - model = tf.keras.models.Sequential( - [ - tf.keras.Input(name="input", dtype=tf.int32, shape=(784,)), - tf.keras.layers.Dense(512, activation="relu"), - tf.keras.layers.Dropout(0.2), - tf.keras.layers.Dense(10, name="output"), - ] - ) - - model.compile( - optimizer="adam", - loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True), - metrics=[tf.metrics.SparseCategoricalAccuracy()], - ) - - # Triton - tf_node = [] >> tf_op.PredictTensorflow(model) - tf_graph = Graph(tf_node) - - with pytest.raises(ValueError) as exception_info: - deepcopy(tf_graph).construct_schema(Schema([])) - assert "Missing column 'input'" in str(exception_info.value) - - with pytest.raises(ValueError) as exception_info: - deepcopy(tf_graph).construct_schema(Schema(["not_input"])) - assert "Missing column 'input'" in str(exception_info.value) - - with pytest.raises(ValueError) as exception_info: - deepcopy(tf_graph).construct_schema(Schema(["input", "not_input"])) - assert "Mismatched dtypes for column 'input'" in str(exception_info.value)