[xnnpack][lite-int][4/n] introduce serialization to delegate (pytorch…

…#87908) We introduced the serializer we created in the previous diff to our XNNGraph builder, the purpose of this is to serialize parts of the graph as we build this. At the end, we are able to finish and serialize the xnngraph into a std::string for use when we forward this along to on-device runtime. The next diff will rebuild the xnngraph from the serialization we introduce here, so testing the serialization of the graph will be done in the next diff Differential Revision: [D39335580](https://our.internmc.facebook.com/intern/diff/D39335580/) **NOTE FOR REVIEWERS**: This PR has internal Meta-specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D39335580/)! Pull Request resolved: pytorch#87908 Approved by: https://github.com/digantdesai
zmmwl · Nov 1, 2022 · 3aa7a52 · 3aa7a52
1 parent 8287c1d
commit 3aa7a52
Show file tree

Hide file tree

Showing 3 changed files with 65 additions and 7 deletions.
diff --git a/torch/csrc/jit/backends/xnnpack/xnnpack_backend_preprocess.cpp b/torch/csrc/jit/backends/xnnpack/xnnpack_backend_preprocess.cpp
@@ -19,7 +19,10 @@ namespace delegate {
 // }
 // or
 // {
-//     "forward" : {"inputs" : c10::List<at::Tensor>}
+//     "forward" : {
+//                  "inputs" : c10::List<at::Tensor>,
+//                  "outputs" : c10::List<at::Tensor>
+//                  }
 // }
 // in which the value for "inputs" is the input shape to the module.
 // The module fed to the xnnpack backend must first be traced in order
@@ -92,20 +95,32 @@ c10::IValue preprocess(
 
   // grabbing the inputs from compile spec for testing
 
+  // gather sample inputs from compile spec
   std::vector<at::Tensor> inputs;
   auto input_list = inp.toList();
 
   for (int i = 0; i < input_list.size(); i++) {
     inputs.push_back(input_list.get(i).toTensor());
   }
   std::vector<at::Tensor> outputs;
-  outputs.push_back(out.toList().get(0).toTensor());
+  auto output_list = out.toList();
+  std::vector<c10::IntList> output_shapes;
+
+  // gather sample outputs from compile spec
+  for (int i = 0; i < output_list.size(); i++) {
+    auto sample_output = output_list.get(i).toTensor();
+    outputs.push_back(sample_output);
+    // also gather output shapes to forward along to device
+    output_shapes.push_back(sample_output.sizes());
+  }
 
+  // sample run on sample inputs
   graph_builder.runGraphOnInputs(inputs, outputs);
+  c10::List<c10::IntList> shapes_list(output_shapes);
 
-  c10::List<at::Tensor> output_list(outputs);
-
-  compiled.insert("Answer", output_list);
+  compiled.insert("ser_model", graph_builder.serializedXNNGraph());
+  compiled.insert("outputs", shapes_list);
+  compiled.insert("Answer", outputs);
 
   return compiled;
 }

diff --git a/torch/csrc/jit/backends/xnnpack/xnnpack_graph_builder.cpp b/torch/csrc/jit/backends/xnnpack/xnnpack_graph_builder.cpp
@@ -127,6 +127,32 @@ void XNNGraph::checkOpsToDelegate(std::shared_ptr<torch::jit::Graph>& graph) {
       "the module contains the following unsupported ops:\n" + error.str());
 }
 
+std::string XNNGraph::serializedXNNGraph() {
+  std::vector<uint32_t> input_ids;
+  std::vector<uint32_t> output_ids;
+
+  for (auto val : _inputs) {
+    input_ids.push_back(_val_to_ids[val]);
+  }
+
+  for (auto val : _outputs) {
+    output_ids.push_back(_val_to_ids[val]);
+  }
+
+  return _serializer.finishAndSerialize(input_ids, output_ids);
+}
+
+std::vector<std::vector<long>> XNNGraph::getGraphOutputShapes() {
+  std::vector<std::vector<long>> output_shapes;
+  for (auto val : _outputs) {
+    auto tensor_ptr = val->type()->cast<TensorType>();
+    std::vector<long> sizes = tensor_ptr->sizes().concrete_sizes().value();
+    output_shapes.push_back(sizes);
+  }
+
+  return output_shapes;
+}
+
 void XNNGraph::defineAllNodes(std::shared_ptr<torch::jit::Graph>& graph) {
   DepthFirstGraphNodeIterator it(graph);
   Node* node = nullptr;
@@ -152,6 +178,7 @@ void XNNGraph::defineAllNodes(std::shared_ptr<torch::jit::Graph>& graph) {
             input2_id,
             output_id,
             /*flags=*/0);
+        _serializer.serializeAddNode(input1_id, input2_id, output_id, 0);
         TORCH_CHECK(status == xnn_status_success, "failed to create add node");
         break;
       }
@@ -213,6 +240,14 @@ void XNNGraph::defineAllTensorValues() {
           /*external_id=*/ext_id,
           /*flags=*/flags,
           /*id_out=*/&id);
+      _serializer.serializeTensorValue(
+          xnn_datatype_fp32,
+          num_dims,
+          tensor_shape,
+          nullptr,
+          ext_id,
+          flags,
+          id);
       TORCH_CHECK(
           status == xnn_status_success,
           "failed to define xnn_tensor_id for: " + val->debugName());

diff --git a/torch/csrc/jit/backends/xnnpack/xnnpack_graph_builder.h b/torch/csrc/jit/backends/xnnpack/xnnpack_graph_builder.h
@@ -6,6 +6,8 @@
 #include <unordered_set>
 #include <vector>
 
+#include <torch/csrc/jit/backends/xnnpack/serialization/serializer.h>
+
 namespace torch {
 namespace jit {
 namespace xnnpack {
@@ -16,7 +18,9 @@ class XNNGraph {
   const float output_min = -std::numeric_limits<float>::infinity();
   const float output_max = std::numeric_limits<float>::infinity();
 
-  // xnn_subgraph
+  // serializer class
+  XNNSerializer _serializer;
+  // xnn subgraph
   xnn_subgraph_t _subgraph_ptr;
   // Set of all the tensor values throughout the jit graph
   std::unordered_set<torch::jit::Value*> _intermediate_tensors;
@@ -58,7 +62,7 @@ class XNNGraph {
   void checkOpsToDelegate(std::shared_ptr<torch::jit::Graph>& graph);
 
  public:
-  XNNGraph() : _subgraph_ptr(nullptr) {
+  XNNGraph() : _serializer(), _subgraph_ptr(nullptr) {
     xnn_status status = xnn_initialize(/*allocator =*/nullptr);
     TORCH_CHECK(xnn_status_success == status, "Failed to initialize xnnpack");
   }
@@ -77,6 +81,10 @@ class XNNGraph {
   void runGraphOnInputs(
       std::vector<at::Tensor> tensor_inputs,
       std::vector<at::Tensor> tensor_outputs);
+
+  std::string serializedXNNGraph();
+
+  std::vector<std::vector<long>> getGraphOutputShapes();
 };
 
 } // namespace delegate