Back out "Revert D16469619: Add Virtual Memory and CPU percentage com…

…putation to AIBench" Summary: Pull Request resolved: pytorch#23821 Reviewed By: hl475 Differential Revision: D16654854 fbshipit-source-id: f057023e890cbcbd9145ef2ecb449df2fbba592b
zmmwl · Aug 7, 2019 · e23e4cc · e23e4cc
1 parent e90adf5
commit e23e4cc
Show file tree

Hide file tree

Showing 8 changed files with 213 additions and 54 deletions.
diff --git a/binaries/benchmark_args.h b/binaries/benchmark_args.h
@@ -50,6 +50,11 @@ C10_DEFINE_string(
     "Input type when specifying the input dimension."
     "The supported types are float, uint8_t.");
 C10_DEFINE_int(iter, 10, "The number of iterations to run.");
+C10_DEFINE_bool(
+    measure_memory,
+    false,
+    "Whether to measure increase in allocated memory while "
+    "loading and running the net.");
 C10_DEFINE_string(net, "", "The given net to benchmark.");
 C10_DEFINE_string(
     output,

diff --git a/binaries/benchmark_helper.cc b/binaries/benchmark_helper.cc
@@ -35,6 +35,14 @@
 #include <observers/observer_config.h>
 #include <observers/perf_observer.h>
 
+#if defined(TARGET_OS_MAC) || \
+defined(TARGET_OS_IPHONE) || \
+defined(TARGET_IPHONE_SIMULATOR)
+#include <malloc/malloc.h>
+#else
+#include <malloc.h>
+#endif
+
 using std::map;
 using std::shared_ptr;
 using std::string;
@@ -235,7 +243,7 @@ void fillInputBlob(
 
 void runNetwork(
     shared_ptr<caffe2::Workspace> workspace,
-    caffe2::NetDef& net_def,
+    caffe2::NetBase* net,
     map<string, caffe2::TensorProtos>& tensor_protos_map,
     const bool wipe_cache,
     const bool run_individual,
@@ -250,13 +258,6 @@ void runNetwork(
     const std::string& output,
     const std::string& output_folder) {
 
-  if (!net_def.has_name()) {
-    net_def.set_name("benchmark");
-  }
-
-  caffe2::NetBase* net = workspace->CreateNet(net_def);
-  CHECK_NOTNULL(net);
-
   LOG(INFO) << "Starting benchmark.";
   caffe2::ObserverConfig::initSampleRate(1, 1, 1, run_individual, warmup);
   LOG(INFO) << "Running warmup runs.";
@@ -376,6 +377,35 @@ void writeOutput(
   }
 }
 
+void logBenchmarkResult(
+    const std::string& type,
+    const std::string& metric,
+    const std::string& unit,
+    const int value) {
+  LOG(INFO) << caffe2::NetObserverReporterPrint::IDENTIFIER << "{"
+            << "\"type\": \"" << type << "\", "
+            << "\"metric\": \"" << metric << "\", "
+            << "\"unit\": \"" << unit << "\", "
+            << "\"value\": " << c10::to_string(value) << "}\n";
+}
+
+long getVirtualMemoryIfOptionEnabled(bool FLAGS_measure_memory) {
+  if (FLAGS_measure_memory) {
+#if defined(TARGET_OS_IPHONE) || \
+defined(TARGET_OS_MAC) || \
+defined(TARGET_IPHONE_SIMULATOR)
+    malloc_statistics_t stats = {0};
+    malloc_zone_statistics(nullptr, &stats);
+    return stats.size_allocated;
+#else
+    struct mallinfo info = mallinfo();
+    return info.uordblks;
+#endif
+  }
+
+  return 0;
+}
+
 int benchmark(
     int argc,
     char* argv[],
@@ -386,6 +416,7 @@ int benchmark(
     const string& FLAGS_input_file,
     const string& FLAGS_input_type,
     int FLAGS_iter,
+    bool FLAGS_measure_memory,
     const string& FLAGS_net,
     const string& FLAGS_output,
     const string& FLAGS_output_folder,
@@ -423,19 +454,15 @@ int benchmark(
 
   auto workspace = std::make_shared<caffe2::Workspace>(new caffe2::Workspace());
   bool run_on_gpu = backendCudaSet(FLAGS_backend);
-  // Run initialization network.
+  // Run initialization network, measure resources used.
+  long init_vmem = getVirtualMemoryIfOptionEnabled(FLAGS_measure_memory);
   caffe2::NetDef init_net_def;
   CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_init_net, &init_net_def));
   setOperatorEngine(&init_net_def, FLAGS_backend);
   CAFFE_ENFORCE(workspace->RunNetOnce(init_net_def));
-
-  // Run main network.
-  caffe2::NetDef net_def;
-  CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_net, &net_def));
-  setOperatorEngine(&net_def, FLAGS_backend);
+  init_vmem = getVirtualMemoryIfOptionEnabled(FLAGS_measure_memory) - init_vmem;
 
   map<string, caffe2::TensorProtos> tensor_protos_map;
-
   int num_blobs = loadInput(
       workspace,
       run_on_gpu,
@@ -445,9 +472,19 @@ int benchmark(
       FLAGS_input_dims,
       FLAGS_input_type);
 
+  // Run main network.
+  long predict_vmem = getVirtualMemoryIfOptionEnabled(FLAGS_measure_memory);
+  caffe2::NetDef net_def;
+  CAFFE_ENFORCE(ReadProtoFromFile(FLAGS_net, &net_def));
+  setOperatorEngine(&net_def, FLAGS_backend);
+  if (!net_def.has_name()) {
+    net_def.set_name("benchmark");
+  }
+  caffe2::NetBase* net = workspace->CreateNet(net_def);
+  CHECK_NOTNULL(net);
   runNetwork(
       workspace,
-      net_def,
+      net,
       tensor_protos_map,
       FLAGS_wipe_cache,
       FLAGS_run_individual,
@@ -461,6 +498,12 @@ int benchmark(
       FLAGS_sleep_between_net_and_operator,
       FLAGS_output,
       FLAGS_output_folder);
+  predict_vmem = getVirtualMemoryIfOptionEnabled(
+      FLAGS_measure_memory) - predict_vmem;
+  if (FLAGS_measure_memory) {
+    logBenchmarkResult(
+        "NET_", "memory", "kB", (init_vmem + predict_vmem) / 1024);
+  }
 
   return 0;
 }
diff --git a/binaries/benchmark_helper.h b/binaries/benchmark_helper.h
@@ -125,7 +125,7 @@ void writeOutput(
     const int num_blobs);
 void runNetwork(
     shared_ptr<caffe2::Workspace> workspace,
-    caffe2::NetDef& net_def,
+    caffe2::NetBase* net,
     map<string, caffe2::TensorProtos>& tensor_protos_map,
     const bool wipe_cache,
     const bool run_individual,
@@ -149,6 +149,7 @@ int benchmark(
     const string& FLAGS_input_file,
     const string& FLAGS_input_type,
     int FLAGS_iter,
+    bool FLAGS_measure_memory,
     const string& FLAGS_net,
     const string& FLAGS_output,
     const string& FLAGS_output_folder,

diff --git a/binaries/caffe2_benchmark.cc b/binaries/caffe2_benchmark.cc
@@ -22,6 +22,7 @@ int main(int argc, char** argv) {
       FLAGS_input_file,
       FLAGS_input_type,
       FLAGS_iter,
+      FLAGS_measure_memory,
       FLAGS_net,
       FLAGS_output,
       FLAGS_output_folder,

diff --git a/modules/observers/net_observer_reporter.h b/modules/observers/net_observer_reporter.h
@@ -19,6 +19,7 @@ struct PerformanceInformation {
   std::string type = ""; // the type of the operator
   // Measured
   double latency = 0;
+  double cpuMilliseconds = 0;
 };
 
 class CAFFE2_OBSERVER_API NetObserverReporter {

diff --git a/modules/observers/net_observer_reporter_print.cc b/modules/observers/net_observer_reporter_print.cc
@@ -1,5 +1,6 @@
 #include "observers/net_observer_reporter_print.h"
 
+#include <algorithm>
 #include <sstream>
 #include "caffe2/core/init.h"
 #include "observers/observer_config.h"
@@ -9,6 +10,7 @@ namespace caffe2 {
 const std::string NetObserverReporterPrint::IDENTIFIER = "Caffe2Observer ";
 static std::string get_op_args(PerformanceInformation p);
 static std::string get_tensor_shapes(PerformanceInformation p);
+static std::string sanatize(std::string json_s);
 
 void NetObserverReporterPrint::report(
     NetBase* net,
@@ -23,29 +25,49 @@ void NetObserverReporterPrint::report(
                              {"value", c10::to_string(p.second.latency * 1000)},
                              {"unit", "us"},
                              {"metric", "latency"}});
+      caffe2_perf.push_back({{"type", "NET_"},
+                             {
+                               "value",
+                               c10::to_string(
+                                   p.second.cpuMilliseconds /
+                                   p.second.latency *
+                                   100),
+                             },
+                             {"unit", "percent"},
+                             {"metric", "cpu_percent"}});
     } else if (p.first != "NET_DELAY") {
       // for operator perf
       std::string shape_str = get_tensor_shapes(p.second);
       std::string args_str = get_op_args(p.second);
-
-      caffe2_perf.push_back({{"type", p.first},
+      std::string type = p.first;
+      caffe2_perf.push_back({{"type", type},
                              {"value", c10::to_string(p.second.latency * 1000)},
                              {"unit", "us"},
                              {"metric", "latency"}});
+      caffe2_perf.push_back({{"type", type},
+                             {
+                               "value",
+                               c10::to_string(
+                                   p.second.cpuMilliseconds /
+                                   p.second.latency *
+                                   100),
+                             },
+                             {"unit", "percent"},
+                             {"metric", "cpu_percent"}});
       if (p.second.flops > 0) {
-        caffe2_perf.push_back({{"type", p.first},
+        caffe2_perf.push_back({{"type", type},
                                {"value", c10::to_string(p.second.flops)},
                                {"unit", "flop"},
                                {"metric", "flops"}});
       }
       if (shape_str != "") {
-        caffe2_perf.push_back({{"type", p.first},
+        caffe2_perf.push_back({{"type", type},
                                {"info_string", shape_str},
                                {"unit", ""},
                                {"metric", "tensor_shapes"}});
       }
       if (args_str != "") {
-        caffe2_perf.push_back({{"type", p.first},
+        caffe2_perf.push_back({{"type", type},
                                {"info_string", args_str},
                                {"unit", ""},
                                {"metric", "op_args"}});
@@ -57,13 +79,13 @@ void NetObserverReporterPrint::report(
     std::stringstream buffer;
     auto entry = *it;
     buffer << IDENTIFIER << "{";
-    buffer << "\"type\": \"" << entry["type"] << "\","
-           << "\"unit\": \"" << entry["unit"] << "\","
-           << "\"metric\": \"" << entry["metric"] << "\",";
+    buffer << "\"type\": \"" << sanatize(entry["type"]) << "\","
+           << "\"unit\": \"" << sanatize(entry["unit"]) << "\","
+           << "\"metric\": \"" << sanatize(entry["metric"]) << "\",";
     if (entry.find("value") != entry.end()) {
-      buffer << "\"value\": \"" << entry["value"] << "\"";
+      buffer << "\"value\": \"" << sanatize(entry["value"]) << "\"";
     } else if (entry.find("info_string") != entry.end()) {
-      buffer << "\"info_string\": \"" << entry["info_string"] << "\"";
+      buffer << "\"info_string\": \"" << sanatize(entry["info_string"]) << "\"";
     }
     buffer << "}";
     LOG(INFO) << buffer.str();
@@ -117,4 +139,12 @@ static std::string get_op_args(PerformanceInformation p) {
   }
   return args_str;
 }
+
+static std::string sanatize(std::string json_s) {
+  // Remove illegal characters from the name that would cause json string to
+  // become invalid
+  json_s.erase(std::remove(json_s.begin(), json_s.end(), '"'), json_s.end());
+  json_s.erase(std::remove(json_s.begin(), json_s.end(), '\\'), json_s.end());
+  return json_s;
+}
 }