From 8c2689877fb48bfb4a6a133b020cdb5ec7c9b066 Mon Sep 17 00:00:00 2001
From: Scott McKay <skottmckay@gmail.com>
Date: Sun, 30 Jun 2024 05:19:51 +1000
Subject: [PATCH] CoreML: Disable 1D ML Program matmul due to bug in coreml
 (#21186)

### Description
Disable using CoreML ML Program for a matmul where one of the inputs is
1D as the CoreML implementation appears to be broken. See
https://github.com/apple/coremltools/issues/2263

Add some debugging notes.

### Motivation and Context
Fix failing test on macos-14.
---
 .github/workflows/mac.yml                     |  3 +-
 .../core/providers/coreml/DebugMLProgram.md   | 85 +++++++++++++++++++
 .../coreml/builders/impl/gemm_op_builder.cc   | 33 ++++---
 .../coreml/builders/model_builder.cc          |  1 +
 .../providers/coreml/dump_mlprogram_model.py  |  9 +-
 5 files changed, 114 insertions(+), 17 deletions(-)
 create mode 100644 onnxruntime/core/providers/coreml/DebugMLProgram.md

diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml
index 8aaec8adef97..3d94d30947c7 100644
--- a/.github/workflows/mac.yml
+++ b/.github/workflows/mac.yml
@@ -54,11 +54,10 @@ jobs:
           --test \
           --build_shared_lib \
           --build_objc \
+          --use_coreml \
           --use_xnnpack \
           --use_binskim_compliant_compile_flags
 
-        # TODO add --use_coreml once unit test failures are addressed
-
   Objective-C-StaticAnalysis:
     runs-on: macos-14
 
diff --git a/onnxruntime/core/providers/coreml/DebugMLProgram.md b/onnxruntime/core/providers/coreml/DebugMLProgram.md
new file mode 100644
index 000000000000..e41a51559430
--- /dev/null
+++ b/onnxruntime/core/providers/coreml/DebugMLProgram.md
@@ -0,0 +1,85 @@
+# Steps to debug an ML Program operator implementation
+
+Basic debugging of everything, excluding model execution, (e.g. partitioning, checking if operator is supported,
+adding CoreML operator input/outputs) can be done anywhere as the code is setup to build and be able to create the
+protobuf based CoreML Model on all platforms.
+
+To debug model execution issues you will need a macOS machine.
+
+## Debugging invalid output
+
+If there is a crash during execution or unexpected output, the best approach is to see what using coremltools directly
+produces.
+
+NOTE: that doesn't guarantee coremltools is correct as there could be a bug in their implementation. It does however
+provide a data point on whether we are generating the same CoreML model as the coremltools python.
+
+### Comparing to coremltools output
+
+Create a small test script that replicates the inputs/outputs of the operator you are debugging.
+This script should use the coremltools library to run the operator and print the output.
+This can be used to compare the CoreML EP's output with the coremltools output.
+
+https://apple.github.io/coremltools/docs-guides/source/model-intermediate-language.html#create-a-mil-program
+
+Usage is reasonably intuitive. The below example defines a model with 2 inputs and a matmul operator.
+The model is printed, and run with randomly generated inputs. The output from doing so is printed.
+
+```python
+import numpy as np
+import coremltools as ct
+from coremltools.converters.mil import Builder as mb
+
+target = ct.target.iOS15
+
+x_shape = (1, 4)
+y_shape = (10, 4, 3)
+
+@mb.program(input_specs=[mb.TensorSpec(shape=x_shape), mb.TensorSpec(shape=y_shape)],
+            opset_version=target)
+def prog(x, y):
+    # For reference, a constant can be added using `mb.const` and specifying the data in the `val` parameter.
+    # c_shape = (3, )
+    # c_data = np.random.random_sample(c_shape)
+    # c = mb.const(val=c_data)
+
+    # call the operator you are debugging with the inputs/constants.
+    # See the spec for the operator names, input/outputs and supported data types.
+    # https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html
+    z = mb.matmul(x=x, y=y)
+
+    # can have additional function calls here if there are multiple operators involved.
+    # Contrived example that uses a constant and the output from a previous operator:
+    # z = mb.add(x=z, y=c)
+
+    return z
+
+# Prints the MIL program in a reasonably concise manner.
+print(prog)
+
+# Convert to ML Program model
+m = ct.convert(prog, minimum_deployment_target=target)
+
+# If you want to dump the full protobuf of the model uncomment this.
+# You can compare the values to what is being set by the ORT CoreML EP code if you suspect any issues there.
+# spec = m.get_spec()
+# print(spec)
+
+# run the model to generate output for comparison with the CoreML EP output
+x = np.random.rand(*x_shape)
+y = np.random.rand(*y_shape)
+
+print(m.predict({'x': x, 'y': y}))
+```
+
+## Dumping the ORT generated mlmodel
+
+You can also dump the mlmodel generated by the ORT CoreML EP. This can be handy with larger models.
+
+In a debug build, set the ORT_COREML_EP_MODEL_DIR environment variable to a directory where you want the ML Package
+containing the mlmodel to be saved. The model will remain after the CoreML EP exits, unlike the default behavior
+where we write it to a temporary directory that is automatically removed on application exit.
+
+Script to dump: [dump_mlprogram_model.py](dump_mlprogram_model.py)
+
+See [here](https://github.com/microsoft/onnxruntime/blob/3c0b407709fd3c71755ed046edd688b30a786d94/onnxruntime/core/providers/coreml/model/host_utils.h#L70-L75) for environment variable setup and [usage](https://github.com/search?q=repo%3Amicrosoft%2Fonnxruntime%20kOverrideModelOutputDirectoryEnvVar%20&type=code).
diff --git a/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc b/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc
index 8daf64dc4a45..7338fc18fe77 100644
--- a/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/impl/gemm_op_builder.cc
@@ -109,19 +109,11 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
   ORT_IGNORE_RETURN_VALUE(GetShape(b, b_shape, logger));
   int64_t b0 = -1, b1 = -1;
 
-  // ML Program MatMul supports N-D input
   if (model_builder.CreateMLProgram() && is_matmul) {
-    if (b_shape.size() == 1) {
-      // B is treated as {b_shape[0], 1} according to the numpy rules.
-      b0 = b_shape[0];
-      b1 = 1;
-    } else {
-      // last 2 dims are used
-      b0 = b_shape[b_shape.size() - 2];
-      b1 = b_shape[b_shape.size() - 1];
-    }
+    // ML Program MatMul supports N-D input, however we don't use the 'K' or 'N' values calculated below for it
+    // so we don't need to update b0 or b1.
   } else {
-    // we only support 2D input
+    // we only support 2D input for all other combinations
     b0 = b_shape[0];
     b1 = b_shape[1];
   }
@@ -182,7 +174,6 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
       model_builder.AddOperation(std::move(gemm_op));
     } else {
       // CoreML implementation is the same as ONNX MatMul.
-      // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.linear.matmul
       auto matmul_op = model_builder.CreateOperation(node, "matmul");
       AddOperationInput(*matmul_op, "x", a.Name());
       AddOperationInput(*matmul_op, "y", b.Name());
@@ -268,14 +259,28 @@ bool GemmOpBuilder::IsOpSupportedImpl(const Node& node, const OpBuilderInputPara
   }
 
   if (is_matmul) {
+    const auto a_rank = a_shape.size();
+    const auto b_rank = b_shape.size();
+
     if (input_params.create_mlprogram) {
-      // ML Program matmul op has numpy semantics the same as the ONNX spec so we can use directly
+      // ML Program matmul op has numpy semantics the same as the ONNX spec, so we can use directly.
+      // https://apple.github.io/coremltools/source/coremltools.converters.mil.mil.ops.defs.html#coremltools.converters.mil.mil.ops.defs.iOS15.linear.matmul
+      //
+      // There does appear to be a bug in handling one of the inputs being 1D, so for now skip these.
+      // See https://github.com/apple/coremltools/issues/2263
+      //
+      // If required for perf we could manually do the shape alterations the spec documents (convert input to 2D,
+      // and remove extra dimension from output), as the 2D input is correctly handled by CoreML matmul.
+      if ((a_rank == 1 && b_rank > 1) || (a_rank > 1 && b_rank == 1)) {
+        LOGS(logger, VERBOSE) << "Skipping due to bug in CoreML ML Program when one of the inputs is 1D.";
+        return false;
+      }
     } else {
       // we could potentially support 1D and 3D if required. beyond 3D the dims that merge diverge.
       // https://github.com/apple/coremltools/blob/1931758aae383c83daddfc56f11a24a9d2bf4b87/coremltools/converters/onnx/_operators.py#L1607
       // https://github.com/apple/coremltools/blob/1931758aae383c83daddfc56f11a24a9d2bf4b87/coremltools/converters/mil/backend/nn/op_mapping.py#L1374
       // https://apple.github.io/coremltools/mlmodel/Format/NeuralNetwork.html#innerproductlayerparams
-      if (a_shape.size() != 2 || b_shape.size() != 2) {
+      if (a_rank != 2 || b_rank != 2) {
         LOGS(logger, VERBOSE) << "a and b inputs must be 2D. ";
         return false;
       }
diff --git a/onnxruntime/core/providers/coreml/builders/model_builder.cc b/onnxruntime/core/providers/coreml/builders/model_builder.cc
index 88b518ab2289..eec0fcce51db 100644
--- a/onnxruntime/core/providers/coreml/builders/model_builder.cc
+++ b/onnxruntime/core/providers/coreml/builders/model_builder.cc
@@ -906,6 +906,7 @@ Status ModelBuilder::SaveModel() {
 
 #if defined(COREML_ENABLE_MLPROGRAM)
   if (create_ml_program_) {
+    // we need to jump through some hoops to get the model path the ML Program load wants.
     std::string tmp_model_path = model_output_path_ + "/tmp/model.mlmodel";
     CreateEmptyFile(tmp_model_path);
 
diff --git a/onnxruntime/core/providers/coreml/dump_mlprogram_model.py b/onnxruntime/core/providers/coreml/dump_mlprogram_model.py
index a3ceee70684d..dce98e5138d9 100644
--- a/onnxruntime/core/providers/coreml/dump_mlprogram_model.py
+++ b/onnxruntime/core/providers/coreml/dump_mlprogram_model.py
@@ -5,6 +5,11 @@
 if len(sys.argv) < 2:
     print(f"Usage: {sys.argv[0]} <path to model.mlmodel in ML Package>")
     print("If generated by onnxruntime this will be <ML Package root>/Data/com.microsoft.onnxruntime/model.mlmodel")
+    print(
+        "The ML Package created by the CoreML EP can saved to a specific directory in a debug build of onnxruntime "
+        "by setting the environment variable ORT_COREML_EP_MODEL_DIR to the desired directory."
+    )
+
     sys.exit(-1)
 
 model_path = sys.argv[1]
@@ -13,7 +18,9 @@
 spec = m.get_spec()
 print(spec)
 
-# Example code if you want to filter output or do more advanced things
+# Example code if you want to filter output or do more advanced things.
+# In the below example we print out the value of an attribute of one specific node from a larger model.
+#
 # main = spec.mlProgram.functions["main"]
 # block = main.block_specializations[main.opset]
 # print(f"{len(block.operations)} operators")