ncnn-jni: add code skeleton for yolo-v10 (#206)

zhouwg · May 28, 2024 · d92c83c · d92c83c
1 parent 2549c41
commit d92c83c
Show file tree

Hide file tree

Showing 5 changed files with 78 additions and 22 deletions.
diff --git a/cdeosplayer/cdeosplayer-lib/src/main/java/cdeos/media/player/CDEUtils.java b/cdeosplayer/cdeosplayer-lib/src/main/java/cdeos/media/player/CDEUtils.java
@@ -4047,6 +4047,7 @@ public enum bench_type {
          NCNN_BENCHMARK_ASR,
          NCNN_BENCHMARK_TTS,
          NCNN_BENCHARK_YOLOV5,
+         NCNN_BENCHARK_YOLOV10,
          NCNN_BENCHMARK_MAX,
      };
 
@@ -4061,6 +4062,7 @@ public enum bench_type {
      public enum ncnn_realtimeinference_type {
          NCNN_REALTIMEINFERENCE_FACEDETECT,
          NCNN_REALTIMEINFERENCE_NANODAT,
+         NCNN_REALTIMEINFERENCE_YOLOV10
      };
      //keep sync with ncnn-jni.h, ncnn backend
      public static final int NCNN_BACKEND_CPU           = 0;

diff --git a/cdeosplayer/kantv/src/main/res/values/arrays.xml b/cdeosplayer/kantv/src/main/res/values/arrays.xml
@@ -78,6 +78,7 @@
         <item>ASR(ncnn)</item>              <!-- ASR(https://github.com/k2-fsa/sherpa-ncnn, based on NCNN)benchmark, TBD -->
         <item>TTS(ncnn)</item>              <!-- TBD -->
         <item>YoloV5(ncnn)</item>
+        <item>YoloV10(ncnn)</item>
     </string-array>
 
     <string-array name="threadCounts">
@@ -166,6 +167,7 @@
     <string-array name="ncnn_realtimeinference_netid">
         <item>scrfd</item>
         <item>nanodat</item>
+        <item>yolov10</item>
     </string-array>
 
 </resources>
diff --git a/core/ncnn/jni/ncnn-jni-impl.cpp b/core/ncnn/jni/ncnn-jni-impl.cpp
@@ -180,7 +180,7 @@ static ncnn::Net squeezenet;
 static ncnn::Net squeezenet_gpu;
 static ncnn::Net mnist;
 static ncnn::Net yolov5;
-
+static ncnn::Net yolov10;
 
 class MyNdkCamera : public NdkCameraWindow {
 public:
@@ -905,6 +905,9 @@ Java_org_ncnn_ncnnjava_loadModel(JNIEnv *env, jobject thiz, jobject assetManager
                 }
             }
 
+        } else if (netid == NCNN_BENCHARK_YOLOV10) {
+            LOGGD("[%s,%s,%d]load YoloV10 model", __FILE__, __FUNCTION__, __LINE__);
+            NCNN_JNI_NOTIFY("[%s,%s,%d]load YoloV10 model", __FILE__, __FUNCTION__, __LINE__);
         } else {
             LOGGW("netid %d not supported using ncnn with non-live inference", netid);
             NCNN_JNI_NOTIFY("netid %d not supported using ncnn with non-live inference", netid);
@@ -1401,6 +1404,69 @@ static void detectYoloV5(JNIEnv *env, jobject bitmap, bool use_gpu) {
 }
 
 
+static void detectYoloV10(JNIEnv *env, jobject bitmap, bool use_gpu) {
+    if (use_gpu && ncnn::get_gpu_count() == 0) {
+        LOGGW("gpu backend not supported");
+        NCNN_JNI_NOTIFY("gpu backend not supported");
+        return;
+    }
+
+    double start_time = ncnn::get_current_time();
+
+    AndroidBitmapInfo info;
+    AndroidBitmap_getInfo(env, bitmap, &info);
+    const int width = info.width;
+    const int height = info.height;
+    if (info.format != ANDROID_BITMAP_FORMAT_RGBA_8888) {
+        LOGGW("bitmap is not RGBA_8888");
+        NCNN_JNI_NOTIFY("bitmap format is not RGBA_8888");
+        return;
+    }
+    NCNN_JNI_NOTIFY("image w=%d,h=%d", width, height);
+
+    // scale to 640 x 640
+    const int target_size = 640;
+    int w = target_size;
+    int h = target_size;
+    ncnn::Mat in = ncnn::Mat::from_android_bitmap_resize(env, bitmap, ncnn::Mat::PIXEL_RGB, w, h);
+    NCNN_JNI_NOTIFY("mat w=%d h=%d", in.w, in.h);
+    //reuse YoloV5Object for YoloV10
+    std::vector<YoloV5Object> objects;
+    {
+        //do YoloV10 inference using NCNN here
+    }
+
+    static const char *class_names[] = {
+            "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
+            "traffic light",
+            "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
+            "sheep", "cow",
+            "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie",
+            "suitcase", "frisbee",
+            "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
+            "skateboard", "surfboard",
+            "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
+            "banana", "apple",
+            "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
+            "chair", "couch",
+            "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote",
+            "keyboard", "cell phone",
+            "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
+            "scissors", "teddy bear",
+            "hair drier", "toothbrush"
+    };
+
+    for (size_t i = 0; i < objects.size(); i++) {
+        LOGGD("x=%d, y=%d, w=%d, h=%d, label=%s, probability=%.2f", objects[i].x, objects[i].y, objects[i].w, objects[i].h, class_names[objects[i].label], objects[i].prob);
+        NCNN_JNI_NOTIFY("x=%d, y=%d, w=%d, h=%d, label=%s, probability=%.2f", objects[i].x, objects[i].y, objects[i].w, objects[i].h, class_names[objects[i].label], objects[i].prob);
+    }
+
+    double elapsed = ncnn::get_current_time() - start_time;
+    LOGGD("ncnn YoloV10 inference elapsed %.2f ms", elapsed);
+    NCNN_JNI_NOTIFY("ncnn YoloV10 inference elapsed %.2f ms", elapsed);
+}
+
+
 /**
 *
 * @param sz_ncnnmodel_param   param file of ncnn model
@@ -1439,6 +1505,9 @@ void ncnn_jni_bench(JNIEnv *env, const char *sz_ncnnmodel_param, const char *sz_
         case NCNN_BENCHARK_YOLOV5:
             detectYoloV5(env, bitmap, use_gpu);
             break;
+        case NCNN_BENCHARK_YOLOV10:
+            detectYoloV10(env, bitmap, use_gpu);
+            break;
             //=============================================================================================
             //add new benchmark type for NCNN here
             //=============================================================================================

diff --git a/core/ncnn/jni/ncnn-jni.h b/core/ncnn/jni/ncnn-jni.h
@@ -42,13 +42,15 @@ enum ncnn_jni_bench_type {
     NCNN_BENCHMARK_ASR,
     NCNN_BENCHMARK_TTS,
     NCNN_BENCHARK_YOLOV5,
+    NCNN_BENCHARK_YOLOV10,
     NCNN_BENCHMARK_MAX
 };
 
 // available realtime inference type for ncnn-jni
 enum ncnn_jni_realtimeinference_type {
     NCNN_REALTIMEINFERENCE_FACEDETECT = 0,  //reserved for GTP-4o style multimodal poc(CV, NLP, LLM, TTS... with live camera on Android phone)
-    NCNN_REALTIMEINFERENCE_NANODAT
+    NCNN_REALTIMEINFERENCE_NANODAT,
+    NCNN_REALTIMEINFERENCE_YOLOV10
 };
 
 // available backend for ncnn-jni

diff --git a/docs/how-to-validate-ai-algorithm-model-on-android-using-this-project.md b/docs/how-to-validate-ai-algorithm-model-on-android-using-this-project.md
@@ -1,23 +1,4 @@
 
 for GGML inference framework, pls refer to https://github.com/zhouwg/kantv/commit/204036648d5d6779f73fe3eafea59edd3cce3843
 
-
-
-for NCNN inference framework, modify following files to add new inference bench type / new inference backend / new realtime inference type
-
-<a href="https://github.com/zhouwg/kantv/blob/master/cdeosplayer/kantv/src/main/res/values/arrays.xml#L77">arrays.xml</a>
-
-
-<a href="https://github.com/zhouwg/kantv/blob/master/cdeosplayer/cdeosplayer-lib/src/main/java/cdeos/media/player/CDEUtils.java#L4027">CDEUtils.java</a>
-
-
-<a href="https://github.com/zhouwg/kantv/blob/master/cdeosplayer/kantv/src/main/java/com/cdeos/kantv/ui/fragment/AIResearchFragment.java">AIResearchFragment.java</a>
-
-
-<a href="https://github.com/zhouwg/kantv/blob/master/cdeosplayer/kantv/src/main/java/com/cdeos/kantv/ui/fragment/AIAgentFragment.java">AIAgentFragment.java</a>
-
-
-<a href="https://github.com/zhouwg/kantv/blob/master/core/ncnn/jni/ncnn-jni.h">ncnn-jni.h</a>
-
-
-<a href="https://github.com/zhouwg/kantv/blob/master/core/ncnn/jni/ncnn-jni-impl.cpp">ncnn-jni-impl.cpp</a>
+for NCNN inference framework, pls refer to https://github.com/zhouwg/kantv/pull/206/commits/6cb20cb34f25d59cfd7d2be953068a934631ce34