ggml-jni: troubleshooting MiniCPM-V inference on xiaomi14 (#204)

zhouwg · May 28, 2024 · 67968bd · 67968bd
1 parent e8e8914
commit 67968bd
Show file tree

Hide file tree

Showing 19 changed files with 543 additions and 10,074 deletions.
diff --git a/cdeosplayer/kantv/build.gradle b/cdeosplayer/kantv/build.gradle
@@ -14,8 +14,13 @@ android {
         externalNativeBuild {
             cmake {
                 //modify to -DCMAKE_BUILD_TYPE=Release before prepare release apk
-                arguments += "-DCMAKE_BUILD_TYPE=Debug"
-                cppFlags ""
+                arguments += "-DCMAKE_BUILD_TYPE=Release"
+                //weiguo:2024-05-28, added for fix issue in this PR:https://github.com/zhouwg/kantv/pull/204
+                arguments += "-DCMAKE_ANDROID_STL_TYPE=c++_shared"
+                arguments += "-DANDROID_STL=c++_shared"
+                arguments += "-DANDROID_CPP_FEATURES=exceptions"
+                cppFlags "-fexceptions"
+                //end added
             }
         }
         ndk {

diff --git a/cdeosplayer/kantv/src/main/java/com/cdeos/kantv/ui/fragment/AIResearchFragment.java b/cdeosplayer/kantv/src/main/java/com/cdeos/kantv/ui/fragment/AIResearchFragment.java
@@ -161,6 +161,11 @@ public class AIResearchFragment extends BaseMvpFragment<AIResearchPresenter> imp
      private String ggmlMNISTImageFile = "mnist-5.png";
      private String ggmlMNISTModelFile = "mnist-ggml-model-f32.gguf";
 
+     //MiniCPM-V:A GPT-4V Level Multimodal LLM, https://github.com/OpenBMB/MiniCPM-V/
+     //for users in China,         https://modelscope.cn/models/OpenBMB/MiniCPM-Llama3-V-2_5-gguf/files
+     //for users outside of China, https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf/tree/main
+     private String ggmlMiniCPMVModelFile = "ggml-model-Q4_K_M.gguf";
+
      // https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/tree/main
      // https://huggingface.co/TheBloke/Llama-2-7B-GGUF
      // https://huggingface.co/TheBloke/Llama-2-13B-GGUF
@@ -347,6 +352,7 @@ public void onItemSelected(AdapterView<?> parent, View view, int position, long
                  //05-25-2024, add for MiniCPM-V(A GPT-4V Level Multimodal LLM, https://github.com/OpenBMB/MiniCPM-V) or other GPT-4o style Multimodal LLM)
                  if (benchmarkIndex == CDEUtils.bench_type.GGML_BENCHMARK_LLM_V.ordinal()) {
                      spinnerModelName.setSelection(21); //TODO: hardcode to MiniCPM-V model for purpose of validate MiniCP-V more easily on Android phone
+                     displayFileStatus(CDEUtils.getDataPath() + ggmlSampleFileName, CDEUtils.getDataPath() + "/models/" + ggmlMiniCPMVModelFile);
                  }
 
                  if ((previousBenchmakrIndex < CDEUtils.bench_type.GGML_BENCHMARK_MAX.ordinal()) && (benchmarkIndex < CDEUtils.bench_type.GGML_BENCHMARK_MAX.ordinal())) {
@@ -595,7 +601,7 @@ public void onNothingSelected(AdapterView<?> parent) {
                      //MiniCPM-V:A GPT-4V Level Multimodal LLM, https://github.com/OpenBMB/MiniCPM-V/
                      //for users in China,         https://modelscope.cn/models/OpenBMB/MiniCPM-Llama3-V-2_5-gguf/files
                      //for users outside of China, https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf/tree/main
-                     selectModeFileName = "ggml-model-Q4_K_M.gguf";
+                     selectModeFileName = ggmlMiniCPMVModelFile;
                      isLLMVModel = true;
                  } else if ((strModeName.startsWith("mnist")) || (benchmarkIndex == CDEUtils.bench_type.GGML_BENCHMARK_CV_MNIST.ordinal())) {
                      isMNISTModel = true;

diff --git a/core/ggml/CMakeLists.txt b/core/ggml/CMakeLists.txt
@@ -74,7 +74,6 @@ set(SOURCE_FILES
         ${LLAMACPP_SRC_DIR}/ggml-alloc.c
         ${LLAMACPP_SRC_DIR}/ggml-backend.c
         ${LLAMACPP_SRC_DIR}/ggml-quants.c
-        ${LLAMACPP_SRC_DIR}/ggml-qnn.cpp
         ${QNN_BACKEND_SRCS}
         ${LLAMACPP_SRC_DIR}/llama.cpp
         ${LLAMACPP_SRC_DIR}/unicode.cpp
@@ -132,6 +131,7 @@ endif()
 
 add_definitions(-O3) #otherwise app's performance will be very bad on Xiaomi14 with debug build
 
+
 if (TARGET_XIAOMI14)
 
 #weiguo:2024-03-11
@@ -170,6 +170,12 @@ else()
             "-Wno-unused-command-line-argument")
 endif()
 
+#weiguo:2024-05-28, fix issue in this PR:https://github.com/zhouwg/kantv/pull/204
+add_definitions(-fexceptions)
+add_definitions(-D_LIBCPP_EXCEPTIONS)
+add_definitions(-D_LIBCXXABI_EXCEPTIONS)
+add_link_options( "-lc++_shared" )
+
 if (GGML_ENABLE_QNN)
     file(GLOB allPrebuiltQNNLibs "${QNN_LIB_PATH}/libQnn*.so")
 

diff --git a/core/ggml/jni/clip.cpp b/core/ggml/jni/clip.cpp
diff --git a/core/ggml/jni/clip.h b/core/ggml/jni/clip.h
@@ -1,4 +1,4 @@
-//ref&author: https://github.com/OpenBMB/llama.cpp/blob/minicpm-v2.5/examples/minicpmv/clip.h
+//ref&author: https://github.com/OpenBMB/llama.cpp/blob/minicpm-v2.5/examples/minicpmv
 #ifndef CLIP_H
 #define CLIP_H
 

diff --git a/core/ggml/jni/ggml-jni-impl-external.cpp b/core/ggml/jni/ggml-jni-impl-external.cpp
@@ -8853,7 +8853,7 @@ int  llama_inference_ng(const char * sz_model_path, const char * sz_user_data, i
 
 
 //05-25-2024, add for MiniCPM-V(A GPT-4V Level Multimodal LLM, https://github.com/OpenBMB/MiniCPM-V) or other GPT-4o style Multimodal LLM)
-extern int minicpmv_inference_main(int argc, char *argv[]);
+extern int minicpmv_inference_main(int argc, char *argv[], int backend);
 int minicpmv_inference(const char *sz_model_path, const char *sz_img_path, const char *sz_user_data,
                        int num_threads, int n_backend_type) {
     int ret = 0;
@@ -8868,22 +8868,22 @@ int minicpmv_inference(const char *sz_model_path, const char *sz_img_path, const
     //TODO: this is a lazy/dirty/quick method, just for fun with MiniCPM-V on Xiaomi 14
     //./minicpmv-cli -m /home/weiguo/models/ggml-model-Q4_K_M.gguf --mmproj /home/weiguo/models/mmproj-model-f16.gguf
     // -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image /home/weiguo/Downloads/airplane.jpeg  -p "What is in the image?"
-    int argc = 21;
+    int argc = 11;
     const char *argv[] = {"minicpmv-main",
                           "-m", sz_model_path,
                           "--mmproj", "/sdcard/kantv/models/mmproj-model-f16.gguf"/*hardcoded*/,
+                          /*
                           "-c", "4096",
                           "--temp", "0.7",
                           "--top-p", "0.8",
                           "--top-k", "100",
                           "--repeat-penalty", "1.05",
+                           */
                           "--image", sz_img_path,
                           "-p", sz_user_data,
                           "-t", std::to_string(num_threads).c_str()
     };
-    //TODO: crash on Xiaomi 14 but works fine on Ubuntu 20.04
-    //ret = minicpmv_inference_main(argc, const_cast<char **>(argv));
-    GGML_JNI_NOTIFY("MiniCPM-V inference not supported currently");
+    ret = minicpmv_inference_main(argc, const_cast<char **>(argv), n_backend_type);
 
     return ret;
 }
diff --git a/core/ggml/jni/ggml-jni.h b/core/ggml/jni/ggml-jni.h
@@ -24,8 +24,10 @@
 #include <stdbool.h>
 
 #include "libavutil/cde_log.h"
+#ifdef ANDROID //for build MiniCPM-V command line application on Linux
 #include "kantv-asr.h"
 #include "kantv-media.h"
+#endif
 
 #include "ggml.h"
 

diff --git a/core/ggml/jni/minicpmv-cli.cpp b/core/ggml/jni/minicpmv-cli.cpp
@@ -1,4 +1,5 @@
-//ref&author: https://github.com/OpenBMB/llama.cpp/blob/minicpm-v2.5/examples/minicpmv/clip.h
+//ref&author: https://github.com/OpenBMB/llama.cpp/blob/minicpm-v2.5/examples/minicpmv
+#include "ggml-jni.h"
 #include "ggml.h"
 #include "log.h"
 #include "common.h"
@@ -7,8 +8,6 @@
 #include "minicpmv_io.h"
 #include "llama.h"
 
-#include "ggml-jni.h"
-
 #include <cstdio>
 #include <cstdlib>
 #include <vector>
@@ -80,18 +79,32 @@ const char * llama_loop(struct minicpmv_context * ctx_llava,struct llama_samplin
     return tmp;
 }
 
-
-int minicpmv_inference_main(int argc, char *argv[]) {
+#ifdef ANDROID
+int minicpmv_inference_main(int argc, char ** argv, int backend) {
+#else //for build and run MiniCPM-V command line application on Linux
+//works fine on Ubuntu20.04
+//./minicpmv-cli -m /home/weiguo/models/ggml-model-Q4_K_M.gguf --mmproj /home/weiguo/models/mmproj-model-f16.gguf  --image /home/weiguo/Downloads/airplane.jpeg  -t 4 -p "What is in the image?"
+int main(int argc, char ** argv) {
+#endif
     ggml_time_init();
 
     gpt_params params;
 
     if (!gpt_params_parse(argc, argv, params)) {
-        LOGGD("gpt_params_parse failed");
-        GGML_JNI_NOTIFY("gpt_params_parse failed");
         show_additional_info(argc, argv);
         return 1;
     }
+    if (backend != GGML_BACKEND_GGML) { // GGML_BACKEND_GGML is the original GGML, used to compare performance between QNN backend and original GGML
+#ifdef GGML_USE_QNN
+        LOGGD("using QNN backend %d", backend);
+        params.main_gpu = backend;
+        params.n_gpu_layers = 1;
+#else
+        LOGGW("QNN feature was disabled and backend is not ggml\n");
+        GGML_JNI_NOTIFY("QNN feature was disabled and backend is not ggml\n");
+        return 1;
+#endif
+    }
 
 #ifndef LOG_DISABLE_LOGS
     log_set_target(log_filename_generator("llava", "log"));
@@ -101,13 +114,12 @@ int minicpmv_inference_main(int argc, char *argv[]) {
 #endif // LOG_DISABLE_LOGS
 
     if (params.mmproj.empty() || (params.image.empty())) {
-        //gpt_params_print_usage(argc, argv, params);
+        gpt_params_print_usage(argc, argv, params);
         show_additional_info(argc, argv);
         return 1;
     }
 
-    for (auto & image : params.image)
-    {
+    for (auto & image : params.image) {
         int n_past = 0;
         auto ctx_llava = minicpmv_init(&params, image, n_past);
 
@@ -128,10 +140,11 @@ int minicpmv_inference_main(int argc, char *argv[]) {
                 if (strstr(tmp, "###")) break; // Yi-VL behavior
                 have_tmp = true;
                 printf("%s", tmp);
-#ifdef TARGET_ANDROID
+#ifdef ANDROID
                 kantv_asr_notify_benchmark_c(tmp);
 #endif
-                if (strstr(response.c_str(), "<user>")) break; // minicpm-v 
+
+                if (strstr(response.c_str(), "<user>")) break; // minicpm-v
 
                 fflush(stdout);
             }
@@ -151,19 +164,21 @@ int minicpmv_inference_main(int argc, char *argv[]) {
                     if (strcmp(tmp, "</s>") == 0) break;
                     if (strstr(tmp, "###")) break; // Yi-VL behavior
                     printf("%s", tmp);// mistral llava-1.6
-                    LOGGD("%s", tmp);
                     if (strstr(response.c_str(), "<user>")) break; // minicpm-v 
                     fflush(stdout);
                 }
                 llama_sampling_free(ctx_sampling);
             }
         }
         printf("\n");
+#ifdef ANDROID
+        kantv_asr_notify_benchmark_c("\n[end of text]\n");
+#endif
         llama_print_timings(ctx_llava->ctx_llama);        
 
         ctx_llava->model = NULL;
         llava_free(ctx_llava);
     }
 
     return 0;
-}
+}