Skip to content

Commit

Permalink
ggml-jni: troubleshooting MiniCPM-V inference on xiaomi14 (#204)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhouwg authored May 28, 2024
1 parent e8e8914 commit 67968bd
Show file tree
Hide file tree
Showing 19 changed files with 543 additions and 10,074 deletions.
9 changes: 7 additions & 2 deletions cdeosplayer/kantv/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,13 @@ android {
externalNativeBuild {
cmake {
//modify to -DCMAKE_BUILD_TYPE=Release before prepare release apk
arguments += "-DCMAKE_BUILD_TYPE=Debug"
cppFlags ""
arguments += "-DCMAKE_BUILD_TYPE=Release"
//weiguo:2024-05-28, added for fix issue in this PR:https://github.com/zhouwg/kantv/pull/204
arguments += "-DCMAKE_ANDROID_STL_TYPE=c++_shared"
arguments += "-DANDROID_STL=c++_shared"
arguments += "-DANDROID_CPP_FEATURES=exceptions"
cppFlags "-fexceptions"
//end added
}
}
ndk {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,11 @@ public class AIResearchFragment extends BaseMvpFragment<AIResearchPresenter> imp
private String ggmlMNISTImageFile = "mnist-5.png";
private String ggmlMNISTModelFile = "mnist-ggml-model-f32.gguf";

//MiniCPM-V:A GPT-4V Level Multimodal LLM, https://github.com/OpenBMB/MiniCPM-V/
//for users in China, https://modelscope.cn/models/OpenBMB/MiniCPM-Llama3-V-2_5-gguf/files
//for users outside of China, https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf/tree/main
private String ggmlMiniCPMVModelFile = "ggml-model-Q4_K_M.gguf";

// https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/tree/main
// https://huggingface.co/TheBloke/Llama-2-7B-GGUF
// https://huggingface.co/TheBloke/Llama-2-13B-GGUF
Expand Down Expand Up @@ -347,6 +352,7 @@ public void onItemSelected(AdapterView<?> parent, View view, int position, long
//05-25-2024, add for MiniCPM-V(A GPT-4V Level Multimodal LLM, https://github.com/OpenBMB/MiniCPM-V) or other GPT-4o style Multimodal LLM)
if (benchmarkIndex == CDEUtils.bench_type.GGML_BENCHMARK_LLM_V.ordinal()) {
spinnerModelName.setSelection(21); //TODO: hardcode to MiniCPM-V model for purpose of validate MiniCP-V more easily on Android phone
displayFileStatus(CDEUtils.getDataPath() + ggmlSampleFileName, CDEUtils.getDataPath() + "/models/" + ggmlMiniCPMVModelFile);
}

if ((previousBenchmakrIndex < CDEUtils.bench_type.GGML_BENCHMARK_MAX.ordinal()) && (benchmarkIndex < CDEUtils.bench_type.GGML_BENCHMARK_MAX.ordinal())) {
Expand Down Expand Up @@ -595,7 +601,7 @@ public void onNothingSelected(AdapterView<?> parent) {
//MiniCPM-V:A GPT-4V Level Multimodal LLM, https://github.com/OpenBMB/MiniCPM-V/
//for users in China, https://modelscope.cn/models/OpenBMB/MiniCPM-Llama3-V-2_5-gguf/files
//for users outside of China, https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf/tree/main
selectModeFileName = "ggml-model-Q4_K_M.gguf";
selectModeFileName = ggmlMiniCPMVModelFile;
isLLMVModel = true;
} else if ((strModeName.startsWith("mnist")) || (benchmarkIndex == CDEUtils.bench_type.GGML_BENCHMARK_CV_MNIST.ordinal())) {
isMNISTModel = true;
Expand Down
8 changes: 7 additions & 1 deletion core/ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ set(SOURCE_FILES
${LLAMACPP_SRC_DIR}/ggml-alloc.c
${LLAMACPP_SRC_DIR}/ggml-backend.c
${LLAMACPP_SRC_DIR}/ggml-quants.c
${LLAMACPP_SRC_DIR}/ggml-qnn.cpp
${QNN_BACKEND_SRCS}
${LLAMACPP_SRC_DIR}/llama.cpp
${LLAMACPP_SRC_DIR}/unicode.cpp
Expand Down Expand Up @@ -132,6 +131,7 @@ endif()

add_definitions(-O3) #otherwise app's performance will be very bad on Xiaomi14 with debug build


if (TARGET_XIAOMI14)

#weiguo:2024-03-11
Expand Down Expand Up @@ -170,6 +170,12 @@ else()
"-Wno-unused-command-line-argument")
endif()

#weiguo:2024-05-28, fix issue in this PR:https://github.com/zhouwg/kantv/pull/204
add_definitions(-fexceptions)
add_definitions(-D_LIBCPP_EXCEPTIONS)
add_definitions(-D_LIBCXXABI_EXCEPTIONS)
add_link_options( "-lc++_shared" )

if (GGML_ENABLE_QNN)
file(GLOB allPrebuiltQNNLibs "${QNN_LIB_PATH}/libQnn*.so")

Expand Down
330 changes: 212 additions & 118 deletions core/ggml/jni/clip.cpp

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion core/ggml/jni/clip.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//ref&author: https://github.com/OpenBMB/llama.cpp/blob/minicpm-v2.5/examples/minicpmv/clip.h
//ref&author: https://github.com/OpenBMB/llama.cpp/blob/minicpm-v2.5/examples/minicpmv
#ifndef CLIP_H
#define CLIP_H

Expand Down
10 changes: 5 additions & 5 deletions core/ggml/jni/ggml-jni-impl-external.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8853,7 +8853,7 @@ int llama_inference_ng(const char * sz_model_path, const char * sz_user_data, i


//05-25-2024, add for MiniCPM-V(A GPT-4V Level Multimodal LLM, https://github.com/OpenBMB/MiniCPM-V) or other GPT-4o style Multimodal LLM)
extern int minicpmv_inference_main(int argc, char *argv[]);
extern int minicpmv_inference_main(int argc, char *argv[], int backend);
int minicpmv_inference(const char *sz_model_path, const char *sz_img_path, const char *sz_user_data,
int num_threads, int n_backend_type) {
int ret = 0;
Expand All @@ -8868,22 +8868,22 @@ int minicpmv_inference(const char *sz_model_path, const char *sz_img_path, const
//TODO: this is a lazy/dirty/quick method, just for fun with MiniCPM-V on Xiaomi 14
//./minicpmv-cli -m /home/weiguo/models/ggml-model-Q4_K_M.gguf --mmproj /home/weiguo/models/mmproj-model-f16.gguf
// -c 4096 --temp 0.7 --top-p 0.8 --top-k 100 --repeat-penalty 1.05 --image /home/weiguo/Downloads/airplane.jpeg -p "What is in the image?"
int argc = 21;
int argc = 11;
const char *argv[] = {"minicpmv-main",
"-m", sz_model_path,
"--mmproj", "/sdcard/kantv/models/mmproj-model-f16.gguf"/*hardcoded*/,
/*
"-c", "4096",
"--temp", "0.7",
"--top-p", "0.8",
"--top-k", "100",
"--repeat-penalty", "1.05",
*/
"--image", sz_img_path,
"-p", sz_user_data,
"-t", std::to_string(num_threads).c_str()
};
//TODO: crash on Xiaomi 14 but works fine on Ubuntu 20.04
//ret = minicpmv_inference_main(argc, const_cast<char **>(argv));
GGML_JNI_NOTIFY("MiniCPM-V inference not supported currently");
ret = minicpmv_inference_main(argc, const_cast<char **>(argv), n_backend_type);

return ret;
}
2 changes: 2 additions & 0 deletions core/ggml/jni/ggml-jni.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@
#include <stdbool.h>

#include "libavutil/cde_log.h"
#ifdef ANDROID //for build MiniCPM-V command line application on Linux
#include "kantv-asr.h"
#include "kantv-media.h"
#endif

#include "ggml.h"

Expand Down
43 changes: 29 additions & 14 deletions core/ggml/jni/minicpmv-cli.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
//ref&author: https://github.com/OpenBMB/llama.cpp/blob/minicpm-v2.5/examples/minicpmv/clip.h
//ref&author: https://github.com/OpenBMB/llama.cpp/blob/minicpm-v2.5/examples/minicpmv
#include "ggml-jni.h"
#include "ggml.h"
#include "log.h"
#include "common.h"
Expand All @@ -7,8 +8,6 @@
#include "minicpmv_io.h"
#include "llama.h"

#include "ggml-jni.h"

#include <cstdio>
#include <cstdlib>
#include <vector>
Expand Down Expand Up @@ -80,18 +79,32 @@ const char * llama_loop(struct minicpmv_context * ctx_llava,struct llama_samplin
return tmp;
}


int minicpmv_inference_main(int argc, char *argv[]) {
#ifdef ANDROID
int minicpmv_inference_main(int argc, char ** argv, int backend) {
#else //for build and run MiniCPM-V command line application on Linux
//works fine on Ubuntu20.04
//./minicpmv-cli -m /home/weiguo/models/ggml-model-Q4_K_M.gguf --mmproj /home/weiguo/models/mmproj-model-f16.gguf --image /home/weiguo/Downloads/airplane.jpeg -t 4 -p "What is in the image?"
int main(int argc, char ** argv) {
#endif
ggml_time_init();

gpt_params params;

if (!gpt_params_parse(argc, argv, params)) {
LOGGD("gpt_params_parse failed");
GGML_JNI_NOTIFY("gpt_params_parse failed");
show_additional_info(argc, argv);
return 1;
}
if (backend != GGML_BACKEND_GGML) { // GGML_BACKEND_GGML is the original GGML, used to compare performance between QNN backend and original GGML
#ifdef GGML_USE_QNN
LOGGD("using QNN backend %d", backend);
params.main_gpu = backend;
params.n_gpu_layers = 1;
#else
LOGGW("QNN feature was disabled and backend is not ggml\n");
GGML_JNI_NOTIFY("QNN feature was disabled and backend is not ggml\n");
return 1;
#endif
}

#ifndef LOG_DISABLE_LOGS
log_set_target(log_filename_generator("llava", "log"));
Expand All @@ -101,13 +114,12 @@ int minicpmv_inference_main(int argc, char *argv[]) {
#endif // LOG_DISABLE_LOGS

if (params.mmproj.empty() || (params.image.empty())) {
//gpt_params_print_usage(argc, argv, params);
gpt_params_print_usage(argc, argv, params);
show_additional_info(argc, argv);
return 1;
}

for (auto & image : params.image)
{
for (auto & image : params.image) {
int n_past = 0;
auto ctx_llava = minicpmv_init(&params, image, n_past);

Expand All @@ -128,10 +140,11 @@ int minicpmv_inference_main(int argc, char *argv[]) {
if (strstr(tmp, "###")) break; // Yi-VL behavior
have_tmp = true;
printf("%s", tmp);
#ifdef TARGET_ANDROID
#ifdef ANDROID
kantv_asr_notify_benchmark_c(tmp);
#endif
if (strstr(response.c_str(), "<user>")) break; // minicpm-v

if (strstr(response.c_str(), "<user>")) break; // minicpm-v

fflush(stdout);
}
Expand All @@ -151,19 +164,21 @@ int minicpmv_inference_main(int argc, char *argv[]) {
if (strcmp(tmp, "</s>") == 0) break;
if (strstr(tmp, "###")) break; // Yi-VL behavior
printf("%s", tmp);// mistral llava-1.6
LOGGD("%s", tmp);
if (strstr(response.c_str(), "<user>")) break; // minicpm-v
fflush(stdout);
}
llama_sampling_free(ctx_sampling);
}
}
printf("\n");
#ifdef ANDROID
kantv_asr_notify_benchmark_c("\n[end of text]\n");
#endif
llama_print_timings(ctx_llava->ctx_llama);

ctx_llava->model = NULL;
llava_free(ctx_llava);
}

return 0;
}
}
Loading

0 comments on commit 67968bd

Please sign in to comment.