fix: bugs (withcatai#80)

* feat: adapt to the latest `llama.cpp` interface * feat: print helpful information to help resolve a clone issue when it happens * feat: print helpful information to help resolve build issues related to CUDA * feat: make portable cmake on Windows more stable * feat: update `CMakeLists.txt` to match `llama.cpp` better * fix: do not download redundant node headers * fix: improve cmake custom options handling * fix: do not set `CMAKE_GENERATOR_TOOLSET` for CUDA * fix: do not fetch information from GitHub when using a local git bundle * fix: GBNF JSON schema string const formatting * docs: document a solution to a compilation problem on macOS * docs: document more CUDA build error solutions * docs: explain about ES modules in the getting started guide * chore: update `.commitlintrc.json` * chore: remove redundant dependency
paladosss · Oct 25, 2023 · ff1644d · ff1644d
1 parent 1cba701
commit ff1644d
Show file tree

Hide file tree

Showing 19 changed files with 207 additions and 107 deletions.
diff --git a/.commitlintrc.json b/.commitlintrc.json
@@ -1,3 +1,6 @@
 {
-    "extends": ["@commitlint/config-conventional"]
+    "extends": ["@commitlint/config-conventional"],
+    "rules": {
+        "subject-case": [0, "never"]
+    }
 }
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -231,6 +231,9 @@ jobs:
       - name: Install modules
         run: npm ci
 
+      - name: Build binary
+        run: node ./dist/cli/cli.js build
+
       - name: Run standalone tests
         run: npm run test:standalone
 

diff --git a/docs/guide/CUDA.md b/docs/guide/CUDA.md
@@ -23,18 +23,39 @@ To build `node-llama-cpp` with any of these options, set an environment variable
 ### Fix the `Failed to detect a default CUDA architecture` build error
 To fix this issue you have to set the `CUDACXX` environment variable to the path of the `nvcc` compiler.
 
-For example, if you installed CUDA Toolkit 12.2 on Windows, you have to run the following command:
-```bash
-set CUDACXX=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\bin\nvcc.exe
+For example, if you have installed CUDA Toolkit 12.2, you have to run a command like this:
+::: code-group
+```bash [Linux]
+export CUDACXX=/usr/local/cuda-12.2/bin/nvcc
 ```
 
-On Linux, it would be something like this:
-```bash
-export CUDACXX=/usr/local/cuda-12.2/bin/nvcc
+```bash [Windows]
+set CUDACXX=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\bin\nvcc.exe
 ```
+:::
 
 Then run the build command again to check whether setting the `CUDACXX` environment variable fixed the issue.
 
+### Fix the `The CUDA compiler identification is unknown` build error
+The solution to this error is the same as [the solution to the `Failed to detect a default CUDA architecture` error](#fix-the-failed-to-detect-a-default-cuda-architecture-build-error).
+
+### Fix the `A single input file is required for a non-link phase when an outputfile is specified` build error
+To fix this issue you have to set the `CMAKE_GENERATOR_TOOLSET` cmake option to the CUDA home directory, usually already set as the `CUDA_PATH` environment variable.
+
+To do this, set the `NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_GENERATOR_TOOLSET` environment variable to the path of your CUDA home directory:
+
+::: code-group
+```bash [Linux]
+export NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_GENERATOR_TOOLSET=$CUDA_PATH
+```
+
+```bash [Windows]
+set NODE_LLAMA_CPP_CMAKE_OPTION_CMAKE_GENERATOR_TOOLSET=%CUDA_PATH%
+```
+:::
+
+Then run the build command again to check whether setting the `CMAKE_GENERATOR_TOOLSET` cmake option fixed the issue.
+
 ## Using `node-llama-cpp` with CUDA
 After you build `node-llama-cpp` with CUDA support, you can use it normally.
 

diff --git a/docs/guide/building-from-source.md b/docs/guide/building-from-source.md
@@ -27,6 +27,8 @@ If `cmake` is not installed on your machine, `node-llama-cpp` will automatically
 
 If the build fails, make sure you have the required dependencies of `cmake` installed on your machine. More info is available [here](https://github.com/cmake-js/cmake-js#:~:text=projectRoot/build%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%5Bstring%5D-,Requirements%3A,-CMake) (you don't have to install `cmake` or `cmake-js`, just the dependencies).
 
+If the build fails on macOS with the error `"/usr/bin/cc" is not able to compile a simple test program`, try running `xcode-select --install` to install the Xcode command line tools.
+
 :::
 
 ## `download` and `build` commands

diff --git a/docs/guide/cli/build.md b/docs/guide/cli/build.md
@@ -10,6 +10,10 @@ const commandDoc = docs.build;
 
 {{commandDoc.description}}
 
+::: info
+If the build fails on macOS with the error `"/usr/bin/cc" is not able to compile a simple test program`, try running `xcode-select --install` to install the Xcode command line tools.
+:::
+
 ## Usage
 ```shell-vue
 {{commandDoc.usage}}

diff --git a/docs/guide/cli/download.md b/docs/guide/cli/download.md
@@ -20,6 +20,10 @@ This is useful for building from source on machines that aren't connected to the
 
 :::
 
+::: info
+If the build fails on macOS with the error `"/usr/bin/cc" is not able to compile a simple test program`, try running `xcode-select --install` to install the Xcode command line tools.
+:::
+
 ## Usage
 ```shell-vue
 {{commandDoc.usage}}

diff --git a/docs/guide/index.md b/docs/guide/index.md
@@ -14,6 +14,11 @@ npm install --save node-llama-cpp
 > If binaries are not available for your platform, it'll fallback to download a release of `llama.cpp` and build it from source with `cmake`.
 > To disable this behavior, set the environment variable `NODE_LLAMA_CPP_SKIP_DOWNLOAD` to `true`.
 
+## ESM usage
+`node-llama-cpp` is an [ES module](https://nodejs.org/api/esm.html#modules-ecmascript-modules), so can only use `import` to load it and cannot use `require`.
+
+To make sure you can use it in your project, make sure your `package.json` file has `"type": "module"` in it.
+
 ## CUDA and Metal support
 **Metal:** Metal support is enabled by default on macOS. If you're using a Mac with an Intel chip, [you might want to disable it](./Metal.md).
 

diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
@@ -1,6 +1,6 @@
-cmake_minimum_required(VERSION 3.12)
+cmake_minimum_required(VERSION 3.13)
 
-project ("llama-addon")
+project("llama-addon" C CXX)
 
 if (MSVC)
   # add_compile_options(/EHsc)

diff --git a/llama/addon.cpp b/llama/addon.cpp
@@ -208,13 +208,13 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
     return Napi::String::New(info.Env(), ss.str());
   }
   Napi::Value TokenBos(const Napi::CallbackInfo& info) {
-    return Napi::Number::From(info.Env(), llama_token_bos(ctx));
+    return Napi::Number::From(info.Env(), llama_token_bos(model->model)); // TODO: move this to the model
   }
   Napi::Value TokenEos(const Napi::CallbackInfo& info) {
-    return Napi::Number::From(info.Env(), llama_token_eos(ctx));
+    return Napi::Number::From(info.Env(), llama_token_eos(model->model)); // TODO: move this to the model
   }
   Napi::Value TokenNl(const Napi::CallbackInfo& info) {
-    return Napi::Number::From(info.Env(), llama_token_nl(ctx));
+    return Napi::Number::From(info.Env(), llama_token_nl(model->model)); // TODO: move this to the model
   }
   Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
     return Napi::Number::From(info.Env(), llama_n_ctx(ctx));
@@ -223,7 +223,7 @@ class LLAMAContext : public Napi::ObjectWrap<LLAMAContext> {
     int token = info[0].As<Napi::Number>().Int32Value();
     std::stringstream ss;
 
-    const char* str = llama_token_get_text(ctx, token);
+    const char* str = llama_token_get_text(model->model, token); // TODO: move this to the model
     if (str == nullptr) {
       return info.Env().Undefined();
     }
@@ -336,18 +336,14 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
 
   protected:
   void Execute() {
-    llama_batch batch = llama_batch_init(tokens.size(), 0);
+    llama_batch batch = llama_batch_init(tokens.size(), 0, 1);
 
-    batch.n_tokens = tokens.size();
-
-    for (int32_t i = 0; i < batch.n_tokens; i++) {
-        batch.token[i]  = tokens[i];
-        batch.pos[i]    = ctx->n_cur;
-        batch.seq_id[i] = 0;
-        batch.logits[i] = false;
+    for (size_t i = 0; i < tokens.size(); i++) {
+        llama_batch_add(batch, tokens[i], ctx->n_cur, { 0 }, false);
 
         ctx->n_cur++;
     }
+    GGML_ASSERT(batch.n_tokens == (int) tokens.size());
 
     batch.logits[batch.n_tokens - 1] = true;
 
@@ -381,14 +377,11 @@ class LLAMAContextEvalWorker : Napi::AsyncWorker, Napi::Promise::Deferred {
 
     llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
 
-    auto eos_token = llama_token_eos(ctx->ctx);
+    auto eos_token = llama_token_eos(ctx->model->model);
 
     if (use_repeat_penalty && !repeat_penalty_tokens.empty()) {
-      llama_sample_repetition_penalty(
-        ctx->ctx, &candidates_p, repeat_penalty_tokens.data(), repeat_penalty_tokens.size(), repeat_penalty
-      );
-      llama_sample_frequency_and_presence_penalties(
-        ctx->ctx, &candidates_p, repeat_penalty_tokens.data(), repeat_penalty_tokens.size(),
+      llama_sample_repetition_penalties(
+        ctx->ctx, &candidates_p, repeat_penalty_tokens.data(), repeat_penalty_tokens.size(), repeat_penalty,
         repeat_penalty_frequency_penalty, repeat_penalty_presence_penalty
       );
     }

diff --git a/llama/package.json b/llama/package.json
@@ -0,0 +1,5 @@
+{
+  "binary": {
+    "napi_versions": [7]
+  }
+}
diff --git a/package.json b/package.json
@@ -100,7 +100,6 @@
     "@commitlint/cli": "^17.7.1",
     "@commitlint/config-conventional": "^17.7.0",
     "@semantic-release/exec": "^6.0.3",
-    "@types/bytes": "^3.1.1",
     "@types/cli-progress": "^3.11.0",
     "@types/cross-spawn": "^6.0.2",
     "@types/fs-extra": "^11.0.1",

diff --git a/src/cli/commands/DownloadCommand.ts b/src/cli/commands/DownloadCommand.ts
@@ -13,7 +13,10 @@ import {setBinariesGithubRelease} from "../../utils/binariesGithubRelease.js";
 import {downloadCmakeIfNeeded} from "../../utils/cmake.js";
 import withStatusLogs from "../../utils/withStatusLogs.js";
 import {getIsInDocumentationMode} from "../../state.js";
-import {unshallowAndSquashCurrentRepoAndSaveItAsReleaseBundle} from "../../utils/gitReleaseBundles.js";
+import {
+    getGitBundlePathForRelease,
+    unshallowAndSquashCurrentRepoAndSaveItAsReleaseBundle
+} from "../../utils/gitReleaseBundles.js";
 import {cloneLlamaCppRepo} from "../../utils/cloneLlamaCppRepo.js";
 
 type DownloadCommandArgs = {
@@ -91,6 +94,7 @@ export const DownloadCommand: CommandModule<object, DownloadCommandArgs> = {
 export async function DownloadLlamaCppCommand({
     repo, release, arch, nodeTarget, metal, cuda, skipBuild, noBundle, updateBinariesReleaseMetadataAndSaveGitBundle
 }: DownloadCommandArgs) {
+    const useBundle = noBundle != true;
     const octokit = new Octokit();
     const [githubOwner, githubRepo] = repo.split("/");
 
@@ -110,37 +114,45 @@ export async function DownloadLlamaCppCommand({
     type GithubReleaseType = Awaited<ReturnType<typeof octokit.rest.repos.getLatestRelease>> |
         Awaited<ReturnType<typeof octokit.rest.repos.getReleaseByTag>>;
 
-    let githubRelease: GithubReleaseType | null = null;
-    await withOra({
-        loading: chalk.blue("Fetching llama.cpp info"),
-        success: chalk.blue("Fetched llama.cpp info"),
-        fail: chalk.blue("Failed to fetch llama.cpp info")
-    }, async () => {
-        try {
-            if (release === "latest") {
-                githubRelease = await octokit.rest.repos.getLatestRelease({
-                    owner: githubOwner,
-                    repo: githubRepo
-                });
-            } else {
-                githubRelease = await octokit.rest.repos.getReleaseByTag({
-                    owner: githubOwner,
-                    repo: githubRepo,
-                    tag: release
-                });
+    let githubReleaseTag: string | null = (useBundle && (await getGitBundlePathForRelease(githubOwner, githubRepo, release)) != null)
+        ? release
+        : null;
+
+    if (githubReleaseTag == null)
+        await withOra({
+            loading: chalk.blue("Fetching llama.cpp info"),
+            success: chalk.blue("Fetched llama.cpp info"),
+            fail: chalk.blue("Failed to fetch llama.cpp info")
+        }, async () => {
+            let githubRelease: GithubReleaseType | null = null;
+
+            try {
+                if (release === "latest") {
+                    githubRelease = await octokit.rest.repos.getLatestRelease({
+                        owner: githubOwner,
+                        repo: githubRepo
+                    });
+                } else {
+                    githubRelease = await octokit.rest.repos.getReleaseByTag({
+                        owner: githubOwner,
+                        repo: githubRepo,
+                        tag: release
+                    });
+                }
+            } catch (err) {
+                console.error("Failed to fetch llama.cpp release info", err);
             }
-        } catch (err) {
-            console.error("Failed to fetch llama.cpp release info", err);
-        }
 
-        if (githubRelease == null) {
-            throw new Error(`Failed to find release "${release}" of "${repo}"`);
-        }
+            if (githubRelease == null) {
+                throw new Error(`Failed to find release "${release}" of "${repo}"`);
+            }
 
-        if (githubRelease!.data.tag_name == null) {
-            throw new Error(`Failed to find tag of release "${release}" of "${repo}"`);
-        }
-    });
+            if (githubRelease.data.tag_name == null) {
+                throw new Error(`Failed to find tag of release "${release}" of "${repo}"`);
+            }
+
+            githubReleaseTag = githubRelease.data.tag_name;
+        });
 
     await clearTempFolder();
 
@@ -153,7 +165,7 @@ export async function DownloadLlamaCppCommand({
     });
 
     console.log(chalk.blue("Cloning llama.cpp"));
-    await cloneLlamaCppRepo(githubOwner, githubRepo, githubRelease!.data.tag_name, noBundle != true);
+    await cloneLlamaCppRepo(githubOwner, githubRepo, githubReleaseTag!, useBundle);
 
     if (!skipBuild) {
         await downloadCmakeIfNeeded(true);
@@ -174,7 +186,7 @@ export async function DownloadLlamaCppCommand({
     }
 
     if (isCI && updateBinariesReleaseMetadataAndSaveGitBundle) {
-        await setBinariesGithubRelease(githubRelease!.data.tag_name);
+        await setBinariesGithubRelease(githubReleaseTag!);
         await unshallowAndSquashCurrentRepoAndSaveItAsReleaseBundle();
     }
 

diff --git a/src/config.ts b/src/config.ts
@@ -57,3 +57,8 @@ export const defaultChatSystemPrompt = "You are a helpful, respectful and honest
     "If you don't know the answer to a question, please don't share false information.";
 export const cliBinName = "node-llama-cpp";
 export const npxRunPrefix = "npx --no ";
+
+const documentationUrl = "https://withcatai.github.io/node-llama-cpp";
+export const documentationPageUrls = {
+    CUDA: documentationUrl + "/guide/CUDA"
+} as const;
diff --git a/src/utils/cloneLlamaCppRepo.ts b/src/utils/cloneLlamaCppRepo.ts
@@ -55,14 +55,35 @@ export async function cloneLlamaCppRepo(githubOwner: string, githubRepo: string,
         } catch (err) {
             await fs.remove(llamaCppDirectory);
             console.error("Failed to clone git bundle, cloning from GitHub instead", err);
+
+            printCloneErrorHelp(String(err));
         }
     }
 
-    await withGitCloneProgress("GitHub", async (gitWithCloneProgress) => {
-        await gitWithCloneProgress.clone(remoteGitUrl, llamaCppDirectory, {
-            "--depth": 1,
-            "--branch": tag,
-            "--quiet": null
+    try {
+        await withGitCloneProgress("GitHub", async (gitWithCloneProgress) => {
+            await gitWithCloneProgress.clone(remoteGitUrl, llamaCppDirectory, {
+                "--depth": 1,
+                "--branch": tag,
+                "--quiet": null
+            });
         });
-    });
+    } catch (err) {
+        printCloneErrorHelp(String(err));
+
+        throw err;
+    }
+}
+
+function printCloneErrorHelp(error: string) {
+    // This error happens with some docker images where the current user is different
+    // from the owner of the files due to mounting a volume.
+    // In such cases, print a helpful message to help the user resolve the issue.
+    if (error.toLowerCase().includes("detected dubious ownership in repository"))
+        console.info("\n" +
+            chalk.grey("[node-llama-cpp]") + chalk.yellow(" To fix this issue, try running this command to fix it for the current module directory:") + "\n" +
+            'git config --global --add safe.directory "' + llamaCppDirectory + '"\n\n' +
+            chalk.yellow("Or run this command to fix it everywhere:") + "\n" +
+            'git config --global --add safe.directory "*"'
+        );
 }
diff --git a/src/utils/cmake.ts b/src/utils/cmake.ts
@@ -35,6 +35,12 @@ export async function getCmakePath() {
 
         if (resolvedPath.toLowerCase().endsWith(".cmd"))
             resolvedPath = (await getBinFromWindowCmd(resolvedPath, "cmake.exe")) ?? "";
+        else if (resolvedPath.toLowerCase().endsWith(".ps1")) {
+            const cmdFilePath = resolvedPath.slice(0, -".ps1".length) + ".cmd";
+
+            if (await fs.pathExists(cmdFilePath))
+                resolvedPath = (await getBinFromWindowCmd(cmdFilePath, "cmake.exe")) ?? "";
+        }
 
         if (resolvedPath !== "")
             return resolvedPath;