Add --interactive-prompt-postfix and update README.md a bit on this s…

…tuff.
Noeda · Apr 2, 2023 · f5f8547 · f5f8547
1 parent 19e552e
commit f5f8547
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -83,7 +83,7 @@ You should now be ready to generate some text.
 Run LLaMA-7B with some weights casted to 16-bit floats:
 
 ```shell
-rllama --tokenizer-model /path/to/tokenizer.model \
+rllama --tokenizer-path /path/to/tokenizer.model \
        --model-path /path/to/LLaMA/7B \
        --param-path /path/to/LLaMA/7B/params.json \
        --f16 \
@@ -94,16 +94,34 @@ Use `rllama --help` to see all the options.
 
 ## Interactive mode
 
-There is a simple interactive mode to do back-and-forth discussion with the model.
+There is a simple experimental interactive mode to try force a type of
+back-and-forth discussion with the model.
 
 ```shell
-rllama ... --start-interactive
+rllama ... --start-interactive \
+           --interactive-prompt-postfix " AI:" \  # (optional)
+           --interactive-stop "Human: "           # (optional but you probably want to set it)
 ```
 
 In this mode, you need to type your prompt before the AI starts doing its work.
-If the AI outputs token sequence `[EOF]` (you can set it with
-`--interactive-stop` switch) then you can type a new prompt that will be
-appended to the sequence.
+If the AI outputs token sequence given in `--interactive-stop` (defaults to
+`[EOF]`) then it will ask for another input. You probably want to have `"Human:
+"` or something similar, like this:
+
+`--interactive-prompt-postfix` is appended automatically to your answers. You
+can use this to force the AI to follow a pattern. Here is a full example of
+interactive mode command line:
+
+```shell
+rllama --f16 \
+       --param-path /LLaMA/7B/params.json \
+       --model-path /LLaMA/7B \
+       --tokenizer-path /stonks/LLaMA/tokenizer.model \
+       --prompt "This is an interactive session between human and AI assistant. AI: Hi! How can I help you? Human:" \
+       --start-interactive \
+       --interactive-stop "Human:" \
+       --interactive-prompt-postfix " AI:"
+```
 
 ## Inference server
 
@@ -207,7 +225,7 @@ RUSTFLAGS="-C target-feature=+sse2,+avx,+fma,+avx2" cargo install rllama --featu
 ```
 
 ```
-rllama --tokenizer-model /path/to/tokenizer.model \
+rllama --tokenizer-path /path/to/tokenizer.model \
        --model-path /path/to/LLaMA/7B \
        --param-path /path/to/LLaMA/7B/params.json \
        --opencl-device 0 \

diff --git a/src/rllama_main.rs b/src/rllama_main.rs
@@ -17,6 +17,7 @@ use std::io::{Read, Write};
 use std::path::PathBuf;
 use std::sync::{Arc, RwLock};
 
+// Refer to README.md to see what all these options mean.
 #[derive(Parser, Clone)]
 #[command(author, version, about, long_about = None)]
 struct Cli {
@@ -37,6 +38,8 @@ struct Cli {
 
     #[arg(long)]
     interactive_stop: Option<String>,
+    #[arg(long)]
+    interactive_prompt_postfix: Option<String>,
     #[arg(long, action)]
     start_interactive: bool,
 
@@ -100,6 +103,10 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
     let tokenizer_path = cli.tokenizer_path.clone();
     let param_path = cli.param_path.clone();
     let interactive_stop = cli.interactive_stop.clone().unwrap_or("[EOF]".to_string());
+    let interactive_prompt_postfix = cli
+        .interactive_prompt_postfix
+        .clone()
+        .unwrap_or("".to_string());
     let start_interactive = cli.start_interactive;
     #[cfg(not(feature = "server"))]
     if cli.inference_server {
@@ -280,6 +287,7 @@ pub fn main() -> Result<(), Box<dyn std::error::Error>> {
             tok.clone(),
             prompt.clone(),
             interactive_stop.clone(),
+            interactive_prompt_postfix.clone(),
             start_interactive,
             be_quiet,
             max_seq_len,
@@ -690,6 +698,7 @@ fn command_line_inference(
     tok: Arc<Tokenizer>,
     prompt: String,
     interactive_stop: String,
+    interactive_prompt_postfix: String,
     start_interactive: bool,
     be_quiet: bool,
     max_seq_len: usize,
@@ -780,6 +789,7 @@ fn command_line_inference(
             if newinput.ends_with('\n') {
                 let _ = newinput.pop();
             }
+            newinput += &interactive_prompt_postfix;
             user_token = tok.tokenize_to_ids(newinput.clone());
 
             // removing [start token] as it is already in the prompt, and tokenize_to_ids  adds it.