Update llm usecase with easy setting

cloud-barista · Feb 26, 2024 · 0f4c76b · 0f4c76b
2 parents 9b3efeb + c80106b
commit 0f4c76b
Show file tree

Hide file tree

Showing 5 changed files with 137 additions and 58 deletions.
diff --git a/assets/cloudspec.csv b/assets/cloudspec.csv
@@ -509,7 +509,7 @@ AWS,aws-us-east-2,aws-us-east-2,t2.medium,0.0464,34.18,,,,,,,,,,default,default
 AWS,aws-us-east-2,aws-us-east-2,t2.large,0.0928,34.18,,,,,,,,,,default,default
 AWS,aws-us-east-2,aws-us-east-2,t2.xlarge,0.1856,34.18,,,,,,,,,,default,default
 AWS,aws-us-east-2,aws-us-east-2,t2.2xlarge,0.3712,34.18,,,,,,,,,,default,default
-AWS,aws-us-east-2,aws-us-east-2,g5.2xlarge,0.3712,34.18,,,,,,,,,,default,default
+AWS,aws-us-east-2,aws-us-east-2,g5.2xlarge,1.212,34.18,,,,,,,,,,default,default
 AWS,aws-us-west-1,aws-us-west-1,t2.micro,0.0138,36.52,,,,,,,,,,default,default
 AWS,aws-us-west-1,aws-us-west-1,t2.small,0.0276,34.18,,,,,,,,,,default,default
 AWS,aws-us-west-1,aws-us-west-1,t2.medium,0.0552,34.18,,,,,,,,,,default,default

diff --git a/scripts/usecases/llm/llmServer.py b/scripts/usecases/llm/llmServer.py
@@ -0,0 +1,63 @@
+from typing import Union
+from fastapi import BackgroundTasks, FastAPI, Request
+from fastapi.responses import JSONResponse
+import uvicorn
+# Correcting the import based on your initial code snippet
+from langchain_community.llms import VLLM
+
+app = FastAPI()
+port = 5001
+
+# Global variable to indicate model loading status
+model="tiiuae/falcon-7b-instruct"
+model_loaded = False
+llm = None
+
+async def load_model():
+    global llm, model_loaded
+    # Create and initialize the model instance
+    llm = VLLM(model=model,
+               trust_remote_code=True,  # Required for loading HF models
+               max_new_tokens=50,
+               temperature=0.6
+              )
+    model_loaded = True  # Update model loading status to True
+
+@app.on_event("startup")
+async def startup_event():
+    await load_model()
+
+@app.get("/status")
+def get_status():
+    # Endpoint to return the model loading status
+    return {"model": model, "loaded": model_loaded}
+
+# Common function to generate text based on the prompt
+def generate_text_from_prompt(prompt: str) -> str:
+    if not model_loaded:
+        return "Model is not loaded yet."
+    output = llm(prompt) # Generate text based on the prompt
+    return output.replace("\n", "")
+
+@app.get("/query")
+def query_get(prompt: str) -> JSONResponse:
+    if not model_loaded:
+        return JSONResponse(content={"error": output}, status_code=503)
+
+    output = generate_text_from_prompt(prompt)
+    return JSONResponse(content={"text": output})
+
+@app.post("/query")
+async def query_post(request: Request) -> JSONResponse:
+    if not model_loaded:
+        return JSONResponse(content={"error": output}, status_code=503)
+
+    request_dict = await request.json()
+    prompt = request_dict.get("prompt", "")
+
+    output = generate_text_from_prompt(prompt)
+    return JSONResponse(content={"text": output})
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=port)
+
diff --git a/scripts/usecases/llm/startServer.sh b/scripts/usecases/llm/startServer.sh
@@ -1,62 +1,75 @@
 #!/bin/bash
-
+  
 # Define script variables
-SCRIPT_NAME=$(basename "$0")
-LLM_PYTHON_FILE=~/runCloudLLM.py
-LOG_FILE=~/llm_nohup.out
-
-# Step 1: Check for root/sudo
-if [[ $EUID -ne 0 ]]; then
-   echo "This script must be run as root or with sudo" 
-   exit 1
+SERVICE_NAME="llmServer"
+SOURCE_FILE="$SERVICE_NAME".py
+LOG_FILE="$SERVICE_NAME".log
+VENV_PATH=venv_"$SERVICE_NAME"  # virtual environment path
+
+echo "Checking source file: $SOURCE_FILE"
+if [ -f "$SOURCE_FILE" ]; then
+  echo "Loading [$SOURCE_FILE] file."
+else
+  echo "Source [$SOURCE_FILE] does not exist. Exiting."
+  exit 1
+fi
+
+echo "[$SERVICE_NAME] Checking for virtual environment..."
+if [ ! -d "$VENV_PATH" ]; then
+  echo "Creating virtual environment..."
+  python3 -m venv $VENV_PATH
+else
+  echo "Virtual environment already exists."
 fi
 
+source $VENV_PATH/bin/activate
+
 # Step 2: Update system and install Python3-pip
-echo "[$SCRIPT_NAME] Updating system and installing Python3-pip..."
-apt-get update
-apt-get install -y python3-pip
+echo "[$SERVICE_NAME] Updating system and installing Python3-pip..."
+sudo apt-get update > /dev/null
+sudo apt-get install -y python3-pip jq > /dev/null
 
 # Step 3: Install required Python packages
-echo "[$SCRIPT_NAME] Installing required Python packages..."
-pip3 install openai==0.28.1 langchain -U langchain-community gptcache
+echo "[$SERVICE_NAME] Installing required Python packages..."
+pip install -U fastapi uvicorn 
+pip install -U langchain langchain-community
+pip install -U gptcache vllm
+pip install openai==0.28.1 
 
 # Check if the pip install was successful
 if [ $? -ne 0 ]; then
-    echo "[$SCRIPT_NAME] Failed to install Python packages. Exiting."
+    echo "[$SERVICE_NAME] Failed to install Python packages. Exiting."
     exit 1
 fi
 
-# Step 4: Create the Python script for LLM
-echo "[$SCRIPT_NAME] Creating the Python script for LLM..."
-cat <<EOF > $LLM_PYTHON_FILE
-from fastapi import FastAPI, Request
-from fastapi.responses import JSONResponse
-import langchain
-from langchain_community.llms import VLLM
-import uvicorn
+# Step 4: Run the Python script in the background using nohup and virtual environment's python
+echo "[$SERVICE_NAME] Starting LLM service in the background..."
+nohup $VENV_PATH/bin/python $SOURCE_FILE > $LOG_FILE 2>&1 &
 
-app = FastAPI()
+echo "[$SERVICE_NAME] Checking status of the LLM service..."
 
-llm = VLLM(model="tiiuae/falcon-7b-instruct",
-           trust_remote_code=True,
-           max_new_tokens=50,
-           temperature=0.6)
+# Check if the LLM service is running
+PID=$(ps aux | grep "$SERVICE_NAME" | grep -v grep | awk '{print $2}')
 
-@app.get("/")
-def read_root():
-    return {"Hello": "World"}
+if [ -z "$PID" ]; then
+    echo "[$SERVICE_NAME] LLM service is not running."
+else
+    echo "[$SERVICE_NAME] LLM service is running. PID: $PID"
+    echo ""
+    echo "[$SERVICE_NAME] Showing the last 20 lines of the log file ($LOG_FILE):"
+    echo ""
+    tail -n 20 "$LOG_FILE"
+fi
 
-@app.post("/v1/generateText")
-async def generateText(request: Request) -> JSONResponse:
-    request_dict = await request.json()
-    prompt = request_dict.get("prompt", "")
-    output = llm(prompt)
-    return JSONResponse(content={"text": output})
+echo ""
+echo "[Test: replace localhost with IP address of the server]"
+echo "curl -X POST http://localhost:5001/query -H \"Content-Type: application/json\" -d '{\"prompt\": \"What is the Multi-Cloud?\"}'"
+curl -s -X POST http://localhost:5001/query \
+-H "Content-Type: application/json" \
+-d '{"prompt": "What is the Multi-Cloud?"}' | jq .
 
-EOF
+echo "http://localhost:5001/query?prompt=What is the Multi-Cloud?"
+curl -s "http://localhost:5001/query?prompt=What+is+the+Multi-Cloud?" | jq .
 
-# Step 5: Run the Python script in the background
-echo "[$SCRIPT_NAME] Starting LLM service in the background..."
-nohup python3 $LLM_PYTHON_FILE > $LOG_FILE 2>&1 &
 
-echo "[$SCRIPT_NAME] LLM service started. Logs are available at $LOG_FILE"
+echo ""
diff --git a/scripts/usecases/llm/statusServer.sh b/scripts/usecases/llm/statusServer.sh
@@ -1,19 +1,22 @@
 #!/bin/bash
 
 # Define script variables
-SCRIPT_NAME=$(basename "$0")
-SERVICE_NAME="runCloudLLM.py"
-LOG_FILE=~/llm_nohup.out
+SERVICE_NAME="llmServer"
+SOURCE_FILE="$SERVICE_NAME".py
+LOG_FILE="$SERVICE_NAME".log
+VENV_PATH=venv_"$SERVICE_NAME" 
 
-echo "[$SCRIPT_NAME] Checking status of the LLM service..."
+echo "[$SERVICE_NAME] Checking status of the LLM service..."
 
 # Check if the LLM service is running
 PID=$(ps aux | grep "$SERVICE_NAME" | grep -v grep | awk '{print $2}')
 
 if [ -z "$PID" ]; then
-    echo "[$SCRIPT_NAME] LLM service is not running."
+    echo "[$SERVICE_NAME] LLM service is not running."
 else
-    echo "[$SCRIPT_NAME] LLM service is running. PID: $PID"
-    echo "[$SCRIPT_NAME] Showing the last 20 lines of the log file ($LOG_FILE):"
+    echo "[$SERVICE_NAME] LLM service is running. PID: $PID"
+    echo ""
+    echo "[$SERVICE_NAME] Showing the last 20 lines of the log file ($LOG_FILE):"
+    echo ""
     tail -n 20 "$LOG_FILE"
 fi
diff --git a/scripts/usecases/llm/stopServer.sh b/scripts/usecases/llm/stopServer.sh
@@ -1,24 +1,24 @@
 #!/bin/bash
+SERVICE_NAME="llmServer"
+SOURCE_FILE="$SERVICE_NAME".py
+LOG_FILE="$SERVICE_NAME".log
+VENV_PATH=venv_"$SERVICE_NAME" 
 
-# Define script variables
-SCRIPT_NAME=$(basename "$0")
-SERVICE_NAME="runCloudLLM.py"
-
-echo "[$SCRIPT_NAME] Attempting to stop the LLM service..."
+echo "[$SERVICE_NAME] Attempting to stop the LLM service..."
 
 # Find the PID of the LLM service
 PIDS=$(ps aux | grep "$SERVICE_NAME" | grep -v grep | awk '{print $2}')
 
 if [ -z "$PIDS" ]; then
-    echo "[$SCRIPT_NAME] No LLM service is currently running."
+    echo "[$SERVICE_NAME] No LLM service is currently running."
 else
     # Kill the LLM service processes
     for PID in $PIDS; do
         kill $PID
         if [ $? -eq 0 ]; then
-            echo "[$SCRIPT_NAME] Successfully stopped the LLM service (PID: $PID)."
+            echo "[$SERVICE_NAME] Successfully stopped the LLM service (PID: $PID)."
         else
-            echo "[$SCRIPT_NAME] Failed to stop the LLM service (PID: $PID)."
+            echo "[$SERVICE_NAME] Failed to stop the LLM service (PID: $PID)."
         fi
     done
 fi