Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add swagger dashboard for LLM server #1455

Merged
merged 1 commit into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 80 additions & 8 deletions scripts/usecases/llm/llmServer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from flask import Flask, request, jsonify
from flasgger import Swagger
import threading
import argparse
from langchain_community.llms import VLLM

app = Flask(__name__)
swagger = Swagger(app)

model="tiiuae/falcon-7b-instruct"

Expand Down Expand Up @@ -33,23 +35,93 @@ def load_model():

@app.route("/status", methods=["GET"])
def get_status():
"""
This endpoint returns the model loading status.
---
responses:
200:
description: Model loading status
schema:
id: status_response
properties:
model:
type: string
description: The model identifier
loaded:
type: boolean
description: Whether the model has been loaded
"""
if not model_loaded:
return jsonify({"model": model, "loaded": model_loaded, "message": "Model is not loaded yet."})
return jsonify({"model": model, "loaded": model_loaded})


@app.route("/prompt", methods=["GET", "POST"])
def prompt():
@app.route("/prompt", methods=["POST"])
def prompt_post():
"""
This is the language model prompt API.
---
parameters:
- name: input
in: body
type: string
required: true
example: {"input": "What is the Multi-Cloud?"}
responses:
200:
description: A successful response
schema:
id: output_response
properties:
input:
type: string
description: The input prompt
output:
type: string
description: The generated text
model:
type: string
description: The model used for generation
"""
if not model_loaded:
return jsonify({"error": "Model is not loaded yet."}), 503

input = ""
if request.method == "POST":
data = request.json
input = data.get("input", "")
elif request.method == "GET":
input = request.args.get("input", "")
data = request.json
input = data.get("input", "")
output = llm(input)
return jsonify({"input": input, "output": output, "model": model})

@app.route("/prompt", methods=["GET"])
def prompt_get():
"""
This is the language model prompt API for GET requests.
---
parameters:
- name: input
in: query
type: string
required: true
example: "What is the Multi-Cloud?"
responses:
200:
description: A successful response
schema:
id: output_response
properties:
input:
type: string
description: The input prompt
output:
type: string
description: The generated text
model:
type: string
description: The model used for generation
"""
if not model_loaded:
return jsonify({"error": "Model is not loaded yet."}), 503

input = request.args.get("input", "")
output = llm(input)
return jsonify({"input": input, "output": output, "model": model})

Expand Down
1 change: 1 addition & 0 deletions scripts/usecases/llm/startServer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ pip install -U -q langchain langchain-community
pip install -U -q gptcache
sudo $VENV_PATH/bin/python -m pip install -U -q vllm
pip install -q openai==0.28.1
pip install -U -q flasgger

# Check if the pip install was successful
if [ $? -ne 0 ]; then
Expand Down