Skip to content

Commit

Permalink
修复本地模型在Windows下的加载BUG
Browse files Browse the repository at this point in the history
  • Loading branch information
binary-sky committed Nov 11, 2023
1 parent e4409b9 commit f75e39d
Show file tree
Hide file tree
Showing 8 changed files with 26 additions and 26 deletions.
3 changes: 1 addition & 2 deletions request_llms/bridge_chatgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
具备多线程调用能力的函数
2. predict_no_ui:高级实验性功能模块调用,不会实时显示在界面上,参数简单,可以多线程并行,方便实现复杂的功能逻辑
3. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程
2. predict_no_ui_long_connection:支持多线程
"""

import json
Expand Down
3 changes: 1 addition & 2 deletions request_llms/bridge_chatgpt_website.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
具备多线程调用能力的函数
2. predict_no_ui:高级实验性功能模块调用,不会实时显示在界面上,参数简单,可以多线程并行,方便实现复杂的功能逻辑
3. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程
2. predict_no_ui_long_connection:支持多线程
"""

import json
Expand Down
2 changes: 1 addition & 1 deletion request_llms/bridge_claude.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
具备多线程调用能力的函数
2. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程
2. predict_no_ui_long_connection:支持多线程
"""

import os
Expand Down
19 changes: 10 additions & 9 deletions request_llms/bridge_internlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import time
import threading
import importlib
from toolbox import update_ui, get_conf
from toolbox import update_ui, get_conf, ProxyNetworkActivate
from multiprocessing import Process, Pipe
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns

Expand Down Expand Up @@ -52,14 +52,15 @@ def load_model_and_tokenizer(self):
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
device = get_conf('LOCAL_MODEL_DEVICE')
if self._model is None:
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
if device=='cpu':
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16)
else:
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16).cuda()

model = model.eval()
with ProxyNetworkActivate('Download_LLM'):
if self._model is None:
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
if device=='cpu':
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16)
else:
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16).cuda()

model = model.eval()
return model, tokenizer

def llm_stream_generator(self, **kwargs):
Expand Down
15 changes: 8 additions & 7 deletions request_llms/bridge_qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import time
import threading
import importlib
from toolbox import update_ui, get_conf
from toolbox import update_ui, get_conf, ProxyNetworkActivate
from multiprocessing import Process, Pipe
from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns

Expand All @@ -29,12 +29,13 @@ def load_model_and_tokenizer(self):
import platform
from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

model_id = 'qwen/Qwen-7B-Chat'
self._tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen-7B-Chat', trust_remote_code=True, resume_download=True)
# use fp16
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参
self._model = model
with ProxyNetworkActivate('Download_LLM'):
model_id = 'qwen/Qwen-7B-Chat'
self._tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen-7B-Chat', trust_remote_code=True, resume_download=True)
# use fp16
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True, fp16=True).eval()
model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参
self._model = model

return self._model, self._tokenizer

Expand Down
2 changes: 1 addition & 1 deletion request_llms/local_llm_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def stream_chat(self, **kwargs):
if res.startswith(self.std_tag):
new_output = res[len(self.std_tag):]
std_out = std_out[:std_out_clip_len]
# print(new_output, end='')
print(new_output, end='')
std_out = new_output + std_out
yield self.std_tag + '\n```\n' + std_out + '\n```\n'
elif res == '[Finish]':
Expand Down
4 changes: 2 additions & 2 deletions tests/test_llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ def validate_path():
# from request_llms.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
# from request_llms.bridge_jittorllms_llama import predict_no_ui_long_connection
# from request_llms.bridge_claude import predict_no_ui_long_connection
# from request_llms.bridge_internlm import predict_no_ui_long_connection
from request_llms.bridge_internlm import predict_no_ui_long_connection
# from request_llms.bridge_qwen import predict_no_ui_long_connection
# from request_llms.bridge_spark import predict_no_ui_long_connection
# from request_llms.bridge_zhipu import predict_no_ui_long_connection
from request_llms.bridge_chatglm3 import predict_no_ui_long_connection
# from request_llms.bridge_chatglm3 import predict_no_ui_long_connection

llm_kwargs = {
'max_length': 4096,
Expand Down
4 changes: 2 additions & 2 deletions version
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": 3.57,
"version": 3.58,
"show_feature": true,
"new_feature": "支持文心一言v4和星火v3 <-> 支持GLM3和智谱的API <-> 解决本地模型并发BUG <-> 支持动态追加基础功能按钮 <-> 新汇报PDF汇总页面 <-> 重新编译Gradio优化使用体验"
"new_feature": "修复本地模型在Windows下的加载BUG <-> 支持文心一言v4和星火v3 <-> 支持GLM3和智谱的API <-> 解决本地模型并发BUG <-> 支持动态追加基础功能按钮 <-> 新汇报PDF汇总页面 <-> 重新编译Gradio优化使用体验"
}

0 comments on commit f75e39d

Please sign in to comment.