138 lines
3.5 KiB
Markdown
138 lines
3.5 KiB
Markdown
|
|
---
|
|||
|
|
license: Apache License 2.0
|
|||
|
|
|
|||
|
|
#model-type:
|
|||
|
|
##如 gpt、phi、llama、chatglm、baichuan 等
|
|||
|
|
#- gpt
|
|||
|
|
|
|||
|
|
#domain:
|
|||
|
|
##如 nlp、cv、audio、multi-modal
|
|||
|
|
#- nlp
|
|||
|
|
|
|||
|
|
#language:
|
|||
|
|
##语言代码列表 https://help.aliyun.com/document_detail/215387.html?spm=a2c4g.11186623.0.0.9f8d7467kni6Aa
|
|||
|
|
#- cn
|
|||
|
|
|
|||
|
|
#metrics:
|
|||
|
|
##如 CIDEr、Blue、ROUGE 等
|
|||
|
|
#- CIDEr
|
|||
|
|
|
|||
|
|
#tags:
|
|||
|
|
##各种自定义,包括 pretrained、fine-tuned、instruction-tuned、RL-tuned 等训练方法和其他
|
|||
|
|
#- pretrained
|
|||
|
|
|
|||
|
|
#tools:
|
|||
|
|
##如 vllm、fastchat、llamacpp、AdaSeq 等
|
|||
|
|
#- vllm
|
|||
|
|
language:
|
|||
|
|
- zh
|
|||
|
|
tasks:
|
|||
|
|
- text-generation
|
|||
|
|
frameworks: PyTorch
|
|||
|
|
base_model:
|
|||
|
|
- Qwen/Qwen2.5-1.5B-Instruct
|
|||
|
|
---
|
|||
|
|
### 本模型是用数据集FoolBird/GB50016-2014对Qwen2.5-1.5B-Instruct进行预训练,训练轮数250轮
|
|||
|
|
### 数据集地址https://modelscope.cn/datasets/FoolBird/GB50016-2014
|
|||
|
|
### 本模型仅供学习使用
|
|||
|
|
#### 您可以通过如下git clone命令,或者ModelScope SDK来下载模型
|
|||
|
|
|
|||
|
|
SDK下载
|
|||
|
|
```bash
|
|||
|
|
#安装ModelScope
|
|||
|
|
pip install modelscope
|
|||
|
|
```
|
|||
|
|
```python
|
|||
|
|
#SDK模型下载
|
|||
|
|
from modelscope import snapshot_download
|
|||
|
|
model_dir = snapshot_download('FoolBird/Qwen-2.5-1.5b-instruct-JZFH')
|
|||
|
|
```
|
|||
|
|
Git下载
|
|||
|
|
```
|
|||
|
|
#Git模型下载
|
|||
|
|
git clone https://www.modelscope.cn/FoolBird/Qwen-2.5-1.5b-instruct-JZFH.git
|
|||
|
|
```
|
|||
|
|
使用本模型进行推理
|
|||
|
|
```python
|
|||
|
|
# 使用本模型进行推理
|
|||
|
|
from vllm import LLM, SamplingParams
|
|||
|
|
import os
|
|||
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|||
|
|
from modelscope import snapshot_download
|
|||
|
|
import torch
|
|||
|
|
import time
|
|||
|
|
|
|||
|
|
# 调用 ModelScope 模型
|
|||
|
|
model_id = 'FoolBird/Qwen-2.5-1.5b-instruct-JZFH' # ModelScope 上的模型 ID
|
|||
|
|
|
|||
|
|
# 初始化全局变量
|
|||
|
|
llm = None
|
|||
|
|
tokenizer = None
|
|||
|
|
sampling_params = None
|
|||
|
|
|
|||
|
|
# 下载 ModelScope 模型
|
|||
|
|
def download_modelscope_model(model_id):
|
|||
|
|
# 下载模型并返回本地路径
|
|||
|
|
model_path = snapshot_download(model_id)
|
|||
|
|
return model_path
|
|||
|
|
|
|||
|
|
# 初始化模型和 tokenizer
|
|||
|
|
def qwen_vllm(model_path):
|
|||
|
|
# 设置 CUDA 设备为 GPU 2
|
|||
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
|||
|
|
|
|||
|
|
global llm, tokenizer
|
|||
|
|
sum = 1
|
|||
|
|
|
|||
|
|
# 设置 CUDA 设备为 GPU 2 (在设置的上下文中,它是索引0)
|
|||
|
|
torch.cuda.set_device(0)
|
|||
|
|
|
|||
|
|
# 加载模型
|
|||
|
|
llm = LLM(model=model_path, tensor_parallel_size=sum, dtype=torch.float16, enforce_eager=True, gpu_memory_utilization=0.8, max_model_len=1024)
|
|||
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
|||
|
|
text = "qwen_vllm加载完毕"
|
|||
|
|
print(text)
|
|||
|
|
return text
|
|||
|
|
|
|||
|
|
# 使用 VLLM 进行推理
|
|||
|
|
def qwen2_5_inference(info):
|
|||
|
|
messages = [
|
|||
|
|
{"role": "system", "content":' You are Qwen, created by Alibaba Cloud. You are a helpful assistant'},
|
|||
|
|
{"role": "user", "content": info}
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# 聊天模板的消息
|
|||
|
|
text = tokenizer.apply_chat_template(
|
|||
|
|
messages,
|
|||
|
|
tokenize=False,
|
|||
|
|
add_generation_prompt=True
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 清理未使用的显存
|
|||
|
|
torch.cuda.empty_cache()
|
|||
|
|
|
|||
|
|
global sampling_params
|
|||
|
|
|
|||
|
|
|
|||
|
|
sampling_params = SamplingParams(temperature=0.7, top_p=0.8, repetition_penalty=1.05, max_tokens=512)
|
|||
|
|
outputs = llm.generate([text], sampling_params)
|
|||
|
|
|
|||
|
|
# 流式输出结果
|
|||
|
|
for output in outputs:
|
|||
|
|
generated_text = output.outputs[0].text
|
|||
|
|
for char in generated_text:
|
|||
|
|
print(char, end='', flush=True) # 流式输出每个字符
|
|||
|
|
time.sleep(0.05)
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
model_path = download_modelscope_model(model_id)
|
|||
|
|
qwen_vllm(model_path)
|
|||
|
|
while True:
|
|||
|
|
user_input = input("请输入您的问题: ")
|
|||
|
|
qwen2_5_inference(user_input)
|
|||
|
|
print(" ")
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 清理未使用的显存
|
|||
|
|
torch.cuda.empty_cache()
|
|||
|
|
```
|