b2a7ea98fce7b32d3bcafd6b8760e3ab560f289c
Model: FoolBird/Qwen-2.5-1.5b-instruct-JZFH Source: Original Platform
license, language, tasks, frameworks, base_model
| license | language | tasks | frameworks | base_model | |||
|---|---|---|---|---|---|---|---|
| Apache License 2.0 |
|
|
PyTorch |
|
本模型是用数据集FoolBird/GB50016-2014对Qwen2.5-1.5B-Instruct进行预训练,训练轮数250轮
数据集地址https://modelscope.cn/datasets/FoolBird/GB50016-2014
本模型仅供学习使用
您可以通过如下git clone命令,或者ModelScope SDK来下载模型
SDK下载
#安装ModelScope
pip install modelscope
#SDK模型下载
from modelscope import snapshot_download
model_dir = snapshot_download('FoolBird/Qwen-2.5-1.5b-instruct-JZFH')
Git下载
#Git模型下载
git clone https://www.modelscope.cn/FoolBird/Qwen-2.5-1.5b-instruct-JZFH.git
使用本模型进行推理
# 使用本模型进行推理
from vllm import LLM, SamplingParams
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
from modelscope import snapshot_download
import torch
import time
# 调用 ModelScope 模型
model_id = 'FoolBird/Qwen-2.5-1.5b-instruct-JZFH' # ModelScope 上的模型 ID
# 初始化全局变量
llm = None
tokenizer = None
sampling_params = None
# 下载 ModelScope 模型
def download_modelscope_model(model_id):
# 下载模型并返回本地路径
model_path = snapshot_download(model_id)
return model_path
# 初始化模型和 tokenizer
def qwen_vllm(model_path):
# 设置 CUDA 设备为 GPU 2
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
global llm, tokenizer
sum = 1
# 设置 CUDA 设备为 GPU 2 (在设置的上下文中,它是索引0)
torch.cuda.set_device(0)
# 加载模型
llm = LLM(model=model_path, tensor_parallel_size=sum, dtype=torch.float16, enforce_eager=True, gpu_memory_utilization=0.8, max_model_len=1024)
tokenizer = AutoTokenizer.from_pretrained(model_path)
text = "qwen_vllm加载完毕"
print(text)
return text
# 使用 VLLM 进行推理
def qwen2_5_inference(info):
messages = [
{"role": "system", "content":' You are Qwen, created by Alibaba Cloud. You are a helpful assistant'},
{"role": "user", "content": info}
]
# 聊天模板的消息
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# 清理未使用的显存
torch.cuda.empty_cache()
global sampling_params
sampling_params = SamplingParams(temperature=0.7, top_p=0.8, repetition_penalty=1.05, max_tokens=512)
outputs = llm.generate([text], sampling_params)
# 流式输出结果
for output in outputs:
generated_text = output.outputs[0].text
for char in generated_text:
print(char, end='', flush=True) # 流式输出每个字符
time.sleep(0.05)
if __name__ == '__main__':
model_path = download_modelscope_model(model_id)
qwen_vllm(model_path)
while True:
user_input = input("请输入您的问题: ")
qwen2_5_inference(user_input)
print(" ")
# 清理未使用的显存
torch.cuda.empty_cache()
Description
Languages
Python
100%