update readme
This commit is contained in:
65
README.md
65
README.md
@@ -1,2 +1,67 @@
|
||||
# enginex-mlu370-any2any
|
||||
|
||||
# 寒武纪 mlu370 统一多模态
|
||||
|
||||
该模型测试框架在寒武纪mlu370 (X8/X4)加速卡上,基于Transfomer框架,适配了 Qwen/Qwen3-Omni-30B-A3B-Instruct 模型。
|
||||
|
||||
* 详见 https://modelscope.cn/models/Qwen/Qwen3-Omni-30B-A3B-Instruct
|
||||
|
||||
|
||||
## Quick Start
|
||||
1. 首先从modelscope上下载vlm,如`gemma-3-4b-it`
|
||||
```bash
|
||||
modelscope download --model Qwen/Qwen3-Omni-30B-A3B-Instruct --local_dir /models/Qwen3-Omni-30B-A3B-Instruct
|
||||
```
|
||||
2. 构建镜像
|
||||
```bash
|
||||
docker build -t qwen:omni .
|
||||
```
|
||||
|
||||
3. 启动docker
|
||||
```bash
|
||||
docker run -it --rm \
|
||||
-v /models/:/mnt/models \
|
||||
--device=/dev/cambricon_dev0:/dev/cambricon_dev0 \
|
||||
--device=/dev/cambricon_dev1:/dev/cambricon_dev1 \
|
||||
--device=/dev/cambricon_dev2:/dev/cambricon_dev2 \
|
||||
--device=/dev/cambricon_dev3:/dev/cambricon_dev3 \
|
||||
--device=/dev/cambricon_ctl:/dev/cambricon_ctl \
|
||||
-p 8080:80 \
|
||||
qwen:omni
|
||||
```
|
||||
注意需要在本地使用寒武纪mlu370 芯片
|
||||
|
||||
4. 测试服务
|
||||
|
||||
4.1 测试视觉理解
|
||||
```bash
|
||||
python request.py
|
||||
```
|
||||
4.2 测试统一多模态
|
||||
|
||||
启动容器时指定入口点为 /bin/bash
|
||||
|
||||
```bash
|
||||
docker run -it --rm \
|
||||
-v /models/:/mnt/models \
|
||||
--device=/dev/cambricon_dev0:/dev/cambricon_dev0 \
|
||||
--device=/dev/cambricon_dev1:/dev/cambricon_dev1 \
|
||||
--device=/dev/cambricon_dev2:/dev/cambricon_dev2 \
|
||||
--device=/dev/cambricon_dev3:/dev/cambricon_dev3 \
|
||||
--device=/dev/cambricon_ctl:/dev/cambricon_ctl \
|
||||
--entrypoint /bin/bash \
|
||||
-p 8080:80 \
|
||||
qwen:omni
|
||||
```
|
||||
|
||||
将 test.py 拷贝到容器内
|
||||
```
|
||||
docker cp ./test.py <container_id>:/workspace/test.py
|
||||
```
|
||||
|
||||
进入容器执行测试脚本
|
||||
|
||||
```bash
|
||||
python test.py
|
||||
```
|
||||
|
||||
|
||||
58
request.py
Normal file
58
request.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import json
|
||||
import os
|
||||
import requests
|
||||
import base64
|
||||
import time
|
||||
|
||||
def model_infer(vlm_url, messages):
|
||||
try:
|
||||
param_dict = {
|
||||
"model": "vlm",
|
||||
"messages": messages
|
||||
}
|
||||
start_time = time.time()
|
||||
response = requests.post(vlm_url, json=param_dict, timeout=300)
|
||||
end_time = time.time()
|
||||
response = response.json()
|
||||
|
||||
response_content = response['choices'][0]['message']['content']
|
||||
return {
|
||||
"vlm_url": vlm_url,
|
||||
"data": response_content,
|
||||
"elapsed": end_time - start_time
|
||||
}
|
||||
except Exception as e:
|
||||
return {"error": str(e), "vlm_url": vlm_url}
|
||||
|
||||
|
||||
def bench_one_image(image_path):
|
||||
with open(image_path, "rb") as f:
|
||||
image_base64 = base64.b64encode(f.read()).decode()
|
||||
|
||||
question = "图片有什么?一句话描述"
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
|
||||
},
|
||||
{"type": "text", "text": question},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
result = model_infer("http://localhost:8080/generate", messages)
|
||||
|
||||
print (result)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
bench_one_image('./cars.jpg')
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user