From c86b923667e30b4e8bf317a04171a961d6611c42 Mon Sep 17 00:00:00 2001 From: luopingyi Date: Thu, 9 Oct 2025 17:22:03 +0800 Subject: [PATCH] update readme --- README.md | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ request.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 request.py diff --git a/README.md b/README.md index daa60d4..946af7f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,67 @@ # enginex-mlu370-any2any +# 寒武纪 mlu370 统一多模态 + +该模型测试框架在寒武纪mlu370 (X8/X4)加速卡上,基于Transfomer框架,适配了 Qwen/Qwen3-Omni-30B-A3B-Instruct 模型。 + +* 详见 https://modelscope.cn/models/Qwen/Qwen3-Omni-30B-A3B-Instruct + + +## Quick Start +1. 首先从modelscope上下载vlm,如`gemma-3-4b-it` +```bash +modelscope download --model Qwen/Qwen3-Omni-30B-A3B-Instruct --local_dir /models/Qwen3-Omni-30B-A3B-Instruct +``` +2. 构建镜像 +```bash +docker build -t qwen:omni . +``` + +3. 启动docker +```bash +docker run -it --rm \ + -v /models/:/mnt/models \ + --device=/dev/cambricon_dev0:/dev/cambricon_dev0 \ + --device=/dev/cambricon_dev1:/dev/cambricon_dev1 \ + --device=/dev/cambricon_dev2:/dev/cambricon_dev2 \ + --device=/dev/cambricon_dev3:/dev/cambricon_dev3 \ + --device=/dev/cambricon_ctl:/dev/cambricon_ctl \ + -p 8080:80 \ + qwen:omni +``` +注意需要在本地使用寒武纪mlu370 芯片 + +4. 测试服务 + +4.1 测试视觉理解 +```bash +python request.py +``` +4.2 测试统一多模态 + +启动容器时指定入口点为 /bin/bash + +```bash +docker run -it --rm \ + -v /models/:/mnt/models \ + --device=/dev/cambricon_dev0:/dev/cambricon_dev0 \ + --device=/dev/cambricon_dev1:/dev/cambricon_dev1 \ + --device=/dev/cambricon_dev2:/dev/cambricon_dev2 \ + --device=/dev/cambricon_dev3:/dev/cambricon_dev3 \ + --device=/dev/cambricon_ctl:/dev/cambricon_ctl \ + --entrypoint /bin/bash \ + -p 8080:80 \ + qwen:omni +``` + +将 test.py 拷贝到容器内 +``` +docker cp ./test.py :/workspace/test.py +``` + +进入容器执行测试脚本 + +```bash +python test.py +``` + diff --git a/request.py b/request.py new file mode 100644 index 0000000..9764aff --- /dev/null +++ b/request.py @@ -0,0 +1,58 @@ +import json +import os +import requests +import base64 +import time + +def model_infer(vlm_url, messages): + try: + param_dict = { + "model": "vlm", + "messages": messages + } + start_time = time.time() + response = requests.post(vlm_url, json=param_dict, timeout=300) + end_time = time.time() + response = response.json() + + response_content = response['choices'][0]['message']['content'] + return { + "vlm_url": vlm_url, + "data": response_content, + "elapsed": end_time - start_time + } + except Exception as e: + return {"error": str(e), "vlm_url": vlm_url} + + +def bench_one_image(image_path): + with open(image_path, "rb") as f: + image_base64 = base64.b64encode(f.read()).decode() + + question = "图片有什么?一句话描述" + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}, + }, + {"type": "text", "text": question}, + ], + } + ] + + result = model_infer("http://localhost:8080/generate", messages) + + print (result) + + return result + + +def main(): + bench_one_image('./cars.jpg') + +if __name__ == "__main__": + main() +