From c86b923667e30b4e8bf317a04171a961d6611c42 Mon Sep 17 00:00:00 2001
From: luopingyi <luopingyi@4paradigm.com>
Date: Thu, 9 Oct 2025 17:22:03 +0800
Subject: [PATCH] update readme

---
 README.md  | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 request.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+)
 create mode 100644 request.py

diff --git a/README.md b/README.md
index daa60d4..946af7f 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,67 @@
 # enginex-mlu370-any2any
 
+# 寒武纪 mlu370 统一多模态
+
+该模型测试框架在寒武纪mlu370 （X8/X4）加速卡上，基于Transfomer框架，适配了 Qwen/Qwen3-Omni-30B-A3B-Instruct 模型。
+
+* 详见 https://modelscope.cn/models/Qwen/Qwen3-Omni-30B-A3B-Instruct
+
+
+## Quick Start
+1. 首先从modelscope上下载vlm，如`gemma-3-4b-it`  
+```bash
+modelscope download --model Qwen/Qwen3-Omni-30B-A3B-Instruct --local_dir /models/Qwen3-Omni-30B-A3B-Instruct
+```
+2. 构建镜像
+```bash
+docker build -t qwen:omni .
+```
+
+3. 启动docker 
+```bash
+docker run -it --rm \
+  -v /models/:/mnt/models \
+  --device=/dev/cambricon_dev0:/dev/cambricon_dev0 \
+  --device=/dev/cambricon_dev1:/dev/cambricon_dev1 \
+  --device=/dev/cambricon_dev2:/dev/cambricon_dev2 \
+  --device=/dev/cambricon_dev3:/dev/cambricon_dev3 \
+  --device=/dev/cambricon_ctl:/dev/cambricon_ctl \
+  -p 8080:80 \
+  qwen:omni
+```  
+注意需要在本地使用寒武纪mlu370 芯片   
+
+4. 测试服务   
+
+4.1 测试视觉理解    
+```bash
+python request.py
+```
+4.2 测试统一多模态
+
+启动容器时指定入口点为 /bin/bash
+
+```bash
+docker run -it --rm \
+  -v /models/:/mnt/models \
+  --device=/dev/cambricon_dev0:/dev/cambricon_dev0 \
+  --device=/dev/cambricon_dev1:/dev/cambricon_dev1 \
+  --device=/dev/cambricon_dev2:/dev/cambricon_dev2 \
+  --device=/dev/cambricon_dev3:/dev/cambricon_dev3 \
+  --device=/dev/cambricon_ctl:/dev/cambricon_ctl \
+  --entrypoint /bin/bash \
+  -p 8080:80 \
+  qwen:omni
+```  
+
+将 test.py 拷贝到容器内
+```
+docker cp ./test.py <container_id>:/workspace/test.py
+```
+
+进入容器执行测试脚本
+
+```bash
+python test.py
+```
+
diff --git a/request.py b/request.py
new file mode 100644
index 0000000..9764aff
--- /dev/null
+++ b/request.py
@@ -0,0 +1,58 @@
+import json
+import os
+import requests
+import base64
+import time
+
+def model_infer(vlm_url, messages):
+    try:
+        param_dict = {
+            "model": "vlm",
+            "messages": messages
+        }
+        start_time = time.time()
+        response = requests.post(vlm_url, json=param_dict, timeout=300)
+        end_time = time.time()
+        response = response.json()
+
+        response_content = response['choices'][0]['message']['content']
+        return {
+            "vlm_url": vlm_url,
+            "data": response_content,
+            "elapsed": end_time - start_time
+        }
+    except Exception as e:
+        return {"error": str(e), "vlm_url": vlm_url}
+
+
+def bench_one_image(image_path):
+    with open(image_path, "rb") as f:
+        image_base64 = base64.b64encode(f.read()).decode()
+
+    question = "图片有什么？一句话描述"
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
+                },
+                {"type": "text", "text": question},
+            ],
+        }
+    ]
+
+    result = model_infer("http://localhost:8080/generate", messages)
+
+    print (result)
+
+    return result
+
+
+def main():
+    bench_one_image('./cars.jpg')
+
+if __name__ == "__main__":
+    main()
+