support metax c500

This commit is contained in:
aiyueqi
2025-09-19 14:46:59 +08:00
commit 03df3ffc3b
6 changed files with 174 additions and 0 deletions

BIN
1.PNG Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 MiB

13
Dockerfile Normal file
View File

@@ -0,0 +1,13 @@
FROM git.modelhub.org.cn:9443/enginex-metax/maca-c500-pytorch:2.33.0.6-torch2.6-py310-ubuntu24.04-amd64
ENV HF_ENDPOINT=https://hf-mirror.com
ENV PATH=/opt/conda/bin:${PATH}
RUN pip install transformers==4.50.0 uvicorn\[standard\] fastapi
WORKDIR /app
COPY ./ /app
EXPOSE 8000
CMD ["sh", "-c", "python3 server.py"]

35
README.md Normal file
View File

@@ -0,0 +1,35 @@
# enginex-metax-c500-translation
# translation-transformers
## Quickstart
```shell
#构建docker镜像
docker build . -t metax_c500_vl
#运行docker容器
docker run -it -p 10055:8000 --device=/dev/mxcd --device=/dev/dri -v /home/aiyueqi/mnt/models/vlm/MiniCPM-V-4:/model:ro --name metax_c500_vl_test metax_c500_vl
```
等待模型Load完成出现以下日志时代表服务启动成功, 且模型加载完成
```shell
INFO: Application startup complete.
INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
```
执行测试程序
```shell
python3 test.py
```
测试程序执行结果
```
Succeed!
Response: {'output_text': '这幅图片包含几个元素,共同营造出宁静的氛围。主要对象是一个坐在沙滩上的金毛寻回犬和一个穿着格子衬衫的人。狗似乎正与这个人互动,可能是在玩耍或训练,因为它的爪子和人的手在接触。狗戴着颜色鲜艳的项圈,表明它可能接受过训练或习惯于与人互动。这个人看起来很放松,微笑着,暗示着他们之间的亲密关系。背景是一片宁静的海滩,太阳低垂在地平线上,为场景投射出温暖的金色光线。这可能是一天中的早晨或傍晚,因为光线柔和而扩散。海滩上没有其他人,强调了两个人之间的个人时刻。这张图片唤起了和平、陪伴和简单之美的感觉。'}
```
停止docker容器
```
docker stop metax_c500_translation_test
```
## 模型支持
在Quickstart中运行容器时通过磁盘目录挂载的方式指定模型的类型和具体的模型名称
```
-v /home/aiyueqi/mnt/models/vlm/MiniCPM-V-4:/model:ro
```
目前支持MiniCPM模型, 参考https://modelscope.cn/models/OpenBMB/MiniCPM-V-4

12
logger.py Normal file
View File

@@ -0,0 +1,12 @@
# -*- coding: utf-8 -*-
import logging
import os
logging.basicConfig(
format="%(asctime)s %(name)-12s %(levelname)-4s %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
level=os.environ.get("LOGLEVEL", "INFO"),
)
def get_logger(file):
return logging.getLogger(file)

84
server.py Normal file
View File

@@ -0,0 +1,84 @@
import base64
import gc
import io
import os
import time
import uvicorn
from typing import List, Optional, Dict, Any, Tuple
import torch
from PIL import Image
from fastapi import FastAPI, HTTPException, Query
from pydantic import BaseModel
from transformers import (AutoTokenizer, AutoConfig, AutoModelForCausalLM, AutoModelForVision2Seq, AutoModel)
import logger
log = logger.get_logger(__file__)
app = FastAPI()
model_type = None
model = None
device = None
tokenizer = None
class GenParams(BaseModel):
max_new_tokens: int = 128
temperature: float = 0.0
top_p: float = 1.0
do_sample: bool = False
class InferRequest(BaseModel):
prompt: str
generation: GenParams = GenParams()
dtype: str = "auto" # "auto"|"float16"|"bfloat16"|"float32"
warmup_runs: int = 1
measure_token_times: bool = False
@app.on_event("startup")
def load_model():
log.info("loading model")
global status, device, model_type, model, tokenizer
model_path = "/model"
cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
model_type = cfg.model_type
log.info(f"model type: {model_type}")
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=True)
model = AutoModel.from_pretrained(model_path, torch_dtype=torch.float32,
device_map=None, trust_remote_code=True)
model.to("cuda")
model.eval()
status = "success"
log.info(f"model loaded successfully")
@app.post("/infer")
def infer(req: InferRequest):
image = Image.open('1.PNG').convert('RGB')
if model_type == "minicpmv":
text = handle_minicpmv(image, req.prompt, req.generation)
log.info(f"text={text}")
result = dict()
result["output_text"] = text
return result
def handle_minicpmv(image: Image.Image, prompt: str, gen: GenParams):
# Prepare msgs in the format expected by model.chat
msgs = [{"role": "user", "content": prompt}]
# Call the model's built-in chat method
response = model.chat(image=image, msgs=msgs, tokenizer=tokenizer,
sampling=gen.do_sample, temperature=gen.temperature, stream=False)
return response
if __name__ == '__main__':
uvicorn.run("server:app", host="0.0.0.0", port=8000, workers=1, access_log=False)

30
test.py Normal file
View File

@@ -0,0 +1,30 @@
import requests
def model_infer(vlm_url: str, payload):
try:
response = requests.post(vlm_url + "/infer", json=payload)
if response.status_code == 200:
print("Succeed!")
print("Response:", response.json())
else:
print(f"Failedcode: {response.status_code}")
print("Error detail:", response.text)
except requests.exceptions.RequestException as e:
print("request error:", str(e))
payload = {
"prompt": "图片有什么?详细描述",
"generation": {
"max_new_tokens": 64,
"temperature": 0.7,
"top_p": 0.9,
"do_sample": True
},
"dtype": "auto",
"warmup_runs": 0,
"measure_token_times": False
}
url = "http://127.0.0.1:10055"
model_infer(url, payload)