commit b4ef4b9aaa39ae5fb762c24234ddd82bfa17ee1f Author: aiyueqi Date: Mon Sep 22 15:13:55 2025 +0800 support kunlun r200 diff --git a/1.PNG b/1.PNG new file mode 100644 index 0000000..2751fe5 Binary files /dev/null and b/1.PNG differ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..314cf16 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +#FROM git.modelhub.org.cn:980/enginex-kunlunxin/xmlir/r200-8f_xmlir-ubuntu_2004_x86_64:v0.27 + +FROM diffuser:r200-8f + +ENV HF_ENDPOINT=https://hf-mirror.com + +RUN /root/miniconda/envs/python38_torch201_cuda/bin/python3 -m pip install transformers==4.46.3 uvicorn\[standard\] fastapi -i https://pypi.tuna.tsinghua.edu.cn/simple + +WORKDIR /app + +COPY ./ /app + +RUN sed -i 's/(inv_freq_expanded\.float() @ position_ids_expanded\.float())/(inv_freq_expanded.float().clone() @ position_ids_expanded.float().clone())/g' /root/miniconda/envs/python38_torch201_cuda/lib/python3.8/site-packages/transformers/models/llama/modeling_llama.py + +EXPOSE 8000 +CMD ["sh", "-c", "/root/miniconda/envs/python38_torch201_cuda/bin/python3 server.py"] + diff --git a/README.md b/README.md new file mode 100644 index 0000000..c016bb6 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +i# enginex-kunlun-r200-translation +# translation-transformers +## Quickstart +```shell +#构建docker镜像 +docker build . -t kunlun_r200_vl + +#运行docker容器 +docker run -it -p 10055:8000 --device=/dev/xpu2:/dev/xpu0 --device=/dev/xpuctrl:/dev/xpuctrl -v /home/aiyueqi/mnt/models/vlm/MiniCPM-V-4:/model:ro --name kunlun_r200_vl_test kunlun_r200_vl +``` +等待模型Load完成,出现以下日志时,代表服务启动成功, 且模型加载完成 +```shell +INFO: Application startup complete. +INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit) +``` +执行测试程序 +```shell +python3 test.py +``` +测试程序执行结果 +``` +Succeed! +Response: {'output_text': '图片中的物体是一只狗,具体来说是一只金毛寻回犬。这只狗穿着带有图案的项圈,表明它可能被驯养并经常在户外活动。它正与一个坐着的人互动,看起来像是在玩或进行某种形式的身体接触。地面上有一个物体,可能是狗玩具或零食,这表明狗和人在海滩上进行休闲活动。背景中的海和日落暗示着一个宁静而放松的环境,通常与宠物的陪伴相关联。'} +``` +停止docker容器 +``` +docker stop kunlun_r200_translation_test +``` +## 模型支持 +在Quickstart中运行容器时,通过磁盘目录挂载的方式,指定模型的类型和具体的模型名称,即: +``` +-v /home/aiyueqi/mnt/models/vlm/MiniCPM-V-4:/model:ro +``` +目前支持MiniCPM模型, 参考https://modelscope.cn/models/OpenBMB/MiniCPM-V-4 + + diff --git a/logger.py b/logger.py new file mode 100644 index 0000000..d238e70 --- /dev/null +++ b/logger.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +import logging +import os + +logging.basicConfig( + format="%(asctime)s %(name)-12s %(levelname)-4s %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=os.environ.get("LOGLEVEL", "INFO"), +) + +def get_logger(file): + return logging.getLogger(file) + diff --git a/server.py b/server.py new file mode 100644 index 0000000..7e31568 --- /dev/null +++ b/server.py @@ -0,0 +1,84 @@ +import base64 +import gc +import io +import os +import time +import uvicorn +from typing import List, Optional, Dict, Any, Tuple + +import torch + +from PIL import Image +from fastapi import FastAPI, HTTPException, Query +from pydantic import BaseModel +from transformers import (AutoTokenizer, AutoConfig, AutoModelForCausalLM, AutoModelForVision2Seq, AutoModel) + +import logger +log = logger.get_logger(__file__) + +app = FastAPI() + +model_type = None +model = None +device = None +tokenizer = None + +class GenParams(BaseModel): + max_new_tokens: int = 128 + temperature: float = 0.0 + top_p: float = 1.0 + do_sample: bool = False + +class InferRequest(BaseModel): + prompt: str + generation: GenParams = GenParams() + dtype: str = "auto" # "auto"|"float16"|"bfloat16"|"float32" + warmup_runs: int = 1 + measure_token_times: bool = False + +@app.on_event("startup") +def load_model(): + log.info("loading model") + global status, device, model_type, model, tokenizer + + model_path = "/model" + cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=True) + model_type = cfg.model_type + log.info(f"model type: {model_type}") + + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=True) + + model = AutoModel.from_pretrained(model_path, torch_dtype=torch.float32, + device_map=None, trust_remote_code=True) + model.to("cuda") + model.eval() + + status = "success" + log.info(f"model loaded successfully") + +@app.post("/infer") +def infer(req: InferRequest): + image = Image.open('1.PNG').convert('RGB') + + if model_type == "minicpmv": + text = handle_minicpmv(image, req.prompt, req.generation) + log.info(f"text={text}") + + result = dict() + result["output_text"] = text + + return result + +def handle_minicpmv(image: Image.Image, prompt: str, gen: GenParams): + # Prepare msgs in the format expected by model.chat + msgs = [{"role": "user", "content": prompt}] + + # Call the model's built-in chat method + response = model.chat(image=image, msgs=msgs, tokenizer=tokenizer, + sampling=gen.do_sample, temperature=gen.temperature, stream=False) + + return response + +if __name__ == '__main__': + uvicorn.run("server:app", host="0.0.0.0", port=8000, workers=1, access_log=False) + diff --git a/test.py b/test.py new file mode 100644 index 0000000..1549252 --- /dev/null +++ b/test.py @@ -0,0 +1,31 @@ +import requests + +def model_infer(vlm_url: str, payload): + try: + response = requests.post(vlm_url + "/infer", json=payload) + if response.status_code == 200: + print("Succeed!") + print("Response:", response.json()) + else: + print(f"Failed,code: {response.status_code}") + print("Error detail:", response.text) + + except requests.exceptions.RequestException as e: + print("request error:", str(e)) + +payload = { + "prompt": "图片有什么?详细描述", + "generation": { + "max_new_tokens": 64, + "temperature": 0.7, + "top_p": 0.9, + "do_sample": True + }, + "dtype": "auto", + "warmup_runs": 0, + "measure_token_times": False +} + +url = "http://127.0.0.1:10055" +model_infer(url, payload) +