init

2025-11-01 12:06:09 +08:00
8 changed files with 1324 additions and 168 deletions
--- a/1.PNG
+++ b/1.PNG
--- a/16
+++ b/16
@@ -1,13 +1,11 @@
-FROM maca-c500-pytorch:2.33.0.6-torch2.6-py310-ubuntu24.04-amd64
+FROM git.modelhub.org.cn:9443/enginex-metax/maca-c500-pytorch:2.33.0.6-torch2.6-py310-ubuntu24.04-amd64
 ENV HF_ENDPOINT=https://hf-mirror.com
 ENV PATH=/opt/conda/bin:${PATH}
 RUN pip install transformers==4.50.0 uvicorn\[standard\] fastapi
 WORKDIR /app
-COPY ./ /app
+RUN /opt/conda/bin/pip install transformers==4.46.3 einops addict easydict modelscope uvicorn fastapi
-EXPOSE 8000
+COPY app.py .
-CMD ["sh", "-c", "python3 server.py"]
+
 ENTRYPOINT []
 CMD ["/opt/conda/bin/python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"]
--- a/README.md
+++ b/README.md
@@ -1,35 +1,38 @@
-# enginex-metax-c500-translation
+# enginex-metax-c500-transformer-deepseekOCR
 # translation-transformers
 ## Quickstart
 ```shell
 #构建docker镜像
 docker build . -t metax_c500_vl
-#运行docker容器
+运行于【沐曦曦云C】系列算力卡的【视觉多模态】引擎，基于 transformer 引擎进行架构特别适配优化，支持 DeepSeek-OCR最新开源模型
-docker run -it -p 10055:8000 --device=/dev/mxcd --device=/dev/dri -v /home/aiyueqi/mnt/models/vlm/MiniCPM-V-4:/model:ro --name metax_c500_vl_test metax_c500_vl
+
-```
+## QuickStart
-等待模型Load完成，出现以下日志时，代表服务启动成功, 且模型加载完成
+
-```shell
+1、从 modelscope上下载支持 DeepSeek-OCR
-INFO:     Application startup complete.
+```python
-INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
+modelscope download --model deepseek-ai/DeepSeek-OCR README.md --local_dir ./model
-```
+```
-执行测试程序
+将仓库里的 deepencoder.py 复制到模型目录覆盖原本的文件
-```shell
+
-python3 test.py
+2、使用Dockerfile生成镜像
-```
+从仓库的【软件包】栏目下载基础镜像 git.modelhub.org.cn:9443/enginex-metax/maca-c500-pytorch:2.33.0.6-torch2.6-py310-ubuntu24.04-amd64
-测试程序执行结果
+
-```
+使用 Dockerfile 生成 镜像
-Succeed!
+```python
-Response: {'output_text': '这幅图片包含几个元素，共同营造出宁静的氛围。主要对象是一个坐在沙滩上的金毛寻回犬和一个穿着格子衬衫的人。狗似乎正与这个人互动，可能是在玩耍或训练，因为它的爪子和人的手在接触。狗戴着颜色鲜艳的项圈，表明它可能接受过训练或习惯于与人互动。这个人看起来很放松，微笑着，暗示着他们之间的亲密关系。背景是一片宁静的海滩，太阳低垂在地平线上，为场景投射出温暖的金色光线。这可能是一天中的早晨或傍晚，因为光线柔和而扩散。海滩上没有其他人，强调了两个人之间的个人时刻。这张图片唤起了和平、陪伴和简单之美的感觉。'}
+docker build -f Dockerfile -t metax:deepseek_ocr .
-```
+```
-停止docker容器
+
-```
+3、启动docker
-docker stop metax_c500_translation_test
+```python
-```
+metax-docker run -it --rm \
-## 模型支持
+  --gpus=[0] \
-在Quickstart中运行容器时，通过磁盘目录挂载的方式，指定模型的类型和具体的模型名称，即：
+  -v ./model:/model \
-```
+  -p 10086:80 \
-v /home/aiyueqi/mnt/models/vlm/MiniCPM-V-4:/model:ro
+  metax:deepseek_ocr
-```
+```
-目前支持MiniCPM模型, 参考https://modelscope.cn/models/OpenBMB/MiniCPM-V-4 
+
 4、测试服务
 ```python
 curl -X POST http://localhost:10086/generate \
  -H "Content-Type: application/json" \
  -d '{
    "messages": [{"role": "user", "content": "你好"}],
  }'
 ```
--- a/app.py
+++ b/app.py
@@ -0,0 +1,223 @@
 import os
 import io
 import time
 import base64
 import shutil
 from typing import Any, Dict, List, Optional
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from starlette.responses import JSONResponse
 from PIL import Image
 import torch
 from modelscope import AutoModel, AutoTokenizer
 # -------- Configuration --------
 MODEL_DIR = os.environ.get("DEESEEK_MODEL_DIR", "/model")
 MODEL_PREFERRED_DTYPE = os.environ.get("DEESEEK_DTYPE", "bfloat16")  # or float16/float32
 # -------- FastAPI app --------
 app = FastAPI(title="DeepSeek-OCR vllm-format wrapper")
 class GenerateRequest(BaseModel):
    messages: List[Dict[str, Any]]
    # optional params mapping to your OCR infer options
    base_size: Optional[int] = 1024
    image_size: Optional[int] = 640
    crop_mode: Optional[bool] = True
    save_results: Optional[bool] = True
    test_compress: Optional[bool] = True
 def _decode_data_uri_image(data_uri: str) -> Image.Image:
    """Decode a data:image/...;base64,xxxx URI into PIL.Image."""
    if not data_uri.startswith("data:"):
        raise ValueError("Not a data URI")
    header, b64 = data_uri.split(",", 1)
    decoded = base64.b64decode(b64)
    return Image.open(io.BytesIO(decoded)).convert("RGB")
 # Load tokenizer + model
 print("Loading tokenizer and model...")
 try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR, trust_remote_code=True)
 except Exception as e:
    print(f"Failed to load tokenizer from {MODEL_DIR}: {e}")
    raise
 try:
    model = AutoModel.from_pretrained(MODEL_DIR, trust_remote_code=True, use_safetensors=True)
 except Exception as e:
    print(f"Failed to load model from {MODEL_DIR}: {e}")
    raise
 # move to device and set dtype if possible
 try:
    model = model.eval().cuda().to(torch.bfloat16)
 except Exception as e:
    print(f"Warning while preparing model device/dtype: {e}")
 print("Model loaded and prepared.")
 # -------- Routes --------
@app.get("/health")
 def health_check():
    return JSONResponse(status_code=200, content={"status": "ok"})
@app.post("/generate")
 def generate(req: GenerateRequest):
    messages = req.messages
    if not messages or not isinstance(messages, list):
        raise HTTPException(status_code=400, detail="messages must be a non-empty list")
    # Convert vllm-style messages -> conversation format
    conversation = []
    for m in messages:
        role = m.get("role", "user")
        raw_content = m.get("content", [])
        content_list = []
        for c in raw_content:
            ctype = c.get("type")
            if ctype == "image_url":
                url = None
                if isinstance(c.get("image_url"), dict):
                    url = c["image_url"].get("url")
                else:
                    url = c.get("image_url")
                content_list.append({"type": "image", "image": url})
            elif ctype == "text":
                content_list.append({"type": "text", "text": c.get("text", "")})
            else:
                content_list.append(c)
        conversation.append({"role": role, "content": content_list})
    # collect images (data URIs will be decoded into temporary files)
    images_for_infer = []
    temp_files = []
    try:
        for msg in conversation:
            for c in msg["content"]:
                if c.get("type") == "image":
                    img_ref = c.get("image")
                    if isinstance(img_ref, str) and img_ref.startswith("data:"):
                        try:
                            pil = _decode_data_uri_image(img_ref)
                        except Exception as e:
                            raise HTTPException(status_code=400, detail=f"failed to decode data URI image: {e}")
                        # save to temp file so model.infer can read path if it expects a path
                        tpath = os.path.join("/tmp", f"deepproc_{int(time.time()*1000)}.png")
                        pil.save(tpath)
                        temp_files.append(tpath)
                        images_for_infer.append(tpath)
                    else:
                        # assume it's a path or URL acceptable to model.infer
                        images_for_infer.append(img_ref)
        # Prepare prompt: for DeepSeek-OCR we typically pass something like '<image>\nFree OCR.' as in your example.
        # Allow overriding by looking for a text content in the messages.
        # prompt_text = None
        # for msg in conversation:
        #     for c in msg["content"]:
        #         if c.get("type") == "text" and c.get("text"):
        #             prompt_text = c.get("text")
        #             break
        #     if prompt_text:
        #         break
        # if not prompt_text:
        prompt_text = "<image>\nFree OCR."  # default prompt
        # call model.infer; support single image or batch (here we will pass the first image if multiple)
        if len(images_for_infer) == 0:
            raise HTTPException(status_code=400, detail="no images provided")
        # Use the first image by default; you can extend to batch inference.
        image_input = images_for_infer[0]
        output_path = "./output/" if not hasattr(req, 'output_path') else getattr(req, 'output_path')
        os.makedirs(output_path, exist_ok=True)
        # start_time = time.time()
        # The example uses: model.infer(tokenizer, prompt, image_file=image_file, output_path=..., base_size=..., ...)
        try:
            res = model.infer(
                tokenizer,
                prompt=prompt_text,
                image_file=image_input,
                output_path="./output/", #if not req.save_results else os.path.join(MODEL_DIR, "infer_out"),
                base_size=req.base_size,
                image_size=req.image_size,
                crop_mode=req.crop_mode,
                save_results=req.save_results,
                test_compress=req.test_compress,
            )
        except TypeError:
            # fallback: try without named args if certain impls expect positional
            res = model.infer(tokenizer, prompt_text, image_input)
        # end_time = time.time()
        # elapsed = end_time - start_time
        print ("res:\n", res)
        # print (elapsed)
        result_mmd_path = os.path.join(output_path, "result.mmd")
        try:
            if os.path.isfile(result_mmd_path):
                with open(result_mmd_path, "r", encoding="utf-8") as f:
                    file_content = f.read().strip()
                    if file_content:
                        ocr_text = file_content
        except Exception as e:
        # log but don't fail; we'll fall back to parsing the model response
            try:
                logger.warning(f"Failed to read {result_mmd_path}: {e}")
            except Exception:
                pass
        # prepare response content; `res` may be a dict or string depending on model impl
        # ocr_text = None
        # if isinstance(res, dict):
        #     # try common keys
        #     ocr_text = res.get("text") or res.get("result") or res.get("ocr_text")
        # elif isinstance(res, (list, tuple)):
        #     # try first element
        #     ocr_text = res[0] if len(res) > 0 else None
        # else:
        #     ocr_text = str(res)
        # if ocr_text is None:
        #     ocr_text = str(res)
        response = {
            "id": "chatcmpl-deepseek",
            "object": "chat.completion",
            "created": int(time.time()),
            "model": os.path.basename(MODEL_DIR),
            "choices": [
                {
                    "index": 0,
                    "message": {
                        "role": "assistant",
                        "content": ocr_text,
                    },
                    "finish_reason": "stop",
                }
            ]
        }
        return JSONResponse(response)
    finally:
        # cleanup temp files we created
        for t in temp_files:
            try:
                os.remove(t)
            except Exception:
                pass
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=80)
--- a/deepencoder.py
+++ b/deepencoder.py
--- a/logger.py
+++ b/logger.py
@@ -1,12 +0,0 @@
 # -*- coding: utf-8 -*-
 import logging
 import os
 logging.basicConfig(
    format="%(asctime)s %(name)-12s %(levelname)-4s %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    level=os.environ.get("LOGLEVEL", "INFO"),
 )
 def get_logger(file):
    return logging.getLogger(file)
--- a/server.py
+++ b/server.py
@@ -1,84 +0,0 @@
 import base64
 import gc
 import io
 import os
 import time
 import uvicorn
 from typing import List, Optional, Dict, Any, Tuple
 import torch
 from PIL import Image
 from fastapi import FastAPI, HTTPException, Query
 from pydantic import BaseModel
 from transformers import (AutoTokenizer, AutoConfig, AutoModelForCausalLM, AutoModelForVision2Seq, AutoModel)
 import logger
 log = logger.get_logger(__file__)
 app = FastAPI()
 model_type = None
 model = None
 device = None
 tokenizer = None
 class GenParams(BaseModel):
    max_new_tokens: int = 128
    temperature: float = 0.0
    top_p: float = 1.0
    do_sample: bool = False
 class InferRequest(BaseModel):
    prompt: str
    generation: GenParams = GenParams()
    dtype: str = "auto"  # "auto"|"float16"|"bfloat16"|"float32"
    warmup_runs: int = 1
    measure_token_times: bool = False
@app.on_event("startup")
 def load_model():
    log.info("loading model")
    global status, device, model_type, model, tokenizer
    model_path = "/model"
    cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
    model_type = cfg.model_type
    log.info(f"model type: {model_type}")
    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=True)
    model = AutoModel.from_pretrained(model_path, torch_dtype=torch.float32,
                device_map=None, trust_remote_code=True)
    model.to("cuda")
    model.eval()
    status = "success"
    log.info(f"model loaded successfully")
@app.post("/infer")
 def infer(req: InferRequest):
    image = Image.open('1.PNG').convert('RGB')
    if model_type == "minicpmv":
        text = handle_minicpmv(image, req.prompt, req.generation)
        log.info(f"text={text}")
    result = dict()
    result["output_text"] = text
    return result
 def handle_minicpmv(image: Image.Image, prompt: str, gen: GenParams):
    # Prepare msgs in the format expected by model.chat
    msgs = [{"role": "user", "content": prompt}]
    # Call the model's built-in chat method
    response = model.chat(image=image, msgs=msgs, tokenizer=tokenizer,
        sampling=gen.do_sample, temperature=gen.temperature, stream=False)
    return response
 if __name__ == '__main__':
    uvicorn.run("server:app", host="0.0.0.0", port=8000, workers=1, access_log=False)
--- a/test.py
+++ b/test.py
@@ -1,30 +0,0 @@
 import requests
 def model_infer(vlm_url: str, payload):
    try:
        response = requests.post(vlm_url + "/infer", json=payload)
        if response.status_code == 200:
            print("Succeed!")
            print("Response:", response.json())
        else:
            print(f"Failed，code: {response.status_code}")
            print("Error detail:", response.text)
    except requests.exceptions.RequestException as e:
        print("request error:", str(e))
 payload = { 
    "prompt": "图片有什么？详细描述",
    "generation": {
        "max_new_tokens": 64, 
        "temperature": 0.7,
        "top_p": 0.9,
        "do_sample": True
    },  
    "dtype": "auto",
    "warmup_runs": 0,
    "measure_token_times": False
 } 
 url = "http://127.0.0.1:10055"
 model_infer(url, payload)