init

2025-11-01 12:06:09 +08:00
8 changed files with 1324 additions and 168 deletions
--- a/1.PNG
+++ b/1.PNG
--- a/16
+++ b/16
@@ -1,13 +1,11 @@
-FROM maca-c500-pytorch:2.33.0.6-torch2.6-py310-ubuntu24.04-amd64
-
-ENV HF_ENDPOINT=https://hf-mirror.com
-ENV PATH=/opt/conda/bin:${PATH}
-
-RUN pip install transformers==4.50.0 uvicorn\[standard\] fastapi
+FROM git.modelhub.org.cn:9443/enginex-metax/maca-c500-pytorch:2.33.0.6-torch2.6-py310-ubuntu24.04-amd64

 WORKDIR /app

-COPY ./ /app
+RUN /opt/conda/bin/pip install transformers==4.46.3 einops addict easydict modelscope uvicorn fastapi

-EXPOSE 8000
-CMD ["sh", "-c", "python3 server.py"]
+COPY app.py .
+
+ENTRYPOINT []
+
+CMD ["/opt/conda/bin/python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"]
--- a/README.md
+++ b/README.md
@@ -1,35 +1,38 @@
-# enginex-metax-c500-translation
-# translation-transformers
-## Quickstart
-```shell
-#构建docker镜像
-docker build . -t metax_c500_vl
+# enginex-metax-c500-transformer-deepseekOCR

-#运行docker容器
-docker run -it -p 10055:8000 --device=/dev/mxcd --device=/dev/dri -v /home/aiyueqi/mnt/models/vlm/MiniCPM-V-4:/model:ro --name metax_c500_vl_test metax_c500_vl
-```
-等待模型Load完成，出现以下日志时，代表服务启动成功, 且模型加载完成
-```shell
-INFO:     Application startup complete.
-INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
-```
-执行测试程序
-```shell
-python3 test.py
-```
-测试程序执行结果
-```
-Succeed!
-Response: {'output_text': '这幅图片包含几个元素，共同营造出宁静的氛围。主要对象是一个坐在沙滩上的金毛寻回犬和一个穿着格子衬衫的人。狗似乎正与这个人互动，可能是在玩耍或训练，因为它的爪子和人的手在接触。狗戴着颜色鲜艳的项圈，表明它可能接受过训练或习惯于与人互动。这个人看起来很放松，微笑着，暗示着他们之间的亲密关系。背景是一片宁静的海滩，太阳低垂在地平线上，为场景投射出温暖的金色光线。这可能是一天中的早晨或傍晚，因为光线柔和而扩散。海滩上没有其他人，强调了两个人之间的个人时刻。这张图片唤起了和平、陪伴和简单之美的感觉。'}
-```
-停止docker容器
-```
-docker stop metax_c500_translation_test
-```
-## 模型支持
-在Quickstart中运行容器时，通过磁盘目录挂载的方式，指定模型的类型和具体的模型名称，即：
-```
-v /home/aiyueqi/mnt/models/vlm/MiniCPM-V-4:/model:ro
-```
-目前支持MiniCPM模型, 参考https://modelscope.cn/models/OpenBMB/MiniCPM-V-4 
+运行于【沐曦曦云C】系列算力卡的【视觉多模态】引擎，基于 transformer 引擎进行架构特别适配优化，支持 DeepSeek-OCR最新开源模型
+
+## QuickStart
+
+1、从 modelscope上下载支持 DeepSeek-OCR
+```python
+modelscope download --model deepseek-ai/DeepSeek-OCR README.md --local_dir ./model
+```
+将仓库里的 deepencoder.py 复制到模型目录覆盖原本的文件
+
+2、使用Dockerfile生成镜像
+从仓库的【软件包】栏目下载基础镜像 git.modelhub.org.cn:9443/enginex-metax/maca-c500-pytorch:2.33.0.6-torch2.6-py310-ubuntu24.04-amd64
+
+使用 Dockerfile 生成 镜像
+```python
+docker build -f Dockerfile -t metax:deepseek_ocr .
+```
+
+3、启动docker
+```python
+metax-docker run -it --rm \
+  --gpus=[0] \
+  -v ./model:/model \
+  -p 10086:80 \
+  metax:deepseek_ocr
+```
+
+4、测试服务
+```python
+curl -X POST http://localhost:10086/generate \
+  -H "Content-Type: application/json" \
+  -d '{
+    "messages": [{"role": "user", "content": "你好"}],
+  }'
+```

--- a/app.py
+++ b/app.py
@@ -0,0 +1,223 @@
+import os
+import io
+import time
+import base64
+import shutil
+from typing import Any, Dict, List, Optional
+
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from starlette.responses import JSONResponse
+from PIL import Image
+
+import torch
+from modelscope import AutoModel, AutoTokenizer
+
+# -------- Configuration --------
+MODEL_DIR = os.environ.get("DEESEEK_MODEL_DIR", "/model")
+MODEL_PREFERRED_DTYPE = os.environ.get("DEESEEK_DTYPE", "bfloat16")  # or float16/float32
+
+# -------- FastAPI app --------
+app = FastAPI(title="DeepSeek-OCR vllm-format wrapper")
+
+class GenerateRequest(BaseModel):
+    messages: List[Dict[str, Any]]
+    # optional params mapping to your OCR infer options
+    base_size: Optional[int] = 1024
+    image_size: Optional[int] = 640
+    crop_mode: Optional[bool] = True
+    save_results: Optional[bool] = True
+    test_compress: Optional[bool] = True
+
+def _decode_data_uri_image(data_uri: str) -> Image.Image:
+    """Decode a data:image/...;base64,xxxx URI into PIL.Image."""
+    if not data_uri.startswith("data:"):
+        raise ValueError("Not a data URI")
+    header, b64 = data_uri.split(",", 1)
+    decoded = base64.b64decode(b64)
+    return Image.open(io.BytesIO(decoded)).convert("RGB")
+
+# Load tokenizer + model
+print("Loading tokenizer and model...")
+try:
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR, trust_remote_code=True)
+except Exception as e:
+    print(f"Failed to load tokenizer from {MODEL_DIR}: {e}")
+    raise
+
+try:
+    model = AutoModel.from_pretrained(MODEL_DIR, trust_remote_code=True, use_safetensors=True)
+except Exception as e:
+    print(f"Failed to load model from {MODEL_DIR}: {e}")
+    raise
+
+# move to device and set dtype if possible
+try:
+    model = model.eval().cuda().to(torch.bfloat16)
+except Exception as e:
+    print(f"Warning while preparing model device/dtype: {e}")
+
+print("Model loaded and prepared.")
+
+# -------- Routes --------
+
+@app.get("/health")
+def health_check():
+    return JSONResponse(status_code=200, content={"status": "ok"})
+
+
+@app.post("/generate")
+def generate(req: GenerateRequest):
+    messages = req.messages
+    if not messages or not isinstance(messages, list):
+        raise HTTPException(status_code=400, detail="messages must be a non-empty list")
+
+    # Convert vllm-style messages -> conversation format
+    conversation = []
+    for m in messages:
+        role = m.get("role", "user")
+        raw_content = m.get("content", [])
+        content_list = []
+        for c in raw_content:
+            ctype = c.get("type")
+            if ctype == "image_url":
+                url = None
+                if isinstance(c.get("image_url"), dict):
+                    url = c["image_url"].get("url")
+                else:
+                    url = c.get("image_url")
+                content_list.append({"type": "image", "image": url})
+            elif ctype == "text":
+                content_list.append({"type": "text", "text": c.get("text", "")})
+            else:
+                content_list.append(c)
+        conversation.append({"role": role, "content": content_list})
+
+    # collect images (data URIs will be decoded into temporary files)
+    images_for_infer = []
+    temp_files = []
+    try:
+        for msg in conversation:
+            for c in msg["content"]:
+                if c.get("type") == "image":
+                    img_ref = c.get("image")
+                    if isinstance(img_ref, str) and img_ref.startswith("data:"):
+                        try:
+                            pil = _decode_data_uri_image(img_ref)
+                        except Exception as e:
+                            raise HTTPException(status_code=400, detail=f"failed to decode data URI image: {e}")
+                        # save to temp file so model.infer can read path if it expects a path
+                        tpath = os.path.join("/tmp", f"deepproc_{int(time.time()*1000)}.png")
+                        pil.save(tpath)
+                        temp_files.append(tpath)
+                        images_for_infer.append(tpath)
+                    else:
+                        # assume it's a path or URL acceptable to model.infer
+                        images_for_infer.append(img_ref)
+
+        # Prepare prompt: for DeepSeek-OCR we typically pass something like '<image>\nFree OCR.' as in your example.
+        # Allow overriding by looking for a text content in the messages.
+        # prompt_text = None
+        # for msg in conversation:
+        #     for c in msg["content"]:
+        #         if c.get("type") == "text" and c.get("text"):
+        #             prompt_text = c.get("text")
+        #             break
+        #     if prompt_text:
+        #         break
+        # if not prompt_text:
+        prompt_text = "<image>\nFree OCR."  # default prompt
+
+        # call model.infer; support single image or batch (here we will pass the first image if multiple)
+        if len(images_for_infer) == 0:
+            raise HTTPException(status_code=400, detail="no images provided")
+
+        # Use the first image by default; you can extend to batch inference.
+        image_input = images_for_infer[0]
+
+        output_path = "./output/" if not hasattr(req, 'output_path') else getattr(req, 'output_path')
+        os.makedirs(output_path, exist_ok=True)
+
+        # start_time = time.time()
+        # The example uses: model.infer(tokenizer, prompt, image_file=image_file, output_path=..., base_size=..., ...)
+        try:
+            res = model.infer(
+                tokenizer,
+                prompt=prompt_text,
+                image_file=image_input,
+                output_path="./output/", #if not req.save_results else os.path.join(MODEL_DIR, "infer_out"),
+                base_size=req.base_size,
+                image_size=req.image_size,
+                crop_mode=req.crop_mode,
+                save_results=req.save_results,
+                test_compress=req.test_compress,
+            )
+        except TypeError:
+            # fallback: try without named args if certain impls expect positional
+            res = model.infer(tokenizer, prompt_text, image_input)
+
+        # end_time = time.time()
+        # elapsed = end_time - start_time
+
+        print ("res:\n", res)
+        # print (elapsed)
+
+        result_mmd_path = os.path.join(output_path, "result.mmd")
+
+        try:
+            if os.path.isfile(result_mmd_path):
+                with open(result_mmd_path, "r", encoding="utf-8") as f:
+                    file_content = f.read().strip()
+                    if file_content:
+                        ocr_text = file_content
+        except Exception as e:
+        # log but don't fail; we'll fall back to parsing the model response
+            try:
+                logger.warning(f"Failed to read {result_mmd_path}: {e}")
+            except Exception:
+                pass
+
+        # prepare response content; `res` may be a dict or string depending on model impl
+        # ocr_text = None
+        # if isinstance(res, dict):
+        #     # try common keys
+        #     ocr_text = res.get("text") or res.get("result") or res.get("ocr_text")
+        # elif isinstance(res, (list, tuple)):
+        #     # try first element
+        #     ocr_text = res[0] if len(res) > 0 else None
+        # else:
+        #     ocr_text = str(res)
+
+        # if ocr_text is None:
+        #     ocr_text = str(res)
+
+        response = {
+            "id": "chatcmpl-deepseek",
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": os.path.basename(MODEL_DIR),
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": ocr_text,
+                    },
+                    "finish_reason": "stop",
+                }
+            ]
+        }
+
+        return JSONResponse(response)
+
+    finally:
+        # cleanup temp files we created
+        for t in temp_files:
+            try:
+                os.remove(t)
+            except Exception:
+                pass
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=80)
--- a/deepencoder.py
+++ b/deepencoder.py
--- a/logger.py
+++ b/logger.py
@@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-import logging
-import os
-
-logging.basicConfig(
-    format="%(asctime)s %(name)-12s %(levelname)-4s %(message)s",
-    datefmt="%Y-%m-%d %H:%M:%S",
-    level=os.environ.get("LOGLEVEL", "INFO"),
-)
-
-def get_logger(file):
-    return logging.getLogger(file)
--- a/server.py
+++ b/server.py
@@ -1,84 +0,0 @@
-import base64
-import gc
-import io
-import os
-import time
-import uvicorn
-from typing import List, Optional, Dict, Any, Tuple
-
-import torch
-
-from PIL import Image
-from fastapi import FastAPI, HTTPException, Query
-from pydantic import BaseModel
-from transformers import (AutoTokenizer, AutoConfig, AutoModelForCausalLM, AutoModelForVision2Seq, AutoModel)
-
-import logger
-log = logger.get_logger(__file__)
-
-app = FastAPI()
-
-model_type = None
-model = None
-device = None
-tokenizer = None
-
-class GenParams(BaseModel):
-    max_new_tokens: int = 128
-    temperature: float = 0.0
-    top_p: float = 1.0
-    do_sample: bool = False
-
-class InferRequest(BaseModel):
-    prompt: str
-    generation: GenParams = GenParams()
-    dtype: str = "auto"  # "auto"|"float16"|"bfloat16"|"float32"
-    warmup_runs: int = 1
-    measure_token_times: bool = False
-
-@app.on_event("startup")
-def load_model():
-    log.info("loading model")
-    global status, device, model_type, model, tokenizer
-
-    model_path = "/model"
-    cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
-    model_type = cfg.model_type
-    log.info(f"model type: {model_type}")
-
-    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=True)
-
-    model = AutoModel.from_pretrained(model_path, torch_dtype=torch.float32,
-                device_map=None, trust_remote_code=True)
-    model.to("cuda")
-    model.eval()
-
-    status = "success"
-    log.info(f"model loaded successfully")
-
-@app.post("/infer")
-def infer(req: InferRequest):
-    image = Image.open('1.PNG').convert('RGB')
-    
-    if model_type == "minicpmv":
-        text = handle_minicpmv(image, req.prompt, req.generation)
-        log.info(f"text={text}")
-
-    result = dict()
-    result["output_text"] = text
-
-    return result
-
-def handle_minicpmv(image: Image.Image, prompt: str, gen: GenParams):
-    # Prepare msgs in the format expected by model.chat
-    msgs = [{"role": "user", "content": prompt}]
-
-    # Call the model's built-in chat method
-    response = model.chat(image=image, msgs=msgs, tokenizer=tokenizer,
-        sampling=gen.do_sample, temperature=gen.temperature, stream=False)
-
-    return response
-
-if __name__ == '__main__':
-    uvicorn.run("server:app", host="0.0.0.0", port=8000, workers=1, access_log=False)
-
--- a/test.py
+++ b/test.py
@@ -1,30 +0,0 @@
-import requests
-
-def model_infer(vlm_url: str, payload):
-    try:
-        response = requests.post(vlm_url + "/infer", json=payload)
-        if response.status_code == 200:
-            print("Succeed!")
-            print("Response:", response.json())
-        else:
-            print(f"Failed，code: {response.status_code}")
-            print("Error detail:", response.text)
-
-    except requests.exceptions.RequestException as e:
-        print("request error:", str(e))
-
-payload = { 
-    "prompt": "图片有什么？详细描述",
-    "generation": {
-        "max_new_tokens": 64, 
-        "temperature": 0.7,
-        "top_p": 0.9,
-        "do_sample": True
-    },  
-    "dtype": "auto",
-    "warmup_runs": 0,
-    "measure_token_times": False
-} 
-
-url = "http://127.0.0.1:10055"
-model_infer(url, payload)