Compare commits
1 Commits
main
...
deepseek-o
| Author | SHA1 | Date | |
|---|---|---|---|
| faded11807 |
16
Dockerfile
16
Dockerfile
@@ -1,13 +1,11 @@
|
|||||||
FROM maca-c500-pytorch:2.33.0.6-torch2.6-py310-ubuntu24.04-amd64
|
FROM git.modelhub.org.cn:9443/enginex-metax/maca-c500-pytorch:2.33.0.6-torch2.6-py310-ubuntu24.04-amd64
|
||||||
|
|
||||||
ENV HF_ENDPOINT=https://hf-mirror.com
|
|
||||||
ENV PATH=/opt/conda/bin:${PATH}
|
|
||||||
|
|
||||||
RUN pip install transformers==4.50.0 uvicorn\[standard\] fastapi
|
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
COPY ./ /app
|
RUN /opt/conda/bin/pip install transformers==4.46.3 einops addict easydict modelscope uvicorn fastapi
|
||||||
|
|
||||||
EXPOSE 8000
|
COPY app.py .
|
||||||
CMD ["sh", "-c", "python3 server.py"]
|
|
||||||
|
ENTRYPOINT []
|
||||||
|
|
||||||
|
CMD ["/opt/conda/bin/python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"]
|
||||||
|
|||||||
69
README.md
69
README.md
@@ -1,35 +1,38 @@
|
|||||||
# enginex-metax-c500-translation
|
# enginex-metax-c500-transformer-deepseekOCR
|
||||||
# translation-transformers
|
|
||||||
## Quickstart
|
|
||||||
```shell
|
|
||||||
#构建docker镜像
|
|
||||||
docker build . -t metax_c500_vl
|
|
||||||
|
|
||||||
#运行docker容器
|
运行于【沐曦曦云C】系列算力卡的【视觉多模态】引擎,基于 transformer 引擎进行架构特别适配优化,支持 DeepSeek-OCR最新开源模型
|
||||||
docker run -it -p 10055:8000 --device=/dev/mxcd --device=/dev/dri -v /home/aiyueqi/mnt/models/vlm/MiniCPM-V-4:/model:ro --name metax_c500_vl_test metax_c500_vl
|
|
||||||
```
|
## QuickStart
|
||||||
等待模型Load完成,出现以下日志时,代表服务启动成功, 且模型加载完成
|
|
||||||
```shell
|
1、从 modelscope上下载支持 DeepSeek-OCR
|
||||||
INFO: Application startup complete.
|
```python
|
||||||
INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
|
modelscope download --model deepseek-ai/DeepSeek-OCR README.md --local_dir ./model
|
||||||
```
|
```
|
||||||
执行测试程序
|
将仓库里的 deepencoder.py 复制到模型目录覆盖原本的文件
|
||||||
```shell
|
|
||||||
python3 test.py
|
2、使用Dockerfile生成镜像
|
||||||
```
|
从仓库的【软件包】栏目下载基础镜像 git.modelhub.org.cn:9443/enginex-metax/maca-c500-pytorch:2.33.0.6-torch2.6-py310-ubuntu24.04-amd64
|
||||||
测试程序执行结果
|
|
||||||
```
|
使用 Dockerfile 生成 镜像
|
||||||
Succeed!
|
```python
|
||||||
Response: {'output_text': '这幅图片包含几个元素,共同营造出宁静的氛围。主要对象是一个坐在沙滩上的金毛寻回犬和一个穿着格子衬衫的人。狗似乎正与这个人互动,可能是在玩耍或训练,因为它的爪子和人的手在接触。狗戴着颜色鲜艳的项圈,表明它可能接受过训练或习惯于与人互动。这个人看起来很放松,微笑着,暗示着他们之间的亲密关系。背景是一片宁静的海滩,太阳低垂在地平线上,为场景投射出温暖的金色光线。这可能是一天中的早晨或傍晚,因为光线柔和而扩散。海滩上没有其他人,强调了两个人之间的个人时刻。这张图片唤起了和平、陪伴和简单之美的感觉。'}
|
docker build -f Dockerfile -t metax:deepseek_ocr .
|
||||||
```
|
```
|
||||||
停止docker容器
|
|
||||||
```
|
3、启动docker
|
||||||
docker stop metax_c500_translation_test
|
```python
|
||||||
```
|
metax-docker run -it --rm \
|
||||||
## 模型支持
|
--gpus=[0] \
|
||||||
在Quickstart中运行容器时,通过磁盘目录挂载的方式,指定模型的类型和具体的模型名称,即:
|
-v ./model:/model \
|
||||||
```
|
-p 10086:80 \
|
||||||
-v /home/aiyueqi/mnt/models/vlm/MiniCPM-V-4:/model:ro
|
metax:deepseek_ocr
|
||||||
```
|
```
|
||||||
目前支持MiniCPM模型, 参考https://modelscope.cn/models/OpenBMB/MiniCPM-V-4
|
|
||||||
|
4、测试服务
|
||||||
|
```python
|
||||||
|
curl -X POST http://localhost:10086/generate \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"messages": [{"role": "user", "content": "你好"}],
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|||||||
223
app.py
Normal file
223
app.py
Normal file
@@ -0,0 +1,223 @@
|
|||||||
|
import os
|
||||||
|
import io
|
||||||
|
import time
|
||||||
|
import base64
|
||||||
|
import shutil
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from starlette.responses import JSONResponse
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from modelscope import AutoModel, AutoTokenizer
|
||||||
|
|
||||||
|
# -------- Configuration --------
|
||||||
|
MODEL_DIR = os.environ.get("DEESEEK_MODEL_DIR", "/model")
|
||||||
|
MODEL_PREFERRED_DTYPE = os.environ.get("DEESEEK_DTYPE", "bfloat16") # or float16/float32
|
||||||
|
|
||||||
|
# -------- FastAPI app --------
|
||||||
|
app = FastAPI(title="DeepSeek-OCR vllm-format wrapper")
|
||||||
|
|
||||||
|
class GenerateRequest(BaseModel):
|
||||||
|
messages: List[Dict[str, Any]]
|
||||||
|
# optional params mapping to your OCR infer options
|
||||||
|
base_size: Optional[int] = 1024
|
||||||
|
image_size: Optional[int] = 640
|
||||||
|
crop_mode: Optional[bool] = True
|
||||||
|
save_results: Optional[bool] = True
|
||||||
|
test_compress: Optional[bool] = True
|
||||||
|
|
||||||
|
def _decode_data_uri_image(data_uri: str) -> Image.Image:
|
||||||
|
"""Decode a data:image/...;base64,xxxx URI into PIL.Image."""
|
||||||
|
if not data_uri.startswith("data:"):
|
||||||
|
raise ValueError("Not a data URI")
|
||||||
|
header, b64 = data_uri.split(",", 1)
|
||||||
|
decoded = base64.b64decode(b64)
|
||||||
|
return Image.open(io.BytesIO(decoded)).convert("RGB")
|
||||||
|
|
||||||
|
# Load tokenizer + model
|
||||||
|
print("Loading tokenizer and model...")
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR, trust_remote_code=True)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to load tokenizer from {MODEL_DIR}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
try:
|
||||||
|
model = AutoModel.from_pretrained(MODEL_DIR, trust_remote_code=True, use_safetensors=True)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to load model from {MODEL_DIR}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
# move to device and set dtype if possible
|
||||||
|
try:
|
||||||
|
model = model.eval().cuda().to(torch.bfloat16)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning while preparing model device/dtype: {e}")
|
||||||
|
|
||||||
|
print("Model loaded and prepared.")
|
||||||
|
|
||||||
|
# -------- Routes --------
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
def health_check():
|
||||||
|
return JSONResponse(status_code=200, content={"status": "ok"})
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/generate")
|
||||||
|
def generate(req: GenerateRequest):
|
||||||
|
messages = req.messages
|
||||||
|
if not messages or not isinstance(messages, list):
|
||||||
|
raise HTTPException(status_code=400, detail="messages must be a non-empty list")
|
||||||
|
|
||||||
|
# Convert vllm-style messages -> conversation format
|
||||||
|
conversation = []
|
||||||
|
for m in messages:
|
||||||
|
role = m.get("role", "user")
|
||||||
|
raw_content = m.get("content", [])
|
||||||
|
content_list = []
|
||||||
|
for c in raw_content:
|
||||||
|
ctype = c.get("type")
|
||||||
|
if ctype == "image_url":
|
||||||
|
url = None
|
||||||
|
if isinstance(c.get("image_url"), dict):
|
||||||
|
url = c["image_url"].get("url")
|
||||||
|
else:
|
||||||
|
url = c.get("image_url")
|
||||||
|
content_list.append({"type": "image", "image": url})
|
||||||
|
elif ctype == "text":
|
||||||
|
content_list.append({"type": "text", "text": c.get("text", "")})
|
||||||
|
else:
|
||||||
|
content_list.append(c)
|
||||||
|
conversation.append({"role": role, "content": content_list})
|
||||||
|
|
||||||
|
# collect images (data URIs will be decoded into temporary files)
|
||||||
|
images_for_infer = []
|
||||||
|
temp_files = []
|
||||||
|
try:
|
||||||
|
for msg in conversation:
|
||||||
|
for c in msg["content"]:
|
||||||
|
if c.get("type") == "image":
|
||||||
|
img_ref = c.get("image")
|
||||||
|
if isinstance(img_ref, str) and img_ref.startswith("data:"):
|
||||||
|
try:
|
||||||
|
pil = _decode_data_uri_image(img_ref)
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=400, detail=f"failed to decode data URI image: {e}")
|
||||||
|
# save to temp file so model.infer can read path if it expects a path
|
||||||
|
tpath = os.path.join("/tmp", f"deepproc_{int(time.time()*1000)}.png")
|
||||||
|
pil.save(tpath)
|
||||||
|
temp_files.append(tpath)
|
||||||
|
images_for_infer.append(tpath)
|
||||||
|
else:
|
||||||
|
# assume it's a path or URL acceptable to model.infer
|
||||||
|
images_for_infer.append(img_ref)
|
||||||
|
|
||||||
|
# Prepare prompt: for DeepSeek-OCR we typically pass something like '<image>\nFree OCR.' as in your example.
|
||||||
|
# Allow overriding by looking for a text content in the messages.
|
||||||
|
# prompt_text = None
|
||||||
|
# for msg in conversation:
|
||||||
|
# for c in msg["content"]:
|
||||||
|
# if c.get("type") == "text" and c.get("text"):
|
||||||
|
# prompt_text = c.get("text")
|
||||||
|
# break
|
||||||
|
# if prompt_text:
|
||||||
|
# break
|
||||||
|
# if not prompt_text:
|
||||||
|
prompt_text = "<image>\nFree OCR." # default prompt
|
||||||
|
|
||||||
|
# call model.infer; support single image or batch (here we will pass the first image if multiple)
|
||||||
|
if len(images_for_infer) == 0:
|
||||||
|
raise HTTPException(status_code=400, detail="no images provided")
|
||||||
|
|
||||||
|
# Use the first image by default; you can extend to batch inference.
|
||||||
|
image_input = images_for_infer[0]
|
||||||
|
|
||||||
|
output_path = "./output/" if not hasattr(req, 'output_path') else getattr(req, 'output_path')
|
||||||
|
os.makedirs(output_path, exist_ok=True)
|
||||||
|
|
||||||
|
# start_time = time.time()
|
||||||
|
# The example uses: model.infer(tokenizer, prompt, image_file=image_file, output_path=..., base_size=..., ...)
|
||||||
|
try:
|
||||||
|
res = model.infer(
|
||||||
|
tokenizer,
|
||||||
|
prompt=prompt_text,
|
||||||
|
image_file=image_input,
|
||||||
|
output_path="./output/", #if not req.save_results else os.path.join(MODEL_DIR, "infer_out"),
|
||||||
|
base_size=req.base_size,
|
||||||
|
image_size=req.image_size,
|
||||||
|
crop_mode=req.crop_mode,
|
||||||
|
save_results=req.save_results,
|
||||||
|
test_compress=req.test_compress,
|
||||||
|
)
|
||||||
|
except TypeError:
|
||||||
|
# fallback: try without named args if certain impls expect positional
|
||||||
|
res = model.infer(tokenizer, prompt_text, image_input)
|
||||||
|
|
||||||
|
# end_time = time.time()
|
||||||
|
# elapsed = end_time - start_time
|
||||||
|
|
||||||
|
print ("res:\n", res)
|
||||||
|
# print (elapsed)
|
||||||
|
|
||||||
|
result_mmd_path = os.path.join(output_path, "result.mmd")
|
||||||
|
|
||||||
|
try:
|
||||||
|
if os.path.isfile(result_mmd_path):
|
||||||
|
with open(result_mmd_path, "r", encoding="utf-8") as f:
|
||||||
|
file_content = f.read().strip()
|
||||||
|
if file_content:
|
||||||
|
ocr_text = file_content
|
||||||
|
except Exception as e:
|
||||||
|
# log but don't fail; we'll fall back to parsing the model response
|
||||||
|
try:
|
||||||
|
logger.warning(f"Failed to read {result_mmd_path}: {e}")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# prepare response content; `res` may be a dict or string depending on model impl
|
||||||
|
# ocr_text = None
|
||||||
|
# if isinstance(res, dict):
|
||||||
|
# # try common keys
|
||||||
|
# ocr_text = res.get("text") or res.get("result") or res.get("ocr_text")
|
||||||
|
# elif isinstance(res, (list, tuple)):
|
||||||
|
# # try first element
|
||||||
|
# ocr_text = res[0] if len(res) > 0 else None
|
||||||
|
# else:
|
||||||
|
# ocr_text = str(res)
|
||||||
|
|
||||||
|
# if ocr_text is None:
|
||||||
|
# ocr_text = str(res)
|
||||||
|
|
||||||
|
response = {
|
||||||
|
"id": "chatcmpl-deepseek",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"created": int(time.time()),
|
||||||
|
"model": os.path.basename(MODEL_DIR),
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": ocr_text,
|
||||||
|
},
|
||||||
|
"finish_reason": "stop",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
return JSONResponse(response)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# cleanup temp files we created
|
||||||
|
for t in temp_files:
|
||||||
|
try:
|
||||||
|
os.remove(t)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=80)
|
||||||
1058
deepencoder.py
Normal file
1058
deepencoder.py
Normal file
File diff suppressed because it is too large
Load Diff
12
logger.py
12
logger.py
@@ -1,12 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
|
|
||||||
logging.basicConfig(
|
|
||||||
format="%(asctime)s %(name)-12s %(levelname)-4s %(message)s",
|
|
||||||
datefmt="%Y-%m-%d %H:%M:%S",
|
|
||||||
level=os.environ.get("LOGLEVEL", "INFO"),
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_logger(file):
|
|
||||||
return logging.getLogger(file)
|
|
||||||
84
server.py
84
server.py
@@ -1,84 +0,0 @@
|
|||||||
import base64
|
|
||||||
import gc
|
|
||||||
import io
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
import uvicorn
|
|
||||||
from typing import List, Optional, Dict, Any, Tuple
|
|
||||||
|
|
||||||
import torch
|
|
||||||
|
|
||||||
from PIL import Image
|
|
||||||
from fastapi import FastAPI, HTTPException, Query
|
|
||||||
from pydantic import BaseModel
|
|
||||||
from transformers import (AutoTokenizer, AutoConfig, AutoModelForCausalLM, AutoModelForVision2Seq, AutoModel)
|
|
||||||
|
|
||||||
import logger
|
|
||||||
log = logger.get_logger(__file__)
|
|
||||||
|
|
||||||
app = FastAPI()
|
|
||||||
|
|
||||||
model_type = None
|
|
||||||
model = None
|
|
||||||
device = None
|
|
||||||
tokenizer = None
|
|
||||||
|
|
||||||
class GenParams(BaseModel):
|
|
||||||
max_new_tokens: int = 128
|
|
||||||
temperature: float = 0.0
|
|
||||||
top_p: float = 1.0
|
|
||||||
do_sample: bool = False
|
|
||||||
|
|
||||||
class InferRequest(BaseModel):
|
|
||||||
prompt: str
|
|
||||||
generation: GenParams = GenParams()
|
|
||||||
dtype: str = "auto" # "auto"|"float16"|"bfloat16"|"float32"
|
|
||||||
warmup_runs: int = 1
|
|
||||||
measure_token_times: bool = False
|
|
||||||
|
|
||||||
@app.on_event("startup")
|
|
||||||
def load_model():
|
|
||||||
log.info("loading model")
|
|
||||||
global status, device, model_type, model, tokenizer
|
|
||||||
|
|
||||||
model_path = "/model"
|
|
||||||
cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
|
|
||||||
model_type = cfg.model_type
|
|
||||||
log.info(f"model type: {model_type}")
|
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=True)
|
|
||||||
|
|
||||||
model = AutoModel.from_pretrained(model_path, torch_dtype=torch.float32,
|
|
||||||
device_map=None, trust_remote_code=True)
|
|
||||||
model.to("cuda")
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
status = "success"
|
|
||||||
log.info(f"model loaded successfully")
|
|
||||||
|
|
||||||
@app.post("/infer")
|
|
||||||
def infer(req: InferRequest):
|
|
||||||
image = Image.open('1.PNG').convert('RGB')
|
|
||||||
|
|
||||||
if model_type == "minicpmv":
|
|
||||||
text = handle_minicpmv(image, req.prompt, req.generation)
|
|
||||||
log.info(f"text={text}")
|
|
||||||
|
|
||||||
result = dict()
|
|
||||||
result["output_text"] = text
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def handle_minicpmv(image: Image.Image, prompt: str, gen: GenParams):
|
|
||||||
# Prepare msgs in the format expected by model.chat
|
|
||||||
msgs = [{"role": "user", "content": prompt}]
|
|
||||||
|
|
||||||
# Call the model's built-in chat method
|
|
||||||
response = model.chat(image=image, msgs=msgs, tokenizer=tokenizer,
|
|
||||||
sampling=gen.do_sample, temperature=gen.temperature, stream=False)
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
uvicorn.run("server:app", host="0.0.0.0", port=8000, workers=1, access_log=False)
|
|
||||||
|
|
||||||
30
test.py
30
test.py
@@ -1,30 +0,0 @@
|
|||||||
import requests
|
|
||||||
|
|
||||||
def model_infer(vlm_url: str, payload):
|
|
||||||
try:
|
|
||||||
response = requests.post(vlm_url + "/infer", json=payload)
|
|
||||||
if response.status_code == 200:
|
|
||||||
print("Succeed!")
|
|
||||||
print("Response:", response.json())
|
|
||||||
else:
|
|
||||||
print(f"Failed,code: {response.status_code}")
|
|
||||||
print("Error detail:", response.text)
|
|
||||||
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
print("request error:", str(e))
|
|
||||||
|
|
||||||
payload = {
|
|
||||||
"prompt": "图片有什么?详细描述",
|
|
||||||
"generation": {
|
|
||||||
"max_new_tokens": 64,
|
|
||||||
"temperature": 0.7,
|
|
||||||
"top_p": 0.9,
|
|
||||||
"do_sample": True
|
|
||||||
},
|
|
||||||
"dtype": "auto",
|
|
||||||
"warmup_runs": 0,
|
|
||||||
"measure_token_times": False
|
|
||||||
}
|
|
||||||
|
|
||||||
url = "http://127.0.0.1:10055"
|
|
||||||
model_infer(url, payload)
|
|
||||||
Reference in New Issue
Block a user