feature: complete

This commit is contained in:
2025-08-25 10:39:03 +08:00
parent fddd7715d5
commit 2187c32882
3 changed files with 28 additions and 4 deletions

View File

@@ -0,0 +1,7 @@
FROM harbor-contest.4pd.io/luxinlong02/sherpa-onnx-offline-asr:1.12.5-mr100-corex-4.3.0-zh-en
ENV HF_ENDPOINT=https://hf-mirror.com
RUN pip install transformers==4.50.0
WORKDIR /app
COPY server.py /app/server.py
EXPOSE 8000
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"]

View File

@@ -0,0 +1,7 @@
FROM harbor.4pd.io/hardcore-tech/vllm/vllm-openai:v0.8.5.post1
ENV HF_ENDPOINT=https://hf-mirror.com
RUN pip install transformers==4.50.0
WORKDIR /app
COPY server.py /app/server.py
EXPOSE 8000
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"]

View File

@@ -14,9 +14,16 @@ from transformers import (
AutoTokenizer,
AutoConfig,
AutoModelForCausalLM,
AutoModelForVision2Seq, AutoModel, Qwen2VLForConditionalGeneration, Gemma3ForConditionalGeneration
AutoModelForVision2Seq, AutoModel
)
try:
from transformers import (Qwen2VLForConditionalGeneration, Gemma3ForConditionalGeneration)
except ImportError:
pass
app = FastAPI(title="Unified VLM API (Transformers)")
@@ -214,10 +221,11 @@ def resolve_model(model_path: str, dtype_str: str) -> LoadedModel:
_loaded[model_path] = lm
return lm
elif model_type in ("internlmxcomposer2"):
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dt, trust_remote_code=True)
dt = torch.float16
print(f"dt change to {dt}")
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model.to(dev)
model.eval()
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dt, trust_remote_code=True, device_map='auto')
model = model.eval()
lm = LoadedModel(model_type, model_path, model, None, tokenizer, dev, dt)
_loaded[model_path] = lm
return lm
@@ -377,6 +385,7 @@ def info():
@app.post("/load_model")
def load_model(req: LoadModelRequest):
lm = resolve_model(req.model_path, req.dtype)
print(f"model with path {req.model_path} loaded!")
return {
"loaded": lm.model_path,
"device": str(lm.device),
@@ -592,3 +601,4 @@ def infer(req: InferRequest):
# Entry
# Run: uvicorn server:app --host 0.0.0.0 --port 8000