diff --git a/docker/iluvatar-bi100.dockerfile b/docker/iluvatar-bi100.dockerfile new file mode 100644 index 0000000..3979cc4 --- /dev/null +++ b/docker/iluvatar-bi100.dockerfile @@ -0,0 +1,7 @@ +FROM harbor-contest.4pd.io/luxinlong02/sherpa-onnx-offline-asr:1.12.5-mr100-corex-4.3.0-zh-en +ENV HF_ENDPOINT=https://hf-mirror.com +RUN pip install transformers==4.50.0 +WORKDIR /app +COPY server.py /app/server.py +EXPOSE 8000 +CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/docker/nvidia-a100.dockerfile b/docker/nvidia-a100.dockerfile new file mode 100644 index 0000000..9ca4842 --- /dev/null +++ b/docker/nvidia-a100.dockerfile @@ -0,0 +1,7 @@ +FROM harbor.4pd.io/hardcore-tech/vllm/vllm-openai:v0.8.5.post1 +ENV HF_ENDPOINT=https://hf-mirror.com +RUN pip install transformers==4.50.0 +WORKDIR /app +COPY server.py /app/server.py +EXPOSE 8000 +CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/server.py b/docker/server.py similarity index 98% rename from server.py rename to docker/server.py index 0981c5c..d85a50f 100644 --- a/server.py +++ b/docker/server.py @@ -14,9 +14,16 @@ from transformers import ( AutoTokenizer, AutoConfig, AutoModelForCausalLM, - AutoModelForVision2Seq, AutoModel, Qwen2VLForConditionalGeneration, Gemma3ForConditionalGeneration + AutoModelForVision2Seq, AutoModel ) + +try: + from transformers import (Qwen2VLForConditionalGeneration, Gemma3ForConditionalGeneration) +except ImportError: + pass + + app = FastAPI(title="Unified VLM API (Transformers)") @@ -214,10 +221,11 @@ def resolve_model(model_path: str, dtype_str: str) -> LoadedModel: _loaded[model_path] = lm return lm elif model_type in ("internlmxcomposer2"): - model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dt, trust_remote_code=True) + dt = torch.float16 + print(f"dt change to {dt}") tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) - model.to(dev) - model.eval() + model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dt, trust_remote_code=True, device_map='auto') + model = model.eval() lm = LoadedModel(model_type, model_path, model, None, tokenizer, dev, dt) _loaded[model_path] = lm return lm @@ -377,6 +385,7 @@ def info(): @app.post("/load_model") def load_model(req: LoadModelRequest): lm = resolve_model(req.model_path, req.dtype) + print(f"model with path {req.model_path} loaded!") return { "loaded": lm.model_path, "device": str(lm.device), @@ -592,3 +601,4 @@ def infer(req: InferRequest): # Entry # Run: uvicorn server:app --host 0.0.0.0 --port 8000 +