feature: complete
This commit is contained in:
7
docker/iluvatar-bi100.dockerfile
Normal file
7
docker/iluvatar-bi100.dockerfile
Normal file
@@ -0,0 +1,7 @@
|
||||
FROM harbor-contest.4pd.io/luxinlong02/sherpa-onnx-offline-asr:1.12.5-mr100-corex-4.3.0-zh-en
|
||||
ENV HF_ENDPOINT=https://hf-mirror.com
|
||||
RUN pip install transformers==4.50.0
|
||||
WORKDIR /app
|
||||
COPY server.py /app/server.py
|
||||
EXPOSE 8000
|
||||
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
7
docker/nvidia-a100.dockerfile
Normal file
7
docker/nvidia-a100.dockerfile
Normal file
@@ -0,0 +1,7 @@
|
||||
FROM harbor.4pd.io/hardcore-tech/vllm/vllm-openai:v0.8.5.post1
|
||||
ENV HF_ENDPOINT=https://hf-mirror.com
|
||||
RUN pip install transformers==4.50.0
|
||||
WORKDIR /app
|
||||
COPY server.py /app/server.py
|
||||
EXPOSE 8000
|
||||
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
@@ -14,9 +14,16 @@ from transformers import (
|
||||
AutoTokenizer,
|
||||
AutoConfig,
|
||||
AutoModelForCausalLM,
|
||||
AutoModelForVision2Seq, AutoModel, Qwen2VLForConditionalGeneration, Gemma3ForConditionalGeneration
|
||||
AutoModelForVision2Seq, AutoModel
|
||||
)
|
||||
|
||||
|
||||
try:
|
||||
from transformers import (Qwen2VLForConditionalGeneration, Gemma3ForConditionalGeneration)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
app = FastAPI(title="Unified VLM API (Transformers)")
|
||||
|
||||
|
||||
@@ -214,10 +221,11 @@ def resolve_model(model_path: str, dtype_str: str) -> LoadedModel:
|
||||
_loaded[model_path] = lm
|
||||
return lm
|
||||
elif model_type in ("internlmxcomposer2"):
|
||||
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dt, trust_remote_code=True)
|
||||
dt = torch.float16
|
||||
print(f"dt change to {dt}")
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||
model.to(dev)
|
||||
model.eval()
|
||||
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=dt, trust_remote_code=True, device_map='auto')
|
||||
model = model.eval()
|
||||
lm = LoadedModel(model_type, model_path, model, None, tokenizer, dev, dt)
|
||||
_loaded[model_path] = lm
|
||||
return lm
|
||||
@@ -377,6 +385,7 @@ def info():
|
||||
@app.post("/load_model")
|
||||
def load_model(req: LoadModelRequest):
|
||||
lm = resolve_model(req.model_path, req.dtype)
|
||||
print(f"model with path {req.model_path} loaded!")
|
||||
return {
|
||||
"loaded": lm.model_path,
|
||||
"device": str(lm.device),
|
||||
@@ -592,3 +601,4 @@ def infer(req: InferRequest):
|
||||
|
||||
# Entry
|
||||
# Run: uvicorn server:app --host 0.0.0.0 --port 8000
|
||||
|
||||
Reference in New Issue
Block a user