42 lines
1.1 KiB
Markdown
42 lines
1.1 KiB
Markdown
|
|
# 天数智芯 天垓100 文本生成引擎(基于 vLLM 优化适配Qwen3.6-27B)
|
|||
|
|
|
|||
|
|
```
|
|||
|
|
# 本地构建
|
|||
|
|
docker build -t enginex-iluvatar-vllm:bi100-qwen3.6 -f Dockerfile .
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
|
|||
|
|
启动容器镜像
|
|||
|
|
|
|||
|
|
下载Qwen3.6-27B模型,并且需要将模型的config.json文件中architectures字段改成
|
|||
|
|
```json
|
|||
|
|
"architectures": [
|
|||
|
|
"Qwen3_5ForCausalLM"
|
|||
|
|
]
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
```bash
|
|||
|
|
docker run -dit --network=host --ipc=host \
|
|||
|
|
-v /usr/src:/usr/src -v /lib/modules:/lib/modules -v /dev:/dev --privileged \
|
|||
|
|
--name vllm-iluvatar \
|
|||
|
|
-v /mnt/models/Qwen3.6-27B:/model:ro --entrypoint=python3 \
|
|||
|
|
enginex-iluvatar-vllm:bi100 \
|
|||
|
|
-m vllm.entrypoints.openai.api_server \
|
|||
|
|
--model /model --port 1111 --served-model-name llm \
|
|||
|
|
--max-model-len 10000 --enforce-eager --trust-remote-code -tp 4 --gpu-memory-utilization 0.95
|
|||
|
|
```
|
|||
|
|
|
|||
|
|
请求
|
|||
|
|
```bash
|
|||
|
|
curl http://localhost:1111/v1/chat/completions \
|
|||
|
|
-H "Content-Type: application/json" \
|
|||
|
|
-d '{
|
|||
|
|
"model": "llm",
|
|||
|
|
"messages": [
|
|||
|
|
{"role": "system", "content": "You are a helpful assistant."},
|
|||
|
|
{"role": "user", "content": "Can you tell me the story of Snow White?"}
|
|||
|
|
],
|
|||
|
|
"max_tokens": 200,
|
|||
|
|
"temperature": 0.7
|
|||
|
|
}'
|
|||
|
|
```
|