[DOC] MiniMax-M2.5 model intro (#7296)

### What this PR does / why we need it? 1. Add nightly test on MiniMax-M2.5 with deployment method on A3 2. Add MiniMax-M2.5 deployment introduction to vllm-ascend docs - vLLM version: v0.17.0 - vLLM main: 4034c3d32e --------- Signed-off-by: limuyuan <limuyuan3@huawei.com> Signed-off-by: SparrowMu <52023119+SparrowMu@users.noreply.github.com> Co-authored-by: limuyuan <limuyuan3@huawei.com>
2026-03-18 20:14:36 +08:00
parent 2916601e6c
commit fb8e22ec00
5 changed files with 488 additions and 1 deletions
--- a/tests/e2e/nightly/single_node/models/configs/MiniMax-M2.5-A3.yaml
+++ b/tests/e2e/nightly/single_node/models/configs/MiniMax-M2.5-A3.yaml
@@ -0,0 +1,46 @@
+# ==========================================
+# ACTUAL TEST CASES
+# ==========================================
+
+test_cases:
+  - name: "MiniMax-M2.5-TP16-Reasoning-Tool"
+    model: "MiniMax/MiniMax-M2.5"
+    envs:
+      HCCL_BUFFSIZE: "1024"
+      OMP_PROC_BIND: "false"
+      HCCL_OP_EXPANSION_MODE: "AIV"
+      PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
+      VLLM_ASCEND_ENABLE_FLASHCOMM1: "1"
+      SERVER_PORT: "DEFAULT_PORT"
+    prompts:
+      - "Hello. Please introduce yourself briefly."
+    api_keyword_args:
+      max_tokens: 128
+      temperature: 0
+    test_content:
+      - chat_completion
+    server_cmd:
+      - "--tensor-parallel-size"
+      - "16"
+      - "--port"
+      - "$SERVER_PORT"
+      - "--trust-remote-code"
+      - "--dtype"
+      - "bfloat16"
+      - "--enable-expert-parallel"
+      - "--max-num-seqs"
+      - "32"
+      - "--max-num-batched-tokens"
+      - "32768"
+      # Prefer a smaller max length for nightly stability. For full context,
+      # omit this flag and rely on the model config (196608).
+      - "--max-model-len"
+      - "32768"
+      - "--compilation-config"
+      - '{"cudagraph_mode":"FULL_DECODE_ONLY"}'
+      - "--enable-auto-tool-choice"
+      - "--tool-call-parser"
+      - "minimax_m2"
+      - "--reasoning-parser"
+      - "minimax_m2_append_think"
+