Add support for Qwen3 MoE+GPTQ

2025-11-15 20:14:45 +08:00
parent b296c44ae0
commit 8152e24cb2
35 changed files with 6468 additions and 574 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -522,7 +522,6 @@ class EngineArgs:
            help="Disable async output processing. This may result in "
            "lower performance.")
        model_group.add_argument("--config-format",
-                                 choices=[f.value for f in ConfigFormat],
                                 **model_kwargs["config_format"])
        # This one is a special case because it can bool
        # or str. TODO: Handle this in get_kwargs