Add support for Qwen3 MoE+GPTQ

2025-11-15 20:14:45 +08:00
parent b296c44ae0
commit 8152e24cb2
35 changed files with 6468 additions and 574 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -358,7 +358,7 @@ class ModelConfig:
    for multimodal models."""
    use_async_output_proc: bool = True
    """Whether to use async output processor."""
-    config_format: Union[str, ConfigFormat] = ConfigFormat.AUTO.value
+    config_format: Union[str, ConfigFormat] = "auto"
    """The format of the model config to load:\n
    - "auto" will try to load the config in hf format if available else it
    will try to load in mistral format.\n
@@ -522,8 +522,8 @@ class ModelConfig:
            raise ValueError(
                "Sleep mode is not supported on current platform.")

-        if isinstance(self.config_format, str):
-            self.config_format = ConfigFormat(self.config_format)
+        # if isinstance(self.config_format, str):
+        #     self.config_format = ConfigFormat(self.config_format)

        hf_config = get_config(self.hf_config_path or self.model,
                               self.trust_remote_code, self.revision,