Clean up server_args.py to have a dedicated function for model specific adjustments (#8983)

This commit is contained in:
Lianmin Zheng
2025-08-08 19:56:50 -07:00
committed by GitHub
parent 23f2afb2ce
commit 706bd69cc5
24 changed files with 201 additions and 340 deletions

View File

@@ -64,13 +64,12 @@ class ModelConfig:
hybrid_kvcache_ratio: Optional[float] = None,
model_impl: Union[str, ModelImpl] = ModelImpl.AUTO,
) -> None:
# Parse args
self.model_path = model_path
self.revision = revision
self.quantization = quantization
self.model_impl = model_impl
# Parse args
self.maybe_pull_model_tokenizer_from_remote()
self.model_override_args = json.loads(model_override_args)
kwargs = {}
@@ -139,6 +138,7 @@ class ModelConfig:
and self.hf_config.architectures[0] == "Ernie4_5_MoeForCausalLM"
):
self.hf_config.architectures[0] = "Ernie4_5_MoeForCausalLMMTP"
# Check model type
self.is_generation = is_generation_model(
self.hf_config.architectures, is_embedding
@@ -282,12 +282,10 @@ class ModelConfig:
# Cache attributes
self.hf_eos_token_id = self.get_hf_eos_token_id()
config = self.hf_config
# multimodal
self.image_token_id = getattr(config, "image_token_id", None) or getattr(
config, "image_token_index", None
)
self.image_token_id = getattr(
self.hf_config, "image_token_id", None
) or getattr(self.hf_config, "image_token_index", None)
@staticmethod
def from_server_args(server_args: ServerArgs, model_path: str = None, **kwargs):