Clean up server_args.py to have a dedicated function for model specific adjustments (#8983)
This commit is contained in:
@@ -64,13 +64,12 @@ class ModelConfig:
|
||||
hybrid_kvcache_ratio: Optional[float] = None,
|
||||
model_impl: Union[str, ModelImpl] = ModelImpl.AUTO,
|
||||
) -> None:
|
||||
|
||||
# Parse args
|
||||
self.model_path = model_path
|
||||
self.revision = revision
|
||||
self.quantization = quantization
|
||||
self.model_impl = model_impl
|
||||
|
||||
# Parse args
|
||||
self.maybe_pull_model_tokenizer_from_remote()
|
||||
self.model_override_args = json.loads(model_override_args)
|
||||
kwargs = {}
|
||||
@@ -139,6 +138,7 @@ class ModelConfig:
|
||||
and self.hf_config.architectures[0] == "Ernie4_5_MoeForCausalLM"
|
||||
):
|
||||
self.hf_config.architectures[0] = "Ernie4_5_MoeForCausalLMMTP"
|
||||
|
||||
# Check model type
|
||||
self.is_generation = is_generation_model(
|
||||
self.hf_config.architectures, is_embedding
|
||||
@@ -282,12 +282,10 @@ class ModelConfig:
|
||||
# Cache attributes
|
||||
self.hf_eos_token_id = self.get_hf_eos_token_id()
|
||||
|
||||
config = self.hf_config
|
||||
|
||||
# multimodal
|
||||
self.image_token_id = getattr(config, "image_token_id", None) or getattr(
|
||||
config, "image_token_index", None
|
||||
)
|
||||
self.image_token_id = getattr(
|
||||
self.hf_config, "image_token_id", None
|
||||
) or getattr(self.hf_config, "image_token_index", None)
|
||||
|
||||
@staticmethod
|
||||
def from_server_args(server_args: ServerArgs, model_path: str = None, **kwargs):
|
||||
|
||||
Reference in New Issue
Block a user