17 lines
859 B
Python
17 lines
859 B
Python
# check spec model config
|
|
# write spec model func to config file
|
|
def check_spec_model(vllm_config):
|
|
# add spec tag
|
|
speculative_mode = hasattr(vllm_config, 'speculative_config')
|
|
if speculative_mode and \
|
|
hasattr(vllm_config.speculative_config, 'num_speculative_tokens') and \
|
|
vllm_config.speculative_config.num_speculative_tokens != 1:
|
|
raise ValueError(f'run_mp_engine: only support num_speculative_tokens == 1, but get {vllm_config.speculative_config.num_speculative_tokens}')
|
|
|
|
default_model_infos = "default"
|
|
if speculative_mode:
|
|
if hasattr(vllm_config.speculative_config, 'method'):
|
|
default_model_infos = vllm_config.speculative_config.method
|
|
|
|
from vllm_vacc.vllm.config_manager import vllm_vacc_config_manager
|
|
vllm_vacc_config_manager().update_model_infos(default_model_infos) |