move apply_torchao_config_ to model_runner (#2342)

This commit is contained in:
Jerry Zhang
2024-12-04 17:26:42 -08:00
committed by GitHub
parent d693ec0427
commit 9cc733b38c
8 changed files with 25 additions and 71 deletions

View File

@@ -38,6 +38,7 @@ from sglang.srt.layers.attention.torch_native_backend import TorchNativeAttnBack
from sglang.srt.layers.attention.triton_backend import TritonAttnBackend
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
from sglang.srt.layers.sampler import Sampler
from sglang.srt.layers.torchao_utils import apply_torchao_config_to_model_
from sglang.srt.lora.lora_manager import LoRAManager
from sglang.srt.managers.schedule_batch import global_server_args_dict
from sglang.srt.mem_cache.memory_pool import (
@@ -159,6 +160,13 @@ class ModelRunner:
else:
self.torch_tp_applied = False
def filter_fn(module, fqn):
return "proj" in fqn
apply_torchao_config_to_model_(
self.model, global_server_args_dict["torchao_config"], filter_fn
)
# Init memory pool and attention backends
if server_args.lora_paths is not None:
self.init_lora_manager()