move apply_torchao_config_ to model_runner (#2342)
This commit is contained in:
@@ -38,6 +38,7 @@ from sglang.srt.layers.attention.torch_native_backend import TorchNativeAttnBack
|
||||
from sglang.srt.layers.attention.triton_backend import TritonAttnBackend
|
||||
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
|
||||
from sglang.srt.layers.sampler import Sampler
|
||||
from sglang.srt.layers.torchao_utils import apply_torchao_config_to_model_
|
||||
from sglang.srt.lora.lora_manager import LoRAManager
|
||||
from sglang.srt.managers.schedule_batch import global_server_args_dict
|
||||
from sglang.srt.mem_cache.memory_pool import (
|
||||
@@ -159,6 +160,13 @@ class ModelRunner:
|
||||
else:
|
||||
self.torch_tp_applied = False
|
||||
|
||||
def filter_fn(module, fqn):
|
||||
return "proj" in fqn
|
||||
|
||||
apply_torchao_config_to_model_(
|
||||
self.model, global_server_args_dict["torchao_config"], filter_fn
|
||||
)
|
||||
|
||||
# Init memory pool and attention backends
|
||||
if server_args.lora_paths is not None:
|
||||
self.init_lora_manager()
|
||||
|
||||
Reference in New Issue
Block a user