[Minor] move triton attention kernels into a separate folder (#1379)

2024-09-10 15:15:08 -07:00
parent fbb4754cb8
commit 3a6e8b6d78
13 changed files with 24 additions and 15 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -18,7 +18,6 @@ limitations under the License.
 import gc
 import importlib
 import importlib.resources
-import json
 import logging
 import pkgutil
 from functools import lru_cache
@@ -45,6 +44,7 @@ from vllm.model_executor.model_loader import get_model
 from vllm.model_executor.models import ModelRegistry

 from sglang.global_config import global_config
+from sglang.srt.configs.model_config import AttentionArch, ModelConfig
 from sglang.srt.layers.logits_processor import LogitsProcessorOutput
 from sglang.srt.layers.sampler import SampleOutput
 from sglang.srt.managers.schedule_batch import ScheduleBatch, global_server_args_dict
@@ -53,7 +53,6 @@ from sglang.srt.mem_cache.memory_pool import (
    MLATokenToKVPool,
    ReqToTokenPool,
 )
-from sglang.srt.model_config import AttentionArch, ModelConfig
 from sglang.srt.model_executor.forward_batch_info import ForwardMode, InputMetadata
 from sglang.srt.server_args import ServerArgs
 from sglang.srt.utils import (