[Minor] move triton attention kernels into a separate folder (#1379)

This commit is contained in:
Lianmin Zheng
2024-09-10 15:15:08 -07:00
committed by GitHub
parent fbb4754cb8
commit 3a6e8b6d78
13 changed files with 24 additions and 15 deletions

View File

@@ -18,7 +18,6 @@ limitations under the License.
import gc
import importlib
import importlib.resources
import json
import logging
import pkgutil
from functools import lru_cache
@@ -45,6 +44,7 @@ from vllm.model_executor.model_loader import get_model
from vllm.model_executor.models import ModelRegistry
from sglang.global_config import global_config
from sglang.srt.configs.model_config import AttentionArch, ModelConfig
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
from sglang.srt.layers.sampler import SampleOutput
from sglang.srt.managers.schedule_batch import ScheduleBatch, global_server_args_dict
@@ -53,7 +53,6 @@ from sglang.srt.mem_cache.memory_pool import (
MLATokenToKVPool,
ReqToTokenPool,
)
from sglang.srt.model_config import AttentionArch, ModelConfig
from sglang.srt.model_executor.forward_batch_info import ForwardMode, InputMetadata
from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import (