[gpt-oss] Add gpt-oss bf16 support

This commit is contained in:
2025-08-13 21:25:57 +08:00
parent 5d2e7edf78
commit 17ea2ec6aa
1232 changed files with 777 additions and 36 deletions

28
vllm/core/block/utils.py Normal file
View File

@@ -0,0 +1,28 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Block manager utils."""
from vllm.sequence import SequenceGroup
from vllm.utils import (STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE,
STR_NOT_IMPL_ENC_DEC_SWA)
def check_no_caching_or_swa_for_blockmgr_encdec(
block_mgr, seq_group: SequenceGroup) -> None:
'''
Enforce that prefix caching & sliding-window attention (SWA)
are currently unsupported *specifically* for encoder/decoder models.
Raises NotImplementedError if unsupported scenario is detected.
Arguments:
* block_mgr: BlockSpaceManager instance
* seq_group: SequenceGroup passed to block_mgr
'''
if seq_group.is_encoder_decoder():
if block_mgr.max_block_sliding_window is not None:
raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_SWA)
if block_mgr.enable_caching:
raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE)