[gpt-oss] Add gpt-oss bf16 support

2025-08-13 21:25:57 +08:00
parent 5d2e7edf78
commit 17ea2ec6aa
1232 changed files with 777 additions and 36 deletions
--- a/vllm/core/block/utils.py
+++ b/vllm/core/block/utils.py
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Block manager utils."""
+from vllm.sequence import SequenceGroup
+from vllm.utils import (STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE,
+                        STR_NOT_IMPL_ENC_DEC_SWA)
+
+
+def check_no_caching_or_swa_for_blockmgr_encdec(
+        block_mgr, seq_group: SequenceGroup) -> None:
+    '''
+    Enforce that prefix caching & sliding-window attention (SWA)
+    are currently unsupported *specifically* for encoder/decoder models.
+
+    Raises NotImplementedError if unsupported scenario is detected.
+
+    Arguments:
+
+    * block_mgr: BlockSpaceManager instance
+    * seq_group: SequenceGroup passed to block_mgr
+    '''
+
+    if seq_group.is_encoder_decoder():
+        if block_mgr.max_block_sliding_window is not None:
+            raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_SWA)
+
+        if block_mgr.enable_caching:
+            raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE)