[Feature] Support kv nz feature for DeepSeek decode node in disagg-prefill scenario (#3072)
By converting the KV cache from ND to NZ format when the decode node
receives it, this PR ensures that the KV NZ feature works correctly
during the decoding phase in disagg-prefill scenario.
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
---------
Signed-off-by: Jade Zheng <zheng.shoujian@outlook.com>
Co-authored-by: ghphotoframe <854746559@qq.com>
Co-authored-by: alex101-ops <alex1015718386@gmail.com>
This commit is contained in:
@@ -13,18 +13,21 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from typing import Optional
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
from vllm.logger import logger
|
||||
from vllm.triton_utils import HAS_TRITON
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.config import VllmConfig
|
||||
|
||||
|
||||
class AscendConfig:
|
||||
"""
|
||||
Configuration Object for additional_config from vllm.configs.
|
||||
"""
|
||||
|
||||
def __init__(self, vllm_config):
|
||||
def __init__(self, vllm_config: "VllmConfig"):
|
||||
additional_config = vllm_config.additional_config if vllm_config.additional_config is not None else {}
|
||||
|
||||
xlite_graph_config = additional_config.get("xlite_graph_config", {})
|
||||
@@ -121,6 +124,19 @@ class AscendConfig:
|
||||
self.enable_async_exponential = bool(
|
||||
additional_config.get("enable_async_exponential", False))
|
||||
|
||||
self.enable_kv_nz = additional_config.get("enable_kv_nz", False)
|
||||
if self.enable_kv_nz:
|
||||
use_sparse = hasattr(vllm_config.model_config.hf_config,
|
||||
"index_topk")
|
||||
if not vllm_config.model_config.is_deepseek_mla or use_sparse:
|
||||
raise RuntimeError(
|
||||
"enable_kv_nz is only supported for mla currently.")
|
||||
if vllm_config.kv_transfer_config is None \
|
||||
or not vllm_config.kv_transfer_config.is_kv_consumer:
|
||||
raise NotImplementedError(
|
||||
"enable_kv_nz is only supported in pd scenario and can "
|
||||
"only be used in D node.")
|
||||
|
||||
|
||||
class FinegrainedTPConfig:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user