[Feat]enable sfa cp for dsv3.2 (#4702)
### What this PR does / why we need it?
RFC: https://github.com/vllm-project/vllm/issues/30055
### How was this patch tested?
1. enable flashcommon1
export VLLM_ASCEND_ENABLE_FLASHCOMM1=1
2. enable sfa-cp
--additional-config '{ "enable_sfa_cp": true }' \
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
Signed-off-by: AlvisGong <gwly0401@163.com>
Co-authored-by: clrs97 <524936896@qq.com>
Co-authored-by: zzhx1 <zzh_201018@outlook.com>
Co-authored-by: hwhaokun <haokun0405@163.com>
Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -1067,3 +1067,15 @@ def refresh_block_size(vllm_config):
|
||||
"Block size is set to 128 if prefix cache or chunked prefill is enabled."
|
||||
)
|
||||
cache_config.block_size = 128
|
||||
|
||||
|
||||
def dispose_layer(layer: Any):
|
||||
for attr_name in dir(layer):
|
||||
attr_value = getattr(layer, attr_name)
|
||||
if isinstance(attr_value, torch.Tensor):
|
||||
dispose_tensor(attr_value)
|
||||
|
||||
|
||||
def replace_layer(original_layer: Any, new_layer: Any):
|
||||
original_layer.__class__ = new_layer.__class__
|
||||
original_layer.__dict__ = new_layer.__dict__
|
||||
|
||||
Reference in New Issue
Block a user