### What this PR does / why we need it?
**Scope of Changes**:
| File Path |
| :--- |
|` vllm_ascend/quantization/compressed_tensors/compressed_tensors.py`|
|` vllm_ascend/quantization/quant_config.py`|
|` vllm_ascend/quantization/utils.py`|
|` vllm_ascend/quantization/w4a16.py`|
|` vllm_ascend/quantization/w4a4_flatquant_dynamic.py`|
|` vllm_ascend/quantization/w4a8_dynamic.py`|
|` vllm_ascend/quantization/w8a16.py`|
|` vllm_ascend/quantization/w8a8.py`|
|` vllm_ascend/quantization/w8a8_dynamic.py`|
|` vllm_ascend/quantization/w8a8_pdmix.py`|
|` vllm_ascend/quantization/w8a8mxfp8.py`|
|` vllm_ascend/sample/rejection_sampler.py`|
|` vllm_ascend/sample/sampler.py`|
|` vllm_ascend/worker/block_table.py`|
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.13.0
- vLLM main:
2c24bc6996
Signed-off-by: MrZ20 <2609716663@qq.com>
This commit is contained in:
@@ -35,7 +35,6 @@ def random_sample(
|
||||
|
||||
|
||||
class AscendSampler(Sampler):
|
||||
|
||||
def __init__(self, logprobs_mode=DEFAULT_LOGPROBS_MODE):
|
||||
# TODO: support logprobs_mode in vllm-ascend
|
||||
super().__init__(logprobs_mode=logprobs_mode)
|
||||
@@ -62,7 +61,6 @@ class AscendSampler(Sampler):
|
||||
|
||||
|
||||
class AscendTopKTopPSampler(TopKTopPSampler):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.apply_top_k_top_p = apply_top_k_top_p
|
||||
@@ -135,4 +133,9 @@ def _apply_top_k_top_p_ascendc(
|
||||
return logits
|
||||
return torch.ops._C_ascend.npu_apply_top_k_top_p(logits, k=k, p=p)
|
||||
|
||||
apply_top_k_top_p = _apply_top_k_top_p_ascendc if get_ascend_device_type() in [AscendDeviceType.A2, AscendDeviceType.A3] else _apply_top_k_top_p_pytorch
|
||||
|
||||
apply_top_k_top_p = (
|
||||
_apply_top_k_top_p_ascendc
|
||||
if get_ascend_device_type() in [AscendDeviceType.A2, AscendDeviceType.A3]
|
||||
else _apply_top_k_top_p_pytorch
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user