[CI] upgrade vllm to 0.8.5 (#715)
1. Upgrade vllm to 0.8.5 2. Drop 0.8.4 support 3. Keep doc to 0.8.4rc2 until we release 0.8.5 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -23,7 +23,6 @@ from vllm.worker.multi_step_model_runner import (ModelOutput,
|
||||
PythonizationCache,
|
||||
StatefulModelInput)
|
||||
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
from vllm_ascend.worker.model_runner import (
|
||||
ModelInputForNPUWithSamplingMetadata, NPUModelRunnerBase)
|
||||
|
||||
@@ -318,13 +317,7 @@ class MultiStepModelNPURunner(NPUModelRunnerBase[StatefulModelInputForNPU]):
|
||||
dtype=torch.long,
|
||||
device="cpu",
|
||||
pin_memory=True)
|
||||
|
||||
if vllm_version_is("0.8.4"):
|
||||
self._base_model_runner.model.sampler.include_gpu_probs_tensor = (
|
||||
True)
|
||||
else:
|
||||
assert self._base_model_runner.sampler is not None
|
||||
self._base_model_runner.sampler.include_gpu_probs_tensor = True
|
||||
self._base_model_runner.sampler.include_gpu_probs_tensor = True
|
||||
if frozen_model_input.sampling_metadata:
|
||||
frozen_model_input.sampling_metadata.skip_sampler_cpu_output = (
|
||||
True)
|
||||
|
||||
Reference in New Issue
Block a user