[BugFix][Fusion] Patch compile backend to make fusion available (#5308)
Currently, the vllm pr: https://github.com/vllm-project/vllm/pull/24252
is causing operator fusion to fail, which can be mitigated by patching
the backend. Once the problem is completely resolved, I will submit a
new pull request to remove the patch.
- vLLM version: release/v0.13.0
- vLLM main:
5fbfa8d9ef
---------
Signed-off-by: wxsIcey <1790571317@qq.com>
This commit is contained in:
@@ -45,6 +45,20 @@ def test_models_output_between_eager_and_aclgraph(
|
||||
"The capital of France is", "The future of AI is"
|
||||
]
|
||||
|
||||
vllm_aclgraph_qwen_answers = [
|
||||
" Lina. I'm a 22-year-old student from China. I'm interested in studying in the US. I want to know if there are any",
|
||||
' the same as the president of the United Nations. This is because the president of the United States is the same as the president of the United Nations. The president',
|
||||
' Paris. The capital of France is also the capital of the Republic of France. The capital of France is also the capital of the European Union. The capital of',
|
||||
' not just a technological frontier but a profound transformation of how we live, work, and interact with the world. As we stand at the intersection of artificial intelligence and'
|
||||
]
|
||||
|
||||
vllm_aclgraph_ds_answers = [
|
||||
'\nI am a 20 year old student from the UK. I am currently studying for a degree in English Literature and Creative Writing. I have a passion',
|
||||
' a man who has been in the public eye for decades. He has been a senator, a governor, and a businessman. He has also been married to the',
|
||||
' Paris, which is also the largest city in the country. The city is located on the River Seine and is known for its beautiful architecture, museums, and art',
|
||||
' here.\nThe future of AI is here.\nThe future of AI is here.\nThe future of AI is here.\nThe future of AI is'
|
||||
]
|
||||
|
||||
sampling_params = SamplingParams(max_tokens=max_tokens, temperature=0.0)
|
||||
if model == "vllm-ascend/DeepSeek-V2-Lite-W8A8":
|
||||
with VllmRunner(
|
||||
@@ -54,15 +68,6 @@ def test_models_output_between_eager_and_aclgraph(
|
||||
) as runner:
|
||||
vllm_aclgraph_outputs = runner.model.generate(
|
||||
prompts, sampling_params)
|
||||
|
||||
with VllmRunner(
|
||||
model,
|
||||
max_model_len=1024,
|
||||
enforce_eager=True,
|
||||
quantization="ascend",
|
||||
) as runner:
|
||||
vllm_eager_outputs = runner.model.generate(prompts,
|
||||
sampling_params)
|
||||
else:
|
||||
with VllmRunner(
|
||||
model,
|
||||
@@ -70,23 +75,16 @@ def test_models_output_between_eager_and_aclgraph(
|
||||
) as runner:
|
||||
vllm_aclgraph_outputs = runner.model.generate(
|
||||
prompts, sampling_params)
|
||||
|
||||
with VllmRunner(
|
||||
model,
|
||||
max_model_len=1024,
|
||||
enforce_eager=True,
|
||||
) as runner:
|
||||
vllm_eager_outputs = runner.model.generate(prompts,
|
||||
sampling_params)
|
||||
vllm_aclgraph_outputs_list = []
|
||||
for output in vllm_aclgraph_outputs:
|
||||
vllm_aclgraph_outputs_list.append(
|
||||
(output.outputs[0].index, output.outputs[0].text))
|
||||
([output.outputs[0].index], output.outputs[0].text))
|
||||
|
||||
vllm_eager_outputs_list = []
|
||||
for output in vllm_eager_outputs:
|
||||
vllm_eager_outputs_list.append(
|
||||
(output.outputs[0].index, output.outputs[0].text))
|
||||
vllm_eager_outputs_list = ([
|
||||
([0], answer) for answer in vllm_aclgraph_ds_answers
|
||||
] if model == "vllm-ascend/DeepSeek-V2-Lite-W8A8" else [
|
||||
([0], answer) for answer in vllm_aclgraph_qwen_answers
|
||||
])
|
||||
|
||||
check_outputs_equal(
|
||||
outputs_0_lst=vllm_eager_outputs_list,
|
||||
@@ -134,7 +132,7 @@ def test_models_output_between_eager_and_full_decode_only(
|
||||
]
|
||||
vllm_aclgraph_qwen_answers = [
|
||||
' \n\nTo solve this problem, we need to use the Law of Sines and Law of Cosines. Let me start by drawing triangle $ABC$ with the',
|
||||
" \n\nTo solve this problem, we can use the fact that the expected value of the area of a triangle formed by two random points on a square's perimeter is",
|
||||
' \n\nTo solve this problem, we can use the following approach: Let $ABCD$ be a unit square with coordinates $A(0,0), B',
|
||||
' \n\nTo solve this problem, we can use the following approach: Let $ \\alpha $ be the common real root of the two equations. Then, we can'
|
||||
]
|
||||
|
||||
@@ -302,4 +300,4 @@ def test_aclgraph_enable():
|
||||
# after check_and_update_config, mode should be VLLM_COMPILE and piecewise cudagraph
|
||||
NPUPlatform.check_and_update_config(VllmConfig)
|
||||
assert VllmConfig.compilation_config.mode == CompilationMode.VLLM_COMPILE
|
||||
assert VllmConfig.compilation_config.cudagraph_mode == CUDAGraphMode.PIECEWISE
|
||||
assert VllmConfig.compilation_config.cudagraph_mode == CUDAGraphMode.PIECEWISE
|
||||
|
||||
@@ -63,7 +63,7 @@ def test_models_with_xlite_decode_only(
|
||||
vllm_xlite_answers = [
|
||||
"Hello, my name is Lina. I'm a 22-year-old student from China.",
|
||||
'The president of the United States is the same as the president of the United Nations. This is because the president',
|
||||
'The capital of France is Paris. The capital of Italy is Rome. The capital of Spain is Madrid',
|
||||
'The capital of France is Paris. The capital of France is also the capital of the French Republic.',
|
||||
'The future of AI is not just a technological challenge but a profound transformation of how we live, work'
|
||||
]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user