### What this PR does / why we need it? This patch adds support for the xlite graph wrapper to vllm_ascend. Xlite provides operator implementations of the transformer network on Ascend hardware. For details about xlite, please refer to the following link: https://gitee.com/openeuler/GVirt/blob/master/xlite/README.md The latest performance comparison data between xlite and the default aclgraph mode is as follows: ## Qwen3 32B TPS 910B3(A2) Online Inference Performance Comparison - aclgraph: main(c4a71fc6) - xlite-full: main(c4a71fc6) + xlite-full - xlite-decode-only: main(c4a71fc6) + xlite-decode-only - diff1: Performance comparison between xlite-full and aclgraph - diff2: Performance comparison between xlite-decode-only and aclgraph ### Does this PR introduce _any_ user-facing change? Enable the xlite graph mode by setting xlite_graph_config: --additional-config='{"xlite_graph_config": {"enabled": true}}' # Enabled for decode only --additional-config='{"xlite_graph_config": {"enabled": true, "full_mode": true}}' # Enabled for prefill and decode - vLLM version: v0.12.0 - vLLM main:ad32e3e19c--------- Signed-off-by: lulina <lina.lulina@huawei.com> Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
32 lines
605 B
INI
32 lines
605 B
INI
[mypy]
|
|
; warn_return_any = True
|
|
warn_unused_configs = True
|
|
|
|
; Suppress all missing import errors from torch_npu for mypy.
|
|
[mypy-torch_npu.*]
|
|
ignore_missing_imports = True
|
|
|
|
[mypy-torchair.*]
|
|
ignore_missing_imports = True
|
|
|
|
[mypy-transformers.*]
|
|
ignore_missing_imports = True
|
|
|
|
[mypy-lm_eval.*]
|
|
ignore_missing_imports = True
|
|
|
|
[mypy-compressed_tensors.*]
|
|
ignore_missing_imports = True
|
|
|
|
[mypy-datasets.*]
|
|
ignore_missing_imports = True
|
|
|
|
[mypy-llmcompressor.*]
|
|
ignore_missing_imports = True
|
|
|
|
[mypy-msprobe.*]
|
|
ignore_missing_imports = True
|
|
allow_untyped_imports = True
|
|
|
|
[mypy-xlite.*]
|
|
ignore_missing_imports = True |