[MISC] fix format check error (#654)

This pr makes format.sh works as expect.

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-04-29 11:14:19 +08:00
committed by GitHub
parent 1fce70a2fb
commit 0dae55a9a3
17 changed files with 45 additions and 44 deletions

View File

@@ -29,7 +29,7 @@ using vllm_ascend::AccType;
using vllm_ascend::local_mem_copy;
template <typename scalar_t, bool isNeox> class RotaryEmbedding {
// NOTE(ganyi): we use 512B as load stride for pipe, need to find another way to
// retrive this size from runtime for more Soc support
// retrieve this size from runtime for more Soc support
static int constexpr loadSize = 512;
using dst_t = scalar_t;
using acc_t = typename AccType<scalar_t>::type;
@@ -66,7 +66,7 @@ public:
pipe_->InitBuffer(inQue_, 1 /* buffer_num */, loadSize /* buffer_size */);
pipe_->InitBuffer(inQueSinCos_, 1 /* buffer_num */, rotDim_ * sizeof(scalar_t) /* buffer_size */);
pipe_->InitBuffer(outQue_, 1 /* buffer_num */, loadSize /* buffer_size */);
// 2 temperary calculation buffer
// 2 temporary calculation buffer
calcTmpBufferOffset_ = 0;
// 1 upcast buffer for bf16 (headSize)
upcastInputBufferOffset_ = calcTmpBufferOffset_ + sizeof(acc_t) * embedDim_ * 2;
@@ -75,10 +75,10 @@ public:
// 2 sin cos upcast buffer for bf16
cosSinUpcastBufferOffset_ = upcastTempBufferOffset_ + sizeof(acc_t) * 2 * embedDim_;
// 2. bf16 path: needs 2 cos sin upcast buffer size
// 3. fp16 path: needs 2 temperary calculation buffer size
// 3. fp16 path: needs 2 temporary calculation buffer size
tempBufferSize_ = cosSinUpcastBufferOffset_ + 2 * embedDim_ * sizeof(acc_t);
// need to consider upcast the bf16 to fp32, so we might need 4 buffer just in case
// 2 temperary buffer, 2 input buffer, 1 cos buffer, 1 sin buffer, 2 scale buffer (headSize), 2 zp
// 2 temporary buffer, 2 input buffer, 1 cos buffer, 1 sin buffer, 2 scale buffer (headSize), 2 zp
// buffer(headSize int8), 1 dst_temp buffer(headSize, int32)
pipe_->InitBuffer(calcBuf_, tempBufferSize_ /* buffer_size */);
if constexpr (!std::is_same_v<scalar_t, acc_t>) {

View File

@@ -89,7 +89,7 @@ Currently, w8a8 quantization is already supported by vllm-ascend originally on v
Currently, w8a8 DeepSeek is working in process: [support AscendW8A8 quantization](https://github.com/vllm-project/vllm-ascend/pull/511)
Please run DeepSeek with BF16 now, follwing the [Multi-Node DeepSeek inferencing tutorail](https://vllm-ascend.readthedocs.io/en/main/tutorials/multi_node.html)
Please run DeepSeek with BF16 now, following the [Multi-Node DeepSeek inferencing tutorail](https://vllm-ascend.readthedocs.io/en/main/tutorials/multi_node.html)
### 12. There is not output in log when loading models using vllm-ascend, How to solve it?

View File

@@ -127,7 +127,7 @@ apt update -y
apt install -y gcc g++ cmake libnuma-dev wget git
```
**[Optinal]** Config the extra-index of `pip` if you are working on a **x86** machine, so that the torch with cpu could be found:
**[Optional]** Config the extra-index of `pip` if you are working on a **x86** machine, so that the torch with cpu could be found:
```bash
pip config set global.extra-index-url https://download.pytorch.org/whl/cpu/

View File

@@ -84,7 +84,7 @@ def run_decode(prefill_done):
gpu_memory_utilization=0.8,
tensor_parallel_size=2)
# Wait for the producer to start the comsumer
# Wait for the producer to start the consumer
print("Waiting for prefill node to finish...")
prefill_done.wait()

View File

@@ -93,7 +93,7 @@ def main(args):
inputs = {"prompt": prompt, "multi_modal_data": mm_data}
if args.num_prompts > 1:
# Batch inference
inputs = [inputs] * args.num_prompts
inputs = [inputs] * args.num_prompts # type: ignore
outputs = llm.generate(inputs, sampling_params=sampling_params)

View File

@@ -116,6 +116,7 @@ format_all() {
yapf --in-place "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" .
}
echo 'vllm-ascend yapf:'
## This flag formats individual files. --files *must* be the first command line
## arg to use this option.
if [[ "$1" == '--files' ]]; then
@@ -128,12 +129,12 @@ else
# Format only the files that changed in last commit.
format_changed
fi
echo 'vLLM yapf: Done'
echo 'vllm-ascend yapf: Done'
# Run mypy
echo 'vLLM mypy:'
echo 'vllm-ascend mypy:'
tools/mypy.sh
echo 'vLLM mypy: Done'
echo 'vllm-ascend mypy: Done'
# If git diff returns a file that is in the skip list, the file may be checked anyway:
@@ -172,6 +173,7 @@ spell_check_changed() {
fi
}
echo 'vllm-ascend codespell:'
# Run Codespell
## This flag runs spell check of individual files. --files *must* be the first command line
## arg to use this option.
@@ -185,7 +187,7 @@ else
# Check spelling only of the files that changed in last commit.
spell_check_changed
fi
echo 'vLLM codespell: Done'
echo 'vllm-ascend codespell: Done'
# Lint specified files
@@ -211,6 +213,7 @@ lint_changed() {
}
echo 'vllm-ascend ruff:'
# Run Ruff
### This flag lints individual files. --files *must* be the first command line
### arg to use this option.
@@ -224,7 +227,7 @@ else
# Format only the files that changed in last commit.
lint_changed
fi
echo 'vLLM ruff: Done'
echo 'vllm-ascend ruff: Done'
# check spelling of specified files
isort_check() {
@@ -251,6 +254,7 @@ isort_check_changed() {
fi
}
echo 'vllm-ascend isort:'
# Run Isort
# This flag runs spell check of individual files. --files *must* be the first command line
# arg to use this option.
@@ -264,18 +268,13 @@ else
# Check spelling only of the files that changed in last commit.
isort_check_changed
fi
echo 'vLLM isort: Done'
echo 'vllm-ascend isort: Done'
# Clang-format section
# Exclude some files for formatting because they are vendored
# NOTE: Keep up to date with .github/workflows/clang-format.yml
CLANG_FORMAT_EXCLUDES=(
'csrc/moe/topk_softmax_kernels.cu'
'csrc/quantization/gguf/ggml-common.h'
'csrc/quantization/gguf/dequantize.cuh'
'csrc/quantization/gguf/vecdotq.cuh'
'csrc/quantization/gguf/mmq.cuh'
'csrc/quantization/gguf/mmvq.cuh'
'csrc/kernels/pos_encoding_kernels.cpp'
)
# Format specified files with clang-format
@@ -315,15 +314,15 @@ elif [[ "$1" == '--all' ]]; then
else
clang_format_changed
fi
echo 'vLLM clang-format: Done'
echo 'vllm-ascend clang-format: Done'
echo 'vLLM actionlint:'
echo 'vllm-ascend actionlint:'
tools/actionlint.sh -color
echo 'vLLM actionlint: Done'
echo 'vllm-ascend actionlint: Done'
echo 'vLLM shellcheck:'
echo 'vllm-ascend shellcheck:'
tools/shellcheck.sh
echo 'vLLM shellcheck: Done'
echo 'vllm-ascend shellcheck: Done'
echo 'excalidraw png check:'
tools/png-lint.sh

View File

@@ -236,7 +236,7 @@ def test_medusa_e2e_greedy_logprobs(vllm_runner, common_llm_kwargs,
# TODO: There is a problem with the preemptive scheduling in the current
# version, which makes this case fail. Please release this case after the
# preemptive scheduling preblem is solved.
# preemptive scheduling problem is solved.
# @pytest.mark.parametrize(
# "common_llm_kwargs",
# [{

View File

@@ -296,7 +296,7 @@ def test_mlp_e2e_seeded_correctness(vllm_runner, common_llm_kwargs,
# TODO: There is a problem with the preemptive scheduling in the current
# version, which makes this case fail. Please release this case after the
# preemptive scheduling preblem is solved.
# preemptive scheduling problem is solved.
# @pytest.mark.parametrize(
# "common_llm_kwargs",
# [{
@@ -352,7 +352,7 @@ def test_mlp_e2e_seeded_correctness(vllm_runner, common_llm_kwargs,
# TODO: There is a problem with the preemptive scheduling in the current
# version, which makes this case fail. Please release this case after the
# preemptive scheduling preblem is solved.
# preemptive scheduling problem is solved.
# @pytest.mark.parametrize(
# "common_llm_kwargs",
# [{

View File

@@ -175,7 +175,7 @@ def test_ngram_e2e_greedy_logprobs(vllm_runner, common_llm_kwargs,
# TODO: There is a problem with the preemptive scheduling in the current
# version, which makes this case fail. Please release this case after the
# preemptive scheduling preblem is solved.
# preemptive scheduling problem is solved.
# @pytest.mark.parametrize(
# "common_llm_kwargs",
# [{

View File

@@ -18,6 +18,7 @@
# This file is a part of the vllm-ascend project.
# Adapted from https://github.com/vllm-project/vllm/tree/main/tools
#
export SHELLCHECK_OPTS="--exclude=SC2046,SC2006"
if command -v actionlint &> /dev/null; then
actionlint .github/workflows/*.yml .github/workflows/*.yaml
@@ -29,4 +30,4 @@ fi
# download a binary to the current directory - v1.7.3
bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/aa0a7be8e566b096e64a5df8ff290ec24fa58fbc/scripts/download-actionlint.bash)
./actionlint .github/workflows/*.yml .github/workflows/*.yaml
./actionlint .github/workflows/*.yml .github/workflows/*.yaml

View File

@@ -28,11 +28,7 @@ fi
run_mypy() {
echo "Running mypy on $1"
if [ "$CI" -eq 1 ] && [ -z "$1" ]; then
mypy --python-version "${PYTHON_VERSION}" "$@"
return
fi
mypy --follow-imports skip --python-version "${PYTHON_VERSION}" "$@"
mypy --check-untyped-defs --follow-imports skip --python-version "${PYTHON_VERSION}" "$@"
}
run_mypy vllm_ascend

View File

@@ -1080,7 +1080,7 @@ class AscendMLAAttentionBackendImpl(MLAAttentionImpl):
if len(kv_cache) > 0 and kv_cache[0].numel(
) > 0 and attn_metadata.num_prefills > 0:
slots = attn_metadata.slot_mapping
# NOTE: Seperate the kv cache in advance to avoid OOM or other issues
# NOTE: Separate the kv cache in advance to avoid OOM or other issues
torch_npu._npu_reshape_and_cache(key=kv_c_normed.view(
num_tokens, self.num_kv_heads, -1),
value=k_pe,

View File

@@ -60,7 +60,7 @@ class AscendSchedulerConfig(SchedulerConfig):
)
if self.is_multimodal_model:
raise NotImplementedError(
"currently AscendScheduler only supports LLM modles.")
"currently AscendScheduler only supports LLM models.")
if self.num_scheduler_steps > 1:
raise NotImplementedError(
"currently AscendScheduler doesn't support multi-step.")

View File

@@ -57,8 +57,10 @@ def get_device_ips():
universal_newlines=True)
if npu_info.returncode != 0 or not os.path.exists(HCCN_TOOL_PATH):
raise RuntimeError("No npu-smi/hccn_tool tools provided for NPU.")
npu_start_idx = int(
re.match(r'.*\n\t([0-9]+).*', npu_info.stdout).group(1))
re_result = re.match(r'.*\n\t([0-9]+).*', npu_info.stdout)
if re_result is None:
raise RuntimeError("Can't find npu start index")
npu_start_idx = int(re_result.group(1))
device_ip_list = []
for ip_offset in range(world_size):
cmd = [
@@ -68,7 +70,10 @@ def get_device_ips():
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True)
device_ip = re.match(r'ipaddr:(.*)\n', device_ip_info.stdout).group(1)
re_result = re.match(r'ipaddr:(.*)\n', device_ip_info.stdout)
if re_result is None:
raise RuntimeError("Can't find npu ip")
device_ip = re_result.group(1)
device_ip_list.append(device_ip)
return device_ip_list

View File

@@ -7,7 +7,7 @@ from vllm.distributed.parallel_state import (GroupCoordinator, get_world_group,
# vllm-ascend will maintain its own EP GroupCoordinator and ETP GroupCoordinator for
# customize parallel solution
_EP: Optional[GroupCoordinator] = None
_ETP: Optional[list[GroupCoordinator]] = None
_ETP: Optional[GroupCoordinator] = None
def get_ep_group() -> GroupCoordinator:
@@ -69,4 +69,4 @@ def destory_ascend_model_parallel():
global _ETP
if _ETP:
_ETP.destroy()
_ETP = None
_ETP = None

View File

@@ -278,7 +278,7 @@ def fused_experts(
dtype=dtype)
# TODO: npu_grouped_matmul output random values at [num_valid_tokens:, ...]
# This created multiple NaN and index_add_ will mix them up which harms accracy
# This created multiple NaN and index_add_ will mix them up which harms accuracy
# remove this mask and filter after it being fixed
num_valid_tokens = mask.sum()
valid_token_mask = torch.arange(

View File

@@ -227,7 +227,7 @@ def _set_cos_sin_cache(self, seq_len, device, dtype):
persistent=False)
# TODO: Patch when aclnn ops avaiable
# TODO: Patch when aclnn ops available
RotaryEmbedding.forward_oot = rope_forward_oot
DeepseekScalingRotaryEmbedding.forward = native_rope_deepseek_forward
DeepseekScalingRotaryEmbedding._set_cos_sin_cache = _set_cos_sin_cache