From 0dae55a9a3deebdb4f2263011154d886c525fc13 Mon Sep 17 00:00:00 2001
From: wangxiyuan <wangxiyuan1007@gmail.com>
Date: Tue, 29 Apr 2025 11:14:19 +0800
Subject: [PATCH] [MISC] fix format check error (#654)

This pr makes format.sh works as expect.

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
---
 csrc/kernels/pos_encoding_kernels.cpp         |  8 ++---
 docs/source/faqs.md                           |  2 +-
 docs/source/installation.md                   |  2 +-
 examples/disaggregated_prefill_hccl.py        |  2 +-
 examples/offline_inference_audio_language.py  |  2 +-
 format.sh                                     | 33 +++++++++----------
 .../e2e/test_medusa_correctness.py            |  2 +-
 .../spec_decode/e2e/test_mlp_correctness.py   |  4 +--
 .../spec_decode/e2e/test_ngram_correctness.py |  2 +-
 tools/actionlint.sh                           |  3 +-
 tools/mypy.sh                                 |  6 +---
 vllm_ascend/attention/attention.py            |  2 +-
 vllm_ascend/core/schedule_config.py           |  2 +-
 .../distributed/llmdatadist_connector.py      | 11 +++++--
 vllm_ascend/distributed/parallel_state.py     |  4 +--
 vllm_ascend/ops/fused_moe.py                  |  2 +-
 vllm_ascend/ops/rotary_embedding.py           |  2 +-
 17 files changed, 45 insertions(+), 44 deletions(-)
diff --git a/csrc/kernels/pos_encoding_kernels.cpp b/csrc/kernels/pos_encoding_kernels.cpp
index 28ef503..0b77ce8 100644
--- a/csrc/kernels/pos_encoding_kernels.cpp
+++ b/csrc/kernels/pos_encoding_kernels.cpp
@@ -29,7 +29,7 @@ using vllm_ascend::AccType;
 using vllm_ascend::local_mem_copy;
 template <typename scalar_t, bool isNeox> class RotaryEmbedding {
     // NOTE(ganyi): we use 512B as load stride for pipe, need to find another way to
-    // retrive this size from runtime for more Soc support
+    // retrieve this size from runtime for more Soc support
     static int constexpr loadSize = 512;
     using dst_t = scalar_t;
     using acc_t = typename AccType<scalar_t>::type;
@@ -66,7 +66,7 @@ public:
         pipe_->InitBuffer(inQue_, 1 /* buffer_num */, loadSize /* buffer_size */);
         pipe_->InitBuffer(inQueSinCos_, 1 /* buffer_num */, rotDim_ * sizeof(scalar_t) /* buffer_size */);
         pipe_->InitBuffer(outQue_, 1 /* buffer_num */, loadSize /* buffer_size */);
-        // 2 temperary calculation buffer
+        // 2 temporary calculation buffer
         calcTmpBufferOffset_ = 0;
         // 1 upcast buffer for bf16 (headSize)
         upcastInputBufferOffset_ = calcTmpBufferOffset_ + sizeof(acc_t) * embedDim_ * 2;
@@ -75,10 +75,10 @@ public:
         // 2 sin cos upcast buffer for bf16
         cosSinUpcastBufferOffset_ = upcastTempBufferOffset_ + sizeof(acc_t) * 2 * embedDim_;
         // 2. bf16 path: needs 2 cos sin upcast buffer size
-        // 3. fp16 path: needs 2 temperary calculation buffer size
+        // 3. fp16 path: needs 2 temporary calculation buffer size
         tempBufferSize_ = cosSinUpcastBufferOffset_ + 2 * embedDim_ * sizeof(acc_t);
         // need to consider upcast the bf16 to fp32, so we might need 4 buffer just in case
-        // 2 temperary buffer, 2 input buffer, 1 cos buffer, 1 sin buffer, 2 scale buffer (headSize), 2 zp
+        // 2 temporary buffer, 2 input buffer, 1 cos buffer, 1 sin buffer, 2 scale buffer (headSize), 2 zp
         // buffer(headSize int8), 1 dst_temp buffer(headSize, int32)
         pipe_->InitBuffer(calcBuf_, tempBufferSize_ /* buffer_size */);
         if constexpr (!std::is_same_v<scalar_t, acc_t>) {
diff --git a/docs/source/faqs.md b/docs/source/faqs.md
index 288689b..f954098 100644
--- a/docs/source/faqs.md
+++ b/docs/source/faqs.md
@@ -89,7 +89,7 @@ Currently, w8a8 quantization is already supported by vllm-ascend originally on v
 
 Currently, w8a8 DeepSeek is working in process: [support AscendW8A8 quantization](https://github.com/vllm-project/vllm-ascend/pull/511)
 
-Please run DeepSeek with BF16 now, follwing the [Multi-Node DeepSeek inferencing tutorail](https://vllm-ascend.readthedocs.io/en/main/tutorials/multi_node.html)
+Please run DeepSeek with BF16 now, following the [Multi-Node DeepSeek inferencing tutorail](https://vllm-ascend.readthedocs.io/en/main/tutorials/multi_node.html)
 
 ### 12. There is not output in log when loading models using vllm-ascend, How to solve it?
 
diff --git a/docs/source/installation.md b/docs/source/installation.md
index 51d0475..cb63a4b 100644
--- a/docs/source/installation.md
+++ b/docs/source/installation.md
@@ -127,7 +127,7 @@ apt update  -y
 apt install -y gcc g++ cmake libnuma-dev wget git
 ```
 
-**[Optinal]** Config the extra-index of `pip` if you are working on a **x86** machine, so that the torch with cpu could be found:
+**[Optional]** Config the extra-index of `pip` if you are working on a **x86** machine, so that the torch with cpu could be found:
 
 ```bash
 pip config set global.extra-index-url https://download.pytorch.org/whl/cpu/
diff --git a/examples/disaggregated_prefill_hccl.py b/examples/disaggregated_prefill_hccl.py
index ab82abc..be317d2 100644
--- a/examples/disaggregated_prefill_hccl.py
+++ b/examples/disaggregated_prefill_hccl.py
@@ -84,7 +84,7 @@ def run_decode(prefill_done):
               gpu_memory_utilization=0.8,
               tensor_parallel_size=2)
 
-    # Wait for the producer to start the comsumer
+    # Wait for the producer to start the consumer
     print("Waiting for prefill node to finish...")
     prefill_done.wait()
 
diff --git a/examples/offline_inference_audio_language.py b/examples/offline_inference_audio_language.py
index 4f9d64f..deb8105 100644
--- a/examples/offline_inference_audio_language.py
+++ b/examples/offline_inference_audio_language.py
@@ -93,7 +93,7 @@ def main(args):
     inputs = {"prompt": prompt, "multi_modal_data": mm_data}
     if args.num_prompts > 1:
         # Batch inference
-        inputs = [inputs] * args.num_prompts
+        inputs = [inputs] * args.num_prompts  # type: ignore
 
     outputs = llm.generate(inputs, sampling_params=sampling_params)
 
diff --git a/format.sh b/format.sh
index bec7be4..595bf2f 100755
--- a/format.sh
+++ b/format.sh
@@ -116,6 +116,7 @@ format_all() {
     yapf --in-place "${YAPF_FLAGS[@]}" "${YAPF_EXCLUDES[@]}" .
 }
 
+echo 'vllm-ascend yapf:'
 ## This flag formats individual files. --files *must* be the first command line
 ## arg to use this option.
 if [[ "$1" == '--files' ]]; then
@@ -128,12 +129,12 @@ else
    # Format only the files that changed in last commit.
    format_changed
 fi
-echo 'vLLM yapf: Done'
+echo 'vllm-ascend yapf: Done'
 
 # Run mypy
-echo 'vLLM mypy:'
+echo 'vllm-ascend mypy:'
 tools/mypy.sh
-echo 'vLLM mypy: Done'
+echo 'vllm-ascend mypy: Done'
 
 
 # If git diff returns a file that is in the skip list, the file may be checked anyway:
@@ -172,6 +173,7 @@ spell_check_changed() {
     fi
 }
 
+echo 'vllm-ascend codespell:'
 # Run Codespell
 ## This flag runs spell check of individual files. --files *must* be the first command line
 ## arg to use this option.
@@ -185,7 +187,7 @@ else
    # Check spelling only of the files that changed in last commit.
    spell_check_changed
 fi
-echo 'vLLM codespell: Done'
+echo 'vllm-ascend codespell: Done'
 
 
 # Lint specified files
@@ -211,6 +213,7 @@ lint_changed() {
 
 }
 
+echo 'vllm-ascend ruff:'
 # Run Ruff
 ### This flag lints individual files. --files *must* be the first command line
 ### arg to use this option.
@@ -224,7 +227,7 @@ else
    # Format only the files that changed in last commit.
    lint_changed
 fi
-echo 'vLLM ruff: Done'
+echo 'vllm-ascend ruff: Done'
 
 # check spelling of specified files
 isort_check() {
@@ -251,6 +254,7 @@ isort_check_changed() {
     fi
 }
 
+echo 'vllm-ascend isort:'
 # Run Isort
 # This flag runs spell check of individual files. --files *must* be the first command line
 # arg to use this option.
@@ -264,18 +268,13 @@ else
    # Check spelling only of the files that changed in last commit.
    isort_check_changed
 fi
-echo 'vLLM isort: Done'
+echo 'vllm-ascend isort: Done'
 
 # Clang-format section
 # Exclude some files for formatting because they are vendored
 # NOTE: Keep up to date with .github/workflows/clang-format.yml
 CLANG_FORMAT_EXCLUDES=(
-    'csrc/moe/topk_softmax_kernels.cu'
-    'csrc/quantization/gguf/ggml-common.h'
-    'csrc/quantization/gguf/dequantize.cuh'
-    'csrc/quantization/gguf/vecdotq.cuh'
-    'csrc/quantization/gguf/mmq.cuh'
-    'csrc/quantization/gguf/mmvq.cuh'
+    'csrc/kernels/pos_encoding_kernels.cpp'
 )
 
 # Format specified files with clang-format
@@ -315,15 +314,15 @@ elif [[ "$1" == '--all' ]]; then
 else
    clang_format_changed
 fi
-echo 'vLLM clang-format: Done'
+echo 'vllm-ascend clang-format: Done'
 
-echo 'vLLM actionlint:'
+echo 'vllm-ascend actionlint:'
 tools/actionlint.sh -color
-echo 'vLLM actionlint: Done'
+echo 'vllm-ascend actionlint: Done'
 
-echo 'vLLM shellcheck:'
+echo 'vllm-ascend shellcheck:'
 tools/shellcheck.sh
-echo 'vLLM shellcheck: Done'
+echo 'vllm-ascend shellcheck: Done'
 
 echo 'excalidraw png check:'
 tools/png-lint.sh
diff --git a/tests/singlecard/spec_decode/e2e/test_medusa_correctness.py b/tests/singlecard/spec_decode/e2e/test_medusa_correctness.py
index 92d3ae4..26cd11b 100644
--- a/tests/singlecard/spec_decode/e2e/test_medusa_correctness.py
+++ b/tests/singlecard/spec_decode/e2e/test_medusa_correctness.py
@@ -236,7 +236,7 @@ def test_medusa_e2e_greedy_logprobs(vllm_runner, common_llm_kwargs,
 
 # TODO: There is a problem with the preemptive scheduling in the current
 # version, which makes this case fail. Please release this case after the
-# preemptive scheduling preblem is solved.
+# preemptive scheduling problem is solved.
 # @pytest.mark.parametrize(
 #     "common_llm_kwargs",
 #     [{
diff --git a/tests/singlecard/spec_decode/e2e/test_mlp_correctness.py b/tests/singlecard/spec_decode/e2e/test_mlp_correctness.py
index 675556f..e446c60 100644
--- a/tests/singlecard/spec_decode/e2e/test_mlp_correctness.py
+++ b/tests/singlecard/spec_decode/e2e/test_mlp_correctness.py
@@ -296,7 +296,7 @@ def test_mlp_e2e_seeded_correctness(vllm_runner, common_llm_kwargs,
 
 # TODO: There is a problem with the preemptive scheduling in the current
 # version, which makes this case fail. Please release this case after the
-# preemptive scheduling preblem is solved.
+# preemptive scheduling problem is solved.
 # @pytest.mark.parametrize(
 #     "common_llm_kwargs",
 #     [{
@@ -352,7 +352,7 @@ def test_mlp_e2e_seeded_correctness(vllm_runner, common_llm_kwargs,
 
 # TODO: There is a problem with the preemptive scheduling in the current
 # version, which makes this case fail. Please release this case after the
-# preemptive scheduling preblem is solved.
+# preemptive scheduling problem is solved.
 # @pytest.mark.parametrize(
 #     "common_llm_kwargs",
 #     [{
diff --git a/tests/singlecard/spec_decode/e2e/test_ngram_correctness.py b/tests/singlecard/spec_decode/e2e/test_ngram_correctness.py
index 14d97e9..18ed731 100644
--- a/tests/singlecard/spec_decode/e2e/test_ngram_correctness.py
+++ b/tests/singlecard/spec_decode/e2e/test_ngram_correctness.py
@@ -175,7 +175,7 @@ def test_ngram_e2e_greedy_logprobs(vllm_runner, common_llm_kwargs,
 
 # TODO: There is a problem with the preemptive scheduling in the current
 # version, which makes this case fail. Please release this case after the
-# preemptive scheduling preblem is solved.
+# preemptive scheduling problem is solved.
 # @pytest.mark.parametrize(
 #     "common_llm_kwargs",
 #     [{
diff --git a/tools/actionlint.sh b/tools/actionlint.sh
index 072335b..a050b56 100755
--- a/tools/actionlint.sh
+++ b/tools/actionlint.sh
@@ -18,6 +18,7 @@
 # This file is a part of the vllm-ascend project.
 # Adapted from https://github.com/vllm-project/vllm/tree/main/tools
 #
+export SHELLCHECK_OPTS="--exclude=SC2046,SC2006"
 
 if command -v actionlint &> /dev/null; then
     actionlint .github/workflows/*.yml .github/workflows/*.yaml
@@ -29,4 +30,4 @@ fi
 
 # download a binary to the current directory - v1.7.3
 bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/aa0a7be8e566b096e64a5df8ff290ec24fa58fbc/scripts/download-actionlint.bash)
-./actionlint  .github/workflows/*.yml .github/workflows/*.yaml
+./actionlint .github/workflows/*.yml .github/workflows/*.yaml
diff --git a/tools/mypy.sh b/tools/mypy.sh
index 7f7f644..57a3d27 100755
--- a/tools/mypy.sh
+++ b/tools/mypy.sh
@@ -28,11 +28,7 @@ fi
 
 run_mypy() {
     echo "Running mypy on $1"
-    if [ "$CI" -eq 1 ] && [ -z "$1" ]; then
-        mypy --python-version "${PYTHON_VERSION}" "$@"
-        return
-    fi
-    mypy --follow-imports skip --python-version "${PYTHON_VERSION}" "$@"
+    mypy --check-untyped-defs --follow-imports skip --python-version "${PYTHON_VERSION}" "$@"
 }
 
 run_mypy vllm_ascend
diff --git a/vllm_ascend/attention/attention.py b/vllm_ascend/attention/attention.py
index 3de391f..b179785 100644
--- a/vllm_ascend/attention/attention.py
+++ b/vllm_ascend/attention/attention.py
@@ -1080,7 +1080,7 @@ class AscendMLAAttentionBackendImpl(MLAAttentionImpl):
             if len(kv_cache) > 0 and kv_cache[0].numel(
             ) > 0 and attn_metadata.num_prefills > 0:
                 slots = attn_metadata.slot_mapping
-                # NOTE: Seperate the kv cache in advance to avoid OOM or other issues
+                # NOTE: Separate the kv cache in advance to avoid OOM or other issues
                 torch_npu._npu_reshape_and_cache(key=kv_c_normed.view(
                     num_tokens, self.num_kv_heads, -1),
                                                  value=k_pe,
diff --git a/vllm_ascend/core/schedule_config.py b/vllm_ascend/core/schedule_config.py
index 9194411..51e4960 100644
--- a/vllm_ascend/core/schedule_config.py
+++ b/vllm_ascend/core/schedule_config.py
@@ -60,7 +60,7 @@ class AscendSchedulerConfig(SchedulerConfig):
             )
         if self.is_multimodal_model:
             raise NotImplementedError(
-                "currently AscendScheduler only supports LLM modles.")
+                "currently AscendScheduler only supports LLM models.")
         if self.num_scheduler_steps > 1:
             raise NotImplementedError(
                 "currently AscendScheduler doesn't support multi-step.")
diff --git a/vllm_ascend/distributed/llmdatadist_connector.py b/vllm_ascend/distributed/llmdatadist_connector.py
index 69c8ce7..19a759a 100644
--- a/vllm_ascend/distributed/llmdatadist_connector.py
+++ b/vllm_ascend/distributed/llmdatadist_connector.py
@@ -57,8 +57,10 @@ def get_device_ips():
                               universal_newlines=True)
     if npu_info.returncode != 0 or not os.path.exists(HCCN_TOOL_PATH):
         raise RuntimeError("No npu-smi/hccn_tool tools provided for NPU.")
-    npu_start_idx = int(
-        re.match(r'.*\n\t([0-9]+).*', npu_info.stdout).group(1))
+    re_result = re.match(r'.*\n\t([0-9]+).*', npu_info.stdout)
+    if re_result is None:
+        raise RuntimeError("Can't find npu start index")
+    npu_start_idx = int(re_result.group(1))
     device_ip_list = []
     for ip_offset in range(world_size):
         cmd = [
@@ -68,7 +70,10 @@ def get_device_ips():
                                         stdout=subprocess.PIPE,
                                         stderr=subprocess.PIPE,
                                         universal_newlines=True)
-        device_ip = re.match(r'ipaddr:(.*)\n', device_ip_info.stdout).group(1)
+        re_result = re.match(r'ipaddr:(.*)\n', device_ip_info.stdout)
+        if re_result is None:
+            raise RuntimeError("Can't find npu ip")
+        device_ip = re_result.group(1)
         device_ip_list.append(device_ip)
     return device_ip_list
 
diff --git a/vllm_ascend/distributed/parallel_state.py b/vllm_ascend/distributed/parallel_state.py
index d7be8c3..895b7ff 100644
--- a/vllm_ascend/distributed/parallel_state.py
+++ b/vllm_ascend/distributed/parallel_state.py
@@ -7,7 +7,7 @@ from vllm.distributed.parallel_state import (GroupCoordinator, get_world_group,
 # vllm-ascend will maintain its own EP GroupCoordinator and ETP GroupCoordinator for
 # customize parallel solution
 _EP: Optional[GroupCoordinator] = None
-_ETP: Optional[list[GroupCoordinator]] = None
+_ETP: Optional[GroupCoordinator] = None
 
 
 def get_ep_group() -> GroupCoordinator:
@@ -69,4 +69,4 @@ def destory_ascend_model_parallel():
     global _ETP
     if _ETP:
         _ETP.destroy()
-    _ETP = None
\ No newline at end of file
+    _ETP = None
diff --git a/vllm_ascend/ops/fused_moe.py b/vllm_ascend/ops/fused_moe.py
index 7eebc7d..2c25e0c 100644
--- a/vllm_ascend/ops/fused_moe.py
+++ b/vllm_ascend/ops/fused_moe.py
@@ -278,7 +278,7 @@ def fused_experts(
                                           dtype=dtype)
 
         # TODO: npu_grouped_matmul output random values at [num_valid_tokens:, ...]
-        # This created multiple NaN and index_add_ will mix them up which harms accracy
+        # This created multiple NaN and index_add_ will mix them up which harms accuracy
         # remove this mask and filter after it being fixed
         num_valid_tokens = mask.sum()
         valid_token_mask = torch.arange(
diff --git a/vllm_ascend/ops/rotary_embedding.py b/vllm_ascend/ops/rotary_embedding.py
index 1ddd08f..f830364 100644
--- a/vllm_ascend/ops/rotary_embedding.py
+++ b/vllm_ascend/ops/rotary_embedding.py
@@ -227,7 +227,7 @@ def _set_cos_sin_cache(self, seq_len, device, dtype):
                          persistent=False)
 
 
-# TODO: Patch when aclnn ops avaiable
+# TODO: Patch when aclnn ops available
 RotaryEmbedding.forward_oot = rope_forward_oot
 DeepseekScalingRotaryEmbedding.forward = native_rope_deepseek_forward
 DeepseekScalingRotaryEmbedding._set_cos_sin_cache = _set_cos_sin_cache