diff --git a/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml b/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml
index 5b5dc050..9c98249c 100644
--- a/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml
+++ b/tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml
@@ -6,7 +6,6 @@ tasks:
   metrics:
   - name: "acc,none"
     value: 0.58
-max_model_len: 8192
 tensor_parallel_size: 2
 gpu_memory_utilization: 0.7
 enable_expert_parallel: True
diff --git a/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml b/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml
index 8803a120..96581e54 100644
--- a/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml
+++ b/tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml
@@ -6,6 +6,5 @@ tasks:
   metrics:
   - name: "acc,none"
     value: 0.55
-max_model_len: 8192
 batch_size: 32
 gpu_memory_utilization: 0.7
diff --git a/tests/e2e/singlecard/test_vlm.py b/tests/e2e/singlecard/test_vlm.py
index c120ef2d..4cdfd7c2 100644
--- a/tests/e2e/singlecard/test_vlm.py
+++ b/tests/e2e/singlecard/test_vlm.py
@@ -39,7 +39,6 @@ def test_multimodal_vl(prompt_template):
     images = [image] * len(img_questions)
     prompts = prompt_template(img_questions)
     with VllmRunner("Qwen/Qwen3-VL-8B-Instruct",
-                    max_model_len=4096,
                     mm_processor_kwargs={
                         "min_pixels": 28 * 28,
                         "max_pixels": 1280 * 28 * 28,
diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py
index 3e1f3f59..f05ef69a 100644
--- a/vllm_ascend/worker/worker_v1.py
+++ b/vllm_ascend/worker/worker_v1.py
@@ -232,6 +232,7 @@ class NPUWorker(WorkerBase):
         # Init ModelRunner here, so that we have access to self.device.
         self.model_runner = NPUModelRunner(self.vllm_config, self.device)
 
+    @torch.inference_mode()
     def determine_available_memory(self) -> int:
         # Profile the memory usage of the model and get the maximum number of
         # cache blocks that can be allocated with the remaining free memory.