diff --git a/.github/workflows/_e2e_nightly_multi_node.yaml b/.github/workflows/_e2e_nightly_multi_node.yaml
index f9ecbadd..db1a6414 100644
--- a/.github/workflows/_e2e_nightly_multi_node.yaml
+++ b/.github/workflows/_e2e_nightly_multi_node.yaml
@@ -60,13 +60,13 @@ defaults:
 # only cancel in-progress runs of the same workflow
 # and ignore the lint / 8 cards test type
 concurrency:
-  group: ascend-nightly-${{ github.ref }}-${{ inputs.config_file_path }}
+  group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.config_file_path }}
   cancel-in-progress: true
 
 jobs:
   e2e:
     name: ${{ inputs.config_file_path }}
-    # This is a runner with no NPU for k8s controller
+    # This is the runner with no NPU for k8s controller
     runs-on: ${{ inputs.runner }}
     container:
       image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
@@ -75,7 +75,7 @@ jobs:
         KUBECTL: /root/.cache/.kube/kubectl
         NAMESPACE: vllm-project
         LEADER_POD: vllm-0
-        RESULT_FILE: /root/.cache/tests/ret/test_result.txt
+        RESULT_FILE: /root/.cache/tests/ret_${{ inputs.soc_version }}
     steps:
         - name: Install system denpendencies
           run: |
@@ -84,7 +84,7 @@ jobs:
            pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
            pip install jinja2-cli
 
-           apt-get update -y && apt-get install -y git curl
+           #apt-get update -y && apt-get install -y git curl
 
         - name: Install kubectl
           run: |
@@ -117,8 +117,8 @@ jobs:
           run: |
             # pre clear the crd resources created by lws
             kubectl delete leaderworkerset vllm -n "$NAMESPACE" --ignore-not-found
-
         - name: Launch cluster
+          id: launcher
           run: |
             set -e
 
@@ -130,6 +130,8 @@ jobs:
             vllm_ascend_ref="${{ inputs.vllm_ascend_ref }}"
             vllm_ascend_remote_url="${{ inputs.vllm_ascend_remote_url }}"
             result_file_path="$RESULT_FILE"
+            fail_tag=FAIL_TAG_"${{ inputs.config_file_path }}"
+            echo "FAIL_TAG=${fail_tag}" >> $GITHUB_ENV
 
             required_params=("size" "replicas" "image" "config_file_path")
             for param in "${required_params[@]}"; do
@@ -155,8 +157,7 @@ jobs:
               -D vllm_ascend_ref="$vllm_ascend_ref" \
               -D result_file_path="$result_file_path" \
               -D npu_per_node="$npu_per_node" \
-              -D controller_name="$HOSTNAME" \
-              -D kb_secret=${{ secrets.KUBECONFIG_B64 }} \
+              -D fail_tag="$fail_tag" \
               --outfile lws.yaml
 
             kubectl apply -f ./lws.yaml
@@ -180,7 +181,14 @@ jobs:
 
         - name: Stream logs
           run: |
-            kubectl logs -f "$LEADER_POD" -n "$NAMESPACE"
+            set -euo pipefail
+            echo "Looking for logs containing: $FAIL_TAG"
+            kubectl logs -f "$LEADER_POD" -n "$NAMESPACE" | while read -r line; do
+              echo "$line"
+              if echo "$line" | grep -q "$FAIL_TAG"; then
+                exit 1   # workflow step failed
+              fi
+            done
 
         - name: Post process
           if: always()
diff --git a/.github/workflows/_e2e_nightly_single_node.yaml b/.github/workflows/_e2e_nightly_single_node.yaml
index 8e3224e2..e77f5623 100644
--- a/.github/workflows/_e2e_nightly_single_node.yaml
+++ b/.github/workflows/_e2e_nightly_single_node.yaml
@@ -44,7 +44,7 @@ defaults:
 # only cancel in-progress runs of the same workflow
 # and ignore the lint / 1 card / 4 cards test type
 concurrency:
-  group: ascend-nightly-${{ github.ref }}-${{ inputs.tests }}
+  group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.tests }}
   cancel-in-progress: true
 
 jobs:
diff --git a/.github/workflows/vllm_ascend_test_nightly_a2.yaml b/.github/workflows/vllm_ascend_test_nightly_a2.yaml
index 83fbb4d6..0842e7e1 100644
--- a/.github/workflows/vllm_ascend_test_nightly_a2.yaml
+++ b/.github/workflows/vllm_ascend_test_nightly_a2.yaml
@@ -42,6 +42,7 @@ concurrency:
 
 jobs:
   single-node-tests:
+    name: single-node
     if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
     strategy:
       fail-fast: false
@@ -63,6 +64,7 @@ jobs:
       tests: ${{ matrix.test_config.tests }}
 
   multi-node-tests:
+    name: multi-node
     needs: single-node-tests
     if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
     strategy:
@@ -71,10 +73,10 @@ jobs:
       matrix:
         test_config:
           - name: multi-node-deepseek-dp
-            config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml
+            config_file_path: DeepSeek-R1-W8A8-A2.yaml
             size: 2
           - name: multi-node-deepseek-dp-torchair
-            config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2-torchair.yaml
+            config_file_path: DeepSeek-R1-W8A8-A2-torchair.yaml
             size: 2
     uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
     with:
diff --git a/.github/workflows/vllm_ascend_test_nightly_a3.yaml b/.github/workflows/vllm_ascend_test_nightly_a3.yaml
index 7254f9cf..2cd6d817 100644
--- a/.github/workflows/vllm_ascend_test_nightly_a3.yaml
+++ b/.github/workflows/vllm_ascend_test_nightly_a3.yaml
@@ -42,6 +42,7 @@ concurrency:
 
 jobs:
   single-node-tests:
+    name: single-node
     if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
     strategy:
       fail-fast: false
@@ -85,6 +86,7 @@ jobs:
       tests: ${{ matrix.test_config.tests }}
 
   multi-node-tests:
+    name: multi-node
     needs: single-node-tests
     if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
     strategy:
@@ -93,19 +95,19 @@ jobs:
       matrix:
         test_config:
           - name: multi-node-deepseek-pd
-            config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml
+            config_file_path: DeepSeek-V3.yaml
             size: 2
           - name: multi-node-qwen3-dp
-            config_file_path: tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml
+            config_file_path: Qwen3-235B-A3B.yaml
             size: 2
           - name: multi-node-dpsk-4node-pd
-            config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml
+            config_file_path: DeepSeek-R1-W8A8.yaml
             size: 4
           - name: multi-node-qwenw8a8-2node
-            config_file_path: tests/e2e/nightly/multi_node/config/models/Qwen3-235B-W8A8.yaml
+            config_file_path: Qwen3-235B-W8A8.yaml
             size: 2
           - name: multi-node-glm-2node
-            config_file_path: tests/e2e/nightly/multi_node/config/models/GLM-4_5.yaml
+            config_file_path: GLM-4_5.yaml
             size: 2
     uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
     with:
@@ -117,12 +119,3 @@ jobs:
       config_file_path: ${{ matrix.test_config.config_file_path }}
     secrets:
       KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }}
-
-  clear_resources:
-    needs: multi-node-tests
-    if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
-    uses: ./.github/workflows/_kill_lws_resources.yaml
-    with:
-      runner: linux-aarch64-a3-0
-    secrets:
-      KUBECONFIG_B64: ${{ secrets.KUBECONFIG_B64 }}
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
index 64ac88cc..dc68bd12 100644
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -163,10 +163,11 @@ class RemoteOpenAIServer:
         self.proxy_port = proxy_port
 
         self._start_server(model, vllm_serve_args, env_dict)
-        max_wait_seconds = max_wait_seconds or 7200
+        max_wait_seconds = max_wait_seconds or 1800
         if self.disaggregated_prefill:
             assert proxy_port is not None, "for disaggregated_prefill, proxy port must be provided"
-            self._wait_for_server_pd(proxy_port=proxy_port)
+            self._wait_for_server_pd(proxy_port=proxy_port,
+                                     timeout=max_wait_seconds)
         else:
             self._wait_for_server(url=self.url_for("health"),
                                   timeout=max_wait_seconds)
@@ -186,7 +187,7 @@ class RemoteOpenAIServer:
         """Subclasses override this method to customize process polling"""
         return self.proc.poll()
 
-    def hang_until_terminated(self) -> None:
+    def hang_until_terminated(self, url) -> None:
         """
         Wait until the server process terminates.
         This is for headless mode, where the api server
@@ -196,7 +197,7 @@ class RemoteOpenAIServer:
         try:
             while True:
                 try:
-                    resp = client.get(self.url_for("health"), timeout=5)
+                    resp = client.get(url, timeout=5)
                     if resp.status_code != 200:
                         break
                     time.sleep(5)
@@ -206,7 +207,7 @@ class RemoteOpenAIServer:
             if isinstance(client, httpx.Client):
                 client.close()
 
-    def _wait_for_server_pd(self, proxy_port: int):
+    def _wait_for_server_pd(self, proxy_port: int, timeout: float):
         # Wait for all api_server nodes ready
         assert self.nodes_info is not None, "cluster info must be provided"
         for node_info in self.nodes_info:
@@ -214,12 +215,12 @@ class RemoteOpenAIServer:
                 continue
 
             url_health = f"http://{node_info.ip}:{node_info.server_port}/health"
-            self._wait_for_server(url=url_health, timeout=7200)
+            self._wait_for_server(url=url_health, timeout=timeout)
 
         # Wait for proxy ready
         master_node = self.nodes_info[0]
         url_proxy = f"http://{master_node.ip}:{proxy_port}/healthcheck"
-        self._wait_for_server(url=url_proxy, timeout=7200)
+        self._wait_for_server(url=url_proxy, timeout=timeout)
 
     def _wait_for_server(self, *, url: str, timeout: float):
         # run health check
diff --git a/tests/e2e/nightly/multi_node/config/models/DeepSeep-R1-W8A8-A2-torchair.yaml b/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2-torchair.yaml
similarity index 100%
rename from tests/e2e/nightly/multi_node/config/models/DeepSeep-R1-W8A8-A2-torchair.yaml
rename to tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2-torchair.yaml
diff --git a/tests/e2e/nightly/multi_node/config/models/DeepSeep-R1-W8A8-A2.yaml b/tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml
similarity index 100%
rename from tests/e2e/nightly/multi_node/config/models/DeepSeep-R1-W8A8-A2.yaml
rename to tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml
diff --git a/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml b/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml
index c5b34c9d..8c00803c 100644
--- a/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml
+++ b/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml
@@ -97,3 +97,12 @@ deployment:
             }
         }'
 benchmarks:
+  acc:
+    case_type: accuracy
+    dataset_path: vllm-ascend/gsm8k-lite
+    request_conf: vllm_api_general_chat
+    dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_chat_prompt
+    max_out_len: 4096
+    batch_size: 512
+    baseline: 95
+    threshold: 5
diff --git a/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml b/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml
index b72bb542..7fde3392 100644
--- a/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml
+++ b/tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml
@@ -47,3 +47,4 @@ deployment:
         --no-enable-prefix-caching
         --gpu-memory-utilization 0.9
 benchmarks:
+
diff --git a/tests/e2e/nightly/multi_node/config/multi_node_config.py b/tests/e2e/nightly/multi_node/config/multi_node_config.py
index 3d540d84..9bde4581 100644
--- a/tests/e2e/nightly/multi_node/config/multi_node_config.py
+++ b/tests/e2e/nightly/multi_node/config/multi_node_config.py
@@ -17,6 +17,7 @@ setup_logger()
 logger = logging.getLogger(__name__)
 DISAGGREGATED_PREFILL_PROXY_SCRIPT = "examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py"
 DISAGGEGATED_PREFILL_PORT = 5333
+CONFIG_BASE_PATH = "tests/e2e/nightly/multi_node/config/models/"
 
 
 @dataclass
@@ -187,9 +188,8 @@ class MultiNodeConfig:
     @classmethod
     def from_yaml(cls, yaml_path: Optional[str] = None):
         if not yaml_path:
-            yaml_path = os.getenv(
-                "CONFIG_YAML_PATH",
-                "tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml")
+            yaml_path = os.getenv("CONFIG_YAML_PATH", "DeepSeek-V3.yaml")
+        yaml_path = os.path.join(CONFIG_BASE_PATH, yaml_path)
         with open(yaml_path, 'r') as file:
             config_data = yaml.safe_load(file)
         test_name = config_data.get("test_name", "default_test")
@@ -255,6 +255,7 @@ class MultiNodeConfig:
         ranktable_path = self.disaggregated_prefill.get("ranktable_path")
         assert ranktable_gen_path is not None and ranktable_path is not None
         if os.path.exists(str(ranktable_path)):
+            logger.info("ranktable has already generated")
             return
 
         local_host = self.cur_ip
@@ -286,6 +287,8 @@ class MultiNodeConfig:
         assert self.nic_name is not None
         env["GLOO_SOCKET_IFNAME"] = self.nic_name
 
+        logger.info(
+            f"Generating ranktable from command: {' '.join(map(str, cmd))}")
         subprocess.run(cmd, env=env, check=True)
         assert os.path.exists(
             str(ranktable_path)), "failed generate ranktable.json"
diff --git a/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 b/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2
index ba12baea..f619b597 100644
--- a/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2
+++ b/tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2
@@ -18,7 +18,7 @@ spec:
             image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }}
             env:
               - name: CONFIG_YAML_PATH
-                value: {{ config_file_path | default("tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml") }}
+                value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
               - name: WORKSPACE
                 value: "/root/workspace"
               # Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
@@ -29,11 +29,9 @@ spec:
               - name: VLLM_ASCEND_REMOTE_URL
                 value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
               - name: RESULT_FILE_PATH
-                value: {{ result_file_path | default("/root/.cache/tests/ret/test_result.txt") }}
-              - name: CONTROLLER_NAME
-                value: {{ controller_name | default("placeholder") }}
-              - name: SECRET
-                value: {{ kb_secret | default("placeholder") }}
+                value: {{ result_file_path | default("/root/.cache/tests/ret") }}
+              - name: FAIL_TAG
+                value: {{ fail_tag | default("FAIL_TAG") }}
             command:
               - sh
               - -c
@@ -80,7 +78,7 @@ spec:
             image: {{ image | default("m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11") }}
             env:
               - name: CONFIG_YAML_PATH
-                value: {{ config_file_path | default("tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml") }}
+                value: {{ config_file_path | default("DeepSeek-V3.yaml") }}
               - name: WORKSPACE
                 value: "/root/workspace"
               # Set vLLM version and vLLM-Ascend version here, once there is a new release, update here.
@@ -92,6 +90,8 @@ spec:
                 value: {{ vllm_ascend_remote_url | default("https://github.com/vllm-project/vllm-ascend.git") }}
               - name: RESULT_FILE_PATH
                 value: {{ result_file_path | default("/root/.cache/tests/ret/test_result.txt") }}
+              - name: FAIL_TAG
+                value: {{ fail_tag | default("FAIL_TAG") }}
             command:
               - sh
               - -c
diff --git a/tests/e2e/nightly/multi_node/scripts/run.sh b/tests/e2e/nightly/multi_node/scripts/run.sh
index c76bb20a..78d829cd 100644
--- a/tests/e2e/nightly/multi_node/scripts/run.sh
+++ b/tests/e2e/nightly/multi_node/scripts/run.sh
@@ -20,6 +20,11 @@ print_section() {
     echo -e "\n${BLUE}=== $1 ===${NC}"
 }
 
+print_failure() {
+    echo -e "${RED}${FAIL_TAG} ✗ ERROR: $1${NC}"
+    exit 1
+}
+
 # Function to print success messages
 print_success() {
     echo -e "${GREEN}✓ $1${NC}"
@@ -161,32 +166,24 @@ kill_npu_processes() {
   sleep 4
 }
 
-run_tests() {
+run_tests_with_log() {
     set +e
     kill_npu_processes
-    pytest -sv tests/e2e/nightly/multi_node/test_multi_node.py
-    ret=$?
+    BASENAME=$(basename "$CONFIG_YAML_PATH" .yaml)
+    # each worker should have log file
+    LOG_FILE="${RESULT_FILE_PATH}/${BASENAME}_worker_${LWS_WORKER_INDEX}.log"
+    mkdir -p ${RESULT_FILE_PATH}
+    pytest -sv tests/e2e/nightly/multi_node/test_multi_node.py 2>&1 | tee $LOG_FILE
+    ret=${PIPESTATUS[0]}
+    set -e
     if [ "$LWS_WORKER_INDEX" -eq 0 ]; then
         if [ $ret -eq 0 ]; then
             print_success "All tests passed!"
         else
-            print_error "Some tests failed!"
-            kubectl delete pod $CONTROLLER_NAME -n vllm-project
+            print_failure "Some tests failed!"
+            mv LOG_FILE error_${LOG_FILE}
         fi
     fi
-    set -e
-}
-
-install_kubectl() {
-    arch=$(uname -m)
-    KUBECTL=/root/.cache/.kube/kubectl
-    if echo "$arch" | grep -qiE "arm|aarch64"; then
-        echo "Detected ARM architecture: $arch"
-        KUBECTL="$KUBECTL"_arm
-    fi
-    install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
-    echo "$SECRET" | base64 -d > /tmp/kubeconfig
-    export KUBECONFIG=/tmp/kubeconfig
 }
 
 main() {
@@ -194,7 +191,6 @@ main() {
     check_and_config
     checkout_src
     install_sys_dependencies
-    install_kubectl
     install_vllm
     install_ais_bench
     # to speed up mooncake build process, install Go here
@@ -203,7 +199,7 @@ main() {
     . $SRC_DIR/vllm-ascend/tests/e2e/nightly/multi_node/scripts/build_mooncake.sh \
     pooling_async_memecpy_v1 9d96b2e1dd76cc601d76b1b4c5f6e04605cd81d3
     cd "$WORKSPACE/source_code/vllm-ascend"
-    run_tests
+    run_tests_with_log
 }
 
 main "$@"
diff --git a/tests/e2e/nightly/multi_node/test_multi_node.py b/tests/e2e/nightly/multi_node/test_multi_node.py
index 19bdf64a..2b23e755 100644
--- a/tests/e2e/nightly/multi_node/test_multi_node.py
+++ b/tests/e2e/nightly/multi_node/test_multi_node.py
@@ -118,6 +118,11 @@ async def test_multi_node() -> None:
                 port = proxy_port if disaggregated_prefill else server_port
                 # aisbench test
                 aisbench_cases = [acc_cmd, perf_cmd]
-                run_aisbench_cases(local_model_path, port, aisbench_cases)
+                run_aisbench_cases(local_model_path,
+                                   port,
+                                   aisbench_cases,
+                                   host_ip=config.cluster_ips[0])
             else:
-                remote_server.hang_until_terminated()
+                # for the nodes except master, should hang until the task complete
+                master_url = f"http://{config.cluster_ips[0]}:{server_port}/health"
+                remote_server.hang_until_terminated(master_url)
diff --git a/tools/aisbench.py b/tools/aisbench.py
index 5fabc465..14f1468e 100644
--- a/tools/aisbench.py
+++ b/tools/aisbench.py
@@ -68,6 +68,7 @@ class AisbenchRunner:
                  model: str,
                  port: int,
                  aisbench_config: dict,
+                 host_ip: str = "localhost",
                  verify=True):
         self.model = model
         self.dataset_path = maybe_download_from_modelscope(
@@ -76,6 +77,7 @@ class AisbenchRunner:
         assert self.dataset_path is not None and self.model_path is not None, \
             f"Failed to download dataset or model: dataset={self.dataset_path}, model={self.model_path}"
         self.port = port
+        self.host_ip = host_ip
         self.task_type = aisbench_config["case_type"]
         self.request_conf = aisbench_config["request_conf"]
         self.dataset_conf = aisbench_config.get("dataset_conf")
@@ -131,6 +133,7 @@ class AisbenchRunner:
             content = f.read()
         content = re.sub(r'model=.*', f'model="{self.model}",', content)
         content = re.sub(r'host_port.*', f'host_port = {self.port},', content)
+        content = re.sub(r'host_ip.*', f'host_ip = "{self.host_ip}",', content)
         content = re.sub(r'max_out_len.*',
                          f'max_out_len = {self.max_out_len},', content)
         content = re.sub(r'batch_size.*', f'batch_size = {self.batch_size},',
@@ -238,14 +241,21 @@ class AisbenchRunner:
         assert self.baseline - self.threshold <= acc_value <= self.baseline + self.threshold, f"Accuracy verification failed. The accuracy of {self.dataset_path} is {acc_value}, which is not within {self.threshold} relative to baseline {self.baseline}."
 
 
-def run_aisbench_cases(model, port, aisbench_cases, server_args=""):
+def run_aisbench_cases(model,
+                       port,
+                       aisbench_cases,
+                       server_args="",
+                       host_ip="localhost"):
     aisbench_results = []
     aisbench_errors = []
     for aisbench_case in aisbench_cases:
         if not aisbench_case:
             continue
         try:
-            with AisbenchRunner(model, port, aisbench_case) as aisbench:
+            with AisbenchRunner(model=model,
+                                port=port,
+                                host_ip=host_ip,
+                                aisbench_config=aisbench_case) as aisbench:
                 aisbench_results.append(aisbench.result)
         except Exception as e:
             aisbench_results.append("")