diff --git a/README.md b/README.md
index ded9f91..7370415 100644
--- a/README.md
+++ b/README.md
@@ -34,24 +34,23 @@ By using vLLM Ascend plugin, popular open-source models, including Transformer-l
 ## Prerequisites
 
 - Hardware: Atlas 800I A2 Inference series, Atlas A2 Training series
+- OS: Linux
 - Software:
   * Python >= 3.9
   * CANN >= 8.0.0
   * PyTorch >= 2.5.1, torch-npu >= 2.5.1.dev20250308
   * vLLM (the same version as vllm-ascend)
 
-Find more about how to setup your environment step by step in [here](docs/source/installation.md).
-
 ## Getting Started
 
 Please refer to [QuickStart](https://vllm-ascend.readthedocs.io/en/latest/quick_start.html) and [Installation](https://vllm-ascend.readthedocs.io/en/latest/installation.html) for more details.
 
 ## Contributing
-See [CONTRIBUTING](docs/source/developer_guide/contributing.md) for more details, which is a step-by-step guide to help you set up development environment, build and test.
+See [CONTRIBUTING](https://vllm-ascend.readthedocs.io/en/main/developer_guide/contributing.html) for more details, which is a step-by-step guide to help you set up development environment, build and test.
 
 We welcome and value any contributions and collaborations:
 - Please let us know if you encounter a bug by [filing an issue](https://github.com/vllm-project/vllm-ascend/issues)
-- Please use [User forum] for usage questions and help.
+- Please use [User forum](https://discuss.vllm.ai/c/hardware-support/vllm-ascend-support) for usage questions and help.
 
 ## Branch
 
@@ -68,7 +67,7 @@ Below is maintained branches:
 | v0.7.1-dev | Unmaintained | Only doc fixed is allowed |
 | v0.7.3-dev | Maintained   | CI commitment for vLLM 0.7.3 version |
 
-Please refer to [Versioning policy](docs/source/developer_guide/versioning_policy.md) for more details.
+Please refer to [Versioning policy](https://vllm-ascend.readthedocs.io/en/main/developer_guide/versioning_policy.html) for more details.
 
 ## License
 
diff --git a/README.zh.md b/README.zh.md
index 6ece548..e61a1d6 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -35,20 +35,25 @@ vLLM 昇腾插件 (`vllm-ascend`) 是一个由社区维护的让vLLM在Ascend NP
 ## 准备
 
 - 硬件：Atlas 800I A2 Inference系列、Atlas A2 Training系列
+- 操作系统：Linux
 - 软件：
   * Python >= 3.9
   * CANN >= 8.0.RC2
   * PyTorch >= 2.5.1, torch-npu >= 2.5.1.dev20250308
   * vLLM (与vllm-ascend版本一致)
 
-在[此处](docs/source/installation.md)，您可以了解如何逐步准备环境。
-
 ## 开始使用
 
 请查看[快速开始](https://vllm-ascend.readthedocs.io/en/latest/quick_start.html)和[安装指南](https://vllm-ascend.readthedocs.io/en/latest/installation.html)了解更多.
 
-## 分支
+## 贡献
+请参考 [CONTRIBUTING]((https://vllm-ascend.readthedocs.io/en/main/developer_guide/contributing.html)) 文档了解更多关于开发环境搭建、功能测试以及 PR 提交规范的信息。
 
+我们欢迎并重视任何形式的贡献与合作：
+- 请通过[Issue](https://github.com/vllm-project/vllm-ascend/issues)来告知我们您遇到的任何Bug。
+- 请通过[用户论坛](https://discuss.vllm.ai/c/hardware-support/vllm-ascend-support)来交流使用问题和寻求帮助。
+
+## 分支策略
 vllm-ascend有主干分支和开发分支。
 
 - **main**: 主干分支，与vLLM的主干分支对应，并通过昇腾CI持续进行质量看护。
@@ -62,15 +67,7 @@ vllm-ascend有主干分支和开发分支。
 | v0.7.1-dev | Unmaintained | 只允许文档修复 |
 | v0.7.3-dev | Maintained | 基于vLLM v0.7.3版本CI看护 |
 
-请参阅[版本策略](docs/source/developer_guide/versioning_policy.zh.md)了解更多详细信息。
-
-## 贡献
-有关更多详细信息，请参阅 [CONTRIBUTING](docs/source/developer_guide/contributing.zh.md)，可以更详细的帮助您部署开发环境、构建和测试。
-
-我们欢迎并重视任何形式的贡献与合作：
-- 请通过[Issue](https://github.com/vllm-project/vllm-ascend/issues)来告知我们您遇到的任何Bug。
-- 请通过[用户论坛](https://github.com/vllm-project/vllm-ascend/issues)来交流使用问题和寻求帮助。
+请参阅[版本策略](https://vllm-ascend.readthedocs.io/en/main/developer_guide/versioning_policy.html)了解更多详细信息。
 
 ## 许可证
-
 Apache 许可证 2.0，如 [LICENSE](./LICENSE) 文件中所示。
diff --git a/collect_env.py b/collect_env.py
index b8c7b7c..5b24bea 100644
--- a/collect_env.py
+++ b/collect_env.py
@@ -47,10 +47,9 @@ SystemEnv = namedtuple(
         'pip_version',  # 'pip' or 'pip3'
         'pip_packages',
         'conda_packages',
-        'is_xnnpack_available',
         'cpu_info',
         'vllm_version',  # vllm specific field
-        'vllm_build_flags',  # vllm specific field
+        'vllm_ascend_version',  # vllm ascend specific field
         'env_vars',
         'npu_info',  # ascend specific field
         'cann_info',  # ascend specific field
@@ -155,17 +154,27 @@ def get_cmake_version(run_lambda):
                                      r'cmake (.*)')
 
 
+def _parse_version(version, version_tuple):
+    version_str = version_tuple[-1]
+    if version_str.startswith('g'):
+        if '.' in version_str:
+            git_sha = version_str.split('.')[0][1:]
+            date = version_str.split('.')[-1][1:]
+            return f"{version} (git sha: {git_sha}, date: {date})"
+        else:
+            git_sha = version_str[1:]  # type: ignore
+            return f"{version} (git sha: {git_sha})"
+    return version
+
+
 def get_vllm_version():
     from vllm import __version__, __version_tuple__
+    return _parse_version(__version__, __version_tuple__)
 
-    if __version__ == "dev":
-        return "N/A (dev)"
 
-    if len(__version_tuple__) == 4:  # dev build
-        git_sha = __version_tuple__[-1][1:]  # type: ignore
-        return f"{__version__} (git sha: {git_sha}"
-
-    return __version__
+def get_vllm_ascend_version():
+    from vllm_ascend._version import __version__, __version_tuple__
+    return _parse_version(__version__, __version_tuple__)
 
 
 def get_cpu_info(run_lambda):
@@ -284,23 +293,6 @@ def get_pip_packages(run_lambda, patterns=None):
     return pip_version, out
 
 
-def summarize_vllm_build_flags():
-    # This could be a static method if the flags are constant, or dynamic if you need to check environment variables, etc.
-    return 'ROCm: {}; Neuron: {}'.format(
-        'Enabled' if os.environ.get('ROCM_HOME') else 'Disabled',
-        'Enabled' if os.environ.get('NEURON_CORES') else 'Disabled',
-    )
-
-
-def is_xnnpack_available():
-    if TORCH_AVAILABLE:
-        import torch.backends.xnnpack
-        return str(
-            torch.backends.xnnpack.enabled)  # type: ignore[attr-defined]
-    else:
-        return "N/A"
-
-
 def get_npu_info(run_lambda):
     return run_and_read_all(run_lambda, 'npu-smi info')
 
@@ -317,7 +309,7 @@ def get_cann_info(run_lambda):
 def get_env_vars():
     env_vars = ''
     secret_terms = ('secret', 'token', 'api', 'access', 'password')
-    report_prefix = ("TORCH", "PYTORCH", "ASCEND_")
+    report_prefix = ("TORCH", "PYTORCH", "ASCEND_", "ATB_")
     for k, v in os.environ.items():
         if any(term in k.lower() for term in secret_terms):
             continue
@@ -343,9 +335,6 @@ def get_env_info():
 
     conda_packages = get_conda_packages(run_lambda)
 
-    vllm_version = get_vllm_version()
-    vllm_build_flags = summarize_vllm_build_flags()
-
     return SystemEnv(
         torch_version=version_str,
         is_debug_build=debug_mode_str,
@@ -361,10 +350,9 @@ def get_env_info():
         gcc_version=get_gcc_version(run_lambda),
         clang_version=get_clang_version(run_lambda),
         cmake_version=get_cmake_version(run_lambda),
-        is_xnnpack_available=is_xnnpack_available(),
         cpu_info=get_cpu_info(run_lambda),
-        vllm_version=vllm_version,
-        vllm_build_flags=vllm_build_flags,
+        vllm_version=get_vllm_version(),
+        vllm_ascend_version=get_vllm_ascend_version(),
         env_vars=get_env_vars(),
         npu_info=get_npu_info(run_lambda),
         cann_info=get_cann_info(run_lambda),
@@ -383,7 +371,6 @@ Libc version: {libc_version}
 
 Python version: {python_version}
 Python platform: {python_platform}
-Is XNNPACK available: {is_xnnpack_available}
 
 CPU:
 {cpu_info}
@@ -400,10 +387,11 @@ env_info_fmt += "\n"
 
 env_info_fmt += """
 vLLM Version: {vllm_version}
-vLLM Build Flags:
-{vllm_build_flags}
+vLLM Ascend Version: {vllm_ascend_version}
 
+ENV Variables:
 {env_vars}
+
 NPU:
 {npu_info}
 
diff --git a/format.sh b/format.sh
index 8dc0a31..1d0b940 100755
--- a/format.sh
+++ b/format.sh
@@ -140,7 +140,7 @@ echo 'vLLM mypy: Done'
 # https://github.com/codespell-project/codespell/issues/1915
 # Avoiding the "./" prefix and using "/**" globs for directories appears to solve the problem
 CODESPELL_EXCLUDES=(
-    '--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**'
+    '--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**'
 )
 
 CODESPELL_IGNORE_WORDS=(
diff --git a/test.py b/test.py
deleted file mode 100644
index c6825fc..0000000
--- a/test.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import os
-
-import torch
-import torch_npu  # noqa: F401
-
-device_id = 0
-
-
-def _device_id_to_physical_device_id(device_id: int) -> int:
-    if "ASCEND_RT_VISIBLE_DEVICES" in os.environ:
-        device_ids = os.environ["ASCEND_RT_VISIBLE_DEVICES"].split(",")
-        if device_ids == [""]:
-            raise RuntimeError("ASCEND_RT_VISIBLE_DEVICES is set to empty"
-                               "string, which means Ascend NPU support is"
-                               "disabled.")
-        physical_device_id = device_ids[device_id]
-        return int(physical_device_id)
-    else:
-        return device_id
-
-
-physical_device_id = _device_id_to_physical_device_id(device_id)
-print("physical_device_id: " + str(physical_device_id))
-
-# return torch.npu.get_device_name(physical_device_id)
-torch.npu.get_device_name(device_id)
-
-for k, v in os.environ.items():
-    if k == "ASCEND_RT_VISIBLE_DEVICES":
-        print(k)
-        print(v)
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
index 14d38af..4648d76 100644
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -52,7 +52,7 @@ from vllm_ascend.attention.attention_v1 import (AscendAttentionBackend,
                                                 AscendMetadata)
 
 if TYPE_CHECKING:
-    from vllm.v1.core.scheduler_output import SchedulerOutput
+    from vllm.v1.core.sched.output import SchedulerOutput
 
 logger = init_logger(__name__)
 
diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py
index 127b107..dd840e9 100644
--- a/vllm_ascend/worker/worker_v1.py
+++ b/vllm_ascend/worker/worker_v1.py
@@ -33,7 +33,7 @@ from vllm.logger import init_logger
 from vllm.model_executor import set_random_seed
 from vllm.platforms import current_platform
 from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
-from vllm.v1.core.scheduler import SchedulerOutput
+from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
                                         KVCacheSpec)
 from vllm.v1.outputs import ModelRunnerOutput