diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml
index f5238c96..6d408f1c 100644
--- a/.github/workflows/_e2e_test.yaml
+++ b/.github/workflows/_e2e_test.yaml
@@ -115,6 +115,7 @@ jobs:
           pytest -sv --durations=0 tests/e2e/singlecard/test_sampler.py
           pytest -sv --durations=0 tests/e2e/singlecard/test_vlm.py
           pytest -sv --durations=0 tests/e2e/singlecard/test_xlite.py
+          pytest -sv --durations=0 tests/e2e/singlecard/test_models.py
           pytest -sv --durations=0 tests/e2e/singlecard/pooling/
           pytest -sv --durations=0 tests/e2e/singlecard/compile/test_norm_quant_fusion.py
           pytest -sv --durations=0 tests/e2e/singlecard/test_multistream_overlap_shared_expert.py
diff --git a/tests/e2e/singlecard/test_models.py b/tests/e2e/singlecard/test_models.py
new file mode 100644
index 00000000..20fe6f77
--- /dev/null
+++ b/tests/e2e/singlecard/test_models.py
@@ -0,0 +1,36 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# This file is a part of the vllm-ascend project.
+# Adapted from vllm/tests/entrypoints/llm/test_guided_generate.py
+# Copyright 2023 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+
+from tests.e2e.conftest import VllmRunner
+
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
+
+
+def test_minicpm_2b() -> None:
+    example_prompts = [
+        "Hello, my name is",
+    ]
+    max_tokens = 5
+
+    with VllmRunner("openbmb/MiniCPM-2B-sft-bf16",
+                    max_model_len=512,
+                    gpu_memory_utilization=0.7) as runner:
+        runner.generate_greedy(example_prompts, max_tokens)
diff --git a/vllm_ascend/ops/linear.py b/vllm_ascend/ops/linear.py
index 51dc011a..2dd9689b 100644
--- a/vllm_ascend/ops/linear.py
+++ b/vllm_ascend/ops/linear.py
@@ -36,7 +36,7 @@ from vllm.model_executor.layers.quantization.base_config import \
 from vllm.model_executor.utils import set_weight_attrs
 
 from vllm_ascend.ops.linear_op import get_parallel_op, get_replicated_op
-from vllm_ascend.utils import maybe_trans_nz
+from vllm_ascend.utils import enable_sp, maybe_trans_nz
 
 
 class AscendUnquantizedLinearMethod(UnquantizedLinearMethod):
@@ -219,6 +219,9 @@ class AscendRowParallelLinear(RowParallelLinear):
     and the original TP group in other modules.
     """
 
+    # NOTE: Globally unique prefix identifier used in SP scenarios
+    unique_prefix_idx = 0
+
     def __init__(
         self,
         input_size: int,
@@ -234,14 +237,15 @@ class AscendRowParallelLinear(RowParallelLinear):
         return_bias: bool = True,
         disable_tp: bool = False,
     ):
-        compilation_config = get_current_vllm_config().compilation_config
-        # TODO(shaopeng-666): Remove the visual check after the mm model reconstruction is complete.
-        # TODO(MengqingCao): Remove the empty string check, after specifying the prefix in linear layers of some models in the vLLM.
-        if prefix in compilation_config.static_forward_context and \
-            prefix != "" and \
-            "visual" not in prefix:
-            raise ValueError(f"Duplicate layer name: {prefix}")
-        compilation_config.static_forward_context[prefix] = self
+        # TODO(kunpengW-code): Specifying the prefix in linear layers of some models in the vLLM.
+        if enable_sp():
+            compilation_config = get_current_vllm_config().compilation_config
+            unique_prefix = prefix
+            if prefix in compilation_config.static_forward_context:
+                unique_prefix = f"{prefix}.unique_prefix{AscendRowParallelLinear.unique_prefix_idx}"
+                AscendRowParallelLinear.unique_prefix_idx += 1
+            self.unique_prefix = unique_prefix
+            compilation_config.static_forward_context[unique_prefix] = self
 
         self.custom_op, self.tp_rank, self.tp_size = get_parallel_op(
             disable_tp, prefix, self, "row")
diff --git a/vllm_ascend/ops/linear_op.py b/vllm_ascend/ops/linear_op.py
index eec63dd3..980fd2a2 100644
--- a/vllm_ascend/ops/linear_op.py
+++ b/vllm_ascend/ops/linear_op.py
@@ -484,6 +484,10 @@ class SequenceColumnParallelOp(CustomColumnParallelOp):
 
 class SequenceRowParallelOp(CustomRowParallelOp):
 
+    def __init__(self, layer):
+        super().__init__(layer)
+        self.unique_prefix = None
+
     def apply_impl(
         self, input_: torch.Tensor
     ) -> Union[torch.Tensor, tuple[torch.Tensor, Optional[Parameter]]]:
@@ -509,7 +513,7 @@ class SequenceRowParallelOp(CustomRowParallelOp):
                                              bias=bias_)
         else:
             output = torch.ops.vllm.matmul_and_reduce(input_parallel,
-                                                      self.prefix)
+                                                      self.unique_prefix)
 
         output_bias = self.bias if self.skip_bias_add else None
         return output, output_bias
@@ -602,6 +606,7 @@ class SequenceRowParallelOp(CustomRowParallelOp):
         super().update_attrs()
         self.input_is_parallel = self.layer.input_is_parallel
         self.reduce_results = self.layer.reduce_results
+        self.unique_prefix = self.layer.unique_prefix
 
 
 def _get_column_parallel_op(