diff --git a/vllm_ascend/attention.py b/vllm_ascend/attention.py
index 2aa915c..5771a11 100644
--- a/vllm_ascend/attention.py
+++ b/vllm_ascend/attention.py
@@ -744,10 +744,19 @@ class AscendAttentionBackendImpl(AttentionImpl):
             block_tables = attn_metadata.decode_metadata.block_tables if attn_metadata.decode_metadata else None
             # Details of kv_cache arrangement in attention quantization
             # are implemented by quant_method.
-            layer.quant_method.apply(layer, query, key, value, self.key_cache,
-                                     self.value_cache, self.scale,
-                                     self.seq_lens_tensor_cpu, block_tables,
-                                     isPrefill, attn_metadata, output)
+            layer.quant_method.apply(
+                layer,
+                query,
+                key,
+                value,
+                self.key_cache,
+                self.value_cache,
+                self.scale,
+                block_tables,
+                isPrefill,
+                attn_metadata,
+                output,
+                seq_lens_tensor_cpu=self.seq_lens_tensor_cpu)
         else:
             if self.key_cache is not None:
                 torch_npu._npu_reshape_and_cache(key=key,
diff --git a/vllm_ascend/quantization/quant_config.py b/vllm_ascend/quantization/quant_config.py
index 3130142..51f201e 100644
--- a/vllm_ascend/quantization/quant_config.py
+++ b/vllm_ascend/quantization/quant_config.py
@@ -88,7 +88,8 @@ class AscendQuantConfig(QuantizationConfig):
             if self.is_layer_skipped_ascend(prefix,
                                             self.packed_modules_mapping):
                 return UnquantizedLinearMethod()
-            return AscendLinearMethod(self, prefix)
+            return AscendLinearMethod(self, prefix,
+                                      self.packed_modules_mapping)
         if isinstance(layer, Attention) and \
             'fa_quant_type' in self.quant_description.keys():
             return AscendKVCacheMethod(self, prefix)
@@ -138,9 +139,10 @@ class AscendLinearMethod(LinearMethodBase):
         quant_config: The Ascend quantization config.
     """
 
-    def __init__(self, quant_config: AscendQuantConfig, prefix: str) -> None:
+    def __init__(self, quant_config: AscendQuantConfig, prefix: str,
+                 packed_modules_mapping: Dict[str, Any]) -> None:
         self.quantizer = AscendQuantizer.get_quantizer(
-            quant_config.quant_description, prefix)
+            quant_config.quant_description, prefix, packed_modules_mapping)
         self.quant_method = self.quantizer.build_linear_method()
 
     def create_weights(
@@ -225,12 +227,29 @@ class AscendKVCacheMethod(BaseKVCacheMethod):
         if hasattr(self.quant_method, "process_weights_after_loading"):
             self.quant_method.process_weights_after_loading(layer)
 
-    def apply(self, layer: torch.nn.Module, query: torch.Tensor,
-              key: torch.Tensor, value: torch.Tensor,
-              kv_cache: List[torch.Tensor], scale: torch.Tensor,
-              seq_lens_tensor_cpu: int, block_tables: torch.Tensor,
-              isPrefill: bool, attn_metadata, output) -> torch.Tensor:
-        return self.quant_method.apply(layer, query, key, value, kv_cache,
-                                       scale, seq_lens_tensor_cpu,
-                                       block_tables, isPrefill, attn_metadata,
-                                       output)
+    def apply(self,
+              layer: torch.nn.Module,
+              query: torch.Tensor,
+              key: torch.Tensor,
+              value: torch.Tensor,
+              k_cache: List[torch.Tensor],
+              v_cache: List[torch.Tensor],
+              scale: torch.Tensor,
+              block_tables: torch.Tensor,
+              isPrefill: bool,
+              attn_metadata,
+              output,
+              seq_lens_tensor_cpu: Optional[int] = None) -> torch.Tensor:
+        return self.quant_method.apply(layer,
+                                       query,
+                                       key,
+                                       value,
+                                       k_cache,
+                                       v_cache,
+                                       scale,
+                                       block_tables,
+                                       isPrefill,
+                                       attn_metadata.attn_mask,
+                                       attn_metadata.slot_mapping,
+                                       output,
+                                       seq_lens_tensor_cpu=seq_lens_tensor_cpu)
diff --git a/vllm_ascend/quantization/quantizer.py b/vllm_ascend/quantization/quantizer.py
index b7c8fe9..eee5159 100644
--- a/vllm_ascend/quantization/quantizer.py
+++ b/vllm_ascend/quantization/quantizer.py
@@ -16,7 +16,7 @@
 #
 
 import importlib
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 
 CUSTOMIZED_QUANTIZER_TYPE: List[str] = []
 
@@ -25,7 +25,11 @@ class AscendQuantizer:
     """An interface to different quantization implementations for ascend hardwares."""
 
     @classmethod
-    def get_quantizer(cls, quant_config: Dict[str, Any], prefix: str):
+    def get_quantizer(cls,
+                      quant_config: Dict[str, Any],
+                      prefix: str,
+                      packed_modules_mapping: Optional[Dict[str,
+                                                            Any]] = dict()):
         # TODO: Need a param to choose quantization algorithms.
         quantization_algorithm = ''
 
@@ -35,11 +39,12 @@ class AscendQuantizer:
         try:
             module = importlib.import_module("mindie_turbo")
             MindIETurboQuantizer = module.MindIETurboQuantizer
-        except Exception:
+        except ImportError:
             raise NotImplementedError(
                 "There is no available ascend quantizer.")
 
-        return MindIETurboQuantizer.get_quantizer(quant_config, prefix)
+        return MindIETurboQuantizer.get_quantizer(quant_config, prefix,
+                                                  packed_modules_mapping)
 
     def build_linear_method(self):
         raise NotImplementedError