初始化项目，由ModelHub XC社区提供模型

Model: AI-ModelScope/DeepSeek-Coder-V2-Lite-Instruct Source: Original Platform
2026-05-22 17:26:13 +08:00
commit b4b414c780
15 changed files with 207838 additions and 0 deletions
--- a/tokenization_deepseek_fast.py
+++ b/tokenization_deepseek_fast.py
@@ -0,0 +1,38 @@
+from typing import List, Optional, Union
+
+
+from transformers.models.llama import LlamaTokenizerFast
+
+
+class DeepseekTokenizerFast(LlamaTokenizerFast):
+
+    def convert_ids_to_tokens(
+        self, ids: Union[int, List[int]], skip_special_tokens: bool = False
+    ) -> Union[str, List[str]]:
+        """
+        Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and
+        added tokens.
+
+        Args:
+            ids (`int` or `List[int]`):
+                The token id (or token ids) to convert to tokens.
+            skip_special_tokens (`bool`, *optional*, defaults to `False`):
+                Whether or not to remove special tokens in the decoding.
+
+        Returns:
+            `str` or `List[str]`: The decoded token(s).
+        """
+        if isinstance(ids, int):
+            return self._convert_id_to_token(ids)
+        tokens = []
+        for index in ids:
+            index = int(index)
+            if skip_special_tokens and index in self.all_special_ids:
+                continue
+            token = self._tokenizer.id_to_token(index)
+            tokens.append(token if token is not None else "")
+        return tokens
+    
+    def _convert_id_to_token(self, index: int) -> Optional[str]:
+        token = self._tokenizer.id_to_token(int(index))
+        return token if token is not None else ""