初始化项目,由ModelHub XC社区提供模型
Model: AI-ModelScope/DeepSeek-Coder-V2-Lite-Instruct Source: Original Platform
This commit is contained in:
38
tokenization_deepseek_fast.py
Normal file
38
tokenization_deepseek_fast.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from typing import List, Optional, Union
|
||||
|
||||
|
||||
from transformers.models.llama import LlamaTokenizerFast
|
||||
|
||||
|
||||
class DeepseekTokenizerFast(LlamaTokenizerFast):
|
||||
|
||||
def convert_ids_to_tokens(
|
||||
self, ids: Union[int, List[int]], skip_special_tokens: bool = False
|
||||
) -> Union[str, List[str]]:
|
||||
"""
|
||||
Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and
|
||||
added tokens.
|
||||
|
||||
Args:
|
||||
ids (`int` or `List[int]`):
|
||||
The token id (or token ids) to convert to tokens.
|
||||
skip_special_tokens (`bool`, *optional*, defaults to `False`):
|
||||
Whether or not to remove special tokens in the decoding.
|
||||
|
||||
Returns:
|
||||
`str` or `List[str]`: The decoded token(s).
|
||||
"""
|
||||
if isinstance(ids, int):
|
||||
return self._convert_id_to_token(ids)
|
||||
tokens = []
|
||||
for index in ids:
|
||||
index = int(index)
|
||||
if skip_special_tokens and index in self.all_special_ids:
|
||||
continue
|
||||
token = self._tokenizer.id_to_token(index)
|
||||
tokens.append(token if token is not None else "")
|
||||
return tokens
|
||||
|
||||
def _convert_id_to_token(self, index: int) -> Optional[str]:
|
||||
token = self._tokenizer.id_to_token(int(index))
|
||||
return token if token is not None else ""
|
||||
Reference in New Issue
Block a user