fix no white space when using stream_chat with fast tokenizer
This commit is contained in:
@@ -56,14 +56,14 @@ class InternLM2Converter(SpmConverter):
|
|||||||
return unk_id
|
return unk_id
|
||||||
|
|
||||||
def decoder(self, replacement, add_prefix_space):
|
def decoder(self, replacement, add_prefix_space):
|
||||||
return decoders.Sequence(
|
decoders_sequence = [
|
||||||
[
|
|
||||||
decoders.Replace("▁", " "),
|
decoders.Replace("▁", " "),
|
||||||
decoders.ByteFallback(),
|
decoders.ByteFallback(),
|
||||||
decoders.Fuse(),
|
decoders.Fuse(),
|
||||||
decoders.Strip(content=" ", left=1),
|
|
||||||
]
|
]
|
||||||
)
|
if self.proto.normalizer_spec.add_dummy_prefix:
|
||||||
|
decoders_sequence.append(decoders.Strip(content=" ", left=1))
|
||||||
|
return decoders.Sequence(decoders_sequence)
|
||||||
|
|
||||||
def tokenizer(self, proto):
|
def tokenizer(self, proto):
|
||||||
model_type = proto.trainer_spec.model_type
|
model_type = proto.trainer_spec.model_type
|
||||||
|
|||||||
Reference in New Issue
Block a user