fix no white space when using stream_chat with fast tokenizer

This commit is contained in:
x54-729
2024-02-28 13:33:29 +08:00
parent ad112bb573
commit 5729cef948

View File

@@ -56,14 +56,14 @@ class InternLM2Converter(SpmConverter):
return unk_id return unk_id
def decoder(self, replacement, add_prefix_space): def decoder(self, replacement, add_prefix_space):
return decoders.Sequence( decoders_sequence = [
[
decoders.Replace("", " "), decoders.Replace("", " "),
decoders.ByteFallback(), decoders.ByteFallback(),
decoders.Fuse(), decoders.Fuse(),
decoders.Strip(content=" ", left=1),
] ]
) if self.proto.normalizer_spec.add_dummy_prefix:
decoders_sequence.append(decoders.Strip(content=" ", left=1))
return decoders.Sequence(decoders_sequence)
def tokenizer(self, proto): def tokenizer(self, proto):
model_type = proto.trainer_spec.model_type model_type = proto.trainer_spec.model_type