Add C++ runtime and Python API for NeMo Canary models (#2352)
This commit is contained in:
@@ -281,9 +281,14 @@ def export_decoder(canary_model):
|
||||
|
||||
|
||||
def export_tokens(canary_model):
|
||||
underline = "▁"
|
||||
with open("./tokens.txt", "w", encoding="utf-8") as f:
|
||||
for i in range(canary_model.tokenizer.vocab_size):
|
||||
s = canary_model.tokenizer.ids_to_text([i])
|
||||
|
||||
if s[0] == " ":
|
||||
s = underline + s[1:]
|
||||
|
||||
f.write(f"{s} {i}\n")
|
||||
print("Saved to tokens.txt")
|
||||
|
||||
|
||||
@@ -289,7 +289,13 @@ def main():
|
||||
tokens.append(t)
|
||||
print("len(tokens)", len(tokens))
|
||||
print("tokens", tokens)
|
||||
|
||||
text = "".join([id2token[i] for i in tokens])
|
||||
|
||||
underline = "▁"
|
||||
# underline = b"\xe2\x96\x81".decode()
|
||||
|
||||
text = text.replace(underline, " ").strip()
|
||||
print("text:", text)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user