From 482263cf87d193903df86bab88b7edd384222637 Mon Sep 17 00:00:00 2001 From: Aleksey Korshuk Date: Wed, 5 Apr 2023 03:30:25 +0000 Subject: [PATCH] Upload tokenizer --- special_tokens_map.json | 6 ++++++ tokenizer.model | 3 +++ tokenizer_config.json | 9 +++++++++ 3 files changed, 18 insertions(+) create mode 100644 special_tokens_map.json create mode 100644 tokenizer.model create mode 100644 tokenizer_config.json diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..318f913 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "[PAD]", + "unk_token": "" +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..6c00c74 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..44bf596 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,9 @@ +{ + "bos_token": "", + "eos_token": "", + "model_max_length": 2048, + "padding_side": "right", + "special_tokens_map_file": "models/decapoda-research_llama-7b-hf/special_tokens_map.json", + "tokenizer_class": "LlamaTokenizer", + "unk_token": "" +}