commit cfc3cca73a0be0568ccbe0d1ccfb9fdcbd554067 Author: ModelHub XC Date: Fri Jun 19 14:40:13 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: AI-ModelScope/MolGen-7b Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..ecfc343 --- /dev/null +++ b/README.md @@ -0,0 +1,92 @@ +--- +license: apache-2.0 +pipeline_tag: text-generation +tags: +- chemistry +- biology +- text-generation-inference +--- + +## 💡 Model description +This repo contains a large molecular generative model built with molecular language SELFIES. + +## 🔍 Intended uses +You can use the model to generate molecules from scratch (i.e., inputting the bos_token), or input a partial structure for the model to complete. + +## 🛠️ How to use +We have provided two types of examples. You can modify the input, generation parameters, etc., according to your needs. + +- Denovo molecule generation example: +```python +from modelscope import AutoTokenizer, AutoModelForCausalLM +import torch + +tokenizer = AutoTokenizer.from_pretrained("AI-ModelScope/MolGen-7b") +model = AutoModelForCausalLM.from_pretrained( + "AI-ModelScope/MolGen-7b", + load_in_8bit=True, + torch_dtype=torch.float16, + device_map="auto", + ) +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +sf_input = tokenizer(tokenizer.bos_token, return_tensors="pt").to(device) + +molecules = model.generate(input_ids=sf_input["input_ids"], + attention_mask=sf_input["attention_mask"], + do_sample=True, + max_new_tokens=10, + top_p=0.75, + top_k=30, + return_dict_in_generate=False, + num_return_sequences=5, + ) +sf_output = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True).replace(" ","") for g in molecules] +['[C][C][=C][C][=C][Branch2][Ring1][=Branch2][C][=Branch1]', +'[C][N][C][C][C][Branch2][Ring2][Ring2][N][C]', +'[C][O][C][=C][C][=C][C][Branch2][Ring1][Branch1]', +'[C][N][C][C][C@H1][Branch2][Ring1][Branch2][N][Branch1]', +'[C][=C][C][Branch2][Ring1][#C][C][=Branch1][C][=O]'] +``` + +- Molecular completion example: +```python +from modelscope import AutoTokenizer, AutoModelForCausalLM +import torch + +tokenizer = AutoTokenizer.from_pretrained("AI-ModelScope/MolGen-7b") +model = AutoModelForCausalLM.from_pretrained( + "AI-ModelScope/MolGen-7b", + load_in_8bit=True, + torch_dtype=torch.float16, + device_map="auto", + ) +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +sf_input = tokenizer("[C][N][O]", return_tensors="pt").to(device) + +molecules = model.generate(input_ids=sf_input["input_ids"], + attention_mask=sf_input["attention_mask"], + do_sample=True, + max_new_tokens=10, + top_p=0.75, + top_k=30, + return_dict_in_generate=False, + num_return_sequences=5, + ) +sf_output = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True).replace(" ","") for g in molecules] +['[C][N][O][C][=Branch1][C][=O][/C][Ring1][=Branch1][=C][/C][=C]', +'[C][N][O][/C][=Branch1][#Branch1][=C][/N][Branch1][C][C][C][C]', +'[C][N][O][/C][=C][/C][=C][C][=Branch1][C][=O][C][=C]', +'[C][N][O][C][=Branch1][C][=O][N][Branch1][C][C][C][=Branch1]', +'[C][N][O][Ring1][Branch1][C][C][C][C][C][C][C][C]'] +``` + +## 📚 Citation +If you use our repository, please cite: +```bibtex +@article{fang2023molecular, + title={Molecular Language Model as Multi-task Generator}, + author={Fang, Yin and Zhang, Ningyu and Chen, Zhuo and Fan, Xiaohui and Chen, Huajun}, + journal={arXiv preprint arXiv:2301.11259}, + year={2023} +} +``` diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..65f3046 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,183 @@ +{ + "": 184, + "[#Branch1]": 12, + "[#Branch2]": 178, + "[#C-1]": 181, + "[#C]": 165, + "[#N+1]": 120, + "[#N]": 23, + "[#P]": 87, + "[#Ring1]": 128, + "[#S]": 40, + "[-/Ring1]": 129, + "[-/Ring2]": 156, + "[-\\Ring1]": 164, + "[-\\Ring2]": 34, + "[/123I]": 7, + "[/B]": 125, + "[/Br]": 149, + "[/C-1]": 68, + "[/C@@H1]": 13, + "[/C@@]": 14, + "[/C@H1]": 177, + "[/C@]": 31, + "[/CH0]": 126, + "[/CH1-1]": 88, + "[/CH1]": 10, + "[/C]": 118, + "[/Cl]": 155, + "[/F]": 6, + "[/I]": 83, + "[/N+1]": 152, + "[/NH0]": 55, + "[/NH1]": 24, + "[/N]": 67, + "[/O-1]": 171, + "[/OH0]": 174, + "[/O]": 69, + "[/P@@]": 16, + "[/P]": 79, + "[/S+1]": 47, + "[/S@@+1]": 21, + "[/S@@]": 160, + "[/S@]": 77, + "[/S]": 112, + "[/Si]": 38, + "[11CH3]": 107, + "[123I]": 124, + "[124I]": 136, + "[125I]": 39, + "[127I]": 36, + "[17F]": 143, + "[18F]": 97, + "[18OH1]": 76, + "[3H]": 137, + "[=B]": 25, + "[=Branch1]": 33, + "[=Branch2]": 100, + "[=CH0]": 78, + "[=C]": 35, + "[=N+1]": 144, + "[=N-1]": 161, + "[=NH0]": 27, + "[=N]": 105, + "[=O+1]": 96, + "[=O]": 30, + "[=P+1]": 98, + "[=P@@H1]": 32, + "[=P@@]": 53, + "[=P@H1]": 147, + "[=P@]": 157, + "[=PH1]": 132, + "[=P]": 86, + "[=Ring1]": 163, + "[=Ring2]": 52, + "[=S+1]": 133, + "[=S@+1]": 167, + "[=S@@+1]": 42, + "[=S@@H1]": 106, + "[=S@@]": 5, + "[=S@]": 44, + "[=SH1]": 117, + "[=S]": 73, + "[B-1]": 122, + "[B@-1]": 179, + "[B@@-1]": 17, + "[B@@H1-1]": 56, + "[B@H1-1]": 121, + "[BH1-1]": 54, + "[BH2-1]": 91, + "[BH3-1]": 75, + "[B]": 108, + "[Br+1]": 166, + "[Br]": 62, + "[Branch1]": 104, + "[Branch2]": 140, + "[C+1]": 65, + "[C-1]": 8, + "[C@@H1]": 15, + "[C@@]": 173, + "[C@H1]": 29, + "[C@]": 9, + "[CH0]": 28, + "[CH1+1]": 41, + "[CH1-1]": 4, + "[CH1]": 115, + "[CH2-1]": 110, + "[CH2]": 146, + "[C]": 139, + "[Cl]": 11, + "[F+1]": 50, + "[F]": 66, + "[I]": 63, + "[N+1]": 150, + "[N-1]": 84, + "[N@+1]": 111, + "[N@@+1]": 131, + "[N@@H1+1]": 162, + "[NH0]": 148, + "[NH1]": 130, + "[N]": 19, + "[O+1]": 85, + "[O-1]": 58, + "[OH0]": 71, + "[O]": 20, + "[P+1]": 82, + "[P@+1]": 158, + "[P@@+1]": 94, + "[P@@H1]": 170, + "[P@@]": 59, + "[P@H1]": 81, + "[P@]": 175, + "[PH1]": 134, + "[PH2]": 51, + "[P]": 80, + "[Ring1]": 70, + "[Ring2]": 90, + "[S+1]": 61, + "[S@+1]": 72, + "[S@@+1]": 57, + "[S@@H1]": 45, + "[S@@]": 154, + "[S@]": 93, + "[SH0]": 182, + "[SH1]": 123, + "[SH2]": 101, + "[SH3]": 102, + "[S]": 60, + "[Si]": 119, + "[Sn+2]": 183, + "[Sn+3]": 135, + "[SnH1]": 109, + "[SnH2]": 99, + "[Sn]": 114, + "[\\123I]": 89, + "[\\B-1]": 64, + "[\\B]": 159, + "[\\Br]": 142, + "[\\C-1]": 127, + "[\\C@@H1]": 22, + "[\\C@@]": 46, + "[\\C@H1]": 49, + "[\\C@]": 145, + "[\\CH0]": 176, + "[\\CH1-1]": 153, + "[\\C]": 43, + "[\\Cl]": 37, + "[\\F]": 26, + "[\\I]": 172, + "[\\N+1]": 92, + "[\\NH1]": 103, + "[\\N]": 138, + "[\\O-1]": 141, + "[\\O]": 18, + "[\\P@@]": 113, + "[\\P]": 74, + "[\\S+1]": 95, + "[\\S@@+1]": 169, + "[\\S@@]": 180, + "[\\S@]": 116, + "[\\SH1]": 151, + "[\\S]": 48, + "[\\Si]": 168 +} diff --git a/config.json b/config.json new file mode 100644 index 0000000..d1c8a62 --- /dev/null +++ b/config.json @@ -0,0 +1,25 @@ +{ + "_name_or_path": "/newdisk3/data/fangyin/molgen-7b", + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 0, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 11008, + "max_position_embeddings": 2048, + "max_sequence_length": 2048, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "pad_token_id": 1, + "rms_norm_eps": 1e-06, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.28.1", + "use_cache": false, + "vocab_size": 185 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..4f89f14 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 2, + "pad_token_id": 1, + "transformers_version": "4.28.1", + "use_cache": false +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..5e7f1fd --- /dev/null +++ b/merges.txt @@ -0,0 +1 @@ +#version: 0.2 diff --git a/pytorch_model-00001-of-00003.bin b/pytorch_model-00001-of-00003.bin new file mode 100644 index 0000000..bf47c90 --- /dev/null +++ b/pytorch_model-00001-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da504aaf2eb3f239d8bd426ed5f23d053edea155c3c530a94c25da35a844c77d +size 9985914313 diff --git a/pytorch_model-00002-of-00003.bin b/pytorch_model-00002-of-00003.bin new file mode 100644 index 0000000..71625a2 --- /dev/null +++ b/pytorch_model-00002-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8958a6681621d3944a1a96c5693c561445252440edb4c5d92f819d337b9f4546 +size 9894801074 diff --git a/pytorch_model-00003-of-00003.bin b/pytorch_model-00003-of-00003.bin new file mode 100644 index 0000000..3cc438b --- /dev/null +++ b/pytorch_model-00003-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93cd23ede03274657841b80382c8f690b670e10b000b5ea245a51e66857b27b1 +size 6030551951 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000..432cd68 --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,330 @@ +{ + "metadata": { + "total_size": 25911156736 + }, + "weight_map": { + "lm_head.weight": "pytorch_model-00003-of-00003.bin", + "model.embed_tokens.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.12.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.12.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.20.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.24.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.30.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.norm.weight": "pytorch_model-00003-of-00003.bin" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..d0fd5b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,33 @@ +{ + "bos_token": "", + "cls_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..fadd146 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,1714 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 1, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 3, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": true + }, + { + "id": 4, + "content": "[CH1-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 5, + "content": "[=S@@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 6, + "content": "[/F]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 7, + "content": "[/123I]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 8, + "content": "[C-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 9, + "content": "[C@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 10, + "content": "[/CH1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 11, + "content": "[Cl]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 12, + "content": "[#Branch1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 13, + "content": "[/C@@H1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 14, + "content": "[/C@@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 15, + "content": "[C@@H1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 16, + "content": "[/P@@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 17, + "content": "[B@@-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 18, + "content": "[\\O]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 19, + "content": "[N]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 20, + "content": "[O]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 21, + "content": "[/S@@+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 22, + "content": "[\\C@@H1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 23, + "content": "[#N]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 24, + "content": "[/NH1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 25, + "content": "[=B]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 26, + "content": "[\\F]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 27, + "content": "[=NH0]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 28, + "content": "[CH0]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 29, + "content": "[C@H1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 30, + "content": "[=O]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 31, + "content": "[/C@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 32, + "content": "[=P@@H1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 33, + "content": "[=Branch1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 34, + "content": "[-\\Ring2]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 35, + "content": "[=C]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 36, + "content": "[127I]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 37, + "content": "[\\Cl]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 38, + "content": "[/Si]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 39, + "content": "[125I]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 40, + "content": "[#S]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 41, + "content": "[CH1+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 42, + "content": "[=S@@+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 43, + "content": "[\\C]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 44, + "content": "[=S@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 45, + "content": "[S@@H1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 46, + "content": "[\\C@@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 47, + "content": "[/S+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 48, + "content": "[\\S]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 49, + "content": "[\\C@H1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 50, + "content": "[F+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 51, + "content": "[PH2]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 52, + "content": "[=Ring2]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 53, + "content": "[=P@@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 54, + "content": "[BH1-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 55, + "content": "[/NH0]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 56, + "content": "[B@@H1-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 57, + "content": "[S@@+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 58, + "content": "[O-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 59, + "content": "[P@@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 60, + "content": "[S]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 61, + "content": "[S+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 62, + "content": "[Br]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 63, + "content": "[I]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 64, + "content": "[\\B-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 65, + "content": "[C+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 66, + "content": "[F]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 67, + "content": "[/N]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 68, + "content": "[/C-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 69, + "content": "[/O]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 70, + "content": "[Ring1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 71, + "content": "[OH0]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 72, + "content": "[S@+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 73, + "content": "[=S]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 74, + "content": "[\\P]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 75, + "content": "[BH3-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 76, + "content": "[18OH1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 77, + "content": "[/S@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 78, + "content": "[=CH0]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 79, + "content": "[/P]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 80, + "content": "[P]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 81, + "content": "[P@H1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 82, + "content": "[P+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 83, + "content": "[/I]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 84, + "content": "[N-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 85, + "content": "[O+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 86, + "content": "[=P]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 87, + "content": "[#P]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 88, + "content": "[/CH1-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 89, + "content": "[\\123I]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 90, + "content": "[Ring2]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 91, + "content": "[BH2-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 92, + "content": "[\\N+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 93, + "content": "[S@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 94, + "content": "[P@@+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 95, + "content": "[\\S+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 96, + "content": "[=O+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 97, + "content": "[18F]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 98, + "content": "[=P+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 99, + "content": "[SnH2]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 100, + "content": "[=Branch2]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 101, + "content": "[SH2]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 102, + "content": "[SH3]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 103, + "content": "[\\NH1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 104, + "content": "[Branch1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 105, + "content": "[=N]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 106, + "content": "[=S@@H1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 107, + "content": "[11CH3]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 108, + "content": "[B]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 109, + "content": "[SnH1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 110, + "content": "[CH2-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 111, + "content": "[N@+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 112, + "content": "[/S]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 113, + "content": "[\\P@@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 114, + "content": "[Sn]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 115, + "content": "[CH1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 116, + "content": "[\\S@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 117, + "content": "[=SH1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 118, + "content": "[/C]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 119, + "content": "[Si]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 120, + "content": "[#N+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 121, + "content": "[B@H1-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 122, + "content": "[B-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 123, + "content": "[SH1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 124, + "content": "[123I]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 125, + "content": "[/B]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 126, + "content": "[/CH0]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 127, + "content": "[\\C-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 128, + "content": "[#Ring1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 129, + "content": "[-/Ring1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 130, + "content": "[NH1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 131, + "content": "[N@@+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 132, + "content": "[=PH1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 133, + "content": "[=S+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 134, + "content": "[PH1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 135, + "content": "[Sn+3]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 136, + "content": "[124I]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 137, + "content": "[3H]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 138, + "content": "[\\N]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 139, + "content": "[C]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 140, + "content": "[Branch2]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 141, + "content": "[\\O-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 142, + "content": "[\\Br]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 143, + "content": "[17F]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 144, + "content": "[=N+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 145, + "content": "[\\C@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 146, + "content": "[CH2]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 147, + "content": "[=P@H1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 148, + "content": "[NH0]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 149, + "content": "[/Br]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 150, + "content": "[N+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 151, + "content": "[\\SH1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 152, + "content": "[/N+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 153, + "content": "[\\CH1-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 154, + "content": "[S@@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 155, + "content": "[/Cl]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 156, + "content": "[-/Ring2]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 157, + "content": "[=P@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 158, + "content": "[P@+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 159, + "content": "[\\B]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 160, + "content": "[/S@@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 161, + "content": "[=N-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 162, + "content": "[N@@H1+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 163, + "content": "[=Ring1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 164, + "content": "[-\\Ring1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 165, + "content": "[#C]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 166, + "content": "[Br+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 167, + "content": "[=S@+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 168, + "content": "[\\Si]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 169, + "content": "[\\S@@+1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 170, + "content": "[P@@H1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 171, + "content": "[/O-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 172, + "content": "[\\I]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 173, + "content": "[C@@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 174, + "content": "[/OH0]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 175, + "content": "[P@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 176, + "content": "[\\CH0]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 177, + "content": "[/C@H1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 178, + "content": "[#Branch2]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 179, + "content": "[B@-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 180, + "content": "[\\S@@]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 181, + "content": "[#C-1]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 182, + "content": "[SH0]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 183, + "content": "[Sn+2]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": true, + "special": false + }, + { + "id": 184, + "content": "", + "single_word": false, + "lstrip": true, + "rstrip": false, + "normalized": true, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": true + }, + "post_processor": { + "type": "RobertaProcessing", + "sep": [ + "", + 2 + ], + "cls": [ + "", + 0 + ], + "trim_offsets": true, + "add_prefix_space": false + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": "", + "end_of_word_suffix": "", + "fuse_unk": false, + "byte_fallback": false, + "vocab": { + "": 0, + "": 1, + "": 2, + "": 3 + }, + "merges": [] + } +} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..400dc19 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,64 @@ +{ + "add_prefix_space": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": true, + "cls_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "errors": "replace", + "mask_token": { + "__type": "AddedToken", + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "model_max_length": 1024, + "pad_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "tokenizer_class": "BartTokenizer", + "trim_offsets": true, + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + } +} diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..ab89896 --- /dev/null +++ b/vocab.json @@ -0,0 +1 @@ +{"":0,"":1,"":2,"":3} \ No newline at end of file