Files
qwen3-8b-4bit-sft-only-400-…/tokenizer_config.json

249 lines
14 KiB
JSON
Raw Normal View History

{
"add_bos_token": false,
"add_prefix_space": false,
"added_tokens_decoder": {
"151643": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151644": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151645": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151646": {
"content": "<|object_ref_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151647": {
"content": "<|object_ref_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151648": {
"content": "<|box_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151649": {
"content": "<|box_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151650": {
"content": "<|quad_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151651": {
"content": "<|quad_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151652": {
"content": "<|vision_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151653": {
"content": "<|vision_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151654": {
"content": "<|vision_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151655": {
"content": "<|image_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151656": {
"content": "<|video_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151657": {
"content": "<tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151658": {
"content": "</tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151659": {
"content": "<|fim_prefix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151660": {
"content": "<|fim_middle|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151661": {
"content": "<|fim_suffix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151662": {
"content": "<|fim_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151663": {
"content": "<|repo_name|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151664": {
"content": "<|file_sep|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151665": {
"content": "<tool_response>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151666": {
"content": "</tool_response>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151667": {
"content": "<think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151668": {
"content": "</think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"151669": {
"content": "<|PAD_TOKEN|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<|im_start|>",
"<|im_end|>",
"<|object_ref_start|>",
"<|object_ref_end|>",
"<|box_start|>",
"<|box_end|>",
"<|quad_start|>",
"<|quad_end|>",
"<|vision_start|>",
"<|vision_end|>",
"<|vision_pad|>",
"<|image_pad|>",
"<|video_pad|>"
],
"bos_token": null,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"errors": "replace",
"extra_special_tokens": {},
"model_max_length": 40960,
"pad_token": "<|PAD_TOKEN|>",
"padding_side": "left",
"split_special_tokens": false,
"tokenizer_class": "Qwen2Tokenizer",
"unk_token": null,
"chat_template": "{{ '\nYou are a helpful linguist with expertise in English etymology. Your task is to extract structured etymological information from a given text describing the origin and development of a word.\n\n**Input:** The user will provide a piece of text about the etymology of a word. If the input is not relevant to etymology (e.g., it discusses grammar, usage, or definitions without tracing origins), you should respond only with `CANNOT_RECOGNISE`.\n\n**Output Format:**\n\nYour output must be **strictly** in the following format:\n\n1. A `<structure>` section containing a directed graph representing the language(s) and their relationships. This section should be placed between {structure_start} and {structure_end}.\n2. A `<content>` section containing detailed linguistic data for each node in the structure. This section should be placed between {content_start} and {content_end}.\n\nUse the following rules:\n\n------\n\n### **Structure Rules:**\n\n- The first node is always **English** or **English 1** .\n\n- Each distinct language is a node. If there are multiple instances of the same language, label them as \"LanguageName Index\", starting from 1.\n\n- Nodes are connected using:\n\n - `\" - \"` for a \"develop from\" relationship.\n - `\" -relationship_name- \"` for other types of relationships (e.g., `\" -possibly_from- \"`).\n\n- Branches start on a new line at the splitting node.\n\n- If a node derives from two or more nodes simultaneously, use parentheses and\n\n `+` to indicate the subgraph:\n\n - e.g., `Lang 1 - (Lang 2 + Lang 3)` means *Lang 1* develops from both *Lang 2* and *Lang 3* .\n- If the input states that a word is \"cognate with\" a specific form in another language, each of those languages must appear as a distinct node.\n------\n\n### **Content Rules:**\n\n- Use `@NodeLabel:` to start each nodes content.\n- For each word form:\n - Use `{{word_form}}` to denote the form.\n - Use `[meaning]` to denote its meaning (if given).\n - Use `({time})` to denote time of attestation or origin (if given).\n- If a node contains multiple languages, use `<LangName>` before the word form to specify which language it belongs to.\n- Multiple `<LangName>` entries can be listed in parallel if they refer to different forms in the same node.\n- Meanings and times should be listed in parallel for clarity.\n- If a word has multiple meanings or senses, list them one after another.\n- Do not include any extra text outside of the `<structure>` and `<content>` tags.\n\n------\n\n### **Example:**\n\n**Input:**\n\nSummary\nOf multiple origins. Partly a borrowing from French. Partly a borrowing from Latin.\nEtymons: French espirit, spirit; Latin spīritus.\n< (i) Anglo-Norman esperite, espirite, espirith, (rare) spirit, Anglo-Norman and Old French, Middle French esperit, espirit (Anglo-Norman and Middle French, French esprit) animating or vital principle, wind, breath, air, action of breathing, divine inspiration, consciousness, emotion, the Holy Spirit, the third person of the Trinity (all early 12th cent.), intelligence (mid 12th cent.), imaginary being, fairy (mid 12th cent.), incorporeal or immaterial being, soul of a dead person, ghost, demon (all late 12th cent.), angel (13th cent.), mind as opposed to body (late 13th cent. in Anglo-Norman, late 14th cent. in continental French), volatile substance (early 14th cent. or earlier), one or other of four substances so named by medieval alchemists (1354), rarefied substance believed to be carried in the blood (1370), disposition of a person, intention, emotional state (late 14th cent.), deeper meaning of a text (late 14th cent.; rare before 1547),\nand its etymon (ii) classical Latin spīritus (u-stem) action of breathing, respiration, breath, (final) breath, (in grammar) aspiration, air, life, consciousness, soul, vital principle animating the world, divine inspiration, essential quality, nature, disposition, ardent disposition, enthusiasm, vigour, arrogance, pride, wind, breeze, wind in the stomach or bowels, scent, perfume, odour, in post-c
}