Files
dsa_llm/tokenizer_config.json

186 lines
20 KiB
JSON
Raw Normal View History

{
"added_tokens_decoder": {
"199998": {
"content": "<|startoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"199999": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200000": {
"content": "<|reserved_200000|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200001": {
"content": "<|reserved_200001|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200002": {
"content": "<|return|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200003": {
"content": "<|constrain|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200004": {
"content": "<|reserved_200004|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200005": {
"content": "<|channel|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200006": {
"content": "<|start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200007": {
"content": "<|end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200008": {
"content": "<|message|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200009": {
"content": "<|reserved_200009|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200010": {
"content": "<|reserved_200010|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200011": {
"content": "<|reserved_200011|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200012": {
"content": "<|call|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200013": {
"content": "<|reserved_200013|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200014": {
"content": "<|reserved_200014|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200015": {
"content": "<|reserved_200015|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200016": {
"content": "<|reserved_200016|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200017": {
"content": "<|reserved_200017|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"200018": {
"content": "<|endofprompt|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"bos_token": "<|startoftext|>",
"clean_up_tokenization_spaces": false,
"eos_token": "<|return|>",
"extra_special_tokens": {},
"model_input_names": [
"input_ids",
"attention_mask"
],
"model_max_length": 131072,
"pad_token": "<|reserved_200017|>",
"padding_side": "left",
"tokenizer_class": "PreTrainedTokenizerFast",
"unk_token": null,
"chat_template": "{# Copyright 2025-present Unsloth. Apache 2.0 License. Unsloth chat template fixes. Edited from ggml-org & OpenAI #}\n{#-\n In addition to the normal inputs of `messages` and `tools`, this template also accepts the\n following kwargs:\n - \"builtin_tools\": A list, can contain \"browser\" and/or \"python\".\n - \"model_identity\": A string that optionally describes the model identity.\n - \"reasoning_effort\": A string that describes the reasoning effort, defaults to \"medium\".\n #}\n\n{#- Tool Definition Rendering ============================================== #}\n{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%}\n {%- if param_spec.type == \"array\" -%}\n {%- if param_spec['items'] -%}\n {%- if param_spec['items']['type'] == \"string\" -%}\n {{- \"string[]\" }}\n {%- elif param_spec['items']['type'] == \"number\" -%}\n {{- \"number[]\" }}\n {%- elif param_spec['items']['type'] == \"integer\" -%}\n {{- \"number[]\" }}\n {%- elif param_spec['items']['type'] == \"boolean\" -%}\n {{- \"boolean[]\" }}\n {%- else -%}\n {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%}\n {%- if inner_type == \"object | object\" or inner_type|length > 50 -%}\n {{- \"any[]\" }}\n {%- else -%}\n {{- inner_type + \"[]\" }}\n {%- endif -%}\n {%- endif -%}\n {%- if param_spec.nullable -%}\n {{- \" | null\" }}\n {%- endif -%}\n {%- else -%}\n {{- \"any[]\" }}\n {%- if param_spec.nullable -%}\n {{- \" | null\" }}\n {%- endif -%}\n {%- endif -%}\n {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%}\n {#- Handle array of types like [\"object\", \"object\"] from Union[dict, list] #}\n {%- if param_spec.type | length > 1 -%}\n {{- param_spec.type | join(\" | \") }}\n {%- else -%}\n {{- param_spec.type[0] }}\n {%- endif -%}\n {%- elif param_spec.oneOf -%}\n {#- Handle oneOf schemas - check for complex unions and fallback to any #}\n {%- set has_object_variants = false -%}\n {%- for variant in param_spec.oneOf -%}\n {%- if variant.type == \"object\" -%}\n {%- set has_object_variants = true -%}\n {%- endif -%}\n {%- endfor -%}\n {%- if has_object_variants and param_spec.oneOf|length > 1 -%}\n {{- \"any\" }}\n {%- else -%}\n {%- for variant in param_spec.oneOf -%}\n {{- render_typescript_type(variant, required_params) -}}\n {%- if variant.description %}\n {{- \"// \" + variant.description }}\n {%- endif -%}\n {%- if variant.default is defined %}\n {{ \"// default: \" + variant.default|tojson }}\n {%- endif -%}\n {%- if not loop.last %}\n {{- \" | \" }}\n {% endif -%}\n {%- endfor -%}\n {%- endif -%}\n {%- elif param_spec.type == \"string\" -%}\n {%- if param_spec.enum -%}\n {{- '\"' + param_spec.enum|join('\" | \"') + '\"' -}}\n {%- else -%}\n {{- \"string\" }}\n {%- if param_spec.nullable %}\n {{- \" | null\" }}\n {%- endif -%}\n {%- endif -%}\n {%- elif param_spec.type == \"number\" -%}\n {{- \"number\" }}\n {%- elif param_spec.type == \"integer\" -%}\n {{- \"number\" }}\n {%- elif param_spec.type == \"boolean\" -%}\n {{- \"boolean\" }}\n\n {%- elif param_spec.type == \"object\" -%}\n {%- if param_spec.properties -%}\n {{- \"{\\n\" }}\n {%-
}