23 lines
695 B
JSON
23 lines
695 B
JSON
{
|
|
"used_fix_mistral_regex": true,
|
|
"base": "/mnt/irdc_afs/alothmanmushari/Qwen/Qwen3-8B-Base",
|
|
"adapters": [
|
|
"output/OALL_DATASETS_NEWSCRIPT_DETECTED_AND_CLEANED_2e5LR/sft/final"
|
|
],
|
|
"tokenizer_source": "/mnt/irdc_afs/alothmanmushari/Qwen/Qwen3-8B-Base",
|
|
"dtype": "bfloat16",
|
|
"device_map": "cpu",
|
|
"trust_remote_code": true,
|
|
"tokenizer_class": "Qwen2Tokenizer",
|
|
"model_class": "Qwen3ForCausalLM",
|
|
"tokenizer_vocab_size": 151669,
|
|
"model_embedding_rows": 151936,
|
|
"bos_token": null,
|
|
"bos_token_id": null,
|
|
"eos_token": "<|endoftext|>",
|
|
"eos_token_id": 151643,
|
|
"pad_token": "<|endoftext|>",
|
|
"pad_token_id": 151643,
|
|
"im_start_id": 151644,
|
|
"im_end_id": 151645
|
|
} |