From 987520d4202c3a2d3243b5939b27ff2f9afe47c4 Mon Sep 17 00:00:00 2001 From: Parker Sytz Date: Sat, 21 Mar 2026 12:00:38 +0000 Subject: [PATCH] Remove training checkpoints for vLLM compatibility --- checkpoint-25/README.md | 210 ------------------------ checkpoint-25/adapter_config.json | 50 ------ checkpoint-25/adapter_model.safetensors | 3 - checkpoint-25/chat_template.jinja | 109 ------------ checkpoint-25/optimizer.pt | 3 - checkpoint-25/rng_state.pth | 3 - checkpoint-25/scheduler.pt | 3 - checkpoint-25/tokenizer.json | 3 - checkpoint-25/tokenizer_config.json | 18 -- checkpoint-25/trainer_state.json | 48 ------ checkpoint-25/training_args.bin | 3 - checkpoint-50/README.md | 210 ------------------------ checkpoint-50/adapter_config.json | 50 ------ checkpoint-50/adapter_model.safetensors | 3 - checkpoint-50/chat_template.jinja | 109 ------------ checkpoint-50/optimizer.pt | 3 - checkpoint-50/rng_state.pth | 3 - checkpoint-50/scheduler.pt | 3 - checkpoint-50/tokenizer.json | 3 - checkpoint-50/tokenizer_config.json | 18 -- checkpoint-50/trainer_state.json | 69 -------- checkpoint-50/training_args.bin | 3 - checkpoint-75/README.md | 210 ------------------------ checkpoint-75/adapter_config.json | 50 ------ checkpoint-75/adapter_model.safetensors | 3 - checkpoint-75/chat_template.jinja | 109 ------------ checkpoint-75/optimizer.pt | 3 - checkpoint-75/rng_state.pth | 3 - checkpoint-75/scheduler.pt | 3 - checkpoint-75/tokenizer.json | 3 - checkpoint-75/tokenizer_config.json | 18 -- checkpoint-75/trainer_state.json | 83 ---------- checkpoint-75/training_args.bin | 3 - training_meta.json | 11 -- 34 files changed, 1426 deletions(-) delete mode 100644 checkpoint-25/README.md delete mode 100644 checkpoint-25/adapter_config.json delete mode 100644 checkpoint-25/adapter_model.safetensors delete mode 100644 checkpoint-25/chat_template.jinja delete mode 100644 checkpoint-25/optimizer.pt delete mode 100644 checkpoint-25/rng_state.pth delete mode 100644 checkpoint-25/scheduler.pt delete mode 100644 checkpoint-25/tokenizer.json delete mode 100644 checkpoint-25/tokenizer_config.json delete mode 100644 checkpoint-25/trainer_state.json delete mode 100644 checkpoint-25/training_args.bin delete mode 100644 checkpoint-50/README.md delete mode 100644 checkpoint-50/adapter_config.json delete mode 100644 checkpoint-50/adapter_model.safetensors delete mode 100644 checkpoint-50/chat_template.jinja delete mode 100644 checkpoint-50/optimizer.pt delete mode 100644 checkpoint-50/rng_state.pth delete mode 100644 checkpoint-50/scheduler.pt delete mode 100644 checkpoint-50/tokenizer.json delete mode 100644 checkpoint-50/tokenizer_config.json delete mode 100644 checkpoint-50/trainer_state.json delete mode 100644 checkpoint-50/training_args.bin delete mode 100644 checkpoint-75/README.md delete mode 100644 checkpoint-75/adapter_config.json delete mode 100644 checkpoint-75/adapter_model.safetensors delete mode 100644 checkpoint-75/chat_template.jinja delete mode 100644 checkpoint-75/optimizer.pt delete mode 100644 checkpoint-75/rng_state.pth delete mode 100644 checkpoint-75/scheduler.pt delete mode 100644 checkpoint-75/tokenizer.json delete mode 100644 checkpoint-75/tokenizer_config.json delete mode 100644 checkpoint-75/trainer_state.json delete mode 100644 checkpoint-75/training_args.bin delete mode 100644 training_meta.json diff --git a/checkpoint-25/README.md b/checkpoint-25/README.md deleted file mode 100644 index 2304bfb..0000000 --- a/checkpoint-25/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/llama-3.1-8b-instruct-unsloth-bnb-4bit -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/llama-3.1-8b-instruct-unsloth-bnb-4bit -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-25/adapter_config.json b/checkpoint-25/adapter_config.json deleted file mode 100644 index 133c403..0000000 --- a/checkpoint-25/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/llama-3.1-8b-instruct-unsloth-bnb-4bit", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 128, - "lora_bias": false, - "lora_dropout": 0.05, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 64, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "gate_proj", - "up_proj", - "q_proj", - "v_proj", - "k_proj", - "down_proj", - "o_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/checkpoint-25/adapter_model.safetensors b/checkpoint-25/adapter_model.safetensors deleted file mode 100644 index f614e3d..0000000 --- a/checkpoint-25/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8d0fe569ce1029364ebca96d10d81086d9d80b61e63d147777736a24d53fcee1 -size 671149168 diff --git a/checkpoint-25/chat_template.jinja b/checkpoint-25/chat_template.jinja deleted file mode 100644 index 33089ac..0000000 --- a/checkpoint-25/chat_template.jinja +++ /dev/null @@ -1,109 +0,0 @@ -{{- bos_token }} -{%- if custom_tools is defined %} - {%- set tools = custom_tools %} -{%- endif %} -{%- if not tools_in_user_message is defined %} - {%- set tools_in_user_message = true %} -{%- endif %} -{%- if not date_string is defined %} - {%- set date_string = "26 Jul 2024" %} -{%- endif %} -{%- if not tools is defined %} - {%- set tools = none %} -{%- endif %} - -{#- This block extracts the system message, so we can slot it into the right place. #} -{%- if messages[0]['role'] == 'system' %} - {%- set system_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} -{%- else %} - {%- set system_message = "" %} -{%- endif %} - -{#- System message + builtin tools #} -{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} -{%- if builtin_tools is defined or tools is not none %} - {{- "Environment: ipython\n" }} -{%- endif %} -{%- if builtin_tools is defined %} - {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} -{%- endif %} -{{- "Cutting Knowledge Date: December 2023\n" }} -{{- "Today Date: " + date_string + "\n\n" }} -{%- if tools is not none and not tools_in_user_message %} - {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} -{%- endif %} -{{- system_message }} -{{- "<|eot_id|>" }} - -{#- Custom tools are passed in a user message with some extra guidance #} -{%- if tools_in_user_message and not tools is none %} - {#- Extract the first user message so we can plug it in here #} - {%- if messages | length != 0 %} - {%- set first_user_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} - {%- else %} - {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} -{%- endif %} - {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} - {{- "Given the following functions, please respond with a JSON for a function call " }} - {{- "with its proper arguments that best answers the given prompt.\n\n" }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} - {{- first_user_message + "<|eot_id|>"}} -{%- endif %} - -{%- for message in messages %} - {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} - {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} - {%- elif 'tool_calls' in message %} - {%- if not message.tool_calls|length == 1 %} - {{- raise_exception("This model only supports single tool-calls at once!") }} - {%- endif %} - {%- set tool_call = message.tool_calls[0].function %} - {%- if builtin_tools is defined and tool_call.name in builtin_tools %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- "<|python_tag|>" + tool_call.name + ".call(" }} - {%- for arg_name, arg_val in tool_call.arguments | items %} - {{- arg_name + '="' + arg_val + '"' }} - {%- if not loop.last %} - {{- ", " }} - {%- endif %} - {%- endfor %} - {{- ")" }} - {%- else %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- '{"name": "' + tool_call.name + '", ' }} - {{- '"parameters": ' }} - {{- tool_call.arguments | tojson }} - {{- "}" }} - {%- endif %} - {%- if builtin_tools is defined %} - {#- This means we're in ipython mode #} - {{- "<|eom_id|>" }} - {%- else %} - {{- "<|eot_id|>" }} - {%- endif %} - {%- elif message.role == "tool" or message.role == "ipython" %} - {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} - {%- if message.content is mapping or message.content is iterable %} - {{- message.content | tojson }} - {%- else %} - {{- message.content }} - {%- endif %} - {{- "<|eot_id|>" }} - {%- endif %} -{%- endfor %} -{%- if add_generation_prompt %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} -{%- endif %} diff --git a/checkpoint-25/optimizer.pt b/checkpoint-25/optimizer.pt deleted file mode 100644 index 34d4d90..0000000 --- a/checkpoint-25/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2b64a60838ae0a9171b19da0ff3086d63d1a503d0893c66465d8d843a5487d74 -size 341318740 diff --git a/checkpoint-25/rng_state.pth b/checkpoint-25/rng_state.pth deleted file mode 100644 index 00ff604..0000000 --- a/checkpoint-25/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3bf3dae1abff48569d9d049bbb7f9b89f97740b3fc62f200b19277a7791737a0 -size 14244 diff --git a/checkpoint-25/scheduler.pt b/checkpoint-25/scheduler.pt deleted file mode 100644 index 2794a61..0000000 --- a/checkpoint-25/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d105f31dedcf00768eb5bd7ea1e537f0c09b691bc5bcbc36185bec92da37c4f4 -size 1064 diff --git a/checkpoint-25/tokenizer.json b/checkpoint-25/tokenizer.json deleted file mode 100644 index 1c1d8d5..0000000 --- a/checkpoint-25/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/checkpoint-25/tokenizer_config.json b/checkpoint-25/tokenizer_config.json deleted file mode 100644 index b2a5914..0000000 --- a/checkpoint-25/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|eot_id|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/checkpoint-25/trainer_state.json b/checkpoint-25/trainer_state.json deleted file mode 100644 index a5098f6..0000000 --- a/checkpoint-25/trainer_state.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 1.0, - "eval_steps": 500, - "global_step": 25, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.40404040404040403, - "grad_norm": 0.8408341407775879, - "learning_rate": 0.0001975626331552507, - "loss": 2.052530860900879, - "step": 10 - }, - { - "epoch": 0.8080808080808081, - "grad_norm": 0.45152202248573303, - "learning_rate": 0.00017877079733177184, - "loss": 0.6234618663787842, - "step": 20 - } - ], - "logging_steps": 10, - "max_steps": 75, - "num_input_tokens_seen": 0, - "num_train_epochs": 3, - "save_steps": 500, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 1.303330320064512e+16, - "train_batch_size": 4, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-25/training_args.bin b/checkpoint-25/training_args.bin deleted file mode 100644 index 40b0805..0000000 --- a/checkpoint-25/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e490fc50836c737a6dc2fc7c021b5244e2a1cedc0e657b4d12b52ce5a40486cf -size 5176 diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md deleted file mode 100644 index 2304bfb..0000000 --- a/checkpoint-50/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/llama-3.1-8b-instruct-unsloth-bnb-4bit -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/llama-3.1-8b-instruct-unsloth-bnb-4bit -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json deleted file mode 100644 index 133c403..0000000 --- a/checkpoint-50/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/llama-3.1-8b-instruct-unsloth-bnb-4bit", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 128, - "lora_bias": false, - "lora_dropout": 0.05, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 64, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "gate_proj", - "up_proj", - "q_proj", - "v_proj", - "k_proj", - "down_proj", - "o_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/checkpoint-50/adapter_model.safetensors b/checkpoint-50/adapter_model.safetensors deleted file mode 100644 index 17f24b9..0000000 --- a/checkpoint-50/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a8e3cb488aa92d2abcd2cb8948be4cda740bf15641cd1d659c328fa63884fc0c -size 671149168 diff --git a/checkpoint-50/chat_template.jinja b/checkpoint-50/chat_template.jinja deleted file mode 100644 index 33089ac..0000000 --- a/checkpoint-50/chat_template.jinja +++ /dev/null @@ -1,109 +0,0 @@ -{{- bos_token }} -{%- if custom_tools is defined %} - {%- set tools = custom_tools %} -{%- endif %} -{%- if not tools_in_user_message is defined %} - {%- set tools_in_user_message = true %} -{%- endif %} -{%- if not date_string is defined %} - {%- set date_string = "26 Jul 2024" %} -{%- endif %} -{%- if not tools is defined %} - {%- set tools = none %} -{%- endif %} - -{#- This block extracts the system message, so we can slot it into the right place. #} -{%- if messages[0]['role'] == 'system' %} - {%- set system_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} -{%- else %} - {%- set system_message = "" %} -{%- endif %} - -{#- System message + builtin tools #} -{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} -{%- if builtin_tools is defined or tools is not none %} - {{- "Environment: ipython\n" }} -{%- endif %} -{%- if builtin_tools is defined %} - {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} -{%- endif %} -{{- "Cutting Knowledge Date: December 2023\n" }} -{{- "Today Date: " + date_string + "\n\n" }} -{%- if tools is not none and not tools_in_user_message %} - {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} -{%- endif %} -{{- system_message }} -{{- "<|eot_id|>" }} - -{#- Custom tools are passed in a user message with some extra guidance #} -{%- if tools_in_user_message and not tools is none %} - {#- Extract the first user message so we can plug it in here #} - {%- if messages | length != 0 %} - {%- set first_user_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} - {%- else %} - {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} -{%- endif %} - {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} - {{- "Given the following functions, please respond with a JSON for a function call " }} - {{- "with its proper arguments that best answers the given prompt.\n\n" }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} - {{- first_user_message + "<|eot_id|>"}} -{%- endif %} - -{%- for message in messages %} - {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} - {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} - {%- elif 'tool_calls' in message %} - {%- if not message.tool_calls|length == 1 %} - {{- raise_exception("This model only supports single tool-calls at once!") }} - {%- endif %} - {%- set tool_call = message.tool_calls[0].function %} - {%- if builtin_tools is defined and tool_call.name in builtin_tools %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- "<|python_tag|>" + tool_call.name + ".call(" }} - {%- for arg_name, arg_val in tool_call.arguments | items %} - {{- arg_name + '="' + arg_val + '"' }} - {%- if not loop.last %} - {{- ", " }} - {%- endif %} - {%- endfor %} - {{- ")" }} - {%- else %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- '{"name": "' + tool_call.name + '", ' }} - {{- '"parameters": ' }} - {{- tool_call.arguments | tojson }} - {{- "}" }} - {%- endif %} - {%- if builtin_tools is defined %} - {#- This means we're in ipython mode #} - {{- "<|eom_id|>" }} - {%- else %} - {{- "<|eot_id|>" }} - {%- endif %} - {%- elif message.role == "tool" or message.role == "ipython" %} - {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} - {%- if message.content is mapping or message.content is iterable %} - {{- message.content | tojson }} - {%- else %} - {{- message.content }} - {%- endif %} - {{- "<|eot_id|>" }} - {%- endif %} -{%- endfor %} -{%- if add_generation_prompt %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} -{%- endif %} diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt deleted file mode 100644 index c82787d..0000000 --- a/checkpoint-50/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4ff5e7c632c85ca4ee4ee8cbbacd0c0cb8c9faabc2ecdd59c4a8302622c64b3b -size 341318740 diff --git a/checkpoint-50/rng_state.pth b/checkpoint-50/rng_state.pth deleted file mode 100644 index f3989fc..0000000 --- a/checkpoint-50/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e39116dec32f30aadb0dc55cb4b43936aafcdba55921e458cef4991413304d67 -size 14244 diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt deleted file mode 100644 index 1b0e04f..0000000 --- a/checkpoint-50/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f4ea451c7eaa84a8ed25da4595282dc6650d58ea3a6396ad8b74a951bad920ac -size 1064 diff --git a/checkpoint-50/tokenizer.json b/checkpoint-50/tokenizer.json deleted file mode 100644 index 1c1d8d5..0000000 --- a/checkpoint-50/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/checkpoint-50/tokenizer_config.json b/checkpoint-50/tokenizer_config.json deleted file mode 100644 index b2a5914..0000000 --- a/checkpoint-50/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|eot_id|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json deleted file mode 100644 index b4f7f77..0000000 --- a/checkpoint-50/trainer_state.json +++ /dev/null @@ -1,69 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 2.0, - "eval_steps": 500, - "global_step": 50, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.40404040404040403, - "grad_norm": 0.8408341407775879, - "learning_rate": 0.0001975626331552507, - "loss": 2.052530860900879, - "step": 10 - }, - { - "epoch": 0.8080808080808081, - "grad_norm": 0.45152202248573303, - "learning_rate": 0.00017877079733177184, - "loss": 0.6234618663787842, - "step": 20 - }, - { - "epoch": 1.202020202020202, - "grad_norm": 0.4560008943080902, - "learning_rate": 0.00014480667839875786, - "loss": 0.4599455833435059, - "step": 30 - }, - { - "epoch": 1.606060606060606, - "grad_norm": 0.3496752977371216, - "learning_rate": 0.00010221220871531869, - "loss": 0.40735950469970705, - "step": 40 - }, - { - "epoch": 2.0, - "grad_norm": 0.3579687178134918, - "learning_rate": 5.91916387756535e-05, - "loss": 0.386263370513916, - "step": 50 - } - ], - "logging_steps": 10, - "max_steps": 75, - "num_input_tokens_seen": 0, - "num_train_epochs": 3, - "save_steps": 500, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 2.620333385883648e+16, - "train_batch_size": 4, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin deleted file mode 100644 index 40b0805..0000000 --- a/checkpoint-50/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e490fc50836c737a6dc2fc7c021b5244e2a1cedc0e657b4d12b52ce5a40486cf -size 5176 diff --git a/checkpoint-75/README.md b/checkpoint-75/README.md deleted file mode 100644 index 2304bfb..0000000 --- a/checkpoint-75/README.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -base_model: unsloth/llama-3.1-8b-instruct-unsloth-bnb-4bit -library_name: peft -pipeline_tag: text-generation -tags: -- base_model:adapter:unsloth/llama-3.1-8b-instruct-unsloth-bnb-4bit -- lora -- sft -- transformers -- trl -- unsloth ---- - -# Model Card for Model ID - - - - - -## Model Details - -### Model Description - - - - - -- **Developed by:** [More Information Needed] -- **Funded by [optional]:** [More Information Needed] -- **Shared by [optional]:** [More Information Needed] -- **Model type:** [More Information Needed] -- **Language(s) (NLP):** [More Information Needed] -- **License:** [More Information Needed] -- **Finetuned from model [optional]:** [More Information Needed] - -### Model Sources [optional] - - - -- **Repository:** [More Information Needed] -- **Paper [optional]:** [More Information Needed] -- **Demo [optional]:** [More Information Needed] - -## Uses - - - -### Direct Use - - - -[More Information Needed] - -### Downstream Use [optional] - - - -[More Information Needed] - -### Out-of-Scope Use - - - -[More Information Needed] - -## Bias, Risks, and Limitations - - - -[More Information Needed] - -### Recommendations - - - -Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. - -## How to Get Started with the Model - -Use the code below to get started with the model. - -[More Information Needed] - -## Training Details - -### Training Data - - - -[More Information Needed] - -### Training Procedure - - - -#### Preprocessing [optional] - -[More Information Needed] - - -#### Training Hyperparameters - -- **Training regime:** [More Information Needed] - -#### Speeds, Sizes, Times [optional] - - - -[More Information Needed] - -## Evaluation - - - -### Testing Data, Factors & Metrics - -#### Testing Data - - - -[More Information Needed] - -#### Factors - - - -[More Information Needed] - -#### Metrics - - - -[More Information Needed] - -### Results - -[More Information Needed] - -#### Summary - - - -## Model Examination [optional] - - - -[More Information Needed] - -## Environmental Impact - - - -Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). - -- **Hardware Type:** [More Information Needed] -- **Hours used:** [More Information Needed] -- **Cloud Provider:** [More Information Needed] -- **Compute Region:** [More Information Needed] -- **Carbon Emitted:** [More Information Needed] - -## Technical Specifications [optional] - -### Model Architecture and Objective - -[More Information Needed] - -### Compute Infrastructure - -[More Information Needed] - -#### Hardware - -[More Information Needed] - -#### Software - -[More Information Needed] - -## Citation [optional] - - - -**BibTeX:** - -[More Information Needed] - -**APA:** - -[More Information Needed] - -## Glossary [optional] - - - -[More Information Needed] - -## More Information [optional] - -[More Information Needed] - -## Model Card Authors [optional] - -[More Information Needed] - -## Model Card Contact - -[More Information Needed] -### Framework versions - -- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-75/adapter_config.json b/checkpoint-75/adapter_config.json deleted file mode 100644 index 133c403..0000000 --- a/checkpoint-75/adapter_config.json +++ /dev/null @@ -1,50 +0,0 @@ -{ - "alora_invocation_tokens": null, - "alpha_pattern": {}, - "arrow_config": null, - "auto_mapping": { - "base_model_class": "LlamaForCausalLM", - "parent_library": "transformers.models.llama.modeling_llama", - "unsloth_fixed": true - }, - "base_model_name_or_path": "unsloth/llama-3.1-8b-instruct-unsloth-bnb-4bit", - "bias": "none", - "corda_config": null, - "ensure_weight_tying": false, - "eva_config": null, - "exclude_modules": null, - "fan_in_fan_out": false, - "inference_mode": true, - "init_lora_weights": true, - "layer_replication": null, - "layers_pattern": null, - "layers_to_transform": null, - "loftq_config": {}, - "lora_alpha": 128, - "lora_bias": false, - "lora_dropout": 0.05, - "megatron_config": null, - "megatron_core": "megatron.core", - "modules_to_save": null, - "peft_type": "LORA", - "peft_version": "0.18.1", - "qalora_group_size": 16, - "r": 64, - "rank_pattern": {}, - "revision": null, - "target_modules": [ - "gate_proj", - "up_proj", - "q_proj", - "v_proj", - "k_proj", - "down_proj", - "o_proj" - ], - "target_parameters": null, - "task_type": "CAUSAL_LM", - "trainable_token_indices": null, - "use_dora": false, - "use_qalora": false, - "use_rslora": false -} \ No newline at end of file diff --git a/checkpoint-75/adapter_model.safetensors b/checkpoint-75/adapter_model.safetensors deleted file mode 100644 index d8f98d2..0000000 --- a/checkpoint-75/adapter_model.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c3e8f4f847a493915fcc01a23ec51d7cf016dd860fa4c67adb80338daa31f920 -size 671149168 diff --git a/checkpoint-75/chat_template.jinja b/checkpoint-75/chat_template.jinja deleted file mode 100644 index 33089ac..0000000 --- a/checkpoint-75/chat_template.jinja +++ /dev/null @@ -1,109 +0,0 @@ -{{- bos_token }} -{%- if custom_tools is defined %} - {%- set tools = custom_tools %} -{%- endif %} -{%- if not tools_in_user_message is defined %} - {%- set tools_in_user_message = true %} -{%- endif %} -{%- if not date_string is defined %} - {%- set date_string = "26 Jul 2024" %} -{%- endif %} -{%- if not tools is defined %} - {%- set tools = none %} -{%- endif %} - -{#- This block extracts the system message, so we can slot it into the right place. #} -{%- if messages[0]['role'] == 'system' %} - {%- set system_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} -{%- else %} - {%- set system_message = "" %} -{%- endif %} - -{#- System message + builtin tools #} -{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} -{%- if builtin_tools is defined or tools is not none %} - {{- "Environment: ipython\n" }} -{%- endif %} -{%- if builtin_tools is defined %} - {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} -{%- endif %} -{{- "Cutting Knowledge Date: December 2023\n" }} -{{- "Today Date: " + date_string + "\n\n" }} -{%- if tools is not none and not tools_in_user_message %} - {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} -{%- endif %} -{{- system_message }} -{{- "<|eot_id|>" }} - -{#- Custom tools are passed in a user message with some extra guidance #} -{%- if tools_in_user_message and not tools is none %} - {#- Extract the first user message so we can plug it in here #} - {%- if messages | length != 0 %} - {%- set first_user_message = messages[0]['content']|trim %} - {%- set messages = messages[1:] %} - {%- else %} - {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} -{%- endif %} - {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} - {{- "Given the following functions, please respond with a JSON for a function call " }} - {{- "with its proper arguments that best answers the given prompt.\n\n" }} - {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} - {{- "Do not use variables.\n\n" }} - {%- for t in tools %} - {{- t | tojson(indent=4) }} - {{- "\n\n" }} - {%- endfor %} - {{- first_user_message + "<|eot_id|>"}} -{%- endif %} - -{%- for message in messages %} - {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} - {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} - {%- elif 'tool_calls' in message %} - {%- if not message.tool_calls|length == 1 %} - {{- raise_exception("This model only supports single tool-calls at once!") }} - {%- endif %} - {%- set tool_call = message.tool_calls[0].function %} - {%- if builtin_tools is defined and tool_call.name in builtin_tools %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- "<|python_tag|>" + tool_call.name + ".call(" }} - {%- for arg_name, arg_val in tool_call.arguments | items %} - {{- arg_name + '="' + arg_val + '"' }} - {%- if not loop.last %} - {{- ", " }} - {%- endif %} - {%- endfor %} - {{- ")" }} - {%- else %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} - {{- '{"name": "' + tool_call.name + '", ' }} - {{- '"parameters": ' }} - {{- tool_call.arguments | tojson }} - {{- "}" }} - {%- endif %} - {%- if builtin_tools is defined %} - {#- This means we're in ipython mode #} - {{- "<|eom_id|>" }} - {%- else %} - {{- "<|eot_id|>" }} - {%- endif %} - {%- elif message.role == "tool" or message.role == "ipython" %} - {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} - {%- if message.content is mapping or message.content is iterable %} - {{- message.content | tojson }} - {%- else %} - {{- message.content }} - {%- endif %} - {{- "<|eot_id|>" }} - {%- endif %} -{%- endfor %} -{%- if add_generation_prompt %} - {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} -{%- endif %} diff --git a/checkpoint-75/optimizer.pt b/checkpoint-75/optimizer.pt deleted file mode 100644 index 27147cc..0000000 --- a/checkpoint-75/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bb801c3776dd2efe85b061b397ace478f04e91dc7400f739800f4cc96d137c2c -size 341318740 diff --git a/checkpoint-75/rng_state.pth b/checkpoint-75/rng_state.pth deleted file mode 100644 index 8aa7649..0000000 --- a/checkpoint-75/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:92992d002901264afccc896072a8261a977bab77a592be6c8bbf26fd11b43bda -size 14244 diff --git a/checkpoint-75/scheduler.pt b/checkpoint-75/scheduler.pt deleted file mode 100644 index 49a368c..0000000 --- a/checkpoint-75/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:babd43118aa66a4f5266d730539cf7f09611158b169d9e63dbcb83f6bbaa8626 -size 1064 diff --git a/checkpoint-75/tokenizer.json b/checkpoint-75/tokenizer.json deleted file mode 100644 index 1c1d8d5..0000000 --- a/checkpoint-75/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b -size 17209920 diff --git a/checkpoint-75/tokenizer_config.json b/checkpoint-75/tokenizer_config.json deleted file mode 100644 index b2a5914..0000000 --- a/checkpoint-75/tokenizer_config.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "backend": "tokenizers", - "bos_token": "<|begin_of_text|>", - "clean_up_tokenization_spaces": true, - "eos_token": "<|eot_id|>", - "from_slow": true, - "is_local": false, - "legacy": false, - "model_input_names": [ - "input_ids", - "attention_mask" - ], - "model_max_length": 131072, - "pad_token": "<|finetune_right_pad_id|>", - "padding_side": "right", - "tokenizer_class": "TokenizersBackend", - "unk_token": null -} diff --git a/checkpoint-75/trainer_state.json b/checkpoint-75/trainer_state.json deleted file mode 100644 index fb4d8ce..0000000 --- a/checkpoint-75/trainer_state.json +++ /dev/null @@ -1,83 +0,0 @@ -{ - "best_global_step": null, - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 3.0, - "eval_steps": 500, - "global_step": 75, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.40404040404040403, - "grad_norm": 0.8408341407775879, - "learning_rate": 0.0001975626331552507, - "loss": 2.052530860900879, - "step": 10 - }, - { - "epoch": 0.8080808080808081, - "grad_norm": 0.45152202248573303, - "learning_rate": 0.00017877079733177184, - "loss": 0.6234618663787842, - "step": 20 - }, - { - "epoch": 1.202020202020202, - "grad_norm": 0.4560008943080902, - "learning_rate": 0.00014480667839875786, - "loss": 0.4599455833435059, - "step": 30 - }, - { - "epoch": 1.606060606060606, - "grad_norm": 0.3496752977371216, - "learning_rate": 0.00010221220871531869, - "loss": 0.40735950469970705, - "step": 40 - }, - { - "epoch": 2.0, - "grad_norm": 0.3579687178134918, - "learning_rate": 5.91916387756535e-05, - "loss": 0.386263370513916, - "step": 50 - }, - { - "epoch": 2.404040404040404, - "grad_norm": 0.3528299629688263, - "learning_rate": 2.403129154167153e-05, - "loss": 0.30219953060150145, - "step": 60 - }, - { - "epoch": 2.808080808080808, - "grad_norm": 0.30917567014694214, - "learning_rate": 3.5035085477190143e-06, - "loss": 0.2843065977096558, - "step": 70 - } - ], - "logging_steps": 10, - "max_steps": 75, - "num_input_tokens_seen": 0, - "num_train_epochs": 3, - "save_steps": 500, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": true - }, - "attributes": {} - } - }, - "total_flos": 3.941111418544128e+16, - "train_batch_size": 4, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-75/training_args.bin b/checkpoint-75/training_args.bin deleted file mode 100644 index 40b0805..0000000 --- a/checkpoint-75/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e490fc50836c737a6dc2fc7c021b5244e2a1cedc0e657b4d12b52ce5a40486cf -size 5176 diff --git a/training_meta.json b/training_meta.json deleted file mode 100644 index 936cd5a..0000000 --- a/training_meta.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "model": "meta-llama/Llama-3.1-8B-Instruct", - "method": "QLoRA", - "lora_rank": 64, - "lora_alpha": 128, - "epochs": 3, - "batch_size": 32, - "learning_rate": 0.0002, - "n_examples": 790, - "max_seq_length": 4096 -} \ No newline at end of file