初始化项目,由ModelHub XC社区提供模型
Model: fava-uw/fava-model Source: Original Platform
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a52b51160518605bb4a367c6663100c28cb9ea35173d3848304eea8e6c581565
|
||||
size 1831
|
||||
32
open_instruct/1694454900.0843773/hparams.yml
Normal file
32
open_instruct/1694454900.0843773/hparams.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
checkpointing_steps: '50'
|
||||
config_name: null
|
||||
dataset_config_name: null
|
||||
dataset_name: null
|
||||
gradient_accumulation_steps: 16
|
||||
learning_rate: 2.0e-05
|
||||
logging_steps: 1
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.1
|
||||
lora_rank: 64
|
||||
low_cpu_mem_usage: false
|
||||
lr_scheduler_type: cosine
|
||||
max_seq_length: 2048
|
||||
max_train_steps: 550
|
||||
model_name_or_path: meta-llama/Llama-2-7b-chat-hf
|
||||
num_train_epochs: 2
|
||||
output_dir: multi_passage_w_qa
|
||||
overwrite_cache: false
|
||||
per_device_train_batch_size: 1
|
||||
preprocessing_num_workers: 16
|
||||
report_to: tensorboard
|
||||
resume_from_checkpoint: null
|
||||
save_merged_lora_model: false
|
||||
seed: null
|
||||
tokenizer_name: meta-llama/Llama-2-7b-chat-hf
|
||||
train_file: /gscratch/h2lab/abhikam/llm-factuality/data/training/open-instruct-multi-qa.json
|
||||
use_flash_attn: true
|
||||
use_lora: false
|
||||
use_slow_tokenizer: true
|
||||
warmup_ratio: 0.03
|
||||
weight_decay: 0.0
|
||||
with_tracking: true
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8a416f1323b83ccf64cef4a6e5115f7fb5422fe96c7430351aad5d473f796c45
|
||||
size 1916
|
||||
32
open_instruct/1694473353.4580002/hparams.yml
Normal file
32
open_instruct/1694473353.4580002/hparams.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
checkpointing_steps: '50'
|
||||
config_name: null
|
||||
dataset_config_name: null
|
||||
dataset_name: null
|
||||
gradient_accumulation_steps: 16
|
||||
learning_rate: 2.0e-05
|
||||
logging_steps: 1
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.1
|
||||
lora_rank: 64
|
||||
low_cpu_mem_usage: false
|
||||
lr_scheduler_type: cosine
|
||||
max_seq_length: 2048
|
||||
max_train_steps: 550
|
||||
model_name_or_path: meta-llama/Llama-2-7b-chat-hf
|
||||
num_train_epochs: 2
|
||||
output_dir: multi_passage_w_qa
|
||||
overwrite_cache: false
|
||||
per_device_train_batch_size: 1
|
||||
preprocessing_num_workers: 16
|
||||
report_to: tensorboard
|
||||
resume_from_checkpoint: multi_passage_w_qa/step_100
|
||||
save_merged_lora_model: false
|
||||
seed: null
|
||||
tokenizer_name: meta-llama/Llama-2-7b-chat-hf
|
||||
train_file: /gscratch/h2lab/abhikam/llm-factuality/data/training/open-instruct-multi-qa.json
|
||||
use_flash_attn: true
|
||||
use_lora: false
|
||||
use_slow_tokenizer: true
|
||||
warmup_ratio: 0.03
|
||||
weight_decay: 0.0
|
||||
with_tracking: true
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:10b74c6f774d729b6af680f4e8ea767cfabf2df025cd74f71df9640b862dba10
|
||||
size 1916
|
||||
32
open_instruct/1694473649.2345717/hparams.yml
Normal file
32
open_instruct/1694473649.2345717/hparams.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
checkpointing_steps: '50'
|
||||
config_name: null
|
||||
dataset_config_name: null
|
||||
dataset_name: null
|
||||
gradient_accumulation_steps: 16
|
||||
learning_rate: 2.0e-05
|
||||
logging_steps: 1
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.1
|
||||
lora_rank: 64
|
||||
low_cpu_mem_usage: false
|
||||
lr_scheduler_type: cosine
|
||||
max_seq_length: 2048
|
||||
max_train_steps: 550
|
||||
model_name_or_path: meta-llama/Llama-2-7b-chat-hf
|
||||
num_train_epochs: 2
|
||||
output_dir: multi_passage_w_qa
|
||||
overwrite_cache: false
|
||||
per_device_train_batch_size: 1
|
||||
preprocessing_num_workers: 16
|
||||
report_to: tensorboard
|
||||
resume_from_checkpoint: multi_passage_w_qa/step_100
|
||||
save_merged_lora_model: false
|
||||
seed: null
|
||||
tokenizer_name: meta-llama/Llama-2-7b-chat-hf
|
||||
train_file: /gscratch/h2lab/abhikam/llm-factuality/data/training/open-instruct-multi-qa.json
|
||||
use_flash_attn: true
|
||||
use_lora: false
|
||||
use_slow_tokenizer: true
|
||||
warmup_ratio: 0.03
|
||||
weight_decay: 0.0
|
||||
with_tracking: true
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c6035e9cf6d2060a3a51c046446e92541d4b25a490c3bda38eb864b7ee1efbc9
|
||||
size 1916
|
||||
32
open_instruct/1694499102.1359086/hparams.yml
Normal file
32
open_instruct/1694499102.1359086/hparams.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
checkpointing_steps: '50'
|
||||
config_name: null
|
||||
dataset_config_name: null
|
||||
dataset_name: null
|
||||
gradient_accumulation_steps: 16
|
||||
learning_rate: 2.0e-05
|
||||
logging_steps: 1
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.1
|
||||
lora_rank: 64
|
||||
low_cpu_mem_usage: false
|
||||
lr_scheduler_type: cosine
|
||||
max_seq_length: 2048
|
||||
max_train_steps: 550
|
||||
model_name_or_path: meta-llama/Llama-2-7b-chat-hf
|
||||
num_train_epochs: 2
|
||||
output_dir: multi_passage_w_qa
|
||||
overwrite_cache: false
|
||||
per_device_train_batch_size: 1
|
||||
preprocessing_num_workers: 16
|
||||
report_to: tensorboard
|
||||
resume_from_checkpoint: multi_passage_w_qa/step_250
|
||||
save_merged_lora_model: false
|
||||
seed: null
|
||||
tokenizer_name: meta-llama/Llama-2-7b-chat-hf
|
||||
train_file: /gscratch/h2lab/abhikam/llm-factuality/data/training/open-instruct-multi-qa.json
|
||||
use_flash_attn: true
|
||||
use_lora: false
|
||||
use_slow_tokenizer: true
|
||||
warmup_ratio: 0.03
|
||||
weight_decay: 0.0
|
||||
with_tracking: true
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:52287e454a884daa449c4f4315dc8768890c0b0c5fdcfdd36f21a6d0026f475d
|
||||
size 1916
|
||||
32
open_instruct/1694517340.3150113/hparams.yml
Normal file
32
open_instruct/1694517340.3150113/hparams.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
checkpointing_steps: '50'
|
||||
config_name: null
|
||||
dataset_config_name: null
|
||||
dataset_name: null
|
||||
gradient_accumulation_steps: 16
|
||||
learning_rate: 2.0e-05
|
||||
logging_steps: 1
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.1
|
||||
lora_rank: 64
|
||||
low_cpu_mem_usage: false
|
||||
lr_scheduler_type: cosine
|
||||
max_seq_length: 2048
|
||||
max_train_steps: 550
|
||||
model_name_or_path: meta-llama/Llama-2-7b-chat-hf
|
||||
num_train_epochs: 2
|
||||
output_dir: multi_passage_w_qa
|
||||
overwrite_cache: false
|
||||
per_device_train_batch_size: 1
|
||||
preprocessing_num_workers: 16
|
||||
report_to: tensorboard
|
||||
resume_from_checkpoint: multi_passage_w_qa/step_400
|
||||
save_merged_lora_model: false
|
||||
seed: null
|
||||
tokenizer_name: meta-llama/Llama-2-7b-chat-hf
|
||||
train_file: /gscratch/h2lab/abhikam/llm-factuality/data/training/open-instruct-multi-qa.json
|
||||
use_flash_attn: true
|
||||
use_lora: false
|
||||
use_slow_tokenizer: true
|
||||
warmup_ratio: 0.03
|
||||
weight_decay: 0.0
|
||||
with_tracking: true
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e0f450f6e7e4caa1407a2ad7d4d1a601ae6946ebe59c4fe8b924b0ed56a0ecbd
|
||||
size 1916
|
||||
32
open_instruct/1694518076.2119565/hparams.yml
Normal file
32
open_instruct/1694518076.2119565/hparams.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
checkpointing_steps: '50'
|
||||
config_name: null
|
||||
dataset_config_name: null
|
||||
dataset_name: null
|
||||
gradient_accumulation_steps: 16
|
||||
learning_rate: 2.0e-05
|
||||
logging_steps: 1
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.1
|
||||
lora_rank: 64
|
||||
low_cpu_mem_usage: false
|
||||
lr_scheduler_type: cosine
|
||||
max_seq_length: 2048
|
||||
max_train_steps: 550
|
||||
model_name_or_path: meta-llama/Llama-2-7b-chat-hf
|
||||
num_train_epochs: 2
|
||||
output_dir: multi_passage_w_qa
|
||||
overwrite_cache: false
|
||||
per_device_train_batch_size: 1
|
||||
preprocessing_num_workers: 16
|
||||
report_to: tensorboard
|
||||
resume_from_checkpoint: multi_passage_w_qa/step_400
|
||||
save_merged_lora_model: false
|
||||
seed: null
|
||||
tokenizer_name: meta-llama/Llama-2-7b-chat-hf
|
||||
train_file: /gscratch/h2lab/abhikam/llm-factuality/data/training/open-instruct-multi-qa.json
|
||||
use_flash_attn: true
|
||||
use_lora: false
|
||||
use_slow_tokenizer: true
|
||||
warmup_ratio: 0.03
|
||||
weight_decay: 0.0
|
||||
with_tracking: true
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5338826a2287655b7c7a2d302534670ad3cad3742b52f3e4fee44f4fb3373397
|
||||
size 1916
|
||||
32
open_instruct/1694518380.279593/hparams.yml
Normal file
32
open_instruct/1694518380.279593/hparams.yml
Normal file
@@ -0,0 +1,32 @@
|
||||
checkpointing_steps: '50'
|
||||
config_name: null
|
||||
dataset_config_name: null
|
||||
dataset_name: null
|
||||
gradient_accumulation_steps: 16
|
||||
learning_rate: 2.0e-05
|
||||
logging_steps: 1
|
||||
lora_alpha: 16
|
||||
lora_dropout: 0.1
|
||||
lora_rank: 64
|
||||
low_cpu_mem_usage: false
|
||||
lr_scheduler_type: cosine
|
||||
max_seq_length: 2048
|
||||
max_train_steps: 550
|
||||
model_name_or_path: meta-llama/Llama-2-7b-chat-hf
|
||||
num_train_epochs: 2
|
||||
output_dir: multi_passage_w_qa
|
||||
overwrite_cache: false
|
||||
per_device_train_batch_size: 1
|
||||
preprocessing_num_workers: 16
|
||||
report_to: tensorboard
|
||||
resume_from_checkpoint: multi_passage_w_qa/step_400
|
||||
save_merged_lora_model: false
|
||||
seed: null
|
||||
tokenizer_name: meta-llama/Llama-2-7b-chat-hf
|
||||
train_file: /gscratch/h2lab/abhikam/llm-factuality/data/training/open-instruct-multi-qa.json
|
||||
use_flash_attn: true
|
||||
use_lora: false
|
||||
use_slow_tokenizer: true
|
||||
warmup_ratio: 0.03
|
||||
weight_decay: 0.0
|
||||
with_tracking: true
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f78aadb6ad02755c808b0772534583a6ead1b5c08241a0cefca5b33f5b53b1f1
|
||||
size 14681
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bf6c9632b71373703f0f87bdbdbe03c8c2112cc8e973a317cddffdfe014aa899
|
||||
size 290
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5b89d08fd51bc5b219476775fc54aaf11dba1f43fe62b6e7c375dbef275780b7
|
||||
size 19325
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4677f0734a8c0a6b9fc733e6e820c7a92da2674e0d495347bc7833151c0fbb83
|
||||
size 20187
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:84c44ce55fde51eb2a2652665577f7cda24d784503bbb1d92e17ffea0031bc85
|
||||
size 189
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4e424e6dffbff8270f4a58b9664c65383e3af16612cbee5332b7c6fa0dea63a6
|
||||
size 88
|
||||
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:94a1b7333712f79560c5d9cca5ccee9ca81c97fe377e6e7b8a20cc5c42a876ef
|
||||
size 15137
|
||||
Reference in New Issue
Block a user