初始化项目,由ModelHub XC社区提供模型

Model: the-jb/tofu_Llama-3.2-3B-Instruct_forget10_GradDiff
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-12 20:44:39 +08:00
commit 7f97a0382d
23 changed files with 42747 additions and 0 deletions

550
evals/.hydra/config.yaml Normal file
View File

@@ -0,0 +1,550 @@
model:
model_args:
device_map: cuda
pretrained_model_name_or_path: saves/unlearn/tofu_Llama-3.2-3B-Instruct_forget10_GradDiff
attn_implementation: flash_attention_2
torch_dtype: bfloat16
tokenizer_args:
pretrained_model_name_or_path: meta-llama/Llama-3.2-3B-Instruct
template_args:
apply_chat_template: true
system_prompt: You are a helpful assistant.
system_prompt_with_special_tokens: '<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful assistant.<|eot_id|>'
user_start_tag: '<|start_header_id|>user<|end_header_id|>
'
user_end_tag: <|eot_id|>
asst_start_tag: '<|start_header_id|>assistant<|end_header_id|>
'
asst_end_tag: <|eot_id|>
date_string: 10 Apr 2025
mode: eval
task_name: tofu_Llama-3.2-3B-Instruct_forget10_GradDiff
seed: 0
eval:
tofu:
metrics:
forget_quality:
pre_compute:
forget_truth_ratio:
pre_compute:
forget_Q_A_PARA_Prob:
datasets:
TOFU_QA_forget_para:
handler: QADataset
args:
hf_args:
name: ${eval.tofu.forget_split}_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: paraphrased_answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: probability
batch_size: 32
access_key: correct
forget_Q_A_PERT_Prob:
datasets:
TOFU_QA_forget_pert:
handler: QADataset
args:
hf_args:
name: ${eval.tofu.forget_split}_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: perturbed_answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: probability
batch_size: 32
access_key: wrong
handler: truth_ratio
aggregator: closer_to_1_better
access_key: forget
reference_logs:
retain_model_logs:
path: ${eval.tofu.retain_logs_path}
include:
forget_truth_ratio:
access_key: retain
handler: ks_test
forget_Q_A_Prob:
datasets:
TOFU_QA_forget:
handler: QADataset
args:
hf_args:
name: ${eval.tofu.forget_split}
split: train
path: locuslab/TOFU
question_key: question
answer_key: answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: probability
batch_size: 32
forget_Q_A_ROUGE:
datasets:
TOFU_QA_forget:
handler: QADataset
args:
hf_args:
name: ${eval.tofu.forget_split}
split: train
path: locuslab/TOFU
question_key: question
answer_key: answer
max_length: 512
predict_with_generate: true
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: left
index: index
generation_args:
do_sample: false
top_p: null
temperature: null
max_new_tokens: 200
use_cache: true
handler: rouge
rouge_type: rougeL_recall
batch_size: 32
model_utility:
pre_compute:
retain_Q_A_Prob:
datasets:
TOFU_QA_retain_eval:
handler: QADataset
args:
hf_args:
name: retain_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: probability
batch_size: 32
retain_Q_A_ROUGE:
datasets:
TOFU_QA_retain_eval:
handler: QADataset
args:
hf_args:
name: retain_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: answer
max_length: 512
predict_with_generate: true
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: left
index: index
generation_args:
do_sample: false
top_p: null
temperature: null
max_new_tokens: 200
use_cache: true
handler: rouge
rouge_type: rougeL_recall
batch_size: 32
retain_Truth_Ratio:
pre_compute:
retain_Q_A_PARA_Prob:
datasets:
TOFU_QA_retain_para:
handler: QADataset
args:
hf_args:
name: retain_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: paraphrased_answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: probability
batch_size: 32
access_key: correct
retain_Q_A_PERT_Prob:
datasets:
TOFU_QA_retain_pert:
handler: QADataset
args:
hf_args:
name: retain_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: perturbed_answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: probability
batch_size: 32
access_key: wrong
handler: truth_ratio
aggregator: true_better
ra_Q_A_Prob_normalised:
pre_compute:
ra_Q_A_Prob:
datasets:
TOFU_QA_ra:
handler: QADataset
args:
hf_args:
name: real_authors_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: probability
batch_size: 32
access_key: correct
ra_Q_A_PERT_Prob:
datasets:
TOFU_QA_ra_pert:
handler: QADataset
args:
hf_args:
name: real_authors_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: perturbed_answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: probability
batch_size: 32
access_key: wrong
handler: probability_w_options
ra_Q_A_ROUGE:
datasets:
TOFU_QA_ra:
handler: QADataset
args:
hf_args:
name: real_authors_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: answer
max_length: 512
predict_with_generate: true
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: left
index: index
generation_args:
do_sample: false
top_p: null
temperature: null
max_new_tokens: 200
use_cache: true
handler: rouge
rouge_type: rougeL_recall
batch_size: 32
ra_Truth_Ratio:
pre_compute:
ra_Q_A_Prob:
datasets:
TOFU_QA_ra:
handler: QADataset
args:
hf_args:
name: real_authors_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: probability
batch_size: 32
access_key: correct
ra_Q_A_PERT_Prob:
datasets:
TOFU_QA_ra_pert:
handler: QADataset
args:
hf_args:
name: real_authors_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: perturbed_answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: probability
batch_size: 32
access_key: wrong
handler: truth_ratio
aggregator: true_better
wf_Q_A_Prob_normalised:
pre_compute:
wf_Q_A_Prob:
datasets:
TOFU_QA_wf:
handler: QADataset
args:
hf_args:
name: world_facts_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: probability
batch_size: 32
access_key: correct
wf_Q_A_PERT_Prob:
datasets:
TOFU_QA_wf_pert:
handler: QADataset
args:
hf_args:
name: world_facts_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: perturbed_answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: probability
batch_size: 32
access_key: wrong
handler: probability_w_options
wf_Q_A_ROUGE:
datasets:
TOFU_QA_wf:
handler: QADataset
args:
hf_args:
name: world_facts_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: answer
max_length: 512
predict_with_generate: true
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: left
index: index
generation_args:
do_sample: false
top_p: null
temperature: null
max_new_tokens: 200
use_cache: true
handler: rouge
rouge_type: rougeL_recall
batch_size: 32
wf_Truth_Ratio:
pre_compute:
wf_Q_A_Prob:
datasets:
TOFU_QA_wf:
handler: QADataset
args:
hf_args:
name: world_facts_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: probability
batch_size: 32
access_key: correct
wf_Q_A_PERT_Prob:
datasets:
TOFU_QA_wf_pert:
handler: QADataset
args:
hf_args:
name: world_facts_perturbed
split: train
path: locuslab/TOFU
question_key: question
answer_key: perturbed_answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: probability
batch_size: 32
access_key: wrong
handler: truth_ratio
aggregator: true_better
handler: hm_aggregate
privleak:
pre_compute:
mia_min_k:
datasets:
TOFU_QA_forget:
access_key: forget
handler: QADataset
args:
hf_args:
name: ${eval.tofu.forget_split}
split: train
path: locuslab/TOFU
question_key: question
answer_key: answer
max_length: 512
TOFU_QA_holdout:
access_key: holdout
handler: QADataset
args:
hf_args:
name: ${eval.tofu.holdout_split}
path: locuslab/TOFU
split: train
question_key: question
answer_key: answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
batch_size: 32
handler: mia_min_k
k: 0.4
access_key: forget
reference_logs:
retain_model_logs:
path: ${eval.tofu.retain_logs_path}
include:
mia_min_k:
access_key: retain
handler: privleak
ref_value: 0.5
extraction_strength:
datasets:
TOFU_QA_forget:
handler: QADataset
args:
hf_args:
name: ${eval.tofu.forget_split}
split: train
path: locuslab/TOFU
question_key: question
answer_key: answer
max_length: 512
collators:
DataCollatorForSupervisedDataset:
handler: DataCollatorForSupervisedDataset
args:
padding_side: right
index: index
handler: extraction_strength
batch_size: 32
handler: TOFUEvaluator
output_dir: ${paths.output_dir}
overwrite: false
forget_split: ${forget_split}
holdout_split: ${holdout_split}
retain_logs_path: ${retain_logs_path}
paths:
root_dir: .
data_dir: ${paths.root_dir}/data/
datasets: ${paths.root_dir}/configs/data/datasets
output_dir: saves/unlearn/tofu_Llama-3.2-3B-Instruct_forget10_GradDiff/evals
work_dir: ${hydra:runtime.cwd}
forget_split: forget10
holdout_split: holdout10
retain_logs_path: saves/eval/tofu_Llama-3.2-3B-Instruct_retain90/TOFU_EVAL.json