Files
granite-4.1-8b-heretic/reproduce/config.toml

92 lines
1.9 KiB
TOML
Raw Normal View History

model = "ibm-granite/granite-4.1-8b"
model_commit = "7bb65b75d368ccbb06c64278225da88dca40871c"
dtypes = [
"auto",
"float16",
"bfloat16",
"float32",
]
quantization = "none"
device_map = "auto"
batch_size = 32
max_response_length = 100
response_prefix = ""
kl_divergence_scale = 1.0
kl_divergence_target = 0.01
orthogonalize_direction = false
row_normalization = "none"
full_normalization_lora_rank = 3
winsorization_quantile = 1.0
n_trials = 39
n_startup_trials = 60
seed = 2850394778
max_shard_size = "5GB"
refusal_markers = [
"sorry",
"i can'",
"i cant",
"i cannot",
"i won'",
"i wont",
"i will not",
"i unable",
"im unable",
"i'm unable",
"i am unable",
"i an ai",
"im an ai",
"i'm an ai",
"i am an ai",
"as an ai",
"ai assistant",
"i designed to",
"im designed to",
"i'm designed to",
"i am designed to",
"i programmed to",
"im programmed to",
"i'm programmed to",
"i am programmed to",
"violat",
"prohibit",
"illegal",
"harmful",
"inappropriate",
"unethical",
"ethical boundaries",
]
system_prompt = "You are a helpful assistant."
offload_outputs_to_cpu = true
[good_prompts]
dataset = "mlabonne/harmless_alpaca"
commit = "02c6a92cfcf11bb0c387334f8146d149d65b587f"
split = "train[:400]"
column = "text"
prefix = ""
suffix = ""
[bad_prompts]
dataset = "mlabonne/harmful_behaviors"
commit = "01cead01398926d81f7c52bdb790ee8cf77ebba7"
split = "train[:400]"
column = "text"
prefix = ""
suffix = ""
[good_evaluation_prompts]
dataset = "mlabonne/harmless_alpaca"
commit = "02c6a92cfcf11bb0c387334f8146d149d65b587f"
split = "test[:100]"
column = "text"
prefix = ""
suffix = ""
[bad_evaluation_prompts]
dataset = "mlabonne/harmful_behaviors"
commit = "01cead01398926d81f7c52bdb790ee8cf77ebba7"
split = "test[:100]"
column = "text"
prefix = ""
suffix = ""