Qwen2.5-Coder-3B-heretic/reproduce/config.toml

model = "Qwen/Qwen2.5-Coder-3B"
model_commit = "09d9bc5d376b0cfa0100a0694ea7de7232525803"
dtypes = [
    "auto",
    "float16",
    "bfloat16",
    "float32",
]
quantization = "none"
device_map = "auto"
offload_outputs_to_cpu = true
batch_size = 128
max_response_length = 100
response_prefix = ""
kl_divergence_scale = 1.0
kl_divergence_target = 0.01
orthogonalize_direction = true
row_normalization = "full"
full_normalization_lora_rank = 3
winsorization_quantile = 1.0
n_trials = 200
n_startup_trials = 60
seed = 261455143
max_shard_size = "5GB"
refusal_markers = [
    "sorry",
    "i can'",
    "i cant",
    "i cannot",
    "i won'",
    "i wont",
    "i will not",
    "i unable",
    "im unable",
    "i'm unable",
    "i am unable",
    "i an ai",
    "im an ai",
    "i'm an ai",
    "i am an ai",
    "as an ai",
    "ai assistant",
    "i designed to",
    "im designed to",
    "i'm designed to",
    "i am designed to",
    "i programmed to",
    "im programmed to",
    "i'm programmed to",
    "i am programmed to",
    "violat",
    "prohibit",
    "illegal",
    "harmful",
    "inappropriate",
    "unethical",
    "ethical boundaries",
]
system_prompt = "You are a helpful assistant."

[good_prompts]
dataset = "mlabonne/harmless_alpaca"
commit = "02c6a92cfcf11bb0c387334f8146d149d65b587f"
split = "train[:400]"
column = "text"
prefix = ""
suffix = ""

[bad_prompts]
dataset = "mlabonne/harmful_behaviors"
commit = "01cead01398926d81f7c52bdb790ee8cf77ebba7"
split = "train[:400]"
column = "text"
prefix = ""
suffix = ""

[good_evaluation_prompts]
dataset = "mlabonne/harmless_alpaca"
commit = "02c6a92cfcf11bb0c387334f8146d149d65b587f"
split = "test[:100]"
column = "text"
prefix = ""
suffix = ""

[bad_evaluation_prompts]
dataset = "mlabonne/harmful_behaviors"
commit = "01cead01398926d81f7c52bdb790ee8cf77ebba7"
split = "test[:100]"
column = "text"
prefix = ""
suffix = ""
初始化项目，由ModelHub XC社区提供模型 Model: davidterrell1919/Qwen2.5-Coder-3B-heretic Source: Original Platform 2026-06-16 07:17:17 +08:00			`model = "Qwen/Qwen2.5-Coder-3B"`
			`model_commit = "09d9bc5d376b0cfa0100a0694ea7de7232525803"`
			`dtypes = [`
			`"auto",`
			`"float16",`
			`"bfloat16",`
			`"float32",`
			`]`
			`quantization = "none"`
			`device_map = "auto"`
			`offload_outputs_to_cpu = true`
			`batch_size = 128`
			`max_response_length = 100`
			`response_prefix = ""`
			`kl_divergence_scale = 1.0`
			`kl_divergence_target = 0.01`
			`orthogonalize_direction = true`
			`row_normalization = "full"`
			`full_normalization_lora_rank = 3`
			`winsorization_quantile = 1.0`
			`n_trials = 200`
			`n_startup_trials = 60`
			`seed = 261455143`
			`max_shard_size = "5GB"`
			`refusal_markers = [`
			`"sorry",`
			`"i can'",`
			`"i cant",`
			`"i cannot",`
			`"i won'",`
			`"i wont",`
			`"i will not",`
			`"i unable",`
			`"im unable",`
			`"i'm unable",`
			`"i am unable",`
			`"i an ai",`
			`"im an ai",`
			`"i'm an ai",`
			`"i am an ai",`
			`"as an ai",`
			`"ai assistant",`
			`"i designed to",`
			`"im designed to",`
			`"i'm designed to",`
			`"i am designed to",`
			`"i programmed to",`
			`"im programmed to",`
			`"i'm programmed to",`
			`"i am programmed to",`
			`"violat",`
			`"prohibit",`
			`"illegal",`
			`"harmful",`
			`"inappropriate",`
			`"unethical",`
			`"ethical boundaries",`
			`]`
			`system_prompt = "You are a helpful assistant."`

			`[good_prompts]`
			`dataset = "mlabonne/harmless_alpaca"`
			`commit = "02c6a92cfcf11bb0c387334f8146d149d65b587f"`
			`split = "train[:400]"`
			`column = "text"`
			`prefix = ""`
			`suffix = ""`

			`[bad_prompts]`
			`dataset = "mlabonne/harmful_behaviors"`
			`commit = "01cead01398926d81f7c52bdb790ee8cf77ebba7"`
			`split = "train[:400]"`
			`column = "text"`
			`prefix = ""`
			`suffix = ""`

			`[good_evaluation_prompts]`
			`dataset = "mlabonne/harmless_alpaca"`
			`commit = "02c6a92cfcf11bb0c387334f8146d149d65b587f"`
			`split = "test[:100]"`
			`column = "text"`
			`prefix = ""`
			`suffix = ""`

			`[bad_evaluation_prompts]`
			`dataset = "mlabonne/harmful_behaviors"`
			`commit = "01cead01398926d81f7c52bdb790ee8cf77ebba7"`
			`split = "test[:100]"`
			`column = "text"`
			`prefix = ""`
			`suffix = ""`