model = "ibm-granite/granite-4.1-8b" model_commit = "7bb65b75d368ccbb06c64278225da88dca40871c" dtypes = [ "auto", "float16", "bfloat16", "float32", ] quantization = "none" device_map = "auto" batch_size = 32 max_response_length = 100 response_prefix = "" kl_divergence_scale = 1.0 kl_divergence_target = 0.01 orthogonalize_direction = false row_normalization = "none" full_normalization_lora_rank = 3 winsorization_quantile = 1.0 n_trials = 39 n_startup_trials = 60 seed = 2850394778 max_shard_size = "5GB" refusal_markers = [ "sorry", "i can'", "i cant", "i cannot", "i won'", "i wont", "i will not", "i unable", "im unable", "i'm unable", "i am unable", "i an ai", "im an ai", "i'm an ai", "i am an ai", "as an ai", "ai assistant", "i designed to", "im designed to", "i'm designed to", "i am designed to", "i programmed to", "im programmed to", "i'm programmed to", "i am programmed to", "violat", "prohibit", "illegal", "harmful", "inappropriate", "unethical", "ethical boundaries", ] system_prompt = "You are a helpful assistant." offload_outputs_to_cpu = true [good_prompts] dataset = "mlabonne/harmless_alpaca" commit = "02c6a92cfcf11bb0c387334f8146d149d65b587f" split = "train[:400]" column = "text" prefix = "" suffix = "" [bad_prompts] dataset = "mlabonne/harmful_behaviors" commit = "01cead01398926d81f7c52bdb790ee8cf77ebba7" split = "train[:400]" column = "text" prefix = "" suffix = "" [good_evaluation_prompts] dataset = "mlabonne/harmless_alpaca" commit = "02c6a92cfcf11bb0c387334f8146d149d65b587f" split = "test[:100]" column = "text" prefix = "" suffix = "" [bad_evaluation_prompts] dataset = "mlabonne/harmful_behaviors" commit = "01cead01398926d81f7c52bdb790ee8cf77ebba7" split = "test[:100]" column = "text" prefix = "" suffix = ""