{ "experiment": "minhash_dedup_08", "method": "minhash_lsh", "base_dataset_repo": "Bykot/c4_ru_200k_split", "source_train_docs": 198000, "cleaned_docs": 197775, "removed_docs": 225, "removed_percent": 0.11363636363636363, "eval_docs": 2000, "created_at_utc": "2026-05-12T01:13:09.279538+00:00" }