{ "experiment": "minhash_dedup_08", "method": "minhash_lsh", "model_name": "ai-forever/rugpt3small_based_on_gpt2", "base_dataset_repo": "Bykot/c4_ru_200k_split", "train_docs": 197775, "removed_docs": 225, "eval_docs": 2000, "eval_loss_before": 3.8492491245269775, "perplexity_before": 46.95779053735424, "eval_loss_after": 3.0526859760284424, "perplexity_after": 21.172135967712943, "train_runtime": 19308.2323, "train_samples_per_second": 10.243, "created_at_utc": "2026-05-12T06:41:45.751863+00:00" }