From 9008e201ad9a9f1dea87d011f520cbc1b61dc28e Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Thu, 14 May 2026 12:27:39 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: 81melody/algerianME5 Source: Original Platform --- .gitattributes | 36 +++ 1_Pooling/config.json | 10 + README.md | 360 ++++++++++++++++++++++++++++++ config.json | 30 +++ config_sentence_transformers.json | 14 ++ model.safetensors | 3 + modules.json | 20 ++ rng_state.pth | 3 + scaler.pt | 3 + scheduler.pt | 3 + sentence_bert_config.json | 4 + tokenizer.json | 3 + tokenizer_config.json | 15 ++ trainer_state.json | 258 +++++++++++++++++++++ training_args.bin | 3 + 15 files changed, 765 insertions(+) create mode 100644 .gitattributes create mode 100644 1_Pooling/config.json create mode 100644 README.md create mode 100644 config.json create mode 100644 config_sentence_transformers.json create mode 100644 model.safetensors create mode 100644 modules.json create mode 100644 rng_state.pth create mode 100644 scaler.pt create mode 100644 scheduler.pt create mode 100644 sentence_bert_config.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 trainer_state.json create mode 100644 training_args.bin diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/1_Pooling/config.json b/1_Pooling/config.json new file mode 100644 index 0000000..70ac42b --- /dev/null +++ b/1_Pooling/config.json @@ -0,0 +1,10 @@ +{ + "word_embedding_dimension": 768, + "pooling_mode_cls_token": false, + "pooling_mode_mean_tokens": true, + "pooling_mode_max_tokens": false, + "pooling_mode_mean_sqrt_len_tokens": false, + "pooling_mode_weightedmean_tokens": false, + "pooling_mode_lasttoken": false, + "include_prompt": true +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..0d50965 --- /dev/null +++ b/README.md @@ -0,0 +1,360 @@ +--- +tags: +- sentence-transformers +- sentence-similarity +- feature-extraction +- dense +- Algerian AI +- Algerian +- algeria +- darja +- darija +- algerian darija +- algerian dialect +- rag +- ar +- multilingual-e5 +- generated_from_trainer +- loss:MultipleNegativesRankingLoss +base_model: intfloat/multilingual-e5-base +widget: +- source_sentence: 'query: Renault Kangoo 2019' + sentences: + - >- + passage: سيارة Renault Kangoo 2019 Confort · مازوت · يدوية · 1.5 DCI 90ch · + المسافة: 199,000 كم · السعر: 420 مليون دج · سيسبونسيو 10/10 + + موتور 10/10 + + سبيغة 0 + + كلشي معاود فيها جديد + - >- + passage: سيارة Dfsq Dfsq 2013 · بنزين · يدوية · 1.1 · المسافة: 280 كم · + السعر: 140 مليون دج · باتنة · مفيهش معاود + + موتور محطوط جديد + - >- + passage: بيع فيلا تيبازة بوسماعيل · فيلا · السعر: 8 مليون دج · تيبازة · + agence immobilier LABID agrée par l'état met en vente trés bel villa r+2 de + sup 250 m² deux facade dans un résidence clôturé et gardée jour et nuit + libre de suite l'villa avec toute commanditée : + + - rdc : deux garage pour 7 véhicule + studio + jardain + + - 1ére étage : salon de chambre + cuisine + salle de bain + sanitaire + + - 2éme étage : salon +3 chambre + sanitaire + Hammam + + - 3éme étage : grand salon + deux terrasse + + - chauffage centrale + + - climatisation + + - caméra de surveillance + + - bâché d'eau + + - acte et livret foncier + + - les prix : 8 milliards nég lég + + - pour plus d'informations consultéz agence labid au : + + - +- source_sentence: 'query: location terrain Oran' + sentences: + - >- + passage: كراء عمارة وهران وهران · ارض · 90 م² · السعر: 6 مليون دج · وهران · + location plusieurs appartements dans un immeuble de 5 étages et avec + ascenseur + + les appartements sont neuf jamais habité + + merci de nous contacter pour savoir plus de détails . + - >- + passage: سيارة Kia Seltos 2025 LUXuRY · بنزين · اوتوماتيك · 1.5 · السعر: 545 + مليون دج · الوادي + - >- + passage: سيارة Peugeot 308 2015 Active · مازوت · يدوية · 1.6 e HDI 112ch · + المسافة: 375,000 كم · وهران · Je vente 308 jdida machya 375000 +- source_sentence: 'query: villa Alger avec jardin' + sentences: + - >- + passage: بيع شقة 3 غرف الجزائر العاشور · شقة · 3 غرف · السعر: 3 مليون دج · + الجزائر ·vente une appartement a el3achour Hawch chawech De 96m F3 en 3 em + etg avec la scenseur tout comoditie chauffage central climatisation cuisine + équipée boxe pour stationnement les caméras de surveillance avec act et + livret foncièr + - >- + passage: كراء شقة دوبلكس 4 غرف الجزائر العاشور · شقة · 4 غرف · مطبخ مجهز · + تدفئة مركزية · تكييف · تيراس · مفروش · جناح غرفة النوم · السعر: 29 مليون دج + · الجزائر · El Achour Location d’un Duplex F4 meublé de haut standing + superficie 213 m² + + + + Le Duplex se compose : + + + Niveau 1: une entrée, un joli séjour avec une salle à manger, une cuisine + équipée haute gamme, sanitaire + hammame, terrasse. + + + Niveau 2 : 3 chambres dont une master bed room, une salle de bain avec + jacuzzi, espace bureau, 2 balcons. + + + Équipements : climatisation, chauffage central, double vitrage, stores + électriques, visiophone, 1 place de parking. + + + Commodités de la résidence : ascenseur, parking, gardiennage 24h/24, aire de + jeux pour enfants, espaces verts pour vos moments de détente. + - >- + passage: كراء شقة 5 غرف البليدة البليدة · شقة · 5 غرف · السعر: 5 مليون دج · + البليدة · 203m plus ascenseur +- source_sentence: 'query: Cuxi Cuxi 2025' + sentences: + - >- + passage: سيارة Volkswagen Golf 7 2016 Trendline + · مازوت · يدوية · 2.0 TDI + 110ch · المسافة: 280,000 كم + - >- + passage: سيارة Opel Corsa 2001 Corsa · مازوت · يدوية · 1.7 D 60ch · المسافة: + 350,000 كم · السعر: 65 مليون دج · موتور نعاود يدور شهرة السبيغة فيها سوباسمو + - >- + passage: سيارة Cuxi Cuxi 2025 · بنزين · اوتوماتيك · Yamaha 110 · المسافة: + 9,250 كم · السعر: 28 مليون دج · قسنطينة · Cuxi 2025 jdida état 10/10 +- source_sentence: 'query: Rani nhawes 3la tonobil Hyundai i10' + sentences: + - 'passage: بيع شقة غرفتين 3 غرف 4 غرف وهران بئر الجير · شقة · 3 غرف · وهران' + - >- + passage: سيارة Kia Cerato 2008 · مازوت · يدوية · المسافة: 230,000 كم · + السعر: 135 مليون دج · سوق اهراس · Problem də terage + - >- + passage: سيارة Hyundai i10 2014 GLS · بنزين · يدوية · 1.1 · المسافة: 300,000 + كم · عين تموشنت · Fiha bantoura +pipeline_tag: sentence-similarity +library_name: sentence-transformers +license: mit +language: +- ar +- fr +--- + +# AlgerianME5 + +**algerianME5** is a specialized **Sentence-Transformer** model designed to map Algerian search queries to a 768-dimensional dense vector space, It is specifically fine-tuned to understand the nuances and the vocabulary of the Algerian car and real estate markets, where listings often mix Arabic, French, and darja in both Arabic and Latin script + +Note: For more details about the methodology, data synthesis, and evaluation, [please visit the full Medium Story](https://medium.com/@ayoubhimeur/building-a-semantic-search-engine-for-algerian-marketplaces-cc04a0008346) + +## Key Features : +-**Domain Specific**: Optimized for real estate and automotive algerian vocabulary “sbigha,” “f3,” “livret foncier” + +-**Cross lingual Retrieval**: Maps informal latin queries "tonobil mliha" to formal Arabic or French listing descriptions + +-**Robust Embeddings**: Based on the powerful intfloat/multilingual-e5-base architecture + +## Use cases : + +-**Semantic Search** : Find relevant listings even if keywords dont match exactly (use it as a second layer) + +-**Textual Similarity**:Compare two listings to find duplicates or similar items + +-**Clustering** Group listings by sub-market or vehicle/property type + +## Model Details + +### Model Description +- **Model Type:** Sentence Transformer +- **Base model:** [intfloat/multilingual-e5-base](https://huggingface.co/intfloat/multilingual-e5-base) +- **Maximum Sequence Length:** 256 tokens +- **Output Dimensionality:** 768 dimensions +- **Similarity Function:** Cosine Similarity + +### Model Sources + +- **Documentation:** [Sentence Transformers Documentation](https://sbert.net) +- **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers) +- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers) + +### Full Model Architecture + +``` +SentenceTransformer( + (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'XLMRobertaModel'}) + (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) + (2): Normalize() +) +``` + +## Usage + +### Direct Usage (Sentence Transformers) + +First install the Sentence Transformers library: + +```bash +pip install -U sentence-transformers +``` + +Then you can load this model and run inference. +```python +from sentence_transformers import SentenceTransformer + + +model = SentenceTransformer("81melody/algerianME5") +sentences = [ + 'query: Rani nhawes 3la tonobil Hyundai i10', + 'passage: سيارة Hyundai i10 2014 GLS · بنزين · يدوية · 1.1 · المسافة: 300,000 كم · عين تموشنت · Fiha bantoura', + 'passage: سيارة Kia Cerato 2008 · مازوت · يدوية · المسافة: 230,000 كم · السعر: 135 مليون دج · سوق اهراس · Problem də terage', +] +embeddings = model.encode(sentences) +print(embeddings.shape) +# [3, 768] + +# Get the similarity scores for the embeddings +similarities = model.similarity(embeddings, embeddings) +print(similarities) +``` +**OR** + +```python +from sentence_transformers import SentenceTransformer , util +model = SentenceTransformer("81melody/algerianME5") +listings = [ + # REAL ESTATE + "بيع شقة 4 غرف الجزائر شراقة · شقة · 4 غرف · السعر: 4 مليون دج · Appartement Composé De 1 Suite Parentale... Résidence sécurisée", + "كراء شقة 4 غرف وهران وهران · شقة · 4 غرف · Location appartement par jour pour familles", + "بيع ارض تلمسان مغنية · ارض · الجزائر · بلان فالسانك مليح", + "كراء محل الجزائر الابيار · محل تجاري · 105 م² · Local avec Deux rideaux", + + # CARS + "سيارة MG Zs Ev 2024 Comfort · بنزين · يدوية · 1.5 VTi-Tech 106ch · المسافة: 67,000 كم · Très beau SUV comme neuf", + "سيارة Hyundai Grand i10 2018 Restylée DZ · بنزين · يدوية · 1.2 ess 87ch · السعر: 265 مليون دج · صبيغة فيها لال و لامان", + "سيارة Renault Clio 4 2018 GT Line + · مازوت · يدوية · 1.5 DCI 85ch · السعر: 330 مليون دج" +] +queries = [ + "شقة 4 غرف الجزائر", + "dar lel bi3 fi Alger centre", + "ard lel bi3 telemcan" , + "chhal souma MG Zs Ev", + "Grand I10 2018 Restylée DZ", + "tonobil mliha fiha sbigha shwia" +] + q_prefix = "query: " + p_prefix = "passage: " + + encoded_listings = model.encode( + [f"{p_prefix}{l}" for l in listings], + convert_to_tensor=True, + show_progress_bar=False + ) + for query in queries: + print(f"\nQuery: '{query}'") + + + query_emb = model.encode(f"{q_prefix}{query}", convert_to_tensor=True) + hits = util.semantic_search(query_emb, encoded_listings, top_k=3)[0] + + + for i, hit in enumerate(hits): + score = hit['score'] + doc_id = hit['corpus_id'] + display_text = listings[doc_id][:100] + "..." if len(listings[doc_id]) > 100 else listings[doc_id] + print(f"[Score: {score:.3f}] {display_text}") +``` + +## Training Details + +### Training Dataset + +* Size: 100,000 training samples +* Columns: sentence_0 and sentence_1 +* Approximate statistics based on the first 1000 samples: + | | sentence_0 | sentence_1 | + |:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------| + | type | string | string | + | details | | | +* Samples: + | sentence_0 | sentence_1 | + |:----------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| + | query: بيع محل وهران بئر | passage: بيع محل وهران بئر الجير · محل تجاري · 750 م² · السعر: 20 مليار دج · وهران · On vous propose en vente un local de 750 m² (550 m² en rez-de-chaussée et 200 m² sous pente) , avec deux rideaux électriques , pour le prix de : 20 Milliards fixe .

Pour plus de détails veuillez nous contacter
| + | query: شقة الجزائر برج | passage: بيع شقة الجزائر برج الكيفان · شقة · 1 غرف · 64 م² · وثائق: دفتر عقاري · عقد موثق · الجزائر · 🔔OPPORTUNITÉ EN OR 🔔
– T2 à vendre +paiement par tranche dans 24mois

❄️À seulement quelques pas de la piscine, dans une site sécurisée et bien située, ce T2 en semi-finis une valeur sûre pour tout investisseur avisé.

Pourquoi ce bien est exceptionnel ?
✅️Localisation stratégique, très demandée
✅️Retour sur investissement rapide
✅️Prêt à être exploité dès l’achat !
✅️Un petit prix pour un grand potentiel.
✅️Les bonnes affaires ne durent jamais longtemps…
Saisissez cette opportunité maintenant !
| + | query: GX3 PRO 2025 X3 Pro | passage: سيارة Geely GX3 PRO 2025 X3 pro livane · بنزين · اوتوماتيك · 1.5 · المسافة: جديدة · بجاية · Vent une livane x3pro neuf carte grise Safia | +* Loss: [MultipleNegativesRankingLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters: + ```json + { + "scale": 20.0, + "similarity_fct": "cos_sim", + "gather_across_devices": false, + "directions": [ + "query_to_doc" + ], + "partition_mode": "joint", + "hardness_mode": null, + "hardness_strength": 0.0 + } + ``` + +### Training Hyperparameters +#### Non-Default Hyperparameters + +- `per_device_train_batch_size`: 16 +- `per_device_eval_batch_size`: 16 +- `fp16`: True +- `multi_dataset_batch_sampler`: round_robin + + + +### Training Logs +| Epoch | Step | Training Loss | +|:------:|:-----:|:-------------:| +| ... | ... | ... | +| 2.32 | 14500 | 0.2827 | +| 2.4 | 15000 | 0.3062 | +| 2.48 | 15500 | 0.3045 | +| 2.56 | 16000 | 0.2841 | + + +### Framework Versions +- Python: 3.12.13 +- Sentence Transformers: 5.3.0 +- Transformers: 5.0.0 +- PyTorch: 2.10.0+cu128 +- Accelerate: 1.13.0 +- Datasets: 4.0.0 +- Tokenizers: 0.22.2 + +## Citation + +### BibTeX + +#### Sentence Transformers +```bibtex +@inproceedings{reimers-2019-sentence-bert, + title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", + author = "Reimers, Nils and Gurevych, Iryna", + booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", + month = "11", + year = "2019", + publisher = "Association for Computational Linguistics", + url = "https://arxiv.org/abs/1908.10084", +} +``` + +#### MultipleNegativesRankingLoss +```bibtex +@misc{oord2019representationlearningcontrastivepredictive, + title={Representation Learning with Contrastive Predictive Coding}, + author={Aaron van den Oord and Yazhe Li and Oriol Vinyals}, + year={2019}, + eprint={1807.03748}, + archivePrefix={arXiv}, + primaryClass={cs.LG}, + url={https://arxiv.org/abs/1807.03748}, +} +``` +### Contact +Iam interested in any further related work, contact me at mohamed.himeur@student.unamur.be \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..3ba0725 --- /dev/null +++ b/config.json @@ -0,0 +1,30 @@ +{ + "add_cross_attention": false, + "architectures": [ + "XLMRobertaModel" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "dtype": "float32", + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "is_decoder": false, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "tie_word_embeddings": true, + "transformers_version": "5.0.0", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/config_sentence_transformers.json b/config_sentence_transformers.json new file mode 100644 index 0000000..d70271d --- /dev/null +++ b/config_sentence_transformers.json @@ -0,0 +1,14 @@ +{ + "model_type": "SentenceTransformer", + "__version__": { + "sentence_transformers": "5.3.0", + "transformers": "5.0.0", + "pytorch": "2.10.0+cu128" + }, + "prompts": { + "query": "", + "document": "" + }, + "default_prompt_name": null, + "similarity_fn_name": "cosine" +} \ No newline at end of file diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..1691fb7 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b342b6314310b1fdd89c60c05e81b0212c743f54d30430d2968fe9f4667afb3 +size 1112197064 diff --git a/modules.json b/modules.json new file mode 100644 index 0000000..952a9b8 --- /dev/null +++ b/modules.json @@ -0,0 +1,20 @@ +[ + { + "idx": 0, + "name": "0", + "path": "", + "type": "sentence_transformers.models.Transformer" + }, + { + "idx": 1, + "name": "1", + "path": "1_Pooling", + "type": "sentence_transformers.models.Pooling" + }, + { + "idx": 2, + "name": "2", + "path": "2_Normalize", + "type": "sentence_transformers.models.Normalize" + } +] \ No newline at end of file diff --git a/rng_state.pth b/rng_state.pth new file mode 100644 index 0000000..114d55c --- /dev/null +++ b/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:913a5368a7577d2be0e6c0babf983ef3b480fcd6823e79a547c9a24735c8a300 +size 14645 diff --git a/scaler.pt b/scaler.pt new file mode 100644 index 0000000..a32d905 --- /dev/null +++ b/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:492bc78bd83b826eedbe67547210706d1c9a2b14604e5b97f861b508daf02d5c +size 1383 diff --git a/scheduler.pt b/scheduler.pt new file mode 100644 index 0000000..b77041b --- /dev/null +++ b/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:216aaa27aba2a207d4db80b37b87abe496b983a01e4691c445bcc1f4502f7dbf +size 1465 diff --git a/sentence_bert_config.json b/sentence_bert_config.json new file mode 100644 index 0000000..5892b1a --- /dev/null +++ b/sentence_bert_config.json @@ -0,0 +1,4 @@ +{ + "max_seq_length": 256, + "do_lower_case": false +} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..511767b --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c16d8a2bff758ba6e009849c31b8ffc8ba92bfc907e0bcee96a09f1818fe2da +size 16766387 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..b43c19d --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,15 @@ +{ + "add_prefix_space": true, + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "is_local": false, + "mask_token": "", + "model_max_length": 512, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..68e2471 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,258 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.56, + "eval_steps": 0, + "global_step": 16000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "grad_norm": 9.120098114013672, + "learning_rate": 5.2906666666666675e-06, + "loss": 0.772038330078125, + "step": 500 + }, + { + "epoch": 0.16, + "grad_norm": 16.881511688232422, + "learning_rate": 1.0624e-05, + "loss": 0.37112783813476563, + "step": 1000 + }, + { + "epoch": 0.24, + "grad_norm": 9.139080047607422, + "learning_rate": 1.5957333333333334e-05, + "loss": 0.36207400512695315, + "step": 1500 + }, + { + "epoch": 0.32, + "grad_norm": 3.8323566913604736, + "learning_rate": 1.9856592592592595e-05, + "loss": 0.34735809326171874, + "step": 2000 + }, + { + "epoch": 0.4, + "grad_norm": 4.640134811401367, + "learning_rate": 1.9265185185185186e-05, + "loss": 0.329142578125, + "step": 2500 + }, + { + "epoch": 0.48, + "grad_norm": 7.571471214294434, + "learning_rate": 1.8672592592592594e-05, + "loss": 0.3232558288574219, + "step": 3000 + }, + { + "epoch": 0.56, + "grad_norm": 5.5788984298706055, + "learning_rate": 1.8080000000000003e-05, + "loss": 0.33851583862304685, + "step": 3500 + }, + { + "epoch": 0.64, + "grad_norm": 4.823075771331787, + "learning_rate": 1.748740740740741e-05, + "loss": 0.3265293884277344, + "step": 4000 + }, + { + "epoch": 0.72, + "grad_norm": 5.9052886962890625, + "learning_rate": 1.6897185185185187e-05, + "loss": 0.3228313903808594, + "step": 4500 + }, + { + "epoch": 0.8, + "grad_norm": 2.530646562576294, + "learning_rate": 1.6304592592592593e-05, + "loss": 0.3212389831542969, + "step": 5000 + }, + { + "epoch": 0.88, + "grad_norm": 2.003970146179199, + "learning_rate": 1.5712e-05, + "loss": 0.3108310546875, + "step": 5500 + }, + { + "epoch": 0.96, + "grad_norm": 2.259843111038208, + "learning_rate": 1.511940740740741e-05, + "loss": 0.33287890625, + "step": 6000 + }, + { + "epoch": 1.04, + "grad_norm": 1.1002967357635498, + "learning_rate": 1.4526814814814815e-05, + "loss": 0.3126535949707031, + "step": 6500 + }, + { + "epoch": 1.12, + "grad_norm": 2.696305751800537, + "learning_rate": 1.3934222222222222e-05, + "loss": 0.3110744323730469, + "step": 7000 + }, + { + "epoch": 1.2, + "grad_norm": 3.0759758949279785, + "learning_rate": 1.3341629629629631e-05, + "loss": 0.307027099609375, + "step": 7500 + }, + { + "epoch": 1.28, + "grad_norm": 1.2770161628723145, + "learning_rate": 1.2749037037037038e-05, + "loss": 0.31455657958984373, + "step": 8000 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 2.4967329502105713, + "learning_rate": 1.2156444444444447e-05, + "loss": 0.31456546020507814, + "step": 8500 + }, + { + "epoch": 1.44, + "grad_norm": 5.275321006774902, + "learning_rate": 1.1565037037037039e-05, + "loss": 0.3131004943847656, + "step": 9000 + }, + { + "epoch": 1.52, + "grad_norm": 2.145164966583252, + "learning_rate": 1.0972444444444446e-05, + "loss": 0.30567279052734375, + "step": 9500 + }, + { + "epoch": 1.6, + "grad_norm": 2.0739190578460693, + "learning_rate": 1.0379851851851853e-05, + "loss": 0.28998117065429685, + "step": 10000 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 3.1562881469726562, + "learning_rate": 9.78725925925926e-06, + "loss": 0.29778146362304686, + "step": 10500 + }, + { + "epoch": 1.76, + "grad_norm": 3.498109817504883, + "learning_rate": 9.194666666666667e-06, + "loss": 0.2988756103515625, + "step": 11000 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 3.3291115760803223, + "learning_rate": 8.602074074074076e-06, + "loss": 0.2985075988769531, + "step": 11500 + }, + { + "epoch": 1.92, + "grad_norm": 1.631378173828125, + "learning_rate": 8.009481481481483e-06, + "loss": 0.2991393737792969, + "step": 12000 + }, + { + "epoch": 2.0, + "grad_norm": 1.3230953216552734, + "learning_rate": 7.416888888888889e-06, + "loss": 0.30236148071289065, + "step": 12500 + }, + { + "epoch": 2.08, + "grad_norm": 2.339695930480957, + "learning_rate": 6.825481481481482e-06, + "loss": 0.295860595703125, + "step": 13000 + }, + { + "epoch": 2.16, + "grad_norm": 1.0685478448867798, + "learning_rate": 6.234074074074075e-06, + "loss": 0.2980207824707031, + "step": 13500 + }, + { + "epoch": 2.24, + "grad_norm": 0.947058379650116, + "learning_rate": 5.6414814814814825e-06, + "loss": 0.29257803344726563, + "step": 14000 + }, + { + "epoch": 2.32, + "grad_norm": 2.2130205631256104, + "learning_rate": 5.0488888888888895e-06, + "loss": 0.2826576843261719, + "step": 14500 + }, + { + "epoch": 2.4, + "grad_norm": 6.699328422546387, + "learning_rate": 4.4562962962962965e-06, + "loss": 0.30620053100585937, + "step": 15000 + }, + { + "epoch": 2.48, + "grad_norm": 0.5939074158668518, + "learning_rate": 3.863703703703704e-06, + "loss": 0.3044532470703125, + "step": 15500 + }, + { + "epoch": 2.56, + "grad_norm": 2.3366057872772217, + "learning_rate": 3.2711111111111117e-06, + "loss": 0.28407180786132813, + "step": 16000 + } + ], + "logging_steps": 500, + "max_steps": 18750, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..9a8c6e3 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:546a615de05e8adf2749ffb2c7c65e652fb8eff7d18b9af7b4f23b45741d3fb4 +size 5521