commit 3e0e7421b3737131822c2408219896df38a76140 Author: ModelHub XC Date: Mon May 18 12:08:49 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: whitefox123/w2v-bert-2.0-arabic-4 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a6344aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..0d86612 --- /dev/null +++ b/README.md @@ -0,0 +1,80 @@ +--- +license: mit +base_model: facebook/w2v-bert-2.0 +tags: +- generated_from_trainer +datasets: +- audiofolder +metrics: +- wer +model-index: +- name: w2v-bert-2.0-arabic-4 + results: + - task: + name: Automatic Speech Recognition + type: automatic-speech-recognition + dataset: + name: audiofolder + type: audiofolder + config: default + split: test + args: default + metrics: + - name: Wer + type: wer + value: 0.1809009009009009 +--- + + + +# w2v-bert-2.0-arabic-4 + +This model is a fine-tuned version of [facebook/w2v-bert-2.0](https://huggingface.co/facebook/w2v-bert-2.0) on the audiofolder dataset. +It achieves the following results on the evaluation set: +- Loss: 0.1952 +- Wer: 0.1809 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-05 +- train_batch_size: 16 +- eval_batch_size: 8 +- seed: 42 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 32 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: linear +- lr_scheduler_warmup_steps: 500 +- num_epochs: 2 +- mixed_precision_training: Native AMP + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Wer | +|:-------------:|:-----:|:----:|:---------------:|:------:| +| 1.5106 | 0.96 | 300 | 0.2448 | 0.2858 | +| 0.2479 | 1.92 | 600 | 0.1952 | 0.1809 | + + +### Framework versions + +- Transformers 4.38.0.dev0 +- Pytorch 2.1.0+cu118 +- Datasets 2.17.1 +- Tokenizers 0.15.2 diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000..cecb0a4 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,4 @@ +{ + "": 48, + "": 47 +} diff --git a/config.json b/config.json new file mode 100644 index 0000000..0365640 --- /dev/null +++ b/config.json @@ -0,0 +1,82 @@ +{ + "_name_or_path": "facebook/w2v-bert-2.0", + "activation_dropout": 0.0, + "adapter_act": "relu", + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": true, + "apply_spec_augment": false, + "architectures": [ + "Wav2Vec2BertForCTC" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "classifier_proj_size": 768, + "codevector_dim": 768, + "conformer_conv_dropout": 0.1, + "contrastive_logits_temperature": 0.1, + "conv_depthwise_kernel_size": 31, + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "eos_token_id": 2, + "feat_proj_dropout": 0.0, + "feat_quantizer_dropout": 0.0, + "feature_projection_input_dim": 160, + "final_dropout": 0.1, + "hidden_act": "swish", + "hidden_dropout": 0.0, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "left_max_position_embeddings": 64, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.0, + "max_source_positions": 5000, + "model_type": "wav2vec2-bert", + "num_adapter_layers": 1, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_hidden_layers": 24, + "num_negatives": 100, + "output_hidden_size": 1024, + "pad_token_id": 46, + "position_embeddings_type": "relative_key", + "proj_codevector_dim": 768, + "right_max_position_embeddings": 8, + "rotary_embedding_base": 10000, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.38.0.dev0", + "use_intermediate_ffn_before_adapter": false, + "use_weighted_layer_sum": false, + "vocab_size": 49, + "xvector_output_dim": 512 +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..b8816ad --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4558f89f3073700b25dc529a24b8802851d91112be130583f6a826798cd403aa +size 2423015460 diff --git a/modelx.safetensors b/modelx.safetensors new file mode 100644 index 0000000..a06f81d --- /dev/null +++ b/modelx.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f28ea5999e079ee9821de807d49f8b6ded90e5661d9fe871a006456123d08d2 +size 2423015460 diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000..b6a54f3 --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,11 @@ +{ + "feature_extractor_type": "SeamlessM4TFeatureExtractor", + "feature_size": 80, + "num_mel_bins": 80, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "Wav2Vec2BertProcessor", + "return_attention_mask": true, + "sampling_rate": 16000, + "stride": 2 +} diff --git a/runs/Feb20_13-11-11_2576a65114d0/events.out.tfevents.1708434816.2576a65114d0 b/runs/Feb20_13-11-11_2576a65114d0/events.out.tfevents.1708434816.2576a65114d0 new file mode 100644 index 0000000..d1531c8 --- /dev/null +++ b/runs/Feb20_13-11-11_2576a65114d0/events.out.tfevents.1708434816.2576a65114d0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9cd2d27cca92a93c9e840af885a24083004a9068168aae599b6b03171a3210 +size 7207 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..1c2a036 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "[PAD]", + "unk_token": "[UNK]" +} diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..a3558a5 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,48 @@ +{ + "added_tokens_decoder": { + "45": { + "content": "[UNK]", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "46": { + "content": "[PAD]", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": true, + "do_lower_case": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "processor_class": "Wav2Vec2BertProcessor", + "replace_word_delimiter_char": " ", + "target_lang": null, + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "unk_token": "[UNK]", + "word_delimiter_token": "|" +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..0c47855 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610083a8803a5481c5f1bb5b8ec62938390ac9321b56ebc9bff02522b248c905 +size 4856 diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..20fa3cb --- /dev/null +++ b/vocab.json @@ -0,0 +1,49 @@ +{ + "[PAD]": 46, + "[UNK]": 45, + "|": 0, + "ء": 1, + "آ": 2, + "أ": 3, + "ؤ": 4, + "إ": 5, + "ئ": 6, + "ا": 7, + "ب": 8, + "ة": 9, + "ت": 10, + "ث": 11, + "ج": 12, + "ح": 13, + "خ": 14, + "د": 15, + "ذ": 16, + "ر": 17, + "ز": 18, + "س": 19, + "ش": 20, + "ص": 21, + "ض": 22, + "ط": 23, + "ظ": 24, + "ع": 25, + "غ": 26, + "ف": 27, + "ق": 28, + "ك": 29, + "ل": 30, + "م": 31, + "ن": 32, + "ه": 33, + "و": 34, + "ى": 35, + "ي": 36, + "ً": 37, + "ٌ": 38, + "ٍ": 39, + "َ": 40, + "ُ": 41, + "ِ": 42, + "ّ": 43, + "ْ": 44 +}