commit 5ca2086cc6e88acfc0c44d5c9d1a9081e2a34002 Author: ModelHub XC Date: Tue May 12 22:15:37 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: MediaCatch/xls-r-300m-danish-mc-v2 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..c286889 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +language_model/unigrams.txt filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..3b32e17 --- /dev/null +++ b/README.md @@ -0,0 +1,168 @@ +--- +library_name: transformers +license: apache-2.0 +base_model: MediaCatch/xls-r-300m-danish-mc +tags: +- automatic-speech-recognition +- generated_from_trainer +metrics: +- wer +model-index: +- name: mc_danish + results: [] +--- + + + +# mc_danish + +This model is a fine-tuned version of [MediaCatch/xls-r-300m-danish-mc](https://huggingface.co/MediaCatch/xls-r-300m-danish-mc) on the Preprocessed Dataset dataset. +It achieves the following results on the evaluation set: +- Loss: 0.1570 +- Wer: 0.0875 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 0.0001 +- train_batch_size: 3 +- eval_batch_size: 6 +- seed: 69 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 48 +- total_eval_batch_size: 24 +- optimizer: Use adamw_torch_fused with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 500 +- num_epochs: 10.0 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Wer | +|:-------------:|:------:|:-----:|:---------------:|:------:| +| No log | 0 | 0 | 3.1518 | 0.1154 | +| 0.1233 | 0.0991 | 500 | 0.1684 | 0.1039 | +| 0.1122 | 0.1982 | 1000 | 0.1536 | 0.1059 | +| 0.109 | 0.2973 | 1500 | 0.1497 | 0.1032 | +| 0.1027 | 0.3965 | 2000 | 0.1471 | 0.1036 | +| 0.1082 | 0.4956 | 2500 | 0.1446 | 0.1053 | +| 0.105 | 0.5947 | 3000 | 0.1440 | 0.1059 | +| 0.0986 | 0.6938 | 3500 | 0.1436 | 0.1041 | +| 0.1037 | 0.7929 | 4000 | 0.1404 | 0.1015 | +| 0.0988 | 0.8920 | 4500 | 0.1391 | 0.1008 | +| 0.0979 | 0.9911 | 5000 | 0.1373 | 0.0995 | +| 0.0833 | 1.0902 | 5500 | 0.1385 | 0.0991 | +| 0.0855 | 1.1893 | 6000 | 0.1407 | 0.0990 | +| 0.0848 | 1.2884 | 6500 | 0.1390 | 0.0975 | +| 0.0845 | 1.3875 | 7000 | 0.1365 | 0.0986 | +| 0.0824 | 1.4866 | 7500 | 0.1373 | 0.0975 | +| 0.0842 | 1.5858 | 8000 | 0.1353 | 0.0979 | +| 0.0837 | 1.6849 | 8500 | 0.1334 | 0.0968 | +| 0.0823 | 1.7840 | 9000 | 0.1348 | 0.0960 | +| 0.0851 | 1.8831 | 9500 | 0.1331 | 0.0957 | +| 0.0843 | 1.9822 | 10000 | 0.1299 | 0.0949 | +| 0.0707 | 2.0813 | 10500 | 0.1337 | 0.0926 | +| 0.0709 | 2.1804 | 11000 | 0.1332 | 0.0958 | +| 0.0742 | 2.2795 | 11500 | 0.1316 | 0.0944 | +| 0.0742 | 2.3786 | 12000 | 0.1356 | 0.0959 | +| 0.0719 | 2.4777 | 12500 | 0.1323 | 0.0969 | +| 0.0739 | 2.5768 | 13000 | 0.1286 | 0.0951 | +| 0.0695 | 2.6760 | 13500 | 0.1315 | 0.0957 | +| 0.0741 | 2.7751 | 14000 | 0.1310 | 0.0940 | +| 0.0729 | 2.8742 | 14500 | 0.1303 | 0.0970 | +| 0.0695 | 2.9733 | 15000 | 0.1316 | 0.0939 | +| 0.0637 | 3.0724 | 15500 | 0.1353 | 0.0955 | +| 0.0664 | 3.1715 | 16000 | 0.1333 | 0.0940 | +| 0.0635 | 3.2706 | 16500 | 0.1370 | 0.0941 | +| 0.0652 | 3.3697 | 17000 | 0.1334 | 0.0937 | +| 0.0653 | 3.4688 | 17500 | 0.1320 | 0.0957 | +| 0.0654 | 3.5679 | 18000 | 0.1365 | 0.0938 | +| 0.0633 | 3.6670 | 18500 | 0.1363 | 0.0943 | +| 0.0642 | 3.7661 | 19000 | 0.1316 | 0.0926 | +| 0.0622 | 3.8653 | 19500 | 0.1282 | 0.0906 | +| 0.0653 | 3.9644 | 20000 | 0.1334 | 0.0904 | +| 0.0585 | 4.0634 | 20500 | 0.1363 | 0.0914 | +| 0.057 | 4.1625 | 21000 | 0.1334 | 0.0935 | +| 0.0591 | 4.2617 | 21500 | 0.1370 | 0.0914 | +| 0.0538 | 4.3608 | 22000 | 0.1357 | 0.0929 | +| 0.0586 | 4.4599 | 22500 | 0.1379 | 0.0916 | +| 0.0556 | 4.5590 | 23000 | 0.1378 | 0.0925 | +| 0.0574 | 4.6581 | 23500 | 0.1353 | 0.0898 | +| 0.0545 | 4.7572 | 24000 | 0.1371 | 0.0912 | +| 0.0572 | 4.8563 | 24500 | 0.1320 | 0.0895 | +| 0.0546 | 4.9554 | 25000 | 0.1361 | 0.0908 | +| 0.0485 | 5.0545 | 25500 | 0.1429 | 0.0926 | +| 0.054 | 5.1536 | 26000 | 0.1401 | 0.0912 | +| 0.0507 | 5.2527 | 26500 | 0.1406 | 0.0888 | +| 0.0519 | 5.3519 | 27000 | 0.1416 | 0.0902 | +| 0.0524 | 5.4510 | 27500 | 0.1403 | 0.0903 | +| 0.05 | 5.5501 | 28000 | 0.1395 | 0.0890 | +| 0.0503 | 5.6492 | 28500 | 0.1439 | 0.0892 | +| 0.0528 | 5.7483 | 29000 | 0.1402 | 0.0905 | +| 0.0503 | 5.8474 | 29500 | 0.1424 | 0.0902 | +| 0.051 | 5.9465 | 30000 | 0.1412 | 0.0890 | +| 0.0471 | 6.0456 | 30500 | 0.1447 | 0.0893 | +| 0.0461 | 6.1447 | 31000 | 0.1511 | 0.0885 | +| 0.0436 | 6.2438 | 31500 | 0.1505 | 0.0898 | +| 0.0483 | 6.3429 | 32000 | 0.1458 | 0.0884 | +| 0.0457 | 6.4420 | 32500 | 0.1449 | 0.0886 | +| 0.0465 | 6.5412 | 33000 | 0.1430 | 0.0880 | +| 0.0449 | 6.6403 | 33500 | 0.1487 | 0.0892 | +| 0.0455 | 6.7394 | 34000 | 0.1491 | 0.0883 | +| 0.0483 | 6.8385 | 34500 | 0.1476 | 0.0884 | +| 0.0485 | 6.9376 | 35000 | 0.1449 | 0.0885 | +| 0.0445 | 7.0367 | 35500 | 0.1504 | 0.0878 | +| 0.0429 | 7.1358 | 36000 | 0.1544 | 0.0887 | +| 0.0429 | 7.2349 | 36500 | 0.1507 | 0.0885 | +| 0.0449 | 7.3340 | 37000 | 0.1499 | 0.0890 | +| 0.0414 | 7.4331 | 37500 | 0.1522 | 0.0878 | +| 0.0414 | 7.5322 | 38000 | 0.1519 | 0.0888 | +| 0.0405 | 7.6313 | 38500 | 0.1540 | 0.0878 | +| 0.0424 | 7.7305 | 39000 | 0.1535 | 0.0884 | +| 0.0421 | 7.8296 | 39500 | 0.1533 | 0.0883 | +| 0.0418 | 7.9287 | 40000 | 0.1540 | 0.0884 | +| 0.0404 | 8.0278 | 40500 | 0.1537 | 0.0880 | +| 0.0412 | 8.1269 | 41000 | 0.1570 | 0.0875 | +| 0.0408 | 8.2260 | 41500 | 0.1569 | 0.0880 | +| 0.0408 | 8.3251 | 42000 | 0.1567 | 0.0878 | +| 0.039 | 8.4242 | 42500 | 0.1570 | 0.0881 | +| 0.0392 | 8.5233 | 43000 | 0.1559 | 0.0881 | +| 0.0424 | 8.6224 | 43500 | 0.1555 | 0.0887 | +| 0.0394 | 8.7215 | 44000 | 0.1572 | 0.0883 | +| 0.039 | 8.8207 | 44500 | 0.1581 | 0.0886 | +| 0.0398 | 8.9198 | 45000 | 0.1561 | 0.0880 | +| 0.0401 | 9.0188 | 45500 | 0.1565 | 0.0884 | +| 0.0393 | 9.1179 | 46000 | 0.1578 | 0.0881 | +| 0.04 | 9.2171 | 46500 | 0.1581 | 0.0878 | +| 0.0407 | 9.3162 | 47000 | 0.1579 | 0.0880 | +| 0.039 | 9.4153 | 47500 | 0.1582 | 0.0882 | +| 0.0388 | 9.5144 | 48000 | 0.1580 | 0.0883 | +| 0.0411 | 9.6135 | 48500 | 0.1580 | 0.0881 | +| 0.0403 | 9.7126 | 49000 | 0.1578 | 0.0880 | +| 0.0398 | 9.8117 | 49500 | 0.1578 | 0.0880 | +| 0.0377 | 9.9108 | 50000 | 0.1577 | 0.0880 | + + +### Framework versions + +- Transformers 4.56.2 +- Pytorch 2.8.0+cu128 +- Datasets 4.1.1 +- Tokenizers 0.22.1 diff --git a/alphabet.json b/alphabet.json new file mode 100644 index 0000000..6a18e4a --- /dev/null +++ b/alphabet.json @@ -0,0 +1 @@ +{"labels": ["", "", "", "\u2047", " ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "\u00e6", "\u00f8", "\u00e5", "\u00e9", "\u00f6", "\u00fc"], "is_bpe": false} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..fccb07d --- /dev/null +++ b/config.json @@ -0,0 +1,108 @@ +{ + "activation_dropout": 0.0, + "adapter_attn_dim": null, + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": false, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 768, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": true, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "dtype": "float32", + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.0, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.0, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "wav2vec2", + "num_adapter_layers": 3, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 24, + "num_negatives": 100, + "output_hidden_size": 1024, + "pad_token_id": 0, + "proj_codevector_dim": 768, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.54.1", + "use_weighted_layer_sum": false, + "vocab_size": 37, + "xvector_output_dim": 512 +} diff --git a/language_model/attrs.json b/language_model/attrs.json new file mode 100644 index 0000000..3c07595 --- /dev/null +++ b/language_model/attrs.json @@ -0,0 +1 @@ +{"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true} \ No newline at end of file diff --git a/language_model/mc_danish_5gram_v2.bin b/language_model/mc_danish_5gram_v2.bin new file mode 100644 index 0000000..a29f3b8 --- /dev/null +++ b/language_model/mc_danish_5gram_v2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c72f95e6f7c24551a372576041b6ac357d6a6dd3ed8aa8079d5a9d6fe990450 +size 1588272288 diff --git a/language_model/unigrams.txt b/language_model/unigrams.txt new file mode 100644 index 0000000..ac8ae40 --- /dev/null +++ b/language_model/unigrams.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f0e592f6c40c2ca3e6f756af5d04b387d2a126ac249bab5cc1f7f11bcf6e2c +size 32303958 diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..422af46 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b42ec4bbcdc5677d420658b324bf96958003e26f89bc951a09b0cabc9230257 +size 1261959180 diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000..9f99bca --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,10 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "Wav2Vec2ProcessorWithLM", + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..5a9fa24 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false + }, + "unk_token": { + "content": "[UNK]", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false + } +} diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..7e9fdb1 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,49 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "[PAD]", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "1": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "2": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "3": { + "content": "[UNK]", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "", + "extra_special_tokens": {}, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "processor_class": "Wav2Vec2ProcessorWithLM", + "replace_word_delimiter_char": " ", + "target_lang": null, + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "unk_token": "[UNK]", + "word_delimiter_token": "|" +} diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..59e2d71 --- /dev/null +++ b/vocab.json @@ -0,0 +1,39 @@ +{ + "": 2, + "": 1, + "[PAD]": 0, + "[UNK]": 3, + "a": 5, + "b": 6, + "c": 7, + "d": 8, + "e": 9, + "f": 10, + "g": 11, + "h": 12, + "i": 13, + "j": 14, + "k": 15, + "l": 16, + "m": 17, + "n": 18, + "o": 19, + "p": 20, + "q": 21, + "r": 22, + "s": 23, + "t": 24, + "u": 25, + "v": 26, + "w": 27, + "x": 28, + "y": 29, + "z": 30, + "|": 4, + "å": 33, + "æ": 31, + "é": 34, + "ö": 35, + "ø": 32, + "ü": 36 +}