commit da9e809435ea7efb66d5afbd2ad590104388df5f Author: ModelHub XC Date: Wed May 27 04:48:16 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: Kittipong/wav2vec2-th-vocal-domain Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..ac481c8 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,27 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..685f5c8 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +--- +license: cc-by-sa-4.0 +--- diff --git a/config.json b/config.json new file mode 100644 index 0000000..e49f699 --- /dev/null +++ b/config.json @@ -0,0 +1,85 @@ +{ + "_name_or_path": "facebook/wav2vec2-large-xlsr-53", + "activation_dropout": 0.0, + "apply_spec_augment": true, + "architectures": [ + "Wav2Vec2ForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "codevector_dim": 768, + "contrastive_logits_temperature": 0.1, + "conv_bias": true, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.0, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.0, + "gradient_checkpointing": true, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_channel_length": 10, + "mask_channel_min_space": 1, + "mask_channel_other": 0.0, + "mask_channel_prob": 0.0, + "mask_channel_selection": "static", + "mask_feature_length": 10, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_space": 1, + "mask_time_other": 0.0, + "mask_time_prob": 0.05, + "mask_time_selection": "static", + "model_type": "wav2vec2", + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 24, + "num_negatives": 100, + "pad_token_id": 69, + "proj_codevector_dim": 768, + "torch_dtype": "float32", + "transformers_version": "4.9.1", + "vocab_size": 70 +} diff --git a/eval.py b/eval.py new file mode 100644 index 0000000..f0af998 --- /dev/null +++ b/eval.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +import argparse +import re +from typing import Dict + +from datasets import Audio, Dataset, load_dataset, load_metric + +from transformers import AutoFeatureExtractor, pipeline + +from pythainlp.tokenize import word_tokenize, syllable_tokenize +from deepcut import tokenize as deepcut_word_tokenize +from functools import partial + + +def log_results(result: Dataset, args: Dict[str, str]): + """DO NOT CHANGE. This function computes and logs the result metrics.""" + + log_outputs = args.log_outputs + dataset_id = "_".join(args.dataset.split("/") + [args.config, args.split]) + + # load metric + wer = load_metric("wer") + cer = load_metric("cer") + + # compute metrics + wer_result = wer.compute(references=result["target"], predictions=result["prediction"]) + cer_result = cer.compute(references=result["target"], predictions=result["prediction"]) + + # print & log results + result_str = f"WER: {wer_result}\n" f"CER: {cer_result}" + print(result_str) + + with open(f"robust-speech-event/{dataset_id}_eval_results_{args.thai_tokenizer}.txt", "w") as f: + f.write(result_str) + + # log all results in text file. Possibly interesting for analysis + if log_outputs is not None: + pred_file = f"robust-speech-event/log_{dataset_id}_predictions_{args.thai_tokenizer}.txt" + target_file = f"robust-speech-event/log_{dataset_id}_targets_{args.thai_tokenizer}.txt" + + with open(pred_file, "w") as p, open(target_file, "w") as t: + + # mapping function to write output + def write_to_file(batch, i): + p.write(f"{i}" + "\n") + p.write(batch["prediction"] + "\n") + t.write(f"{i}" + "\n") + t.write(batch["target"] + "\n") + + result.map(write_to_file, with_indices=True) + + +def normalize_text(text: str, tok_func) -> str: + """DO ADAPT FOR YOUR USE CASE. this function normalizes the target text.""" + + chars_to_ignore_regex = '[,?.!\-\;\:"“%‘”�—’…–]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training + + text = re.sub(chars_to_ignore_regex, "", text.lower()) + + # In addition, we can normalize the target text, e.g. removing new lines characters etc... + # note that order is important here! + token_sequences_to_ignore = ["\n\n", "\n", " ", " "] + + for t in token_sequences_to_ignore: + text = " ".join(text.split(t)) + + #thai tokenize + text = " ".join(tok_func(text)) + + return text + +def retokenize(text:str, tok_func) -> str: + """tokenize and rejoin prediction outputs without cleaning""" + return " ".join(tok_func("".join(text.split()))) + + +def main(args): + # load dataset + dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True) + + # for testing: only process the first two examples as a test + dataset = dataset.select(range(10)) + + # load processor + feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id) + sampling_rate = feature_extractor.sampling_rate + + # resample audio + dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate)) + + # load eval pipeline + asr = pipeline("automatic-speech-recognition", model=args.model_id) + + #select tokenizer + if args.thai_tokenizer=='deepcut': + tok_func = deepcut_word_tokenize + elif args.thai_tokenizer=='newmm': + tok_func = word_tokenize + elif args.thai_tokenizer=='syllable': + tok_func = syllable_tokenize + else: + tok_func = lambda x: x.replace(' ','') + + # map function to decode audio + def map_to_pred(batch, tok_func): + prediction = asr( + batch["audio"]["array"], chunk_length_s=args.chunk_length_s, stride_length_s=args.stride_length_s + ) + + batch["prediction"] = retokenize(prediction["text"], tok_func) + batch["target"] = normalize_text(batch["sentence"], tok_func) + return batch + + # run inference on all examples + result = dataset.map(partial(map_to_pred, tok_func=tok_func), + remove_columns=dataset.column_names) + + # compute and log_results + # do not change function below + log_results(result, args) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "--model_id", type=str, required=True, help="Model identifier. Should be loadable with 🤗 Transformers" + ) + parser.add_argument( + "--thai_tokenizer", type=str, default="newmm", + required=True, help="newmm, syllable, or deepcut; if not specified, remove all spaces (used for CER calculation)" + ) + parser.add_argument( + "--dataset", + type=str, + required=True, + help="Dataset name to evaluate the `model_id`. Should be loadable with 🤗 Datasets", + ) + parser.add_argument( + "--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice" + ) + parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`") + parser.add_argument( + "--chunk_length_s", type=float, default=None, help="Chunk length in seconds. Defaults to 5 seconds." + ) + parser.add_argument( + "--stride_length_s", type=float, default=None, help="Stride of the audio chunks. Defaults to 1 second." + ) + parser.add_argument( + "--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis." + ) + args = parser.parse_args() + + main(args) \ No newline at end of file diff --git a/optimizer-002.pt b/optimizer-002.pt new file mode 100644 index 0000000..ed65d80 --- /dev/null +++ b/optimizer-002.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63480cd0bddf763bbfa016f364b5af6262bbd4584e1e3f1223bdf042feaf7080 +size 2490632977 diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000..a0b7227 --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000..2a368c6 --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dbb3a749c3f6e08fe2af7227d4857723fd1e525157b79ccb0dba255a69fdce8 +size 1262210673 diff --git a/rng_state.pth b/rng_state.pth new file mode 100644 index 0000000..0742e20 --- /dev/null +++ b/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e9b980149921cf7dc2fe91f225c1c3f931cc22ff40d7e30639d7236b390d622 +size 14567 diff --git a/scheduler.pt b/scheduler.pt new file mode 100644 index 0000000..e440370 --- /dev/null +++ b/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b9ed0945bc75b9b51a6a34f17bea3f2eca616815e76668e99905ed244654b22 +size 623 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..9abf719 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{"bos_token": "", "eos_token": "", "unk_token": "[UNK]", "pad_token": "[PAD]"} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..f5118ab --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"unk_token": "[UNK]", "bos_token": "", "eos_token": "", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..d776f24 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,616 @@ +{ + "best_metric": 0.472552783109405, + "best_model_checkpoint": "/content/drive/MyDrive/new_dataset/wav2vec2-large-xlsr-53-thai-finetune/checkpoint-500", + "epoch": 99.98765432098766, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 2.49, + "learning_rate": 0.0001, + "loss": 1.9911, + "step": 50 + }, + { + "epoch": 2.49, + "eval_loss": 1.2750070095062256, + "eval_runtime": 34.892, + "eval_samples_per_second": 12.238, + "eval_steps_per_second": 0.774, + "eval_wer": 0.6767754318618042, + "step": 50 + }, + { + "epoch": 4.99, + "learning_rate": 9.743589743589744e-05, + "loss": 1.2107, + "step": 100 + }, + { + "epoch": 4.99, + "eval_loss": 1.1088056564331055, + "eval_runtime": 34.7796, + "eval_samples_per_second": 12.277, + "eval_steps_per_second": 0.776, + "eval_wer": 0.5927063339731286, + "step": 100 + }, + { + "epoch": 7.49, + "learning_rate": 9.487179487179487e-05, + "loss": 0.9686, + "step": 150 + }, + { + "epoch": 7.49, + "eval_loss": 1.0476980209350586, + "eval_runtime": 34.6203, + "eval_samples_per_second": 12.334, + "eval_steps_per_second": 0.78, + "eval_wer": 0.5712092130518234, + "step": 150 + }, + { + "epoch": 9.99, + "learning_rate": 9.230769230769232e-05, + "loss": 0.7504, + "step": 200 + }, + { + "epoch": 9.99, + "eval_loss": 1.0640665292739868, + "eval_runtime": 34.843, + "eval_samples_per_second": 12.255, + "eval_steps_per_second": 0.775, + "eval_wer": 0.5504798464491363, + "step": 200 + }, + { + "epoch": 12.49, + "learning_rate": 8.974358974358975e-05, + "loss": 0.6352, + "step": 250 + }, + { + "epoch": 12.49, + "eval_loss": 1.095747709274292, + "eval_runtime": 33.9281, + "eval_samples_per_second": 12.585, + "eval_steps_per_second": 0.796, + "eval_wer": 0.5324376199616123, + "step": 250 + }, + { + "epoch": 14.99, + "learning_rate": 8.717948717948718e-05, + "loss": 0.5313, + "step": 300 + }, + { + "epoch": 14.99, + "eval_loss": 1.0491594076156616, + "eval_runtime": 33.7693, + "eval_samples_per_second": 12.645, + "eval_steps_per_second": 0.8, + "eval_wer": 0.5163147792706334, + "step": 300 + }, + { + "epoch": 17.49, + "learning_rate": 8.461538461538461e-05, + "loss": 0.4461, + "step": 350 + }, + { + "epoch": 17.49, + "eval_loss": 1.0721960067749023, + "eval_runtime": 33.9128, + "eval_samples_per_second": 12.591, + "eval_steps_per_second": 0.796, + "eval_wer": 0.5124760076775432, + "step": 350 + }, + { + "epoch": 19.99, + "learning_rate": 8.205128205128205e-05, + "loss": 0.4094, + "step": 400 + }, + { + "epoch": 19.99, + "eval_loss": 1.057926058769226, + "eval_runtime": 34.0347, + "eval_samples_per_second": 12.546, + "eval_steps_per_second": 0.793, + "eval_wer": 0.49750479846449136, + "step": 400 + }, + { + "epoch": 22.49, + "learning_rate": 7.948717948717948e-05, + "loss": 0.3467, + "step": 450 + }, + { + "epoch": 22.49, + "eval_loss": 1.0208101272583008, + "eval_runtime": 34.3752, + "eval_samples_per_second": 12.422, + "eval_steps_per_second": 0.785, + "eval_wer": 0.491362763915547, + "step": 450 + }, + { + "epoch": 24.99, + "learning_rate": 7.692307692307693e-05, + "loss": 0.3195, + "step": 500 + }, + { + "epoch": 24.99, + "eval_loss": 1.0337833166122437, + "eval_runtime": 33.8794, + "eval_samples_per_second": 12.604, + "eval_steps_per_second": 0.797, + "eval_wer": 0.472552783109405, + "step": 500 + }, + { + "epoch": 27.49, + "learning_rate": 7.435897435897436e-05, + "loss": 0.3005, + "step": 550 + }, + { + "epoch": 27.49, + "eval_loss": 1.0594605207443237, + "eval_runtime": 33.9955, + "eval_samples_per_second": 12.56, + "eval_steps_per_second": 0.794, + "eval_wer": 0.47946257197696734, + "step": 550 + }, + { + "epoch": 29.99, + "learning_rate": 7.17948717948718e-05, + "loss": 0.2933, + "step": 600 + }, + { + "epoch": 29.99, + "eval_loss": 1.017477035522461, + "eval_runtime": 33.8622, + "eval_samples_per_second": 12.61, + "eval_steps_per_second": 0.797, + "eval_wer": 0.472936660268714, + "step": 600 + }, + { + "epoch": 32.49, + "learning_rate": 6.923076923076924e-05, + "loss": 0.2601, + "step": 650 + }, + { + "epoch": 32.49, + "eval_loss": 1.099133014678955, + "eval_runtime": 34.4437, + "eval_samples_per_second": 12.397, + "eval_steps_per_second": 0.784, + "eval_wer": 0.4652591170825336, + "step": 650 + }, + { + "epoch": 34.99, + "learning_rate": 6.666666666666667e-05, + "loss": 0.2226, + "step": 700 + }, + { + "epoch": 34.99, + "eval_loss": 1.1290050745010376, + "eval_runtime": 34.6356, + "eval_samples_per_second": 12.328, + "eval_steps_per_second": 0.78, + "eval_wer": 0.47063339731285986, + "step": 700 + }, + { + "epoch": 37.49, + "learning_rate": 6.410256410256412e-05, + "loss": 0.2262, + "step": 750 + }, + { + "epoch": 37.49, + "eval_loss": 1.0954631567001343, + "eval_runtime": 35.3666, + "eval_samples_per_second": 12.074, + "eval_steps_per_second": 0.763, + "eval_wer": 0.45681381957773515, + "step": 750 + }, + { + "epoch": 39.99, + "learning_rate": 6.153846153846155e-05, + "loss": 0.2236, + "step": 800 + }, + { + "epoch": 39.99, + "eval_loss": 1.1119202375411987, + "eval_runtime": 35.0365, + "eval_samples_per_second": 12.187, + "eval_steps_per_second": 0.771, + "eval_wer": 0.4491362763915547, + "step": 800 + }, + { + "epoch": 42.49, + "learning_rate": 5.897435897435898e-05, + "loss": 0.2029, + "step": 850 + }, + { + "epoch": 42.49, + "eval_loss": 1.1375640630722046, + "eval_runtime": 35.2676, + "eval_samples_per_second": 12.107, + "eval_steps_per_second": 0.766, + "eval_wer": 0.45220729366602685, + "step": 850 + }, + { + "epoch": 44.99, + "learning_rate": 5.6410256410256414e-05, + "loss": 0.1876, + "step": 900 + }, + { + "epoch": 44.99, + "eval_loss": 1.142003059387207, + "eval_runtime": 35.6836, + "eval_samples_per_second": 11.966, + "eval_steps_per_second": 0.757, + "eval_wer": 0.4476007677543186, + "step": 900 + }, + { + "epoch": 47.49, + "learning_rate": 5.384615384615385e-05, + "loss": 0.1733, + "step": 950 + }, + { + "epoch": 47.49, + "eval_loss": 1.1137712001800537, + "eval_runtime": 34.9014, + "eval_samples_per_second": 12.234, + "eval_steps_per_second": 0.774, + "eval_wer": 0.44337811900191937, + "step": 950 + }, + { + "epoch": 49.99, + "learning_rate": 5.128205128205128e-05, + "loss": 0.161, + "step": 1000 + }, + { + "epoch": 49.99, + "eval_loss": 1.195468783378601, + "eval_runtime": 35.1209, + "eval_samples_per_second": 12.158, + "eval_steps_per_second": 0.769, + "eval_wer": 0.44798464491362766, + "step": 1000 + }, + { + "epoch": 52.49, + "learning_rate": 4.871794871794872e-05, + "loss": 0.1661, + "step": 1050 + }, + { + "epoch": 52.49, + "eval_loss": 1.1598896980285645, + "eval_runtime": 34.8413, + "eval_samples_per_second": 12.256, + "eval_steps_per_second": 0.775, + "eval_wer": 0.44606525911708256, + "step": 1050 + }, + { + "epoch": 54.99, + "learning_rate": 4.615384615384616e-05, + "loss": 0.1533, + "step": 1100 + }, + { + "epoch": 54.99, + "eval_loss": 1.143904447555542, + "eval_runtime": 34.3532, + "eval_samples_per_second": 12.43, + "eval_steps_per_second": 0.786, + "eval_wer": 0.4383877159309021, + "step": 1100 + }, + { + "epoch": 57.49, + "learning_rate": 4.358974358974359e-05, + "loss": 0.1565, + "step": 1150 + }, + { + "epoch": 57.49, + "eval_loss": 1.1354175806045532, + "eval_runtime": 34.4974, + "eval_samples_per_second": 12.378, + "eval_steps_per_second": 0.783, + "eval_wer": 0.4418426103646833, + "step": 1150 + }, + { + "epoch": 59.99, + "learning_rate": 4.1025641025641023e-05, + "loss": 0.1476, + "step": 1200 + }, + { + "epoch": 59.99, + "eval_loss": 1.1580840349197388, + "eval_runtime": 34.4611, + "eval_samples_per_second": 12.391, + "eval_steps_per_second": 0.783, + "eval_wer": 0.4410748560460653, + "step": 1200 + }, + { + "epoch": 62.49, + "learning_rate": 3.846153846153846e-05, + "loss": 0.1378, + "step": 1250 + }, + { + "epoch": 62.49, + "eval_loss": 1.1266977787017822, + "eval_runtime": 34.2613, + "eval_samples_per_second": 12.463, + "eval_steps_per_second": 0.788, + "eval_wer": 0.4303262955854127, + "step": 1250 + }, + { + "epoch": 64.99, + "learning_rate": 3.58974358974359e-05, + "loss": 0.1214, + "step": 1300 + }, + { + "epoch": 64.99, + "eval_loss": 1.165766716003418, + "eval_runtime": 33.9904, + "eval_samples_per_second": 12.562, + "eval_steps_per_second": 0.794, + "eval_wer": 0.42879078694817657, + "step": 1300 + }, + { + "epoch": 67.49, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.133, + "step": 1350 + }, + { + "epoch": 67.49, + "eval_loss": 1.137351632118225, + "eval_runtime": 34.008, + "eval_samples_per_second": 12.556, + "eval_steps_per_second": 0.794, + "eval_wer": 0.43339731285988486, + "step": 1350 + }, + { + "epoch": 69.99, + "learning_rate": 3.0769230769230774e-05, + "loss": 0.1147, + "step": 1400 + }, + { + "epoch": 69.99, + "eval_loss": 1.1521999835968018, + "eval_runtime": 33.8683, + "eval_samples_per_second": 12.608, + "eval_steps_per_second": 0.797, + "eval_wer": 0.4245681381957774, + "step": 1400 + }, + { + "epoch": 72.49, + "learning_rate": 2.8205128205128207e-05, + "loss": 0.125, + "step": 1450 + }, + { + "epoch": 72.49, + "eval_loss": 1.1379369497299194, + "eval_runtime": 34.1582, + "eval_samples_per_second": 12.501, + "eval_steps_per_second": 0.79, + "eval_wer": 0.43570057581573896, + "step": 1450 + }, + { + "epoch": 74.99, + "learning_rate": 2.564102564102564e-05, + "loss": 0.1189, + "step": 1500 + }, + { + "epoch": 74.99, + "eval_loss": 1.1502233743667603, + "eval_runtime": 33.9539, + "eval_samples_per_second": 12.576, + "eval_steps_per_second": 0.795, + "eval_wer": 0.4284069097888676, + "step": 1500 + }, + { + "epoch": 77.49, + "learning_rate": 2.307692307692308e-05, + "loss": 0.1122, + "step": 1550 + }, + { + "epoch": 77.49, + "eval_loss": 1.1575168371200562, + "eval_runtime": 34.1792, + "eval_samples_per_second": 12.493, + "eval_steps_per_second": 0.79, + "eval_wer": 0.4314779270633397, + "step": 1550 + }, + { + "epoch": 79.99, + "learning_rate": 2.0512820512820512e-05, + "loss": 0.1235, + "step": 1600 + }, + { + "epoch": 79.99, + "eval_loss": 1.142774224281311, + "eval_runtime": 33.8121, + "eval_samples_per_second": 12.629, + "eval_steps_per_second": 0.799, + "eval_wer": 0.42610364683301344, + "step": 1600 + }, + { + "epoch": 82.49, + "learning_rate": 1.794871794871795e-05, + "loss": 0.1092, + "step": 1650 + }, + { + "epoch": 82.49, + "eval_loss": 1.1530485153198242, + "eval_runtime": 33.6852, + "eval_samples_per_second": 12.676, + "eval_steps_per_second": 0.802, + "eval_wer": 0.4341650671785029, + "step": 1650 + }, + { + "epoch": 84.99, + "learning_rate": 1.5384615384615387e-05, + "loss": 0.1185, + "step": 1700 + }, + { + "epoch": 84.99, + "eval_loss": 1.1284310817718506, + "eval_runtime": 33.8486, + "eval_samples_per_second": 12.615, + "eval_steps_per_second": 0.798, + "eval_wer": 0.42994241842610365, + "step": 1700 + }, + { + "epoch": 87.49, + "learning_rate": 1.282051282051282e-05, + "loss": 0.1048, + "step": 1750 + }, + { + "epoch": 87.49, + "eval_loss": 1.1434178352355957, + "eval_runtime": 33.9335, + "eval_samples_per_second": 12.583, + "eval_steps_per_second": 0.796, + "eval_wer": 0.4276391554702495, + "step": 1750 + }, + { + "epoch": 89.99, + "learning_rate": 1.0256410256410256e-05, + "loss": 0.1038, + "step": 1800 + }, + { + "epoch": 89.99, + "eval_loss": 1.1609505414962769, + "eval_runtime": 34.1384, + "eval_samples_per_second": 12.508, + "eval_steps_per_second": 0.791, + "eval_wer": 0.42418426103646834, + "step": 1800 + }, + { + "epoch": 92.49, + "learning_rate": 7.692307692307694e-06, + "loss": 0.1073, + "step": 1850 + }, + { + "epoch": 92.49, + "eval_loss": 1.1562278270721436, + "eval_runtime": 33.8455, + "eval_samples_per_second": 12.616, + "eval_steps_per_second": 0.798, + "eval_wer": 0.4238003838771593, + "step": 1850 + }, + { + "epoch": 94.99, + "learning_rate": 5.128205128205128e-06, + "loss": 0.1055, + "step": 1900 + }, + { + "epoch": 94.99, + "eval_loss": 1.1589001417160034, + "eval_runtime": 34.1041, + "eval_samples_per_second": 12.52, + "eval_steps_per_second": 0.792, + "eval_wer": 0.4214971209213052, + "step": 1900 + }, + { + "epoch": 97.49, + "learning_rate": 2.564102564102564e-06, + "loss": 0.1133, + "step": 1950 + }, + { + "epoch": 97.49, + "eval_loss": 1.1536645889282227, + "eval_runtime": 33.9701, + "eval_samples_per_second": 12.57, + "eval_steps_per_second": 0.795, + "eval_wer": 0.4238003838771593, + "step": 1950 + }, + { + "epoch": 99.99, + "learning_rate": 0.0, + "loss": 0.0992, + "step": 2000 + }, + { + "epoch": 99.99, + "eval_loss": 1.1553977727890015, + "eval_runtime": 33.8655, + "eval_samples_per_second": 12.609, + "eval_steps_per_second": 0.797, + "eval_wer": 0.42418426103646834, + "step": 2000 + } + ], + "max_steps": 2000, + "num_train_epochs": 100, + "total_flos": 1.6479245346890066e+19, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..8934fac --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a007a4ec524a3af0145655a9608fca61329594a670f7bbbd2bb694f2d8648c8 +size 2799 diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..0046279 --- /dev/null +++ b/vocab.json @@ -0,0 +1 @@ +{"ฑ": 0, "ๅ": 1, "ก": 2, "ง": 3, "ฒ": 4, "ะ": 5, "๊": 6, "้": 7, "ฌ": 8, "ซ": 9, "ด": 10, "ฯ": 11, "ใ": 12, "ึ": 13, "ญ": 14, "่": 15, "า": 16, "ฤ": 17, "๋": 18, "อ": 19, "ฬ": 20, "ท": 21, "โ": 22, "ภ": 23, "ย": 24, "็": 25, "ล": 26, "ุ": 27, "เ": 28, "ฮ": 29, "ฝ": 30, "ป": 31, "ี": 32, "บ": 33, "ฐ": 34, "ต": 35, "ถ": 36, "ศ": 37, "ฟ": 38, "ณ": 39, "ห": 40, "ร": 41, "พ": 43, "ฆ": 44, "ั": 45, "ค": 46, "ว": 47, "ฏ": 48, "จ": 49, "แ": 50, "ม": 51, "ฎ": 52, "ฉ": 53, "์": 54, "ษ": 55, "ำ": 56, "ผ": 57, "ข": 58, "ไ": 59, "ู": 60, "ื": 61, "น": 62, "ช": 63, "ิ": 64, "ธ": 65, "ฃ": 66, "ส": 67, "|": 42, "[UNK]": 68, "[PAD]": 69} \ No newline at end of file