初始化项目,由ModelHub XC社区提供模型

Model: Kittipong/wav2vec2-th-vocal-domain
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-27 04:48:16 +08:00
commit da9e809435
14 changed files with 912 additions and 0 deletions

27
.gitattributes vendored Normal file
View File

@@ -0,0 +1,27 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zstandard filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

3
README.md Normal file
View File

@@ -0,0 +1,3 @@
---
license: cc-by-sa-4.0
---

85
config.json Normal file
View File

@@ -0,0 +1,85 @@
{
"_name_or_path": "facebook/wav2vec2-large-xlsr-53",
"activation_dropout": 0.0,
"apply_spec_augment": true,
"architectures": [
"Wav2Vec2ForCTC"
],
"attention_dropout": 0.1,
"bos_token_id": 1,
"codevector_dim": 768,
"contrastive_logits_temperature": 0.1,
"conv_bias": true,
"conv_dim": [
512,
512,
512,
512,
512,
512,
512
],
"conv_kernel": [
10,
3,
3,
3,
3,
2,
2
],
"conv_stride": [
5,
2,
2,
2,
2,
2,
2
],
"ctc_loss_reduction": "mean",
"ctc_zero_infinity": false,
"diversity_loss_weight": 0.1,
"do_stable_layer_norm": true,
"eos_token_id": 2,
"feat_extract_activation": "gelu",
"feat_extract_dropout": 0.0,
"feat_extract_norm": "layer",
"feat_proj_dropout": 0.0,
"feat_quantizer_dropout": 0.0,
"final_dropout": 0.0,
"gradient_checkpointing": true,
"hidden_act": "gelu",
"hidden_dropout": 0.1,
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 4096,
"layer_norm_eps": 1e-05,
"layerdrop": 0.1,
"mask_channel_length": 10,
"mask_channel_min_space": 1,
"mask_channel_other": 0.0,
"mask_channel_prob": 0.0,
"mask_channel_selection": "static",
"mask_feature_length": 10,
"mask_feature_prob": 0.0,
"mask_time_length": 10,
"mask_time_min_space": 1,
"mask_time_other": 0.0,
"mask_time_prob": 0.05,
"mask_time_selection": "static",
"model_type": "wav2vec2",
"num_attention_heads": 16,
"num_codevector_groups": 2,
"num_codevectors_per_group": 320,
"num_conv_pos_embedding_groups": 16,
"num_conv_pos_embeddings": 128,
"num_feat_extract_layers": 7,
"num_hidden_layers": 24,
"num_negatives": 100,
"pad_token_id": 69,
"proj_codevector_dim": 768,
"torch_dtype": "float32",
"transformers_version": "4.9.1",
"vocab_size": 70
}

154
eval.py Normal file
View File

@@ -0,0 +1,154 @@
#!/usr/bin/env python3
import argparse
import re
from typing import Dict
from datasets import Audio, Dataset, load_dataset, load_metric
from transformers import AutoFeatureExtractor, pipeline
from pythainlp.tokenize import word_tokenize, syllable_tokenize
from deepcut import tokenize as deepcut_word_tokenize
from functools import partial
def log_results(result: Dataset, args: Dict[str, str]):
"""DO NOT CHANGE. This function computes and logs the result metrics."""
log_outputs = args.log_outputs
dataset_id = "_".join(args.dataset.split("/") + [args.config, args.split])
# load metric
wer = load_metric("wer")
cer = load_metric("cer")
# compute metrics
wer_result = wer.compute(references=result["target"], predictions=result["prediction"])
cer_result = cer.compute(references=result["target"], predictions=result["prediction"])
# print & log results
result_str = f"WER: {wer_result}\n" f"CER: {cer_result}"
print(result_str)
with open(f"robust-speech-event/{dataset_id}_eval_results_{args.thai_tokenizer}.txt", "w") as f:
f.write(result_str)
# log all results in text file. Possibly interesting for analysis
if log_outputs is not None:
pred_file = f"robust-speech-event/log_{dataset_id}_predictions_{args.thai_tokenizer}.txt"
target_file = f"robust-speech-event/log_{dataset_id}_targets_{args.thai_tokenizer}.txt"
with open(pred_file, "w") as p, open(target_file, "w") as t:
# mapping function to write output
def write_to_file(batch, i):
p.write(f"{i}" + "\n")
p.write(batch["prediction"] + "\n")
t.write(f"{i}" + "\n")
t.write(batch["target"] + "\n")
result.map(write_to_file, with_indices=True)
def normalize_text(text: str, tok_func) -> str:
"""DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
chars_to_ignore_regex = '[,?.!\-\;\:"%<EFBFBD>]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
text = re.sub(chars_to_ignore_regex, "", text.lower())
# In addition, we can normalize the target text, e.g. removing new lines characters etc...
# note that order is important here!
token_sequences_to_ignore = ["\n\n", "\n", " ", " "]
for t in token_sequences_to_ignore:
text = " ".join(text.split(t))
#thai tokenize
text = " ".join(tok_func(text))
return text
def retokenize(text:str, tok_func) -> str:
"""tokenize and rejoin prediction outputs without cleaning"""
return " ".join(tok_func("".join(text.split())))
def main(args):
# load dataset
dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
# for testing: only process the first two examples as a test
dataset = dataset.select(range(10))
# load processor
feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
sampling_rate = feature_extractor.sampling_rate
# resample audio
dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))
# load eval pipeline
asr = pipeline("automatic-speech-recognition", model=args.model_id)
#select tokenizer
if args.thai_tokenizer=='deepcut':
tok_func = deepcut_word_tokenize
elif args.thai_tokenizer=='newmm':
tok_func = word_tokenize
elif args.thai_tokenizer=='syllable':
tok_func = syllable_tokenize
else:
tok_func = lambda x: x.replace(' ','')
# map function to decode audio
def map_to_pred(batch, tok_func):
prediction = asr(
batch["audio"]["array"], chunk_length_s=args.chunk_length_s, stride_length_s=args.stride_length_s
)
batch["prediction"] = retokenize(prediction["text"], tok_func)
batch["target"] = normalize_text(batch["sentence"], tok_func)
return batch
# run inference on all examples
result = dataset.map(partial(map_to_pred, tok_func=tok_func),
remove_columns=dataset.column_names)
# compute and log_results
# do not change function below
log_results(result, args)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_id", type=str, required=True, help="Model identifier. Should be loadable with 🤗 Transformers"
)
parser.add_argument(
"--thai_tokenizer", type=str, default="newmm",
required=True, help="newmm, syllable, or deepcut; if not specified, remove all spaces (used for CER calculation)"
)
parser.add_argument(
"--dataset",
type=str,
required=True,
help="Dataset name to evaluate the `model_id`. Should be loadable with 🤗 Datasets",
)
parser.add_argument(
"--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
)
parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
parser.add_argument(
"--chunk_length_s", type=float, default=None, help="Chunk length in seconds. Defaults to 5 seconds."
)
parser.add_argument(
"--stride_length_s", type=float, default=None, help="Stride of the audio chunks. Defaults to 1 second."
)
parser.add_argument(
"--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
)
args = parser.parse_args()
main(args)

3
optimizer-002.pt Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:63480cd0bddf763bbfa016f364b5af6262bbd4584e1e3f1223bdf042feaf7080
size 2490632977

9
preprocessor_config.json Normal file
View File

@@ -0,0 +1,9 @@
{
"do_normalize": true,
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
"feature_size": 1,
"padding_side": "right",
"padding_value": 0.0,
"return_attention_mask": false,
"sampling_rate": 16000
}

3
pytorch_model.bin Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1dbb3a749c3f6e08fe2af7227d4857723fd1e525157b79ccb0dba255a69fdce8
size 1262210673

3
rng_state.pth Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0e9b980149921cf7dc2fe91f225c1c3f931cc22ff40d7e30639d7236b390d622
size 14567

3
scheduler.pt Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0b9ed0945bc75b9b51a6a34f17bea3f2eca616815e76668e99905ed244654b22
size 623

1
special_tokens_map.json Normal file
View File

@@ -0,0 +1 @@
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}

1
tokenizer_config.json Normal file
View File

@@ -0,0 +1 @@
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}

616
trainer_state.json Normal file
View File

@@ -0,0 +1,616 @@
{
"best_metric": 0.472552783109405,
"best_model_checkpoint": "/content/drive/MyDrive/new_dataset/wav2vec2-large-xlsr-53-thai-finetune/checkpoint-500",
"epoch": 99.98765432098766,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.49,
"learning_rate": 0.0001,
"loss": 1.9911,
"step": 50
},
{
"epoch": 2.49,
"eval_loss": 1.2750070095062256,
"eval_runtime": 34.892,
"eval_samples_per_second": 12.238,
"eval_steps_per_second": 0.774,
"eval_wer": 0.6767754318618042,
"step": 50
},
{
"epoch": 4.99,
"learning_rate": 9.743589743589744e-05,
"loss": 1.2107,
"step": 100
},
{
"epoch": 4.99,
"eval_loss": 1.1088056564331055,
"eval_runtime": 34.7796,
"eval_samples_per_second": 12.277,
"eval_steps_per_second": 0.776,
"eval_wer": 0.5927063339731286,
"step": 100
},
{
"epoch": 7.49,
"learning_rate": 9.487179487179487e-05,
"loss": 0.9686,
"step": 150
},
{
"epoch": 7.49,
"eval_loss": 1.0476980209350586,
"eval_runtime": 34.6203,
"eval_samples_per_second": 12.334,
"eval_steps_per_second": 0.78,
"eval_wer": 0.5712092130518234,
"step": 150
},
{
"epoch": 9.99,
"learning_rate": 9.230769230769232e-05,
"loss": 0.7504,
"step": 200
},
{
"epoch": 9.99,
"eval_loss": 1.0640665292739868,
"eval_runtime": 34.843,
"eval_samples_per_second": 12.255,
"eval_steps_per_second": 0.775,
"eval_wer": 0.5504798464491363,
"step": 200
},
{
"epoch": 12.49,
"learning_rate": 8.974358974358975e-05,
"loss": 0.6352,
"step": 250
},
{
"epoch": 12.49,
"eval_loss": 1.095747709274292,
"eval_runtime": 33.9281,
"eval_samples_per_second": 12.585,
"eval_steps_per_second": 0.796,
"eval_wer": 0.5324376199616123,
"step": 250
},
{
"epoch": 14.99,
"learning_rate": 8.717948717948718e-05,
"loss": 0.5313,
"step": 300
},
{
"epoch": 14.99,
"eval_loss": 1.0491594076156616,
"eval_runtime": 33.7693,
"eval_samples_per_second": 12.645,
"eval_steps_per_second": 0.8,
"eval_wer": 0.5163147792706334,
"step": 300
},
{
"epoch": 17.49,
"learning_rate": 8.461538461538461e-05,
"loss": 0.4461,
"step": 350
},
{
"epoch": 17.49,
"eval_loss": 1.0721960067749023,
"eval_runtime": 33.9128,
"eval_samples_per_second": 12.591,
"eval_steps_per_second": 0.796,
"eval_wer": 0.5124760076775432,
"step": 350
},
{
"epoch": 19.99,
"learning_rate": 8.205128205128205e-05,
"loss": 0.4094,
"step": 400
},
{
"epoch": 19.99,
"eval_loss": 1.057926058769226,
"eval_runtime": 34.0347,
"eval_samples_per_second": 12.546,
"eval_steps_per_second": 0.793,
"eval_wer": 0.49750479846449136,
"step": 400
},
{
"epoch": 22.49,
"learning_rate": 7.948717948717948e-05,
"loss": 0.3467,
"step": 450
},
{
"epoch": 22.49,
"eval_loss": 1.0208101272583008,
"eval_runtime": 34.3752,
"eval_samples_per_second": 12.422,
"eval_steps_per_second": 0.785,
"eval_wer": 0.491362763915547,
"step": 450
},
{
"epoch": 24.99,
"learning_rate": 7.692307692307693e-05,
"loss": 0.3195,
"step": 500
},
{
"epoch": 24.99,
"eval_loss": 1.0337833166122437,
"eval_runtime": 33.8794,
"eval_samples_per_second": 12.604,
"eval_steps_per_second": 0.797,
"eval_wer": 0.472552783109405,
"step": 500
},
{
"epoch": 27.49,
"learning_rate": 7.435897435897436e-05,
"loss": 0.3005,
"step": 550
},
{
"epoch": 27.49,
"eval_loss": 1.0594605207443237,
"eval_runtime": 33.9955,
"eval_samples_per_second": 12.56,
"eval_steps_per_second": 0.794,
"eval_wer": 0.47946257197696734,
"step": 550
},
{
"epoch": 29.99,
"learning_rate": 7.17948717948718e-05,
"loss": 0.2933,
"step": 600
},
{
"epoch": 29.99,
"eval_loss": 1.017477035522461,
"eval_runtime": 33.8622,
"eval_samples_per_second": 12.61,
"eval_steps_per_second": 0.797,
"eval_wer": 0.472936660268714,
"step": 600
},
{
"epoch": 32.49,
"learning_rate": 6.923076923076924e-05,
"loss": 0.2601,
"step": 650
},
{
"epoch": 32.49,
"eval_loss": 1.099133014678955,
"eval_runtime": 34.4437,
"eval_samples_per_second": 12.397,
"eval_steps_per_second": 0.784,
"eval_wer": 0.4652591170825336,
"step": 650
},
{
"epoch": 34.99,
"learning_rate": 6.666666666666667e-05,
"loss": 0.2226,
"step": 700
},
{
"epoch": 34.99,
"eval_loss": 1.1290050745010376,
"eval_runtime": 34.6356,
"eval_samples_per_second": 12.328,
"eval_steps_per_second": 0.78,
"eval_wer": 0.47063339731285986,
"step": 700
},
{
"epoch": 37.49,
"learning_rate": 6.410256410256412e-05,
"loss": 0.2262,
"step": 750
},
{
"epoch": 37.49,
"eval_loss": 1.0954631567001343,
"eval_runtime": 35.3666,
"eval_samples_per_second": 12.074,
"eval_steps_per_second": 0.763,
"eval_wer": 0.45681381957773515,
"step": 750
},
{
"epoch": 39.99,
"learning_rate": 6.153846153846155e-05,
"loss": 0.2236,
"step": 800
},
{
"epoch": 39.99,
"eval_loss": 1.1119202375411987,
"eval_runtime": 35.0365,
"eval_samples_per_second": 12.187,
"eval_steps_per_second": 0.771,
"eval_wer": 0.4491362763915547,
"step": 800
},
{
"epoch": 42.49,
"learning_rate": 5.897435897435898e-05,
"loss": 0.2029,
"step": 850
},
{
"epoch": 42.49,
"eval_loss": 1.1375640630722046,
"eval_runtime": 35.2676,
"eval_samples_per_second": 12.107,
"eval_steps_per_second": 0.766,
"eval_wer": 0.45220729366602685,
"step": 850
},
{
"epoch": 44.99,
"learning_rate": 5.6410256410256414e-05,
"loss": 0.1876,
"step": 900
},
{
"epoch": 44.99,
"eval_loss": 1.142003059387207,
"eval_runtime": 35.6836,
"eval_samples_per_second": 11.966,
"eval_steps_per_second": 0.757,
"eval_wer": 0.4476007677543186,
"step": 900
},
{
"epoch": 47.49,
"learning_rate": 5.384615384615385e-05,
"loss": 0.1733,
"step": 950
},
{
"epoch": 47.49,
"eval_loss": 1.1137712001800537,
"eval_runtime": 34.9014,
"eval_samples_per_second": 12.234,
"eval_steps_per_second": 0.774,
"eval_wer": 0.44337811900191937,
"step": 950
},
{
"epoch": 49.99,
"learning_rate": 5.128205128205128e-05,
"loss": 0.161,
"step": 1000
},
{
"epoch": 49.99,
"eval_loss": 1.195468783378601,
"eval_runtime": 35.1209,
"eval_samples_per_second": 12.158,
"eval_steps_per_second": 0.769,
"eval_wer": 0.44798464491362766,
"step": 1000
},
{
"epoch": 52.49,
"learning_rate": 4.871794871794872e-05,
"loss": 0.1661,
"step": 1050
},
{
"epoch": 52.49,
"eval_loss": 1.1598896980285645,
"eval_runtime": 34.8413,
"eval_samples_per_second": 12.256,
"eval_steps_per_second": 0.775,
"eval_wer": 0.44606525911708256,
"step": 1050
},
{
"epoch": 54.99,
"learning_rate": 4.615384615384616e-05,
"loss": 0.1533,
"step": 1100
},
{
"epoch": 54.99,
"eval_loss": 1.143904447555542,
"eval_runtime": 34.3532,
"eval_samples_per_second": 12.43,
"eval_steps_per_second": 0.786,
"eval_wer": 0.4383877159309021,
"step": 1100
},
{
"epoch": 57.49,
"learning_rate": 4.358974358974359e-05,
"loss": 0.1565,
"step": 1150
},
{
"epoch": 57.49,
"eval_loss": 1.1354175806045532,
"eval_runtime": 34.4974,
"eval_samples_per_second": 12.378,
"eval_steps_per_second": 0.783,
"eval_wer": 0.4418426103646833,
"step": 1150
},
{
"epoch": 59.99,
"learning_rate": 4.1025641025641023e-05,
"loss": 0.1476,
"step": 1200
},
{
"epoch": 59.99,
"eval_loss": 1.1580840349197388,
"eval_runtime": 34.4611,
"eval_samples_per_second": 12.391,
"eval_steps_per_second": 0.783,
"eval_wer": 0.4410748560460653,
"step": 1200
},
{
"epoch": 62.49,
"learning_rate": 3.846153846153846e-05,
"loss": 0.1378,
"step": 1250
},
{
"epoch": 62.49,
"eval_loss": 1.1266977787017822,
"eval_runtime": 34.2613,
"eval_samples_per_second": 12.463,
"eval_steps_per_second": 0.788,
"eval_wer": 0.4303262955854127,
"step": 1250
},
{
"epoch": 64.99,
"learning_rate": 3.58974358974359e-05,
"loss": 0.1214,
"step": 1300
},
{
"epoch": 64.99,
"eval_loss": 1.165766716003418,
"eval_runtime": 33.9904,
"eval_samples_per_second": 12.562,
"eval_steps_per_second": 0.794,
"eval_wer": 0.42879078694817657,
"step": 1300
},
{
"epoch": 67.49,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.133,
"step": 1350
},
{
"epoch": 67.49,
"eval_loss": 1.137351632118225,
"eval_runtime": 34.008,
"eval_samples_per_second": 12.556,
"eval_steps_per_second": 0.794,
"eval_wer": 0.43339731285988486,
"step": 1350
},
{
"epoch": 69.99,
"learning_rate": 3.0769230769230774e-05,
"loss": 0.1147,
"step": 1400
},
{
"epoch": 69.99,
"eval_loss": 1.1521999835968018,
"eval_runtime": 33.8683,
"eval_samples_per_second": 12.608,
"eval_steps_per_second": 0.797,
"eval_wer": 0.4245681381957774,
"step": 1400
},
{
"epoch": 72.49,
"learning_rate": 2.8205128205128207e-05,
"loss": 0.125,
"step": 1450
},
{
"epoch": 72.49,
"eval_loss": 1.1379369497299194,
"eval_runtime": 34.1582,
"eval_samples_per_second": 12.501,
"eval_steps_per_second": 0.79,
"eval_wer": 0.43570057581573896,
"step": 1450
},
{
"epoch": 74.99,
"learning_rate": 2.564102564102564e-05,
"loss": 0.1189,
"step": 1500
},
{
"epoch": 74.99,
"eval_loss": 1.1502233743667603,
"eval_runtime": 33.9539,
"eval_samples_per_second": 12.576,
"eval_steps_per_second": 0.795,
"eval_wer": 0.4284069097888676,
"step": 1500
},
{
"epoch": 77.49,
"learning_rate": 2.307692307692308e-05,
"loss": 0.1122,
"step": 1550
},
{
"epoch": 77.49,
"eval_loss": 1.1575168371200562,
"eval_runtime": 34.1792,
"eval_samples_per_second": 12.493,
"eval_steps_per_second": 0.79,
"eval_wer": 0.4314779270633397,
"step": 1550
},
{
"epoch": 79.99,
"learning_rate": 2.0512820512820512e-05,
"loss": 0.1235,
"step": 1600
},
{
"epoch": 79.99,
"eval_loss": 1.142774224281311,
"eval_runtime": 33.8121,
"eval_samples_per_second": 12.629,
"eval_steps_per_second": 0.799,
"eval_wer": 0.42610364683301344,
"step": 1600
},
{
"epoch": 82.49,
"learning_rate": 1.794871794871795e-05,
"loss": 0.1092,
"step": 1650
},
{
"epoch": 82.49,
"eval_loss": 1.1530485153198242,
"eval_runtime": 33.6852,
"eval_samples_per_second": 12.676,
"eval_steps_per_second": 0.802,
"eval_wer": 0.4341650671785029,
"step": 1650
},
{
"epoch": 84.99,
"learning_rate": 1.5384615384615387e-05,
"loss": 0.1185,
"step": 1700
},
{
"epoch": 84.99,
"eval_loss": 1.1284310817718506,
"eval_runtime": 33.8486,
"eval_samples_per_second": 12.615,
"eval_steps_per_second": 0.798,
"eval_wer": 0.42994241842610365,
"step": 1700
},
{
"epoch": 87.49,
"learning_rate": 1.282051282051282e-05,
"loss": 0.1048,
"step": 1750
},
{
"epoch": 87.49,
"eval_loss": 1.1434178352355957,
"eval_runtime": 33.9335,
"eval_samples_per_second": 12.583,
"eval_steps_per_second": 0.796,
"eval_wer": 0.4276391554702495,
"step": 1750
},
{
"epoch": 89.99,
"learning_rate": 1.0256410256410256e-05,
"loss": 0.1038,
"step": 1800
},
{
"epoch": 89.99,
"eval_loss": 1.1609505414962769,
"eval_runtime": 34.1384,
"eval_samples_per_second": 12.508,
"eval_steps_per_second": 0.791,
"eval_wer": 0.42418426103646834,
"step": 1800
},
{
"epoch": 92.49,
"learning_rate": 7.692307692307694e-06,
"loss": 0.1073,
"step": 1850
},
{
"epoch": 92.49,
"eval_loss": 1.1562278270721436,
"eval_runtime": 33.8455,
"eval_samples_per_second": 12.616,
"eval_steps_per_second": 0.798,
"eval_wer": 0.4238003838771593,
"step": 1850
},
{
"epoch": 94.99,
"learning_rate": 5.128205128205128e-06,
"loss": 0.1055,
"step": 1900
},
{
"epoch": 94.99,
"eval_loss": 1.1589001417160034,
"eval_runtime": 34.1041,
"eval_samples_per_second": 12.52,
"eval_steps_per_second": 0.792,
"eval_wer": 0.4214971209213052,
"step": 1900
},
{
"epoch": 97.49,
"learning_rate": 2.564102564102564e-06,
"loss": 0.1133,
"step": 1950
},
{
"epoch": 97.49,
"eval_loss": 1.1536645889282227,
"eval_runtime": 33.9701,
"eval_samples_per_second": 12.57,
"eval_steps_per_second": 0.795,
"eval_wer": 0.4238003838771593,
"step": 1950
},
{
"epoch": 99.99,
"learning_rate": 0.0,
"loss": 0.0992,
"step": 2000
},
{
"epoch": 99.99,
"eval_loss": 1.1553977727890015,
"eval_runtime": 33.8655,
"eval_samples_per_second": 12.609,
"eval_steps_per_second": 0.797,
"eval_wer": 0.42418426103646834,
"step": 2000
}
],
"max_steps": 2000,
"num_train_epochs": 100,
"total_flos": 1.6479245346890066e+19,
"trial_name": null,
"trial_params": null
}

3
training_args.bin Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0a007a4ec524a3af0145655a9608fca61329594a670f7bbbd2bb694f2d8648c8
size 2799

1
vocab.json Normal file
View File

@@ -0,0 +1 @@
{"ฑ": 0, "ๅ": 1, "ก": 2, "ง": 3, "ฒ": 4, "ะ": 5, "๊": 6, "้": 7, "ฌ": 8, "ซ": 9, "ด": 10, "ฯ": 11, "ใ": 12, "ึ": 13, "ญ": 14, "่": 15, "า": 16, "ฤ": 17, "๋": 18, "อ": 19, "ฬ": 20, "ท": 21, "โ": 22, "ภ": 23, "ย": 24, "็": 25, "ล": 26, "ุ": 27, "เ": 28, "ฮ": 29, "ฝ": 30, "ป": 31, "ี": 32, "บ": 33, "ฐ": 34, "ต": 35, "ถ": 36, "ศ": 37, "ฟ": 38, "ณ": 39, "ห": 40, "ร": 41, "พ": 43, "ฆ": 44, "ั": 45, "ค": 46, "ว": 47, "ฏ": 48, "จ": 49, "แ": 50, "ม": 51, "ฎ": 52, "ฉ": 53, "์": 54, "ษ": 55, "ำ": 56, "ผ": 57, "ข": 58, "ไ": 59, "ู": 60, "ื": 61, "น": 62, "ช": 63, "ิ": 64, "ธ": 65, "ฃ": 66, "ส": 67, "|": 42, "[UNK]": 68, "[PAD]": 69}