初始化项目,由ModelHub XC社区提供模型
Model: Kittipong/wav2vec2-th-vocal-domain Source: Original Platform
This commit is contained in:
27
.gitattributes
vendored
Normal file
27
.gitattributes
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
85
config.json
Normal file
85
config.json
Normal file
@@ -0,0 +1,85 @@
|
||||
{
|
||||
"_name_or_path": "facebook/wav2vec2-large-xlsr-53",
|
||||
"activation_dropout": 0.0,
|
||||
"apply_spec_augment": true,
|
||||
"architectures": [
|
||||
"Wav2Vec2ForCTC"
|
||||
],
|
||||
"attention_dropout": 0.1,
|
||||
"bos_token_id": 1,
|
||||
"codevector_dim": 768,
|
||||
"contrastive_logits_temperature": 0.1,
|
||||
"conv_bias": true,
|
||||
"conv_dim": [
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
512,
|
||||
512
|
||||
],
|
||||
"conv_kernel": [
|
||||
10,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
2,
|
||||
2
|
||||
],
|
||||
"conv_stride": [
|
||||
5,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2
|
||||
],
|
||||
"ctc_loss_reduction": "mean",
|
||||
"ctc_zero_infinity": false,
|
||||
"diversity_loss_weight": 0.1,
|
||||
"do_stable_layer_norm": true,
|
||||
"eos_token_id": 2,
|
||||
"feat_extract_activation": "gelu",
|
||||
"feat_extract_dropout": 0.0,
|
||||
"feat_extract_norm": "layer",
|
||||
"feat_proj_dropout": 0.0,
|
||||
"feat_quantizer_dropout": 0.0,
|
||||
"final_dropout": 0.0,
|
||||
"gradient_checkpointing": true,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_dropout": 0.1,
|
||||
"hidden_size": 1024,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 4096,
|
||||
"layer_norm_eps": 1e-05,
|
||||
"layerdrop": 0.1,
|
||||
"mask_channel_length": 10,
|
||||
"mask_channel_min_space": 1,
|
||||
"mask_channel_other": 0.0,
|
||||
"mask_channel_prob": 0.0,
|
||||
"mask_channel_selection": "static",
|
||||
"mask_feature_length": 10,
|
||||
"mask_feature_prob": 0.0,
|
||||
"mask_time_length": 10,
|
||||
"mask_time_min_space": 1,
|
||||
"mask_time_other": 0.0,
|
||||
"mask_time_prob": 0.05,
|
||||
"mask_time_selection": "static",
|
||||
"model_type": "wav2vec2",
|
||||
"num_attention_heads": 16,
|
||||
"num_codevector_groups": 2,
|
||||
"num_codevectors_per_group": 320,
|
||||
"num_conv_pos_embedding_groups": 16,
|
||||
"num_conv_pos_embeddings": 128,
|
||||
"num_feat_extract_layers": 7,
|
||||
"num_hidden_layers": 24,
|
||||
"num_negatives": 100,
|
||||
"pad_token_id": 69,
|
||||
"proj_codevector_dim": 768,
|
||||
"torch_dtype": "float32",
|
||||
"transformers_version": "4.9.1",
|
||||
"vocab_size": 70
|
||||
}
|
||||
154
eval.py
Normal file
154
eval.py
Normal file
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import re
|
||||
from typing import Dict
|
||||
|
||||
from datasets import Audio, Dataset, load_dataset, load_metric
|
||||
|
||||
from transformers import AutoFeatureExtractor, pipeline
|
||||
|
||||
from pythainlp.tokenize import word_tokenize, syllable_tokenize
|
||||
from deepcut import tokenize as deepcut_word_tokenize
|
||||
from functools import partial
|
||||
|
||||
|
||||
def log_results(result: Dataset, args: Dict[str, str]):
|
||||
"""DO NOT CHANGE. This function computes and logs the result metrics."""
|
||||
|
||||
log_outputs = args.log_outputs
|
||||
dataset_id = "_".join(args.dataset.split("/") + [args.config, args.split])
|
||||
|
||||
# load metric
|
||||
wer = load_metric("wer")
|
||||
cer = load_metric("cer")
|
||||
|
||||
# compute metrics
|
||||
wer_result = wer.compute(references=result["target"], predictions=result["prediction"])
|
||||
cer_result = cer.compute(references=result["target"], predictions=result["prediction"])
|
||||
|
||||
# print & log results
|
||||
result_str = f"WER: {wer_result}\n" f"CER: {cer_result}"
|
||||
print(result_str)
|
||||
|
||||
with open(f"robust-speech-event/{dataset_id}_eval_results_{args.thai_tokenizer}.txt", "w") as f:
|
||||
f.write(result_str)
|
||||
|
||||
# log all results in text file. Possibly interesting for analysis
|
||||
if log_outputs is not None:
|
||||
pred_file = f"robust-speech-event/log_{dataset_id}_predictions_{args.thai_tokenizer}.txt"
|
||||
target_file = f"robust-speech-event/log_{dataset_id}_targets_{args.thai_tokenizer}.txt"
|
||||
|
||||
with open(pred_file, "w") as p, open(target_file, "w") as t:
|
||||
|
||||
# mapping function to write output
|
||||
def write_to_file(batch, i):
|
||||
p.write(f"{i}" + "\n")
|
||||
p.write(batch["prediction"] + "\n")
|
||||
t.write(f"{i}" + "\n")
|
||||
t.write(batch["target"] + "\n")
|
||||
|
||||
result.map(write_to_file, with_indices=True)
|
||||
|
||||
|
||||
def normalize_text(text: str, tok_func) -> str:
|
||||
"""DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
|
||||
|
||||
chars_to_ignore_regex = '[,?.!\-\;\:"“%‘”<EFBFBD>—’…–]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
|
||||
|
||||
text = re.sub(chars_to_ignore_regex, "", text.lower())
|
||||
|
||||
# In addition, we can normalize the target text, e.g. removing new lines characters etc...
|
||||
# note that order is important here!
|
||||
token_sequences_to_ignore = ["\n\n", "\n", " ", " "]
|
||||
|
||||
for t in token_sequences_to_ignore:
|
||||
text = " ".join(text.split(t))
|
||||
|
||||
#thai tokenize
|
||||
text = " ".join(tok_func(text))
|
||||
|
||||
return text
|
||||
|
||||
def retokenize(text:str, tok_func) -> str:
|
||||
"""tokenize and rejoin prediction outputs without cleaning"""
|
||||
return " ".join(tok_func("".join(text.split())))
|
||||
|
||||
|
||||
def main(args):
|
||||
# load dataset
|
||||
dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
|
||||
|
||||
# for testing: only process the first two examples as a test
|
||||
dataset = dataset.select(range(10))
|
||||
|
||||
# load processor
|
||||
feature_extractor = AutoFeatureExtractor.from_pretrained(args.model_id)
|
||||
sampling_rate = feature_extractor.sampling_rate
|
||||
|
||||
# resample audio
|
||||
dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))
|
||||
|
||||
# load eval pipeline
|
||||
asr = pipeline("automatic-speech-recognition", model=args.model_id)
|
||||
|
||||
#select tokenizer
|
||||
if args.thai_tokenizer=='deepcut':
|
||||
tok_func = deepcut_word_tokenize
|
||||
elif args.thai_tokenizer=='newmm':
|
||||
tok_func = word_tokenize
|
||||
elif args.thai_tokenizer=='syllable':
|
||||
tok_func = syllable_tokenize
|
||||
else:
|
||||
tok_func = lambda x: x.replace(' ','')
|
||||
|
||||
# map function to decode audio
|
||||
def map_to_pred(batch, tok_func):
|
||||
prediction = asr(
|
||||
batch["audio"]["array"], chunk_length_s=args.chunk_length_s, stride_length_s=args.stride_length_s
|
||||
)
|
||||
|
||||
batch["prediction"] = retokenize(prediction["text"], tok_func)
|
||||
batch["target"] = normalize_text(batch["sentence"], tok_func)
|
||||
return batch
|
||||
|
||||
# run inference on all examples
|
||||
result = dataset.map(partial(map_to_pred, tok_func=tok_func),
|
||||
remove_columns=dataset.column_names)
|
||||
|
||||
# compute and log_results
|
||||
# do not change function below
|
||||
log_results(result, args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument(
|
||||
"--model_id", type=str, required=True, help="Model identifier. Should be loadable with 🤗 Transformers"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--thai_tokenizer", type=str, default="newmm",
|
||||
required=True, help="newmm, syllable, or deepcut; if not specified, remove all spaces (used for CER calculation)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dataset",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Dataset name to evaluate the `model_id`. Should be loadable with 🤗 Datasets",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
|
||||
)
|
||||
parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
|
||||
parser.add_argument(
|
||||
"--chunk_length_s", type=float, default=None, help="Chunk length in seconds. Defaults to 5 seconds."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--stride_length_s", type=float, default=None, help="Stride of the audio chunks. Defaults to 1 second."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log_outputs", action="store_true", help="If defined, write outputs to log file for analysis."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args)
|
||||
3
optimizer-002.pt
Normal file
3
optimizer-002.pt
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:63480cd0bddf763bbfa016f364b5af6262bbd4584e1e3f1223bdf042feaf7080
|
||||
size 2490632977
|
||||
9
preprocessor_config.json
Normal file
9
preprocessor_config.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"do_normalize": true,
|
||||
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
||||
"feature_size": 1,
|
||||
"padding_side": "right",
|
||||
"padding_value": 0.0,
|
||||
"return_attention_mask": false,
|
||||
"sampling_rate": 16000
|
||||
}
|
||||
3
pytorch_model.bin
Normal file
3
pytorch_model.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1dbb3a749c3f6e08fe2af7227d4857723fd1e525157b79ccb0dba255a69fdce8
|
||||
size 1262210673
|
||||
3
rng_state.pth
Normal file
3
rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0e9b980149921cf7dc2fe91f225c1c3f931cc22ff40d7e30639d7236b390d622
|
||||
size 14567
|
||||
3
scheduler.pt
Normal file
3
scheduler.pt
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0b9ed0945bc75b9b51a6a34f17bea3f2eca616815e76668e99905ed244654b22
|
||||
size 623
|
||||
1
special_tokens_map.json
Normal file
1
special_tokens_map.json
Normal file
@@ -0,0 +1 @@
|
||||
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
|
||||
1
tokenizer_config.json
Normal file
1
tokenizer_config.json
Normal file
@@ -0,0 +1 @@
|
||||
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
||||
616
trainer_state.json
Normal file
616
trainer_state.json
Normal file
@@ -0,0 +1,616 @@
|
||||
{
|
||||
"best_metric": 0.472552783109405,
|
||||
"best_model_checkpoint": "/content/drive/MyDrive/new_dataset/wav2vec2-large-xlsr-53-thai-finetune/checkpoint-500",
|
||||
"epoch": 99.98765432098766,
|
||||
"global_step": 2000,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [
|
||||
{
|
||||
"epoch": 2.49,
|
||||
"learning_rate": 0.0001,
|
||||
"loss": 1.9911,
|
||||
"step": 50
|
||||
},
|
||||
{
|
||||
"epoch": 2.49,
|
||||
"eval_loss": 1.2750070095062256,
|
||||
"eval_runtime": 34.892,
|
||||
"eval_samples_per_second": 12.238,
|
||||
"eval_steps_per_second": 0.774,
|
||||
"eval_wer": 0.6767754318618042,
|
||||
"step": 50
|
||||
},
|
||||
{
|
||||
"epoch": 4.99,
|
||||
"learning_rate": 9.743589743589744e-05,
|
||||
"loss": 1.2107,
|
||||
"step": 100
|
||||
},
|
||||
{
|
||||
"epoch": 4.99,
|
||||
"eval_loss": 1.1088056564331055,
|
||||
"eval_runtime": 34.7796,
|
||||
"eval_samples_per_second": 12.277,
|
||||
"eval_steps_per_second": 0.776,
|
||||
"eval_wer": 0.5927063339731286,
|
||||
"step": 100
|
||||
},
|
||||
{
|
||||
"epoch": 7.49,
|
||||
"learning_rate": 9.487179487179487e-05,
|
||||
"loss": 0.9686,
|
||||
"step": 150
|
||||
},
|
||||
{
|
||||
"epoch": 7.49,
|
||||
"eval_loss": 1.0476980209350586,
|
||||
"eval_runtime": 34.6203,
|
||||
"eval_samples_per_second": 12.334,
|
||||
"eval_steps_per_second": 0.78,
|
||||
"eval_wer": 0.5712092130518234,
|
||||
"step": 150
|
||||
},
|
||||
{
|
||||
"epoch": 9.99,
|
||||
"learning_rate": 9.230769230769232e-05,
|
||||
"loss": 0.7504,
|
||||
"step": 200
|
||||
},
|
||||
{
|
||||
"epoch": 9.99,
|
||||
"eval_loss": 1.0640665292739868,
|
||||
"eval_runtime": 34.843,
|
||||
"eval_samples_per_second": 12.255,
|
||||
"eval_steps_per_second": 0.775,
|
||||
"eval_wer": 0.5504798464491363,
|
||||
"step": 200
|
||||
},
|
||||
{
|
||||
"epoch": 12.49,
|
||||
"learning_rate": 8.974358974358975e-05,
|
||||
"loss": 0.6352,
|
||||
"step": 250
|
||||
},
|
||||
{
|
||||
"epoch": 12.49,
|
||||
"eval_loss": 1.095747709274292,
|
||||
"eval_runtime": 33.9281,
|
||||
"eval_samples_per_second": 12.585,
|
||||
"eval_steps_per_second": 0.796,
|
||||
"eval_wer": 0.5324376199616123,
|
||||
"step": 250
|
||||
},
|
||||
{
|
||||
"epoch": 14.99,
|
||||
"learning_rate": 8.717948717948718e-05,
|
||||
"loss": 0.5313,
|
||||
"step": 300
|
||||
},
|
||||
{
|
||||
"epoch": 14.99,
|
||||
"eval_loss": 1.0491594076156616,
|
||||
"eval_runtime": 33.7693,
|
||||
"eval_samples_per_second": 12.645,
|
||||
"eval_steps_per_second": 0.8,
|
||||
"eval_wer": 0.5163147792706334,
|
||||
"step": 300
|
||||
},
|
||||
{
|
||||
"epoch": 17.49,
|
||||
"learning_rate": 8.461538461538461e-05,
|
||||
"loss": 0.4461,
|
||||
"step": 350
|
||||
},
|
||||
{
|
||||
"epoch": 17.49,
|
||||
"eval_loss": 1.0721960067749023,
|
||||
"eval_runtime": 33.9128,
|
||||
"eval_samples_per_second": 12.591,
|
||||
"eval_steps_per_second": 0.796,
|
||||
"eval_wer": 0.5124760076775432,
|
||||
"step": 350
|
||||
},
|
||||
{
|
||||
"epoch": 19.99,
|
||||
"learning_rate": 8.205128205128205e-05,
|
||||
"loss": 0.4094,
|
||||
"step": 400
|
||||
},
|
||||
{
|
||||
"epoch": 19.99,
|
||||
"eval_loss": 1.057926058769226,
|
||||
"eval_runtime": 34.0347,
|
||||
"eval_samples_per_second": 12.546,
|
||||
"eval_steps_per_second": 0.793,
|
||||
"eval_wer": 0.49750479846449136,
|
||||
"step": 400
|
||||
},
|
||||
{
|
||||
"epoch": 22.49,
|
||||
"learning_rate": 7.948717948717948e-05,
|
||||
"loss": 0.3467,
|
||||
"step": 450
|
||||
},
|
||||
{
|
||||
"epoch": 22.49,
|
||||
"eval_loss": 1.0208101272583008,
|
||||
"eval_runtime": 34.3752,
|
||||
"eval_samples_per_second": 12.422,
|
||||
"eval_steps_per_second": 0.785,
|
||||
"eval_wer": 0.491362763915547,
|
||||
"step": 450
|
||||
},
|
||||
{
|
||||
"epoch": 24.99,
|
||||
"learning_rate": 7.692307692307693e-05,
|
||||
"loss": 0.3195,
|
||||
"step": 500
|
||||
},
|
||||
{
|
||||
"epoch": 24.99,
|
||||
"eval_loss": 1.0337833166122437,
|
||||
"eval_runtime": 33.8794,
|
||||
"eval_samples_per_second": 12.604,
|
||||
"eval_steps_per_second": 0.797,
|
||||
"eval_wer": 0.472552783109405,
|
||||
"step": 500
|
||||
},
|
||||
{
|
||||
"epoch": 27.49,
|
||||
"learning_rate": 7.435897435897436e-05,
|
||||
"loss": 0.3005,
|
||||
"step": 550
|
||||
},
|
||||
{
|
||||
"epoch": 27.49,
|
||||
"eval_loss": 1.0594605207443237,
|
||||
"eval_runtime": 33.9955,
|
||||
"eval_samples_per_second": 12.56,
|
||||
"eval_steps_per_second": 0.794,
|
||||
"eval_wer": 0.47946257197696734,
|
||||
"step": 550
|
||||
},
|
||||
{
|
||||
"epoch": 29.99,
|
||||
"learning_rate": 7.17948717948718e-05,
|
||||
"loss": 0.2933,
|
||||
"step": 600
|
||||
},
|
||||
{
|
||||
"epoch": 29.99,
|
||||
"eval_loss": 1.017477035522461,
|
||||
"eval_runtime": 33.8622,
|
||||
"eval_samples_per_second": 12.61,
|
||||
"eval_steps_per_second": 0.797,
|
||||
"eval_wer": 0.472936660268714,
|
||||
"step": 600
|
||||
},
|
||||
{
|
||||
"epoch": 32.49,
|
||||
"learning_rate": 6.923076923076924e-05,
|
||||
"loss": 0.2601,
|
||||
"step": 650
|
||||
},
|
||||
{
|
||||
"epoch": 32.49,
|
||||
"eval_loss": 1.099133014678955,
|
||||
"eval_runtime": 34.4437,
|
||||
"eval_samples_per_second": 12.397,
|
||||
"eval_steps_per_second": 0.784,
|
||||
"eval_wer": 0.4652591170825336,
|
||||
"step": 650
|
||||
},
|
||||
{
|
||||
"epoch": 34.99,
|
||||
"learning_rate": 6.666666666666667e-05,
|
||||
"loss": 0.2226,
|
||||
"step": 700
|
||||
},
|
||||
{
|
||||
"epoch": 34.99,
|
||||
"eval_loss": 1.1290050745010376,
|
||||
"eval_runtime": 34.6356,
|
||||
"eval_samples_per_second": 12.328,
|
||||
"eval_steps_per_second": 0.78,
|
||||
"eval_wer": 0.47063339731285986,
|
||||
"step": 700
|
||||
},
|
||||
{
|
||||
"epoch": 37.49,
|
||||
"learning_rate": 6.410256410256412e-05,
|
||||
"loss": 0.2262,
|
||||
"step": 750
|
||||
},
|
||||
{
|
||||
"epoch": 37.49,
|
||||
"eval_loss": 1.0954631567001343,
|
||||
"eval_runtime": 35.3666,
|
||||
"eval_samples_per_second": 12.074,
|
||||
"eval_steps_per_second": 0.763,
|
||||
"eval_wer": 0.45681381957773515,
|
||||
"step": 750
|
||||
},
|
||||
{
|
||||
"epoch": 39.99,
|
||||
"learning_rate": 6.153846153846155e-05,
|
||||
"loss": 0.2236,
|
||||
"step": 800
|
||||
},
|
||||
{
|
||||
"epoch": 39.99,
|
||||
"eval_loss": 1.1119202375411987,
|
||||
"eval_runtime": 35.0365,
|
||||
"eval_samples_per_second": 12.187,
|
||||
"eval_steps_per_second": 0.771,
|
||||
"eval_wer": 0.4491362763915547,
|
||||
"step": 800
|
||||
},
|
||||
{
|
||||
"epoch": 42.49,
|
||||
"learning_rate": 5.897435897435898e-05,
|
||||
"loss": 0.2029,
|
||||
"step": 850
|
||||
},
|
||||
{
|
||||
"epoch": 42.49,
|
||||
"eval_loss": 1.1375640630722046,
|
||||
"eval_runtime": 35.2676,
|
||||
"eval_samples_per_second": 12.107,
|
||||
"eval_steps_per_second": 0.766,
|
||||
"eval_wer": 0.45220729366602685,
|
||||
"step": 850
|
||||
},
|
||||
{
|
||||
"epoch": 44.99,
|
||||
"learning_rate": 5.6410256410256414e-05,
|
||||
"loss": 0.1876,
|
||||
"step": 900
|
||||
},
|
||||
{
|
||||
"epoch": 44.99,
|
||||
"eval_loss": 1.142003059387207,
|
||||
"eval_runtime": 35.6836,
|
||||
"eval_samples_per_second": 11.966,
|
||||
"eval_steps_per_second": 0.757,
|
||||
"eval_wer": 0.4476007677543186,
|
||||
"step": 900
|
||||
},
|
||||
{
|
||||
"epoch": 47.49,
|
||||
"learning_rate": 5.384615384615385e-05,
|
||||
"loss": 0.1733,
|
||||
"step": 950
|
||||
},
|
||||
{
|
||||
"epoch": 47.49,
|
||||
"eval_loss": 1.1137712001800537,
|
||||
"eval_runtime": 34.9014,
|
||||
"eval_samples_per_second": 12.234,
|
||||
"eval_steps_per_second": 0.774,
|
||||
"eval_wer": 0.44337811900191937,
|
||||
"step": 950
|
||||
},
|
||||
{
|
||||
"epoch": 49.99,
|
||||
"learning_rate": 5.128205128205128e-05,
|
||||
"loss": 0.161,
|
||||
"step": 1000
|
||||
},
|
||||
{
|
||||
"epoch": 49.99,
|
||||
"eval_loss": 1.195468783378601,
|
||||
"eval_runtime": 35.1209,
|
||||
"eval_samples_per_second": 12.158,
|
||||
"eval_steps_per_second": 0.769,
|
||||
"eval_wer": 0.44798464491362766,
|
||||
"step": 1000
|
||||
},
|
||||
{
|
||||
"epoch": 52.49,
|
||||
"learning_rate": 4.871794871794872e-05,
|
||||
"loss": 0.1661,
|
||||
"step": 1050
|
||||
},
|
||||
{
|
||||
"epoch": 52.49,
|
||||
"eval_loss": 1.1598896980285645,
|
||||
"eval_runtime": 34.8413,
|
||||
"eval_samples_per_second": 12.256,
|
||||
"eval_steps_per_second": 0.775,
|
||||
"eval_wer": 0.44606525911708256,
|
||||
"step": 1050
|
||||
},
|
||||
{
|
||||
"epoch": 54.99,
|
||||
"learning_rate": 4.615384615384616e-05,
|
||||
"loss": 0.1533,
|
||||
"step": 1100
|
||||
},
|
||||
{
|
||||
"epoch": 54.99,
|
||||
"eval_loss": 1.143904447555542,
|
||||
"eval_runtime": 34.3532,
|
||||
"eval_samples_per_second": 12.43,
|
||||
"eval_steps_per_second": 0.786,
|
||||
"eval_wer": 0.4383877159309021,
|
||||
"step": 1100
|
||||
},
|
||||
{
|
||||
"epoch": 57.49,
|
||||
"learning_rate": 4.358974358974359e-05,
|
||||
"loss": 0.1565,
|
||||
"step": 1150
|
||||
},
|
||||
{
|
||||
"epoch": 57.49,
|
||||
"eval_loss": 1.1354175806045532,
|
||||
"eval_runtime": 34.4974,
|
||||
"eval_samples_per_second": 12.378,
|
||||
"eval_steps_per_second": 0.783,
|
||||
"eval_wer": 0.4418426103646833,
|
||||
"step": 1150
|
||||
},
|
||||
{
|
||||
"epoch": 59.99,
|
||||
"learning_rate": 4.1025641025641023e-05,
|
||||
"loss": 0.1476,
|
||||
"step": 1200
|
||||
},
|
||||
{
|
||||
"epoch": 59.99,
|
||||
"eval_loss": 1.1580840349197388,
|
||||
"eval_runtime": 34.4611,
|
||||
"eval_samples_per_second": 12.391,
|
||||
"eval_steps_per_second": 0.783,
|
||||
"eval_wer": 0.4410748560460653,
|
||||
"step": 1200
|
||||
},
|
||||
{
|
||||
"epoch": 62.49,
|
||||
"learning_rate": 3.846153846153846e-05,
|
||||
"loss": 0.1378,
|
||||
"step": 1250
|
||||
},
|
||||
{
|
||||
"epoch": 62.49,
|
||||
"eval_loss": 1.1266977787017822,
|
||||
"eval_runtime": 34.2613,
|
||||
"eval_samples_per_second": 12.463,
|
||||
"eval_steps_per_second": 0.788,
|
||||
"eval_wer": 0.4303262955854127,
|
||||
"step": 1250
|
||||
},
|
||||
{
|
||||
"epoch": 64.99,
|
||||
"learning_rate": 3.58974358974359e-05,
|
||||
"loss": 0.1214,
|
||||
"step": 1300
|
||||
},
|
||||
{
|
||||
"epoch": 64.99,
|
||||
"eval_loss": 1.165766716003418,
|
||||
"eval_runtime": 33.9904,
|
||||
"eval_samples_per_second": 12.562,
|
||||
"eval_steps_per_second": 0.794,
|
||||
"eval_wer": 0.42879078694817657,
|
||||
"step": 1300
|
||||
},
|
||||
{
|
||||
"epoch": 67.49,
|
||||
"learning_rate": 3.3333333333333335e-05,
|
||||
"loss": 0.133,
|
||||
"step": 1350
|
||||
},
|
||||
{
|
||||
"epoch": 67.49,
|
||||
"eval_loss": 1.137351632118225,
|
||||
"eval_runtime": 34.008,
|
||||
"eval_samples_per_second": 12.556,
|
||||
"eval_steps_per_second": 0.794,
|
||||
"eval_wer": 0.43339731285988486,
|
||||
"step": 1350
|
||||
},
|
||||
{
|
||||
"epoch": 69.99,
|
||||
"learning_rate": 3.0769230769230774e-05,
|
||||
"loss": 0.1147,
|
||||
"step": 1400
|
||||
},
|
||||
{
|
||||
"epoch": 69.99,
|
||||
"eval_loss": 1.1521999835968018,
|
||||
"eval_runtime": 33.8683,
|
||||
"eval_samples_per_second": 12.608,
|
||||
"eval_steps_per_second": 0.797,
|
||||
"eval_wer": 0.4245681381957774,
|
||||
"step": 1400
|
||||
},
|
||||
{
|
||||
"epoch": 72.49,
|
||||
"learning_rate": 2.8205128205128207e-05,
|
||||
"loss": 0.125,
|
||||
"step": 1450
|
||||
},
|
||||
{
|
||||
"epoch": 72.49,
|
||||
"eval_loss": 1.1379369497299194,
|
||||
"eval_runtime": 34.1582,
|
||||
"eval_samples_per_second": 12.501,
|
||||
"eval_steps_per_second": 0.79,
|
||||
"eval_wer": 0.43570057581573896,
|
||||
"step": 1450
|
||||
},
|
||||
{
|
||||
"epoch": 74.99,
|
||||
"learning_rate": 2.564102564102564e-05,
|
||||
"loss": 0.1189,
|
||||
"step": 1500
|
||||
},
|
||||
{
|
||||
"epoch": 74.99,
|
||||
"eval_loss": 1.1502233743667603,
|
||||
"eval_runtime": 33.9539,
|
||||
"eval_samples_per_second": 12.576,
|
||||
"eval_steps_per_second": 0.795,
|
||||
"eval_wer": 0.4284069097888676,
|
||||
"step": 1500
|
||||
},
|
||||
{
|
||||
"epoch": 77.49,
|
||||
"learning_rate": 2.307692307692308e-05,
|
||||
"loss": 0.1122,
|
||||
"step": 1550
|
||||
},
|
||||
{
|
||||
"epoch": 77.49,
|
||||
"eval_loss": 1.1575168371200562,
|
||||
"eval_runtime": 34.1792,
|
||||
"eval_samples_per_second": 12.493,
|
||||
"eval_steps_per_second": 0.79,
|
||||
"eval_wer": 0.4314779270633397,
|
||||
"step": 1550
|
||||
},
|
||||
{
|
||||
"epoch": 79.99,
|
||||
"learning_rate": 2.0512820512820512e-05,
|
||||
"loss": 0.1235,
|
||||
"step": 1600
|
||||
},
|
||||
{
|
||||
"epoch": 79.99,
|
||||
"eval_loss": 1.142774224281311,
|
||||
"eval_runtime": 33.8121,
|
||||
"eval_samples_per_second": 12.629,
|
||||
"eval_steps_per_second": 0.799,
|
||||
"eval_wer": 0.42610364683301344,
|
||||
"step": 1600
|
||||
},
|
||||
{
|
||||
"epoch": 82.49,
|
||||
"learning_rate": 1.794871794871795e-05,
|
||||
"loss": 0.1092,
|
||||
"step": 1650
|
||||
},
|
||||
{
|
||||
"epoch": 82.49,
|
||||
"eval_loss": 1.1530485153198242,
|
||||
"eval_runtime": 33.6852,
|
||||
"eval_samples_per_second": 12.676,
|
||||
"eval_steps_per_second": 0.802,
|
||||
"eval_wer": 0.4341650671785029,
|
||||
"step": 1650
|
||||
},
|
||||
{
|
||||
"epoch": 84.99,
|
||||
"learning_rate": 1.5384615384615387e-05,
|
||||
"loss": 0.1185,
|
||||
"step": 1700
|
||||
},
|
||||
{
|
||||
"epoch": 84.99,
|
||||
"eval_loss": 1.1284310817718506,
|
||||
"eval_runtime": 33.8486,
|
||||
"eval_samples_per_second": 12.615,
|
||||
"eval_steps_per_second": 0.798,
|
||||
"eval_wer": 0.42994241842610365,
|
||||
"step": 1700
|
||||
},
|
||||
{
|
||||
"epoch": 87.49,
|
||||
"learning_rate": 1.282051282051282e-05,
|
||||
"loss": 0.1048,
|
||||
"step": 1750
|
||||
},
|
||||
{
|
||||
"epoch": 87.49,
|
||||
"eval_loss": 1.1434178352355957,
|
||||
"eval_runtime": 33.9335,
|
||||
"eval_samples_per_second": 12.583,
|
||||
"eval_steps_per_second": 0.796,
|
||||
"eval_wer": 0.4276391554702495,
|
||||
"step": 1750
|
||||
},
|
||||
{
|
||||
"epoch": 89.99,
|
||||
"learning_rate": 1.0256410256410256e-05,
|
||||
"loss": 0.1038,
|
||||
"step": 1800
|
||||
},
|
||||
{
|
||||
"epoch": 89.99,
|
||||
"eval_loss": 1.1609505414962769,
|
||||
"eval_runtime": 34.1384,
|
||||
"eval_samples_per_second": 12.508,
|
||||
"eval_steps_per_second": 0.791,
|
||||
"eval_wer": 0.42418426103646834,
|
||||
"step": 1800
|
||||
},
|
||||
{
|
||||
"epoch": 92.49,
|
||||
"learning_rate": 7.692307692307694e-06,
|
||||
"loss": 0.1073,
|
||||
"step": 1850
|
||||
},
|
||||
{
|
||||
"epoch": 92.49,
|
||||
"eval_loss": 1.1562278270721436,
|
||||
"eval_runtime": 33.8455,
|
||||
"eval_samples_per_second": 12.616,
|
||||
"eval_steps_per_second": 0.798,
|
||||
"eval_wer": 0.4238003838771593,
|
||||
"step": 1850
|
||||
},
|
||||
{
|
||||
"epoch": 94.99,
|
||||
"learning_rate": 5.128205128205128e-06,
|
||||
"loss": 0.1055,
|
||||
"step": 1900
|
||||
},
|
||||
{
|
||||
"epoch": 94.99,
|
||||
"eval_loss": 1.1589001417160034,
|
||||
"eval_runtime": 34.1041,
|
||||
"eval_samples_per_second": 12.52,
|
||||
"eval_steps_per_second": 0.792,
|
||||
"eval_wer": 0.4214971209213052,
|
||||
"step": 1900
|
||||
},
|
||||
{
|
||||
"epoch": 97.49,
|
||||
"learning_rate": 2.564102564102564e-06,
|
||||
"loss": 0.1133,
|
||||
"step": 1950
|
||||
},
|
||||
{
|
||||
"epoch": 97.49,
|
||||
"eval_loss": 1.1536645889282227,
|
||||
"eval_runtime": 33.9701,
|
||||
"eval_samples_per_second": 12.57,
|
||||
"eval_steps_per_second": 0.795,
|
||||
"eval_wer": 0.4238003838771593,
|
||||
"step": 1950
|
||||
},
|
||||
{
|
||||
"epoch": 99.99,
|
||||
"learning_rate": 0.0,
|
||||
"loss": 0.0992,
|
||||
"step": 2000
|
||||
},
|
||||
{
|
||||
"epoch": 99.99,
|
||||
"eval_loss": 1.1553977727890015,
|
||||
"eval_runtime": 33.8655,
|
||||
"eval_samples_per_second": 12.609,
|
||||
"eval_steps_per_second": 0.797,
|
||||
"eval_wer": 0.42418426103646834,
|
||||
"step": 2000
|
||||
}
|
||||
],
|
||||
"max_steps": 2000,
|
||||
"num_train_epochs": 100,
|
||||
"total_flos": 1.6479245346890066e+19,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
||||
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0a007a4ec524a3af0145655a9608fca61329594a670f7bbbd2bb694f2d8648c8
|
||||
size 2799
|
||||
1
vocab.json
Normal file
1
vocab.json
Normal file
@@ -0,0 +1 @@
|
||||
{"ฑ": 0, "ๅ": 1, "ก": 2, "ง": 3, "ฒ": 4, "ะ": 5, "๊": 6, "้": 7, "ฌ": 8, "ซ": 9, "ด": 10, "ฯ": 11, "ใ": 12, "ึ": 13, "ญ": 14, "่": 15, "า": 16, "ฤ": 17, "๋": 18, "อ": 19, "ฬ": 20, "ท": 21, "โ": 22, "ภ": 23, "ย": 24, "็": 25, "ล": 26, "ุ": 27, "เ": 28, "ฮ": 29, "ฝ": 30, "ป": 31, "ี": 32, "บ": 33, "ฐ": 34, "ต": 35, "ถ": 36, "ศ": 37, "ฟ": 38, "ณ": 39, "ห": 40, "ร": 41, "พ": 43, "ฆ": 44, "ั": 45, "ค": 46, "ว": 47, "ฏ": 48, "จ": 49, "แ": 50, "ม": 51, "ฎ": 52, "ฉ": 53, "์": 54, "ษ": 55, "ำ": 56, "ผ": 57, "ข": 58, "ไ": 59, "ู": 60, "ื": 61, "น": 62, "ช": 63, "ิ": 64, "ธ": 65, "ฃ": 66, "ส": 67, "|": 42, "[UNK]": 68, "[PAD]": 69}
|
||||
Reference in New Issue
Block a user