初始化项目,由ModelHub XC社区提供模型
Model: projecte-aina/whisper-large-v3-ca-3catparla Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
377
README.md
Normal file
377
README.md
Normal file
@@ -0,0 +1,377 @@
|
|||||||
|
---
|
||||||
|
language: ca
|
||||||
|
datasets:
|
||||||
|
- projecte-aina/3catparla_asr
|
||||||
|
tags:
|
||||||
|
- audio
|
||||||
|
- automatic-speech-recognition
|
||||||
|
- catalan
|
||||||
|
- whisper-large-v3
|
||||||
|
- projecte-aina
|
||||||
|
- barcelona-supercomputing-center
|
||||||
|
- bsc
|
||||||
|
license: apache-2.0
|
||||||
|
model-index:
|
||||||
|
- name: whisper-large-v3-ca-3catparla
|
||||||
|
results:
|
||||||
|
- task:
|
||||||
|
name: Automatic Speech Recognition
|
||||||
|
type: automatic-speech-recognition
|
||||||
|
dataset:
|
||||||
|
name: 3CatParla (Test)
|
||||||
|
type: projecte-aina/3catparla_asr
|
||||||
|
split: test
|
||||||
|
args:
|
||||||
|
language: ca
|
||||||
|
metrics:
|
||||||
|
- name: WER
|
||||||
|
type: wer
|
||||||
|
value: 0.96
|
||||||
|
- task:
|
||||||
|
name: Automatic Speech Recognition
|
||||||
|
type: automatic-speech-recognition
|
||||||
|
dataset:
|
||||||
|
name: 3CatParla (Dev)
|
||||||
|
type: projecte-aina/3catparla_asr
|
||||||
|
split: dev
|
||||||
|
args:
|
||||||
|
language: ca
|
||||||
|
metrics:
|
||||||
|
- name: WER
|
||||||
|
type: wer
|
||||||
|
value: 0.92
|
||||||
|
- task:
|
||||||
|
name: Automatic Speech Recognition
|
||||||
|
type: automatic-speech-recognition
|
||||||
|
dataset:
|
||||||
|
name: Mozilla Common Voice 17.0 (Test)
|
||||||
|
type: mozilla-foundation/common_voice_17_0
|
||||||
|
split: test
|
||||||
|
args:
|
||||||
|
language: ca
|
||||||
|
metrics:
|
||||||
|
- name: WER
|
||||||
|
type: wer
|
||||||
|
value: 10.32
|
||||||
|
- task:
|
||||||
|
name: Automatic Speech Recognition
|
||||||
|
type: automatic-speech-recognition
|
||||||
|
dataset:
|
||||||
|
name: Mozilla Common Voice 17.0 (Dev)
|
||||||
|
type: mozilla-foundation/common_voice_17_0
|
||||||
|
split: validation
|
||||||
|
args:
|
||||||
|
language: ca
|
||||||
|
metrics:
|
||||||
|
- name: WER
|
||||||
|
type: wer
|
||||||
|
value: 9.26
|
||||||
|
- task:
|
||||||
|
name: Automatic Speech Recognition
|
||||||
|
type: automatic-speech-recognition
|
||||||
|
dataset:
|
||||||
|
name: CV Benchmark Catalan Accents (Balearic fem)
|
||||||
|
type: projecte-aina/commonvoice_benchmark_catalan_accents
|
||||||
|
split: Balearic female
|
||||||
|
args:
|
||||||
|
language: ca
|
||||||
|
metrics:
|
||||||
|
- name: WER
|
||||||
|
type: wer
|
||||||
|
value: 12.25
|
||||||
|
- task:
|
||||||
|
name: Automatic Speech Recognition
|
||||||
|
type: automatic-speech-recognition
|
||||||
|
dataset:
|
||||||
|
name: CV Benchmark Catalan Accents (Balearic male)
|
||||||
|
type: projecte-aina/commonvoice_benchmark_catalan_accents
|
||||||
|
split: Balearic male
|
||||||
|
args:
|
||||||
|
language: ca
|
||||||
|
metrics:
|
||||||
|
- name: WER
|
||||||
|
type: wer
|
||||||
|
value: 12.18
|
||||||
|
- task:
|
||||||
|
name: Automatic Speech Recognition
|
||||||
|
type: automatic-speech-recognition
|
||||||
|
dataset:
|
||||||
|
name: CV Benchmark Catalan Accents (Central fem)
|
||||||
|
type: projecte-aina/commonvoice_benchmark_catalan_accents
|
||||||
|
split: Central female
|
||||||
|
args:
|
||||||
|
language: ca
|
||||||
|
metrics:
|
||||||
|
- name: WER
|
||||||
|
type: wer
|
||||||
|
value: 8.51
|
||||||
|
- task:
|
||||||
|
name: Automatic Speech Recognition
|
||||||
|
type: automatic-speech-recognition
|
||||||
|
dataset:
|
||||||
|
name: CV Benchmark Catalan Accents (Central male)
|
||||||
|
type: projecte-aina/commonvoice_benchmark_catalan_accents
|
||||||
|
split: Central male
|
||||||
|
args:
|
||||||
|
language: ca
|
||||||
|
metrics:
|
||||||
|
- name: WER
|
||||||
|
type: wer
|
||||||
|
value: 8.73
|
||||||
|
- task:
|
||||||
|
name: Automatic Speech Recognition
|
||||||
|
type: automatic-speech-recognition
|
||||||
|
dataset:
|
||||||
|
name: CV Benchmark Catalan Accents (Northern fem)
|
||||||
|
type: projecte-aina/commonvoice_benchmark_catalan_accents
|
||||||
|
split: Northern female
|
||||||
|
args:
|
||||||
|
language: ca
|
||||||
|
metrics:
|
||||||
|
- name: WER
|
||||||
|
type: wer
|
||||||
|
value: 8.09
|
||||||
|
- task:
|
||||||
|
name: Automatic Speech Recognition
|
||||||
|
type: automatic-speech-recognition
|
||||||
|
dataset:
|
||||||
|
name: CV Benchmark Catalan Accents (Northern male)
|
||||||
|
type: projecte-aina/commonvoice_benchmark_catalan_accents
|
||||||
|
split: Northern male
|
||||||
|
args:
|
||||||
|
language: ca
|
||||||
|
metrics:
|
||||||
|
- name: WER
|
||||||
|
type: wer
|
||||||
|
value: 8.28
|
||||||
|
- task:
|
||||||
|
name: Automatic Speech Recognition
|
||||||
|
type: automatic-speech-recognition
|
||||||
|
dataset:
|
||||||
|
name: CV Benchmark Catalan Accents (Northwestern fem)
|
||||||
|
type: projecte-aina/commonvoice_benchmark_catalan_accents
|
||||||
|
split: Northwestern female
|
||||||
|
args:
|
||||||
|
language: ca
|
||||||
|
metrics:
|
||||||
|
- name: WER
|
||||||
|
type: wer
|
||||||
|
value: 7.88
|
||||||
|
- task:
|
||||||
|
name: Automatic Speech Recognition
|
||||||
|
type: automatic-speech-recognition
|
||||||
|
dataset:
|
||||||
|
name: CV Benchmark Catalan Accents (Northwestern male)
|
||||||
|
type: projecte-aina/commonvoice_benchmark_catalan_accents
|
||||||
|
split: Northwestern male
|
||||||
|
args:
|
||||||
|
language: ca
|
||||||
|
metrics:
|
||||||
|
- name: WER
|
||||||
|
type: wer
|
||||||
|
value: 8.44
|
||||||
|
- task:
|
||||||
|
name: Automatic Speech Recognition
|
||||||
|
type: automatic-speech-recognition
|
||||||
|
dataset:
|
||||||
|
name: CV Benchmark Catalan Accents (Valencian fem)
|
||||||
|
type: projecte-aina/commonvoice_benchmark_catalan_accents
|
||||||
|
split: Valencian female
|
||||||
|
args:
|
||||||
|
language: ca
|
||||||
|
metrics:
|
||||||
|
- name: WER
|
||||||
|
type: wer
|
||||||
|
value: 9.58
|
||||||
|
- task:
|
||||||
|
name: Automatic Speech Recognition
|
||||||
|
type: automatic-speech-recognition
|
||||||
|
dataset:
|
||||||
|
name: CV Benchmark Catalan Accents (Valencian male)
|
||||||
|
type: projecte-aina/commonvoice_benchmark_catalan_accents
|
||||||
|
split: Valencian male
|
||||||
|
args:
|
||||||
|
language: ca
|
||||||
|
metrics:
|
||||||
|
- name: WER
|
||||||
|
type: wer
|
||||||
|
value: 9.1
|
||||||
|
library_name: transformers
|
||||||
|
---
|
||||||
|
# whisper-large-v3-ca-3catparla
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
<details>
|
||||||
|
<summary>Click to expand</summary>
|
||||||
|
|
||||||
|
- [Paper](#paper)
|
||||||
|
- [Model Description](#model-description)
|
||||||
|
- [Intended Uses and Limitations](#intended-uses-and-limitations)
|
||||||
|
- [How to Get Started with the Model](#how-to-get-started-with-the-model)
|
||||||
|
- [Training Details](#training-details)
|
||||||
|
- [Citation](#citation)
|
||||||
|
- [Additional Information](#additional-information)
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
## Paper
|
||||||
|
|
||||||
|
**PDF:** [3CatParla: A New Open-Source Corpus of Broadcast TV in Catalan for Automatic Speech Recognition](https://www.isca-archive.org/iberspeech_2024/hernandezmena24_iberspeech.pdf)
|
||||||
|
|
||||||
|
## Model Description
|
||||||
|
|
||||||
|
The "whisper-large-v3-ca-3catparla" is an acoustic model suitable for Automatic Speech Recognition in Catalan. It is the result of finetuning the model ["openai/whisper-large-v3"](https://huggingface.co/openai/whisper-large-v3) with 710 hours of Catalan data released by the [Projecte AINA](https://projecteaina.cat/) from Barcelona, Spain.
|
||||||
|
|
||||||
|
## Intended Uses and Limitations
|
||||||
|
|
||||||
|
This model can be used for Automatic Speech Recognition (ASR) in Catalan. The model is intended to transcribe audio files in Catalan to plain text without punctuation.
|
||||||
|
|
||||||
|
## How to Get Started with the Model
|
||||||
|
|
||||||
|
To see an updated and functional version of this code, please see our our [Notebook](https://colab.research.google.com/drive/1MHiPrffNTwiyWeUyMQvSdSbfkef_8aJC?usp=sharing)
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
|
||||||
|
In order to use this model, you may install [datasets](https://huggingface.co/docs/datasets/installation) and [transformers](https://huggingface.co/docs/transformers/installation):
|
||||||
|
|
||||||
|
Create a virtual environment:
|
||||||
|
```bash
|
||||||
|
python -m venv /path/to/venv
|
||||||
|
```
|
||||||
|
Activate the environment:
|
||||||
|
```bash
|
||||||
|
source /path/to/venv/bin/activate
|
||||||
|
```
|
||||||
|
Install the modules:
|
||||||
|
```bash
|
||||||
|
pip install datasets transformers
|
||||||
|
```
|
||||||
|
|
||||||
|
### For Inference
|
||||||
|
In order to transcribe audio in Catalan using this model, you can follow this example:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
#Install Prerequisites
|
||||||
|
pip install torch
|
||||||
|
pip install datasets
|
||||||
|
pip install 'transformers[torch]'
|
||||||
|
pip install evaluate
|
||||||
|
pip install jiwer
|
||||||
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
#This code works with GPU
|
||||||
|
|
||||||
|
#Notice that: load_metric is no longer part of datasets.
|
||||||
|
#you have to remove it and use evaluate's load instead.
|
||||||
|
#(Note from November 2024)
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
||||||
|
|
||||||
|
#Load the processor and model.
|
||||||
|
MODEL_NAME="projecte-aina/whisper-large-v3-ca-3catparla"
|
||||||
|
processor = WhisperProcessor.from_pretrained(MODEL_NAME)
|
||||||
|
model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME).to("cuda")
|
||||||
|
|
||||||
|
#Load the dataset
|
||||||
|
from datasets import load_dataset, load_metric, Audio
|
||||||
|
ds=load_dataset("projecte-aina/3catparla_asr",split='test')
|
||||||
|
|
||||||
|
#Downsample to 16kHz
|
||||||
|
ds = ds.cast_column("audio", Audio(sampling_rate=16_000))
|
||||||
|
|
||||||
|
#Process the dataset
|
||||||
|
def map_to_pred(batch):
|
||||||
|
audio = batch["audio"]
|
||||||
|
input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
|
||||||
|
batch["reference"] = processor.tokenizer._normalize(batch['normalized_text'])
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
predicted_ids = model.generate(input_features.to("cuda"))[0]
|
||||||
|
|
||||||
|
transcription = processor.decode(predicted_ids)
|
||||||
|
batch["prediction"] = processor.tokenizer._normalize(transcription)
|
||||||
|
|
||||||
|
return batch
|
||||||
|
|
||||||
|
#Do the evaluation
|
||||||
|
result = ds.map(map_to_pred)
|
||||||
|
|
||||||
|
#Compute the overall WER now.
|
||||||
|
from evaluate import load
|
||||||
|
|
||||||
|
wer = load("wer")
|
||||||
|
WER=100 * wer.compute(references=result["reference"], predictions=result["prediction"])
|
||||||
|
print(WER)
|
||||||
|
```
|
||||||
|
**Test Result**: 0.96
|
||||||
|
|
||||||
|
## Training Details
|
||||||
|
|
||||||
|
### Training data
|
||||||
|
|
||||||
|
The specific dataset used to create the model is called ["3CatParla"](https://huggingface.co/datasets/projecte-aina/3catparla_asr).
|
||||||
|
|
||||||
|
### Training procedure
|
||||||
|
|
||||||
|
This model is the result of finetuning the model ["openai/whisper-large-v3"](https://huggingface.co/openai/whisper-large-v3) by following this [tutorial](https://huggingface.co/blog/fine-tune-whisper) provided by Hugging Face.
|
||||||
|
|
||||||
|
### Training Hyperparameters
|
||||||
|
|
||||||
|
* language: catalan
|
||||||
|
* hours of training audio: 710
|
||||||
|
* learning rate: 1.95e-07
|
||||||
|
* sample rate: 16000
|
||||||
|
* train batch size: 32 (x4 GPUs)
|
||||||
|
* gradient accumulation steps: 1
|
||||||
|
* eval batch size: 32
|
||||||
|
* save total limit: 3
|
||||||
|
* max steps: 19842
|
||||||
|
* warmup steps: 1984
|
||||||
|
* eval steps: 3307
|
||||||
|
* save steps: 3307
|
||||||
|
* shuffle buffer size: 480
|
||||||
|
|
||||||
|
## Citation
|
||||||
|
If this model contributes to your research, please cite the work:
|
||||||
|
```bibtex
|
||||||
|
@inproceedings{hernandez20243catparla,
|
||||||
|
title={3CatParla: A New Open-Source Corpus of Broadcast TV in Catalan for Automatic Speech Recognition},
|
||||||
|
author={Hern{\'a}ndez Mena, Carlos Daniel and Armentano Oller, Carme and Solito, Sarah and K{\"u}lebi, Baybars},
|
||||||
|
booktitle={Proc. IberSPEECH 2024},
|
||||||
|
pages={176--180},
|
||||||
|
year={2024}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
<!--
|
||||||
|
@misc{mena2024whisperlarge3catparla,
|
||||||
|
title={Acoustic Model in Catalan: whisper-large-v3-ca-3catparla.},
|
||||||
|
author={Hernandez Mena, Carlos Daniel; Armentano-Oller, Carme; Solito, Sarah; Külebi, Baybars},
|
||||||
|
organization={Barcelona Supercomputing Center},
|
||||||
|
url={https://huggingface.co/projecte-aina/whisper-large-v3-ca-3catparla},
|
||||||
|
year={2024}
|
||||||
|
}
|
||||||
|
-->
|
||||||
|
|
||||||
|
## Additional Information
|
||||||
|
|
||||||
|
### Author
|
||||||
|
|
||||||
|
The fine-tuning process was perform during July (2024) in the [Language Technologies Unit](https://huggingface.co/BSC-LT) of the [Barcelona Supercomputing Center](https://www.bsc.es/) by [Carlos Daniel Hernández Mena](https://huggingface.co/carlosdanielhernandezmena).
|
||||||
|
|
||||||
|
### Contact
|
||||||
|
For further information, please send an email to <langtech@bsc.es>.
|
||||||
|
|
||||||
|
### Copyright
|
||||||
|
Copyright(c) 2024 by Language Technologies Unit, Barcelona Supercomputing Center.
|
||||||
|
|
||||||
|
### License
|
||||||
|
|
||||||
|
[Apache-2.0](https://www.apache.org/licenses/LICENSE-2.0)
|
||||||
|
|
||||||
|
### Funding
|
||||||
|
This work has been promoted and financed by the Generalitat de Catalunya through the [Aina project](https://projecteaina.cat/).
|
||||||
|
|
||||||
|
The training of the model was possible thanks to the compute time provided by [Barcelona Supercomputing Center](https://www.bsc.es/) through MareNostrum 5.
|
||||||
1611
added_tokens.json
Normal file
1611
added_tokens.json
Normal file
File diff suppressed because it is too large
Load Diff
8
all_results.json
Normal file
8
all_results.json
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"epoch": 11.083056143533918,
|
||||||
|
"total_flos": 8.628884758428616e+21,
|
||||||
|
"train_loss": 0.04815149868992161,
|
||||||
|
"train_runtime": 124240.955,
|
||||||
|
"train_samples_per_second": 20.442,
|
||||||
|
"train_steps_per_second": 0.16
|
||||||
|
}
|
||||||
52
config.json
Normal file
52
config.json
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
{
|
||||||
|
"_name_or_path": "/gpfs/projects/bsc88/speech/ASR/models/whisper-large-v3",
|
||||||
|
"activation_dropout": 0.0,
|
||||||
|
"activation_function": "gelu",
|
||||||
|
"apply_spec_augment": false,
|
||||||
|
"architectures": [
|
||||||
|
"WhisperForConditionalGeneration"
|
||||||
|
],
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"begin_suppress_tokens": [
|
||||||
|
220,
|
||||||
|
50257
|
||||||
|
],
|
||||||
|
"bos_token_id": 50257,
|
||||||
|
"classifier_proj_size": 256,
|
||||||
|
"d_model": 1280,
|
||||||
|
"decoder_attention_heads": 20,
|
||||||
|
"decoder_ffn_dim": 5120,
|
||||||
|
"decoder_layerdrop": 0.0,
|
||||||
|
"decoder_layers": 32,
|
||||||
|
"decoder_start_token_id": 50258,
|
||||||
|
"dropout": 0.0,
|
||||||
|
"encoder_attention_heads": 20,
|
||||||
|
"encoder_ffn_dim": 5120,
|
||||||
|
"encoder_layerdrop": 0.0,
|
||||||
|
"encoder_layers": 32,
|
||||||
|
"eos_token_id": 50257,
|
||||||
|
"forced_decoder_ids": null,
|
||||||
|
"init_std": 0.02,
|
||||||
|
"is_encoder_decoder": true,
|
||||||
|
"mask_feature_length": 10,
|
||||||
|
"mask_feature_min_masks": 0,
|
||||||
|
"mask_feature_prob": 0.0,
|
||||||
|
"mask_time_length": 10,
|
||||||
|
"mask_time_min_masks": 2,
|
||||||
|
"mask_time_prob": 0.05,
|
||||||
|
"max_length": 448,
|
||||||
|
"max_source_positions": 1500,
|
||||||
|
"max_target_positions": 448,
|
||||||
|
"median_filter_width": 7,
|
||||||
|
"model_type": "whisper",
|
||||||
|
"num_hidden_layers": 32,
|
||||||
|
"num_mel_bins": 128,
|
||||||
|
"pad_token_id": 50256,
|
||||||
|
"scale_embedding": false,
|
||||||
|
"suppress_tokens": [],
|
||||||
|
"torch_dtype": "float32",
|
||||||
|
"transformers_version": "4.40.2",
|
||||||
|
"use_cache": false,
|
||||||
|
"use_weighted_layer_sum": false,
|
||||||
|
"vocab_size": 51866
|
||||||
|
}
|
||||||
266
generation_config.json
Normal file
266
generation_config.json
Normal file
@@ -0,0 +1,266 @@
|
|||||||
|
{
|
||||||
|
"alignment_heads": [
|
||||||
|
[
|
||||||
|
7,
|
||||||
|
0
|
||||||
|
],
|
||||||
|
[
|
||||||
|
10,
|
||||||
|
17
|
||||||
|
],
|
||||||
|
[
|
||||||
|
12,
|
||||||
|
18
|
||||||
|
],
|
||||||
|
[
|
||||||
|
13,
|
||||||
|
12
|
||||||
|
],
|
||||||
|
[
|
||||||
|
16,
|
||||||
|
1
|
||||||
|
],
|
||||||
|
[
|
||||||
|
17,
|
||||||
|
14
|
||||||
|
],
|
||||||
|
[
|
||||||
|
19,
|
||||||
|
11
|
||||||
|
],
|
||||||
|
[
|
||||||
|
21,
|
||||||
|
4
|
||||||
|
],
|
||||||
|
[
|
||||||
|
24,
|
||||||
|
1
|
||||||
|
],
|
||||||
|
[
|
||||||
|
25,
|
||||||
|
6
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"begin_suppress_tokens": [
|
||||||
|
220,
|
||||||
|
50257
|
||||||
|
],
|
||||||
|
"bos_token_id": 50257,
|
||||||
|
"decoder_start_token_id": 50258,
|
||||||
|
"eos_token_id": 50257,
|
||||||
|
"forced_decoder_ids": [
|
||||||
|
[
|
||||||
|
1,
|
||||||
|
null
|
||||||
|
],
|
||||||
|
[
|
||||||
|
2,
|
||||||
|
50360
|
||||||
|
]
|
||||||
|
],
|
||||||
|
"is_multilingual": true,
|
||||||
|
"lang_to_id": {
|
||||||
|
"<|af|>": 50327,
|
||||||
|
"<|am|>": 50334,
|
||||||
|
"<|ar|>": 50272,
|
||||||
|
"<|as|>": 50350,
|
||||||
|
"<|az|>": 50304,
|
||||||
|
"<|ba|>": 50355,
|
||||||
|
"<|be|>": 50330,
|
||||||
|
"<|bg|>": 50292,
|
||||||
|
"<|bn|>": 50302,
|
||||||
|
"<|bo|>": 50347,
|
||||||
|
"<|br|>": 50309,
|
||||||
|
"<|bs|>": 50315,
|
||||||
|
"<|ca|>": 50270,
|
||||||
|
"<|cs|>": 50283,
|
||||||
|
"<|cy|>": 50297,
|
||||||
|
"<|da|>": 50285,
|
||||||
|
"<|de|>": 50261,
|
||||||
|
"<|el|>": 50281,
|
||||||
|
"<|en|>": 50259,
|
||||||
|
"<|es|>": 50262,
|
||||||
|
"<|et|>": 50307,
|
||||||
|
"<|eu|>": 50310,
|
||||||
|
"<|fa|>": 50300,
|
||||||
|
"<|fi|>": 50277,
|
||||||
|
"<|fo|>": 50338,
|
||||||
|
"<|fr|>": 50265,
|
||||||
|
"<|gl|>": 50319,
|
||||||
|
"<|gu|>": 50333,
|
||||||
|
"<|haw|>": 50352,
|
||||||
|
"<|ha|>": 50354,
|
||||||
|
"<|he|>": 50279,
|
||||||
|
"<|hi|>": 50276,
|
||||||
|
"<|hr|>": 50291,
|
||||||
|
"<|ht|>": 50339,
|
||||||
|
"<|hu|>": 50286,
|
||||||
|
"<|hy|>": 50312,
|
||||||
|
"<|id|>": 50275,
|
||||||
|
"<|is|>": 50311,
|
||||||
|
"<|it|>": 50274,
|
||||||
|
"<|ja|>": 50266,
|
||||||
|
"<|jw|>": 50356,
|
||||||
|
"<|ka|>": 50329,
|
||||||
|
"<|kk|>": 50316,
|
||||||
|
"<|km|>": 50323,
|
||||||
|
"<|kn|>": 50306,
|
||||||
|
"<|ko|>": 50264,
|
||||||
|
"<|la|>": 50294,
|
||||||
|
"<|lb|>": 50345,
|
||||||
|
"<|ln|>": 50353,
|
||||||
|
"<|lo|>": 50336,
|
||||||
|
"<|lt|>": 50293,
|
||||||
|
"<|lv|>": 50301,
|
||||||
|
"<|mg|>": 50349,
|
||||||
|
"<|mi|>": 50295,
|
||||||
|
"<|mk|>": 50308,
|
||||||
|
"<|ml|>": 50296,
|
||||||
|
"<|mn|>": 50314,
|
||||||
|
"<|mr|>": 50320,
|
||||||
|
"<|ms|>": 50282,
|
||||||
|
"<|mt|>": 50343,
|
||||||
|
"<|my|>": 50346,
|
||||||
|
"<|ne|>": 50313,
|
||||||
|
"<|nl|>": 50271,
|
||||||
|
"<|nn|>": 50342,
|
||||||
|
"<|no|>": 50288,
|
||||||
|
"<|oc|>": 50328,
|
||||||
|
"<|pa|>": 50321,
|
||||||
|
"<|pl|>": 50269,
|
||||||
|
"<|ps|>": 50340,
|
||||||
|
"<|pt|>": 50267,
|
||||||
|
"<|ro|>": 50284,
|
||||||
|
"<|ru|>": 50263,
|
||||||
|
"<|sa|>": 50344,
|
||||||
|
"<|sd|>": 50332,
|
||||||
|
"<|si|>": 50322,
|
||||||
|
"<|sk|>": 50298,
|
||||||
|
"<|sl|>": 50305,
|
||||||
|
"<|sn|>": 50324,
|
||||||
|
"<|so|>": 50326,
|
||||||
|
"<|sq|>": 50317,
|
||||||
|
"<|sr|>": 50303,
|
||||||
|
"<|su|>": 50357,
|
||||||
|
"<|sv|>": 50273,
|
||||||
|
"<|sw|>": 50318,
|
||||||
|
"<|ta|>": 50287,
|
||||||
|
"<|te|>": 50299,
|
||||||
|
"<|tg|>": 50331,
|
||||||
|
"<|th|>": 50289,
|
||||||
|
"<|tk|>": 50341,
|
||||||
|
"<|tl|>": 50348,
|
||||||
|
"<|tr|>": 50268,
|
||||||
|
"<|tt|>": 50351,
|
||||||
|
"<|uk|>": 50280,
|
||||||
|
"<|ur|>": 50290,
|
||||||
|
"<|uz|>": 50337,
|
||||||
|
"<|vi|>": 50278,
|
||||||
|
"<|yi|>": 50335,
|
||||||
|
"<|yo|>": 50325,
|
||||||
|
"<|yue|>": 50358,
|
||||||
|
"<|zh|>": 50260
|
||||||
|
},
|
||||||
|
"language": "catalan",
|
||||||
|
"max_initial_timestamp_index": 50,
|
||||||
|
"max_length": 448,
|
||||||
|
"no_timestamps_token_id": 50364,
|
||||||
|
"pad_token_id": 50257,
|
||||||
|
"prev_sot_token_id": 50362,
|
||||||
|
"return_timestamps": false,
|
||||||
|
"suppress_tokens": [
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
7,
|
||||||
|
8,
|
||||||
|
9,
|
||||||
|
10,
|
||||||
|
14,
|
||||||
|
25,
|
||||||
|
26,
|
||||||
|
27,
|
||||||
|
28,
|
||||||
|
29,
|
||||||
|
31,
|
||||||
|
58,
|
||||||
|
59,
|
||||||
|
60,
|
||||||
|
61,
|
||||||
|
62,
|
||||||
|
63,
|
||||||
|
90,
|
||||||
|
91,
|
||||||
|
92,
|
||||||
|
93,
|
||||||
|
359,
|
||||||
|
503,
|
||||||
|
522,
|
||||||
|
542,
|
||||||
|
873,
|
||||||
|
893,
|
||||||
|
902,
|
||||||
|
918,
|
||||||
|
922,
|
||||||
|
931,
|
||||||
|
1350,
|
||||||
|
1853,
|
||||||
|
1982,
|
||||||
|
2460,
|
||||||
|
2627,
|
||||||
|
3246,
|
||||||
|
3253,
|
||||||
|
3268,
|
||||||
|
3536,
|
||||||
|
3846,
|
||||||
|
3961,
|
||||||
|
4183,
|
||||||
|
4667,
|
||||||
|
6585,
|
||||||
|
6647,
|
||||||
|
7273,
|
||||||
|
9061,
|
||||||
|
9383,
|
||||||
|
10428,
|
||||||
|
10929,
|
||||||
|
11938,
|
||||||
|
12033,
|
||||||
|
12331,
|
||||||
|
12562,
|
||||||
|
13793,
|
||||||
|
14157,
|
||||||
|
14635,
|
||||||
|
15265,
|
||||||
|
15618,
|
||||||
|
16553,
|
||||||
|
16604,
|
||||||
|
18362,
|
||||||
|
18956,
|
||||||
|
20075,
|
||||||
|
21675,
|
||||||
|
22520,
|
||||||
|
26130,
|
||||||
|
26161,
|
||||||
|
26435,
|
||||||
|
28279,
|
||||||
|
29464,
|
||||||
|
31650,
|
||||||
|
32302,
|
||||||
|
32470,
|
||||||
|
36865,
|
||||||
|
42863,
|
||||||
|
47425,
|
||||||
|
49870,
|
||||||
|
50254,
|
||||||
|
50258,
|
||||||
|
50359,
|
||||||
|
50360,
|
||||||
|
50361,
|
||||||
|
50362,
|
||||||
|
50363
|
||||||
|
],
|
||||||
|
"task_to_id": {
|
||||||
|
"transcribe": 50360,
|
||||||
|
"translate": 50359
|
||||||
|
},
|
||||||
|
"transformers_version": "4.40.2"
|
||||||
|
}
|
||||||
50001
merges.txt
Normal file
50001
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
1742
normalizer.json
Normal file
1742
normalizer.json
Normal file
File diff suppressed because it is too large
Load Diff
14
preprocessor_config.json
Normal file
14
preprocessor_config.json
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
{
|
||||||
|
"chunk_length": 30,
|
||||||
|
"feature_extractor_type": "WhisperFeatureExtractor",
|
||||||
|
"feature_size": 128,
|
||||||
|
"hop_length": 160,
|
||||||
|
"n_fft": 400,
|
||||||
|
"n_samples": 480000,
|
||||||
|
"nb_max_frames": 3000,
|
||||||
|
"padding_side": "right",
|
||||||
|
"padding_value": 0.0,
|
||||||
|
"processor_class": "WhisperProcessor",
|
||||||
|
"return_attention_mask": false,
|
||||||
|
"sampling_rate": 16000
|
||||||
|
}
|
||||||
3
pytorch_model-00001-of-00002.bin
Normal file
3
pytorch_model-00001-of-00002.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:b5dd9863724f0133ef9e2fc7fe7c438c9d8728d6a31ef999b85a5099285d0cac
|
||||||
|
size 4993686017
|
||||||
3
pytorch_model-00002-of-00002.bin
Normal file
3
pytorch_model-00002-of-00002.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:ac4eb29e216f51ba613a65495e00e036a1ca27c11ea1892548c4b69dafbf87da
|
||||||
|
size 1180727888
|
||||||
1267
pytorch_model.bin.index.json
Normal file
1267
pytorch_model.bin.index.json
Normal file
File diff suppressed because it is too large
Load Diff
139
special_tokens_map.json
Normal file
139
special_tokens_map.json
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
{
|
||||||
|
"additional_special_tokens": [
|
||||||
|
"<|startoftranscript|>",
|
||||||
|
"<|en|>",
|
||||||
|
"<|zh|>",
|
||||||
|
"<|de|>",
|
||||||
|
"<|es|>",
|
||||||
|
"<|ru|>",
|
||||||
|
"<|ko|>",
|
||||||
|
"<|fr|>",
|
||||||
|
"<|ja|>",
|
||||||
|
"<|pt|>",
|
||||||
|
"<|tr|>",
|
||||||
|
"<|pl|>",
|
||||||
|
"<|ca|>",
|
||||||
|
"<|nl|>",
|
||||||
|
"<|ar|>",
|
||||||
|
"<|sv|>",
|
||||||
|
"<|it|>",
|
||||||
|
"<|id|>",
|
||||||
|
"<|hi|>",
|
||||||
|
"<|fi|>",
|
||||||
|
"<|vi|>",
|
||||||
|
"<|he|>",
|
||||||
|
"<|uk|>",
|
||||||
|
"<|el|>",
|
||||||
|
"<|ms|>",
|
||||||
|
"<|cs|>",
|
||||||
|
"<|ro|>",
|
||||||
|
"<|da|>",
|
||||||
|
"<|hu|>",
|
||||||
|
"<|ta|>",
|
||||||
|
"<|no|>",
|
||||||
|
"<|th|>",
|
||||||
|
"<|ur|>",
|
||||||
|
"<|hr|>",
|
||||||
|
"<|bg|>",
|
||||||
|
"<|lt|>",
|
||||||
|
"<|la|>",
|
||||||
|
"<|mi|>",
|
||||||
|
"<|ml|>",
|
||||||
|
"<|cy|>",
|
||||||
|
"<|sk|>",
|
||||||
|
"<|te|>",
|
||||||
|
"<|fa|>",
|
||||||
|
"<|lv|>",
|
||||||
|
"<|bn|>",
|
||||||
|
"<|sr|>",
|
||||||
|
"<|az|>",
|
||||||
|
"<|sl|>",
|
||||||
|
"<|kn|>",
|
||||||
|
"<|et|>",
|
||||||
|
"<|mk|>",
|
||||||
|
"<|br|>",
|
||||||
|
"<|eu|>",
|
||||||
|
"<|is|>",
|
||||||
|
"<|hy|>",
|
||||||
|
"<|ne|>",
|
||||||
|
"<|mn|>",
|
||||||
|
"<|bs|>",
|
||||||
|
"<|kk|>",
|
||||||
|
"<|sq|>",
|
||||||
|
"<|sw|>",
|
||||||
|
"<|gl|>",
|
||||||
|
"<|mr|>",
|
||||||
|
"<|pa|>",
|
||||||
|
"<|si|>",
|
||||||
|
"<|km|>",
|
||||||
|
"<|sn|>",
|
||||||
|
"<|yo|>",
|
||||||
|
"<|so|>",
|
||||||
|
"<|af|>",
|
||||||
|
"<|oc|>",
|
||||||
|
"<|ka|>",
|
||||||
|
"<|be|>",
|
||||||
|
"<|tg|>",
|
||||||
|
"<|sd|>",
|
||||||
|
"<|gu|>",
|
||||||
|
"<|am|>",
|
||||||
|
"<|yi|>",
|
||||||
|
"<|lo|>",
|
||||||
|
"<|uz|>",
|
||||||
|
"<|fo|>",
|
||||||
|
"<|ht|>",
|
||||||
|
"<|ps|>",
|
||||||
|
"<|tk|>",
|
||||||
|
"<|nn|>",
|
||||||
|
"<|mt|>",
|
||||||
|
"<|sa|>",
|
||||||
|
"<|lb|>",
|
||||||
|
"<|my|>",
|
||||||
|
"<|bo|>",
|
||||||
|
"<|tl|>",
|
||||||
|
"<|mg|>",
|
||||||
|
"<|as|>",
|
||||||
|
"<|tt|>",
|
||||||
|
"<|haw|>",
|
||||||
|
"<|ln|>",
|
||||||
|
"<|ha|>",
|
||||||
|
"<|ba|>",
|
||||||
|
"<|jw|>",
|
||||||
|
"<|su|>",
|
||||||
|
"<|yue|>",
|
||||||
|
"<|translate|>",
|
||||||
|
"<|transcribe|>",
|
||||||
|
"<|startoflm|>",
|
||||||
|
"<|startofprev|>",
|
||||||
|
"<|nospeech|>",
|
||||||
|
"<|notimestamps|>"
|
||||||
|
],
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"unk_token": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
12996
tokenizer_config.json
Normal file
12996
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
8
train_results.json
Normal file
8
train_results.json
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"epoch": 11.083056143533918,
|
||||||
|
"total_flos": 8.628884758428616e+21,
|
||||||
|
"train_loss": 0.04815149868992161,
|
||||||
|
"train_runtime": 124240.955,
|
||||||
|
"train_samples_per_second": 20.442,
|
||||||
|
"train_steps_per_second": 0.16
|
||||||
|
}
|
||||||
357
trainer_state.json
Normal file
357
trainer_state.json
Normal file
@@ -0,0 +1,357 @@
|
|||||||
|
{
|
||||||
|
"best_metric": 0.8985657508208054,
|
||||||
|
"best_model_checkpoint": "CHECKPOINTS/checkpoint-3307",
|
||||||
|
"epoch": 11.083056143533918,
|
||||||
|
"eval_steps": 3307,
|
||||||
|
"global_step": 19842,
|
||||||
|
"is_hyper_param_search": false,
|
||||||
|
"is_local_process_zero": true,
|
||||||
|
"is_world_process_zero": true,
|
||||||
|
"log_history": [
|
||||||
|
{
|
||||||
|
"epoch": 0.02519907267412559,
|
||||||
|
"grad_norm": 0.7393302321434021,
|
||||||
|
"learning_rate": 2.4899193548387098e-06,
|
||||||
|
"loss": 0.3059,
|
||||||
|
"step": 500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.05039814534825118,
|
||||||
|
"grad_norm": 0.6863130331039429,
|
||||||
|
"learning_rate": 5.010080645161291e-06,
|
||||||
|
"loss": 0.1492,
|
||||||
|
"step": 1000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 0.07559721802237677,
|
||||||
|
"grad_norm": 0.703405499458313,
|
||||||
|
"learning_rate": 7.5302419354838715e-06,
|
||||||
|
"loss": 0.1428,
|
||||||
|
"step": 1500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 1.017437758290495,
|
||||||
|
"grad_norm": 0.6589027643203735,
|
||||||
|
"learning_rate": 9.994400268787099e-06,
|
||||||
|
"loss": 0.1261,
|
||||||
|
"step": 2000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 1.0426368309646206,
|
||||||
|
"grad_norm": 0.6031370759010315,
|
||||||
|
"learning_rate": 9.71441370814201e-06,
|
||||||
|
"loss": 0.1202,
|
||||||
|
"step": 2500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 1.0678359036387461,
|
||||||
|
"grad_norm": 0.6696850657463074,
|
||||||
|
"learning_rate": 9.434427147496921e-06,
|
||||||
|
"loss": 0.1171,
|
||||||
|
"step": 3000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 1.0833081342606592,
|
||||||
|
"eval_loss": 0.017358383163809776,
|
||||||
|
"eval_runtime": 504.91,
|
||||||
|
"eval_samples_per_second": 0.824,
|
||||||
|
"eval_steps_per_second": 0.008,
|
||||||
|
"eval_wer": 0.8985657508208054,
|
||||||
|
"step": 3307
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 2.0096764439068644,
|
||||||
|
"grad_norm": 0.591613233089447,
|
||||||
|
"learning_rate": 9.154440586851832e-06,
|
||||||
|
"loss": 0.1042,
|
||||||
|
"step": 3500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 2.03487551658099,
|
||||||
|
"grad_norm": 0.5527406930923462,
|
||||||
|
"learning_rate": 8.874454026206742e-06,
|
||||||
|
"loss": 0.0861,
|
||||||
|
"step": 4000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 2.0600745892551156,
|
||||||
|
"grad_norm": 0.5651601552963257,
|
||||||
|
"learning_rate": 8.594467465561653e-06,
|
||||||
|
"loss": 0.0878,
|
||||||
|
"step": 4500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 3.0019151295232334,
|
||||||
|
"grad_norm": 0.4751633107662201,
|
||||||
|
"learning_rate": 8.314480904916565e-06,
|
||||||
|
"loss": 0.0865,
|
||||||
|
"step": 5000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 3.027114202197359,
|
||||||
|
"grad_norm": 0.5415444374084473,
|
||||||
|
"learning_rate": 8.034494344271475e-06,
|
||||||
|
"loss": 0.0625,
|
||||||
|
"step": 5500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 3.0523132748714845,
|
||||||
|
"grad_norm": 0.5294741988182068,
|
||||||
|
"learning_rate": 7.754507783626388e-06,
|
||||||
|
"loss": 0.0636,
|
||||||
|
"step": 6000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 3.0775123475456105,
|
||||||
|
"grad_norm": 0.538415789604187,
|
||||||
|
"learning_rate": 7.474521222981298e-06,
|
||||||
|
"loss": 0.0652,
|
||||||
|
"step": 6500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 3.083257736115311,
|
||||||
|
"eval_loss": 0.01661744900047779,
|
||||||
|
"eval_runtime": 517.2779,
|
||||||
|
"eval_samples_per_second": 0.804,
|
||||||
|
"eval_steps_per_second": 0.008,
|
||||||
|
"eval_wer": 0.9369659965823781,
|
||||||
|
"step": 6614
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 4.019352887813729,
|
||||||
|
"grad_norm": 0.6536675691604614,
|
||||||
|
"learning_rate": 7.194534662336209e-06,
|
||||||
|
"loss": 0.0474,
|
||||||
|
"step": 7000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 4.044551960487854,
|
||||||
|
"grad_norm": 0.5821442604064941,
|
||||||
|
"learning_rate": 6.91454810169112e-06,
|
||||||
|
"loss": 0.0441,
|
||||||
|
"step": 7500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 4.06975103316198,
|
||||||
|
"grad_norm": 0.5527841448783875,
|
||||||
|
"learning_rate": 6.6345615410460304e-06,
|
||||||
|
"loss": 0.0449,
|
||||||
|
"step": 8000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 5.011591573430098,
|
||||||
|
"grad_norm": 0.453218549489975,
|
||||||
|
"learning_rate": 6.354574980400942e-06,
|
||||||
|
"loss": 0.0369,
|
||||||
|
"step": 8500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 5.036790646104223,
|
||||||
|
"grad_norm": 0.5013980865478516,
|
||||||
|
"learning_rate": 6.074588419755852e-06,
|
||||||
|
"loss": 0.0273,
|
||||||
|
"step": 9000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 5.061989718778349,
|
||||||
|
"grad_norm": 0.5885359644889832,
|
||||||
|
"learning_rate": 5.7946018591107636e-06,
|
||||||
|
"loss": 0.0288,
|
||||||
|
"step": 9500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 5.083207337969963,
|
||||||
|
"eval_loss": 0.017346344888210297,
|
||||||
|
"eval_runtime": 530.4397,
|
||||||
|
"eval_samples_per_second": 0.784,
|
||||||
|
"eval_steps_per_second": 0.008,
|
||||||
|
"eval_wer": 1.0060864389532094,
|
||||||
|
"step": 9921
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 6.003830259046467,
|
||||||
|
"grad_norm": 0.4446285665035248,
|
||||||
|
"learning_rate": 5.514615298465674e-06,
|
||||||
|
"loss": 0.0276,
|
||||||
|
"step": 10000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 6.029029331720593,
|
||||||
|
"grad_norm": 0.46283265948295593,
|
||||||
|
"learning_rate": 5.234628737820585e-06,
|
||||||
|
"loss": 0.016,
|
||||||
|
"step": 10500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 6.054228404394718,
|
||||||
|
"grad_norm": 0.42813611030578613,
|
||||||
|
"learning_rate": 4.954642177175496e-06,
|
||||||
|
"loss": 0.0173,
|
||||||
|
"step": 11000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 6.079427477068844,
|
||||||
|
"grad_norm": 0.48960232734680176,
|
||||||
|
"learning_rate": 4.674655616530407e-06,
|
||||||
|
"loss": 0.018,
|
||||||
|
"step": 11500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 7.021268017336962,
|
||||||
|
"grad_norm": 0.3988407254219055,
|
||||||
|
"learning_rate": 4.394669055885318e-06,
|
||||||
|
"loss": 0.0109,
|
||||||
|
"step": 12000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 7.046467090011087,
|
||||||
|
"grad_norm": 0.3698909282684326,
|
||||||
|
"learning_rate": 4.114682495240229e-06,
|
||||||
|
"loss": 0.0101,
|
||||||
|
"step": 12500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 7.071666162685213,
|
||||||
|
"grad_norm": 0.4073663055896759,
|
||||||
|
"learning_rate": 3.8346959345951395e-06,
|
||||||
|
"loss": 0.0109,
|
||||||
|
"step": 13000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 7.083156939824614,
|
||||||
|
"eval_loss": 0.0192726943641901,
|
||||||
|
"eval_runtime": 518.0983,
|
||||||
|
"eval_samples_per_second": 0.803,
|
||||||
|
"eval_steps_per_second": 0.008,
|
||||||
|
"eval_wer": 0.9907263406485801,
|
||||||
|
"step": 13228
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 8.01350670295333,
|
||||||
|
"grad_norm": 0.33067503571510315,
|
||||||
|
"learning_rate": 3.5547093739500504e-06,
|
||||||
|
"loss": 0.0082,
|
||||||
|
"step": 13500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 8.038705775627458,
|
||||||
|
"grad_norm": 0.3731881380081177,
|
||||||
|
"learning_rate": 3.2747228133049617e-06,
|
||||||
|
"loss": 0.0062,
|
||||||
|
"step": 14000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 8.063904848301583,
|
||||||
|
"grad_norm": 0.2673242688179016,
|
||||||
|
"learning_rate": 2.9947362526598727e-06,
|
||||||
|
"loss": 0.0066,
|
||||||
|
"step": 14500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 9.0057453885697,
|
||||||
|
"grad_norm": 0.18087884783744812,
|
||||||
|
"learning_rate": 2.7147496920147836e-06,
|
||||||
|
"loss": 0.0062,
|
||||||
|
"step": 15000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 9.030944461243827,
|
||||||
|
"grad_norm": 0.250787615776062,
|
||||||
|
"learning_rate": 2.4347631313696945e-06,
|
||||||
|
"loss": 0.0038,
|
||||||
|
"step": 15500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 9.056143533917952,
|
||||||
|
"grad_norm": 0.2255438268184662,
|
||||||
|
"learning_rate": 2.1547765707246054e-06,
|
||||||
|
"loss": 0.0041,
|
||||||
|
"step": 16000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 9.081342606592077,
|
||||||
|
"grad_norm": 0.29366812109947205,
|
||||||
|
"learning_rate": 1.8747900100795163e-06,
|
||||||
|
"loss": 0.0044,
|
||||||
|
"step": 16500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 9.083106541679266,
|
||||||
|
"eval_loss": 0.020848926156759262,
|
||||||
|
"eval_runtime": 513.0393,
|
||||||
|
"eval_samples_per_second": 0.811,
|
||||||
|
"eval_steps_per_second": 0.008,
|
||||||
|
"eval_wer": 0.9772862546320297,
|
||||||
|
"step": 16535
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 10.023183146860196,
|
||||||
|
"grad_norm": 0.1837795376777649,
|
||||||
|
"learning_rate": 1.5948034494344272e-06,
|
||||||
|
"loss": 0.0026,
|
||||||
|
"step": 17000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 10.048382219534322,
|
||||||
|
"grad_norm": 0.24585728347301483,
|
||||||
|
"learning_rate": 1.314816888789338e-06,
|
||||||
|
"loss": 0.0026,
|
||||||
|
"step": 17500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 10.073581292208447,
|
||||||
|
"grad_norm": 0.15548868477344513,
|
||||||
|
"learning_rate": 1.0348303281442492e-06,
|
||||||
|
"loss": 0.0026,
|
||||||
|
"step": 18000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 11.015421832476564,
|
||||||
|
"grad_norm": 0.08757825195789337,
|
||||||
|
"learning_rate": 7.554037406204502e-07,
|
||||||
|
"loss": 0.002,
|
||||||
|
"step": 18500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 11.040620905150691,
|
||||||
|
"grad_norm": 0.1045205295085907,
|
||||||
|
"learning_rate": 4.7541717997536123e-07,
|
||||||
|
"loss": 0.0016,
|
||||||
|
"step": 19000
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 11.065819977824816,
|
||||||
|
"grad_norm": 0.07768367975950241,
|
||||||
|
"learning_rate": 1.9543061933027217e-07,
|
||||||
|
"loss": 0.0016,
|
||||||
|
"step": 19500
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 11.083056143533918,
|
||||||
|
"eval_loss": 0.02412882074713707,
|
||||||
|
"eval_runtime": 518.0169,
|
||||||
|
"eval_samples_per_second": 0.803,
|
||||||
|
"eval_steps_per_second": 0.008,
|
||||||
|
"eval_wer": 0.9926463529366589,
|
||||||
|
"step": 19842
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"epoch": 11.083056143533918,
|
||||||
|
"step": 19842,
|
||||||
|
"total_flos": 8.628884758428616e+21,
|
||||||
|
"train_loss": 0.04815149868992161,
|
||||||
|
"train_runtime": 124240.955,
|
||||||
|
"train_samples_per_second": 20.442,
|
||||||
|
"train_steps_per_second": 0.16
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"logging_steps": 500,
|
||||||
|
"max_steps": 19842,
|
||||||
|
"num_input_tokens_seen": 0,
|
||||||
|
"num_train_epochs": 9223372036854775807,
|
||||||
|
"save_steps": 3307,
|
||||||
|
"total_flos": 8.628884758428616e+21,
|
||||||
|
"train_batch_size": 32,
|
||||||
|
"trial_name": null,
|
||||||
|
"trial_params": null
|
||||||
|
}
|
||||||
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:74768d83badcff2bde356dd7f72dc7ffe861663843fa21b5a97e286a5eae8f3e
|
||||||
|
size 5176
|
||||||
50259
vocab.json
Normal file
50259
vocab.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user