Upload speaker embedding models to huggingface (#1428)
See also https://huggingface.co/spaces/k2-fsa/speaker-diarization
This commit is contained in:
27
.github/workflows/export-3dspeaker-to-onnx.yaml
vendored
27
.github/workflows/export-3dspeaker-to-onnx.yaml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-latest]
|
||||
os: [ubuntu-latest]
|
||||
python-version: ["3.8"]
|
||||
|
||||
steps:
|
||||
@@ -43,3 +43,28 @@ jobs:
|
||||
repo_name: k2-fsa/sherpa-onnx
|
||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
tag: speaker-recongition-models
|
||||
|
||||
- name: Publish to huggingface
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
max_attempts: 20
|
||||
timeout_seconds: 200
|
||||
shell: bash
|
||||
command: |
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
d=speaker-embedding-models
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
export GIT_CLONE_PROTECTION_ACTIVE=false
|
||||
git clone https://huggingface.co/csukuangfj/$d huggingface
|
||||
mv -v ./*.onnx ./huggingface
|
||||
cd huggingface
|
||||
git lfs track "*.onnx"
|
||||
git status
|
||||
git add .
|
||||
git status
|
||||
git commit -m "add models"
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
os: [macos-latest]
|
||||
python-version: ["3.10"]
|
||||
|
||||
steps:
|
||||
@@ -43,3 +43,28 @@ jobs:
|
||||
repo_name: k2-fsa/sherpa-onnx
|
||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
tag: speaker-recongition-models
|
||||
|
||||
- name: Publish to huggingface
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
max_attempts: 20
|
||||
timeout_seconds: 200
|
||||
shell: bash
|
||||
command: |
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
d=speaker-embedding-models
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
export GIT_CLONE_PROTECTION_ACTIVE=false
|
||||
git clone https://huggingface.co/csukuangfj/$d huggingface
|
||||
mv -v ./*.onnx ./huggingface
|
||||
cd huggingface
|
||||
git lfs track "*.onnx"
|
||||
git status
|
||||
git add .
|
||||
git status
|
||||
git commit -m "add models"
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
|
||||
|
||||
25
.github/workflows/export-wespeaker-to-onnx.yaml
vendored
25
.github/workflows/export-wespeaker-to-onnx.yaml
vendored
@@ -48,3 +48,28 @@ jobs:
|
||||
repo_name: k2-fsa/sherpa-onnx
|
||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
tag: speaker-recongition-models
|
||||
|
||||
- name: Publish to huggingface
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
max_attempts: 20
|
||||
timeout_seconds: 200
|
||||
shell: bash
|
||||
command: |
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
d=speaker-embedding-models
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
export GIT_CLONE_PROTECTION_ACTIVE=false
|
||||
git clone https://huggingface.co/csukuangfj/$d huggingface
|
||||
mv -v ./*.onnx ./huggingface
|
||||
cd huggingface
|
||||
git lfs track "*.onnx"
|
||||
git status
|
||||
git add .
|
||||
git status
|
||||
git commit -m "add models"
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
|
||||
|
||||
18
README.md
18
README.md
@@ -88,14 +88,15 @@ with the following APIs
|
||||
<summary>You can visit the following Huggingface spaces to try sherpa-onnx without
|
||||
installing anything. All you need is a browser.</summary>
|
||||
|
||||
| Description | URL |
|
||||
|-------------------------------------------------------|------------------------------------|
|
||||
| Speech recognition | [Click me][hf-space-asr] |
|
||||
| Speech recognition with [Whisper][Whisper] | [Click me][hf-space-asr-whisper] |
|
||||
| Speech synthesis | [Click me][hf-space-tts] |
|
||||
| Generate subtitles | [Click me][hf-space-subtitle] |
|
||||
| Audio tagging | [Click me][hf-space-audio-tagging] |
|
||||
| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper] |
|
||||
| Description | URL |
|
||||
|-------------------------------------------------------|-----------------------------------------|
|
||||
| Speaker diarization | [Click me][hf-space-speaker-diarization]|
|
||||
| Speech recognition | [Click me][hf-space-asr] |
|
||||
| Speech recognition with [Whisper][Whisper] | [Click me][hf-space-asr-whisper] |
|
||||
| Speech synthesis | [Click me][hf-space-tts] |
|
||||
| Generate subtitles | [Click me][hf-space-subtitle] |
|
||||
| Audio tagging | [Click me][hf-space-audio-tagging] |
|
||||
| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper] |
|
||||
|
||||
We also have spaces built using WebAssembly. They are listed below:
|
||||
|
||||
@@ -240,6 +241,7 @@ Video demo in Chinese: [爆了!炫神教你开打字挂!真正影响胜率
|
||||
[VisionFive 2]: https://www.starfivetech.com/en/site/boards
|
||||
[旭日X3派]: https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html
|
||||
[爱芯派]: https://wiki.sipeed.com/hardware/zh/maixIII/ax-pi/axpi.html
|
||||
[hf-space-speaker-diarization]: https://huggingface.co/spaces/k2-fsa/speaker-diarization
|
||||
[hf-space-asr]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition
|
||||
[Whisper]: https://github.com/openai/whisper
|
||||
[hf-space-asr-whisper]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper
|
||||
|
||||
@@ -4,10 +4,10 @@ set -e
|
||||
|
||||
function install_3d_speaker() {
|
||||
echo "Install 3D-Speaker"
|
||||
git clone https://github.com/alibaba-damo-academy/3D-Speaker.git
|
||||
git clone https://github.com/modelscope/3D-Speaker
|
||||
pushd 3D-Speaker
|
||||
pip install -q -r ./requirements.txt
|
||||
pip install -q modelscope onnx onnxruntime kaldi-native-fbank
|
||||
pip install -q modelscope==1.14.0 onnx onnxruntime kaldi-native-fbank
|
||||
popd
|
||||
}
|
||||
|
||||
|
||||
@@ -7,14 +7,17 @@ function install_nemo() {
|
||||
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
|
||||
python3 get-pip.py
|
||||
|
||||
pip install torch==2.1.0+cpu torchaudio==2.1.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
pip install torch==2.1.0 torchaudio==2.1.0 -f https://download.pytorch.org/whl/torch_stable.html
|
||||
|
||||
pip install wget text-unidecode matplotlib>=3.3.2 onnx onnxruntime pybind11 Cython einops kaldi-native-fbank soundfile
|
||||
pip install -qq wget text-unidecode matplotlib>=3.3.2 onnx onnxruntime pybind11 Cython einops kaldi-native-fbank soundfile
|
||||
pip install -qq ipython
|
||||
|
||||
sudo apt-get install -q -y sox libsndfile1 ffmpeg python3-pip
|
||||
# sudo apt-get install -q -y sox libsndfile1 ffmpeg python3-pip ipython
|
||||
|
||||
BRANCH='main'
|
||||
python3 -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]
|
||||
|
||||
pip install numpy==1.26.4
|
||||
}
|
||||
|
||||
install_nemo
|
||||
|
||||
Reference in New Issue
Block a user