Add Python API for source separation (#2283)

This commit is contained in:
Fangjun Kuang
2025-06-05 20:44:26 +08:00
committed by GitHub
parent 6f0fac2064
commit d57e4f84de
20 changed files with 599 additions and 23 deletions

View File

@@ -8,6 +8,32 @@ log() {
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
log "test spleeter"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/sherpa-onnx-spleeter-2stems-fp16.tar.bz2
tar xvf sherpa-onnx-spleeter-2stems-fp16.tar.bz2
rm sherpa-onnx-spleeter-2stems-fp16.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/qi-feng-le-zh.wav
./python-api-examples/offline-source-separation-spleeter.py
rm -rf sherpa-onnx-spleeter-2stems-fp16
rm qi-feng-le-zh.wav
log "test UVR"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/UVR_MDXNET_9482.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/source-separation-models/qi-feng-le-zh.wav
./python-api-examples/offline-source-separation-uvr.py
rm UVR_MDXNET_9482.onnx
rm qi-feng-le-zh.wav
mkdir source-separation
mv spleeter-*.wav source-separation
mv uvr-*.wav source-separation
ls -lh source-separation
log "test offline dolphin ctc"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2

View File

@@ -97,6 +97,11 @@ jobs:
.github/scripts/test-python.sh
.github/scripts/test-speaker-recognition-python.sh
- uses: actions/upload-artifact@v4
with:
name: source-separation-${{ matrix.os }}-${{ matrix.python-version }}
path: ./source-separation
- uses: actions/upload-artifact@v4
with:
name: tts-generated-test-files-${{ matrix.os }}-${{ matrix.python-version }}

View File

@@ -36,22 +36,18 @@ jobs:
fail-fast: false
matrix:
include:
# it fails to install ffmpeg on ubuntu 20.04
#
# - os: ubuntu-20.04
# python-version: "3.7"
# - os: ubuntu-20.04
# python-version: "3.8"
# - os: ubuntu-20.04
# python-version: "3.9"
- os: ubuntu-24.04
python-version: "3.8"
- os: ubuntu-24.04
python-version: "3.9"
- os: ubuntu-22.04
- os: ubuntu-24.04
python-version: "3.10"
- os: ubuntu-22.04
- os: ubuntu-24.04
python-version: "3.11"
- os: ubuntu-22.04
- os: ubuntu-24.04
python-version: "3.12"
- os: ubuntu-22.04
- os: ubuntu-24.04
python-version: "3.13"
steps:
@@ -81,10 +77,12 @@ jobs:
python3 -m pip install --upgrade pip numpy pypinyin sentencepiece>=0.1.96 soundfile
python3 -m pip install wheel twine setuptools
- name: Install ffmpeg
shell: bash
run: |
sudo apt-get install ffmpeg
- uses: afoley587/setup-ffmpeg@main
id: setup-ffmpeg
with:
ffmpeg-version: release
architecture: ''
github-token: ${{ github.server_url == 'https://github.com' && github.token || '' }}
- name: Install ninja
shell: bash
@@ -189,6 +187,11 @@ jobs:
.github/scripts/test-python.sh
.github/scripts/test-speaker-recognition-python.sh
- uses: actions/upload-artifact@v4
with:
name: source-separation-${{ matrix.os }}-${{ matrix.python-version }}-whl
path: ./source-separation
- uses: actions/upload-artifact@v4
with:
name: tts-generated-test-files-${{ matrix.os }}-${{ matrix.python-version }}