add more models for speaker diarization (#1440)
This commit is contained in:
@@ -17,8 +17,9 @@ val segmentationModel = "segmentation.onnx"
|
|||||||
|
|
||||||
// please download it from
|
// please download it from
|
||||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||||
|
// and rename it to embedding.onnx
|
||||||
// and move it to the assets folder
|
// and move it to the assets folder
|
||||||
val embeddingModel = "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
|
val embeddingModel = "embedding.onnx"
|
||||||
|
|
||||||
// in the end, your assets folder should look like below
|
// in the end, your assets folder should look like below
|
||||||
/*
|
/*
|
||||||
@@ -26,7 +27,7 @@ val embeddingModel = "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
|||||||
/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxSpeakerDiarization/app/src/main/assets
|
/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxSpeakerDiarization/app/src/main/assets
|
||||||
(py38) fangjuns-MacBook-Pro:assets fangjun$ ls -lh
|
(py38) fangjuns-MacBook-Pro:assets fangjun$ ls -lh
|
||||||
total 89048
|
total 89048
|
||||||
-rw-r--r-- 1 fangjun staff 38M Oct 12 20:28 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
-rw-r--r-- 1 fangjun staff 38M Oct 12 20:28 embedding.onnx
|
||||||
-rw-r--r-- 1 fangjun staff 5.7M Oct 12 20:28 segmentation.onnx
|
-rw-r--r-- 1 fangjun staff 5.7M Oct 12 20:28 segmentation.onnx
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -63,4 +64,4 @@ object SpeakerDiarizationObject {
|
|||||||
_sd = OfflineSpeakerDiarization(assetManager = assetManager, config = config)
|
_sd = OfflineSpeakerDiarization(assetManager = assetManager, config = config)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,18 +37,20 @@ pushd ./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/
|
|||||||
|
|
||||||
ls -lh
|
ls -lh
|
||||||
|
|
||||||
model_name={{ model.model_name }}
|
segmentation_model_name={{ model.segmentation.model_name }}
|
||||||
short_name={{ model.short_name }}
|
segmentation_short_name={{ model.segmentation.short_name }}
|
||||||
|
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/$model_name.tar.bz2
|
embedding_model_name={{ model.embedding.model_name }}
|
||||||
tar xvf $model_name.tar.bz2
|
embedding_short_name={{ model.embedding.short_name }}
|
||||||
rm $model_name.tar.bz2
|
|
||||||
mv $model_name/model.onnx segmentation.onnx
|
|
||||||
rm -rf $model_name
|
|
||||||
|
|
||||||
if [ ! -f 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/$segmentation_model_name.tar.bz2
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
tar xvf $segmentation_model_name.tar.bz2
|
||||||
fi
|
rm $segmentation_model_name.tar.bz2
|
||||||
|
mv $segmentation_model_name/model.onnx segmentation.onnx
|
||||||
|
rm -rf $segmentation_model_name
|
||||||
|
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/$embedding_model_name.onnx
|
||||||
|
mv $embedding_model_name.onnx embedding.onnx
|
||||||
|
|
||||||
echo "pwd: $PWD"
|
echo "pwd: $PWD"
|
||||||
ls -lh
|
ls -lh
|
||||||
@@ -74,12 +76,12 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
|
|||||||
./gradlew build
|
./gradlew build
|
||||||
popd
|
popd
|
||||||
|
|
||||||
mv android/SherpaOnnxSpeakerDiarization/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-speaker-diarization-$short_name-3dspeaker.apk
|
mv android/SherpaOnnxSpeakerDiarization/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-speaker-diarization-$segmentation_short_name-$embedding_short_name.apk
|
||||||
ls -lh apks
|
ls -lh apks
|
||||||
rm -v ./android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/$arch/*.so
|
rm -v ./android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/$arch/*.so
|
||||||
done
|
done
|
||||||
|
|
||||||
rm -rf ./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/segmentation.onnx
|
rm -rf ./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/*.onnx
|
||||||
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
|
|||||||
@@ -27,10 +27,22 @@ def get_args():
|
|||||||
@dataclass
|
@dataclass
|
||||||
class SpeakerSegmentationModel:
|
class SpeakerSegmentationModel:
|
||||||
model_name: str
|
model_name: str
|
||||||
short_name: str = ""
|
short_name: str
|
||||||
|
|
||||||
|
|
||||||
def get_models() -> List[SpeakerSegmentationModel]:
|
@dataclass
|
||||||
|
class SpeakerEmbeddingModel:
|
||||||
|
model_name: str
|
||||||
|
short_name: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Model:
|
||||||
|
segmentation: SpeakerSegmentationModel
|
||||||
|
embedding: SpeakerEmbeddingModel
|
||||||
|
|
||||||
|
|
||||||
|
def get_segmentation_models() -> List[SpeakerSegmentationModel]:
|
||||||
models = [
|
models = [
|
||||||
SpeakerSegmentationModel(
|
SpeakerSegmentationModel(
|
||||||
model_name="sherpa-onnx-pyannote-segmentation-3-0",
|
model_name="sherpa-onnx-pyannote-segmentation-3-0",
|
||||||
@@ -45,13 +57,33 @@ def get_models() -> List[SpeakerSegmentationModel]:
|
|||||||
return models
|
return models
|
||||||
|
|
||||||
|
|
||||||
|
def get_embedding_models() -> List[SpeakerEmbeddingModel]:
|
||||||
|
models = [
|
||||||
|
SpeakerSegmentationModel(
|
||||||
|
model_name="3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k",
|
||||||
|
short_name="3dspeaker",
|
||||||
|
),
|
||||||
|
SpeakerSegmentationModel(
|
||||||
|
model_name="nemo_en_titanet_small",
|
||||||
|
short_name="nemo",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
return models
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
args = get_args()
|
args = get_args()
|
||||||
index = args.index
|
index = args.index
|
||||||
total = args.total
|
total = args.total
|
||||||
assert 0 <= index < total, (index, total)
|
assert 0 <= index < total, (index, total)
|
||||||
|
|
||||||
all_model_list = get_models()
|
segmentation_models = get_segmentation_models()
|
||||||
|
embedding_models = get_embedding_models()
|
||||||
|
|
||||||
|
all_model_list = []
|
||||||
|
for s in segmentation_models:
|
||||||
|
for e in embedding_models:
|
||||||
|
all_model_list.append(Model(segmentation=s, embedding=e))
|
||||||
|
|
||||||
num_models = len(all_model_list)
|
num_models = len(all_model_list)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user