Add C API for spoken language identification. (#695)
This commit is contained in:
26
.github/scripts/test-c-api.sh
vendored
Executable file
26
.github/scripts/test-c-api.sh
vendored
Executable file
@@ -0,0 +1,26 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
log() {
|
||||||
|
# This function is from espnet
|
||||||
|
local fname=${BASH_SOURCE[1]##*/}
|
||||||
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "SLID_EXE is $SLID_EXE"
|
||||||
|
echo "PATH: $PATH"
|
||||||
|
|
||||||
|
|
||||||
|
log "------------------------------------------------------------"
|
||||||
|
log "Download whisper tiny for spoken language identification "
|
||||||
|
log "------------------------------------------------------------"
|
||||||
|
|
||||||
|
rm -rf sherpa-onnx-whisper-tiny*
|
||||||
|
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
rm sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
|
||||||
|
$SLID_EXE
|
||||||
|
|
||||||
|
rm -rf sherpa-onnx-whisper-tiny*
|
||||||
@@ -28,32 +28,32 @@ ar-arabic.wav
|
|||||||
bg-bulgarian.wav
|
bg-bulgarian.wav
|
||||||
cs-czech.wav
|
cs-czech.wav
|
||||||
da-danish.wav
|
da-danish.wav
|
||||||
de-german.wav
|
# de-german.wav
|
||||||
el-greek.wav
|
# el-greek.wav
|
||||||
en-english.wav
|
# en-english.wav
|
||||||
es-spanish.wav
|
# es-spanish.wav
|
||||||
fa-persian.wav
|
# fa-persian.wav
|
||||||
fi-finnish.wav
|
# fi-finnish.wav
|
||||||
fr-french.wav
|
# fr-french.wav
|
||||||
hi-hindi.wav
|
# hi-hindi.wav
|
||||||
hr-croatian.wav
|
# hr-croatian.wav
|
||||||
id-indonesian.wav
|
# id-indonesian.wav
|
||||||
it-italian.wav
|
# it-italian.wav
|
||||||
ja-japanese.wav
|
# ja-japanese.wav
|
||||||
ko-korean.wav
|
# ko-korean.wav
|
||||||
nl-dutch.wav
|
# nl-dutch.wav
|
||||||
no-norwegian.wav
|
# no-norwegian.wav
|
||||||
po-polish.wav
|
# po-polish.wav
|
||||||
pt-portuguese.wav
|
# pt-portuguese.wav
|
||||||
ro-romanian.wav
|
# ro-romanian.wav
|
||||||
ru-russian.wav
|
# ru-russian.wav
|
||||||
sk-slovak.wav
|
# sk-slovak.wav
|
||||||
sv-swedish.wav
|
# sv-swedish.wav
|
||||||
ta-tamil.wav
|
# ta-tamil.wav
|
||||||
tl-tagalog.wav
|
# tl-tagalog.wav
|
||||||
tr-turkish.wav
|
# tr-turkish.wav
|
||||||
uk-ukrainian.wav
|
# uk-ukrainian.wav
|
||||||
zh-chinese.wav
|
# zh-chinese.wav
|
||||||
)
|
)
|
||||||
|
|
||||||
for wav in ${waves[@]}; do
|
for wav in ${waves[@]}; do
|
||||||
|
|||||||
1
.github/workflows/android.yaml
vendored
1
.github/workflows/android.yaml
vendored
@@ -113,6 +113,7 @@ jobs:
|
|||||||
git config --global user.email "csukuangfj@gmail.com"
|
git config --global user.email "csukuangfj@gmail.com"
|
||||||
git config --global user.name "Fangjun Kuang"
|
git config --global user.name "Fangjun Kuang"
|
||||||
|
|
||||||
|
rm -rf huggingface
|
||||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
|
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
|
||||||
|
|
||||||
cd huggingface
|
cd huggingface
|
||||||
|
|||||||
1
.github/workflows/build-xcframework.yaml
vendored
1
.github/workflows/build-xcframework.yaml
vendored
@@ -90,6 +90,7 @@ jobs:
|
|||||||
git config --global user.email "csukuangfj@gmail.com"
|
git config --global user.email "csukuangfj@gmail.com"
|
||||||
git config --global user.name "Fangjun Kuang"
|
git config --global user.name "Fangjun Kuang"
|
||||||
|
|
||||||
|
rm -rf huggingface
|
||||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
|
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
|
||||||
|
|
||||||
cd huggingface
|
cd huggingface
|
||||||
|
|||||||
12
.github/workflows/linux.yaml
vendored
12
.github/workflows/linux.yaml
vendored
@@ -123,8 +123,15 @@ jobs:
|
|||||||
name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }}
|
name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }}
|
||||||
path: build/bin/*
|
path: build/bin/*
|
||||||
|
|
||||||
- name: Test spoken language identification
|
- name: Test spoken language identification (C API)
|
||||||
if: matrix.build_type != 'Debug'
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PATH=$PWD/build/bin:$PATH
|
||||||
|
export SLID_EXE=spoken-language-identification-c-api
|
||||||
|
|
||||||
|
.github/scripts/test-c-api.sh
|
||||||
|
|
||||||
|
- name: Test spoken language identification (C++ API)
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
export PATH=$PWD/build/bin:$PATH
|
export PATH=$PWD/build/bin:$PATH
|
||||||
@@ -243,6 +250,7 @@ jobs:
|
|||||||
git config --global user.email "csukuangfj@gmail.com"
|
git config --global user.email "csukuangfj@gmail.com"
|
||||||
git config --global user.name "Fangjun Kuang"
|
git config --global user.name "Fangjun Kuang"
|
||||||
|
|
||||||
|
rm -rf huggingface
|
||||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
|
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
|
||||||
|
|
||||||
cd huggingface
|
cd huggingface
|
||||||
|
|||||||
11
.github/workflows/macos.yaml
vendored
11
.github/workflows/macos.yaml
vendored
@@ -102,8 +102,15 @@ jobs:
|
|||||||
otool -L build/bin/sherpa-onnx
|
otool -L build/bin/sherpa-onnx
|
||||||
otool -l build/bin/sherpa-onnx
|
otool -l build/bin/sherpa-onnx
|
||||||
|
|
||||||
- name: Test spoken language identification
|
- name: Test spoken language identification (C API)
|
||||||
if: matrix.build_type != 'Debug'
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PATH=$PWD/build/bin:$PATH
|
||||||
|
export SLID_EXE=spoken-language-identification-c-api
|
||||||
|
|
||||||
|
.github/scripts/test-c-api.sh
|
||||||
|
|
||||||
|
- name: Test spoken language identification (C++ API)
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
export PATH=$PWD/build/bin:$PATH
|
export PATH=$PWD/build/bin:$PATH
|
||||||
|
|||||||
10
.github/workflows/windows-x64.yaml
vendored
10
.github/workflows/windows-x64.yaml
vendored
@@ -68,7 +68,15 @@ jobs:
|
|||||||
|
|
||||||
ls -lh ./bin/Release/sherpa-onnx.exe
|
ls -lh ./bin/Release/sherpa-onnx.exe
|
||||||
|
|
||||||
- name: Test spoken language identification
|
- name: Test spoken language identification (C API)
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PATH=$PWD/build/bin/Release:$PATH
|
||||||
|
export SLID_EXE=spoken-language-identification-c-api.exe
|
||||||
|
|
||||||
|
.github/scripts/test-c-api.sh
|
||||||
|
|
||||||
|
- name: Test spoken language identification (C++ API)
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
export PATH=$PWD/build/bin/Release:$PATH
|
export PATH=$PWD/build/bin/Release:$PATH
|
||||||
|
|||||||
8
.github/workflows/windows-x86.yaml
vendored
8
.github/workflows/windows-x86.yaml
vendored
@@ -69,6 +69,14 @@ jobs:
|
|||||||
|
|
||||||
ls -lh ./bin/Release/sherpa-onnx.exe
|
ls -lh ./bin/Release/sherpa-onnx.exe
|
||||||
|
|
||||||
|
- name: Test spoken language identification (C API)
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PATH=$PWD/build/bin/Release:$PATH
|
||||||
|
export SLID_EXE=spoken-language-identification-c-api.exe
|
||||||
|
|
||||||
|
.github/scripts/test-c-api.sh
|
||||||
|
|
||||||
# - name: Test spoken language identification
|
# - name: Test spoken language identification
|
||||||
# shell: bash
|
# shell: bash
|
||||||
# run: |
|
# run: |
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -85,3 +85,4 @@ log
|
|||||||
vits-piper-*
|
vits-piper-*
|
||||||
vits-coqui-*
|
vits-coqui-*
|
||||||
vits-mms-*
|
vits-mms-*
|
||||||
|
*.tar.bz2
|
||||||
|
|||||||
@@ -7,8 +7,11 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
|
|||||||
add_executable(offline-tts-c-api offline-tts-c-api.c)
|
add_executable(offline-tts-c-api offline-tts-c-api.c)
|
||||||
target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)
|
target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)
|
||||||
|
|
||||||
|
add_executable(spoken-language-identification-c-api spoken-language-identification-c-api.c)
|
||||||
|
target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api)
|
||||||
|
|
||||||
if(SHERPA_ONNX_HAS_ALSA)
|
if(SHERPA_ONNX_HAS_ALSA)
|
||||||
add_subdirectory(./asr-microphone-example)
|
add_subdirectory(./asr-microphone-example)
|
||||||
else()
|
elseif((UNIX AND NOT APPLE) OR LINUX)
|
||||||
message(WARNING "Not include ./asr-microphone-example since alsa is not available")
|
message(WARNING "Not include ./asr-microphone-example since alsa is not available")
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd)
|
|||||||
CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/
|
CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/
|
||||||
LDFLAGS := -L ../build/lib
|
LDFLAGS := -L ../build/lib
|
||||||
LDFLAGS += -L ../build/_deps/onnxruntime-src/lib
|
LDFLAGS += -L ../build/_deps/onnxruntime-src/lib
|
||||||
LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lcargs
|
LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime
|
||||||
LDFLAGS += -framework Foundation
|
LDFLAGS += -framework Foundation
|
||||||
LDFLAGS += -lc++
|
LDFLAGS += -lc++
|
||||||
LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib
|
LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib
|
||||||
|
|||||||
@@ -169,29 +169,30 @@ int32_t main(int32_t argc, char *argv[]) {
|
|||||||
int32_t segment_id = 0;
|
int32_t segment_id = 0;
|
||||||
|
|
||||||
const char *wav_filename = argv[context.index];
|
const char *wav_filename = argv[context.index];
|
||||||
FILE *fp = fopen(wav_filename, "rb");
|
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
|
||||||
if (!fp) {
|
if (wave == NULL) {
|
||||||
fprintf(stderr, "Failed to open %s\n", wav_filename);
|
fprintf(stderr, "Failed to read %s\n", wav_filename);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assume the wave header occupies 44 bytes.
|
|
||||||
fseek(fp, 44, SEEK_SET);
|
|
||||||
|
|
||||||
// simulate streaming
|
// simulate streaming
|
||||||
|
|
||||||
#define N 3200 // 0.2 s. Sample rate is fixed to 16 kHz
|
#define N 3200 // 0.2 s. Sample rate is fixed to 16 kHz
|
||||||
|
|
||||||
int16_t buffer[N];
|
int16_t buffer[N];
|
||||||
float samples[N];
|
float samples[N];
|
||||||
|
fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
|
||||||
|
wave->sample_rate, wave->num_samples,
|
||||||
|
(float)wave->num_samples / wave->sample_rate);
|
||||||
|
|
||||||
while (!feof(fp)) {
|
int32_t k = 0;
|
||||||
size_t n = fread((void *)buffer, sizeof(int16_t), N, fp);
|
while (k < wave->num_samples) {
|
||||||
if (n > 0) {
|
int32_t start = k;
|
||||||
for (size_t i = 0; i != n; ++i) {
|
int32_t end =
|
||||||
samples[i] = buffer[i] / 32768.;
|
(start + N > wave->num_samples) ? wave->num_samples : (start + N);
|
||||||
}
|
k += N;
|
||||||
AcceptWaveform(stream, 16000, samples, n);
|
|
||||||
|
AcceptWaveform(stream, wave->sample_rate, wave->samples + start,
|
||||||
|
end - start);
|
||||||
while (IsOnlineStreamReady(recognizer, stream)) {
|
while (IsOnlineStreamReady(recognizer, stream)) {
|
||||||
DecodeOnlineStream(recognizer, stream);
|
DecodeOnlineStream(recognizer, stream);
|
||||||
}
|
}
|
||||||
@@ -212,12 +213,12 @@ int32_t main(int32_t argc, char *argv[]) {
|
|||||||
|
|
||||||
DestroyOnlineRecognizerResult(r);
|
DestroyOnlineRecognizerResult(r);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
fclose(fp);
|
|
||||||
|
|
||||||
// add some tail padding
|
// add some tail padding
|
||||||
float tail_paddings[4800] = {0}; // 0.3 seconds at 16 kHz sample rate
|
float tail_paddings[4800] = {0}; // 0.3 seconds at 16 kHz sample rate
|
||||||
AcceptWaveform(stream, 16000, tail_paddings, 4800);
|
AcceptWaveform(stream, wave->sample_rate, tail_paddings, 4800);
|
||||||
|
|
||||||
|
SherpaOnnxFreeWave(wave);
|
||||||
|
|
||||||
InputFinished(stream);
|
InputFinished(stream);
|
||||||
while (IsOnlineStreamReady(recognizer, stream)) {
|
while (IsOnlineStreamReady(recognizer, stream)) {
|
||||||
|
|||||||
65
c-api-examples/spoken-language-identification-c-api.c
Normal file
65
c-api-examples/spoken-language-identification-c-api.c
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
|
||||||
|
// We assume you have pre-downloaded the whisper multi-lingual models
|
||||||
|
// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
// An example command to download the "tiny" whisper model is given below:
|
||||||
|
//
|
||||||
|
// clang-format off
|
||||||
|
//
|
||||||
|
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
// tar xvf sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
// rm sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
//
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "sherpa-onnx/c-api/c-api.h"
|
||||||
|
|
||||||
|
int32_t main() {
|
||||||
|
SherpaOnnxSpokenLanguageIdentificationConfig config;
|
||||||
|
|
||||||
|
memset(&config, 0, sizeof(config));
|
||||||
|
|
||||||
|
config.whisper.encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
|
||||||
|
config.whisper.decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";
|
||||||
|
config.num_threads = 1;
|
||||||
|
config.debug = 1;
|
||||||
|
config.provider = "cpu";
|
||||||
|
|
||||||
|
const SherpaOnnxSpokenLanguageIdentification *slid =
|
||||||
|
SherpaOnnxCreateSpokenLanguageIdentification(&config);
|
||||||
|
if (!slid) {
|
||||||
|
fprintf(stderr, "Failed to create spoken language identifier");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// You can find more test waves from
|
||||||
|
// https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs
|
||||||
|
const char *wav_filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
|
||||||
|
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
|
||||||
|
if (wave == NULL) {
|
||||||
|
fprintf(stderr, "Failed to read %s\n", wav_filename);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
SherpaOnnxOfflineStream *stream =
|
||||||
|
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid);
|
||||||
|
|
||||||
|
AcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
|
||||||
|
wave->num_samples);
|
||||||
|
|
||||||
|
const SherpaOnnxSpokenLanguageIdentificationResult *result =
|
||||||
|
SherpaOnnxSpokenLanguageIdentificationCompute(slid, stream);
|
||||||
|
|
||||||
|
fprintf(stderr, "wav_filename: %s\n", wav_filename);
|
||||||
|
fprintf(stderr, "Detected language: %s\n", result->lang);
|
||||||
|
|
||||||
|
SherpaOnnxDestroySpokenLanguageIdentificationResult(result);
|
||||||
|
DestroyOfflineStream(stream);
|
||||||
|
SherpaOnnxFreeWave(wave);
|
||||||
|
SherpaOnnxDestroySpokenLanguageIdentification(slid);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
|
if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
|
||||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
|
||||||
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
|
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
|
||||||
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
|
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
|
if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
|
||||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
|
||||||
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
|
tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
|
||||||
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
|
rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
set -ex
|
set -ex
|
||||||
if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
|
if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
|
||||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@@ -16,7 +17,9 @@
|
|||||||
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
||||||
#include "sherpa-onnx/csrc/offline-tts.h"
|
#include "sherpa-onnx/csrc/offline-tts.h"
|
||||||
#include "sherpa-onnx/csrc/online-recognizer.h"
|
#include "sherpa-onnx/csrc/online-recognizer.h"
|
||||||
|
#include "sherpa-onnx/csrc/spoken-language-identification.h"
|
||||||
#include "sherpa-onnx/csrc/voice-activity-detector.h"
|
#include "sherpa-onnx/csrc/voice-activity-detector.h"
|
||||||
|
#include "sherpa-onnx/csrc/wave-reader.h"
|
||||||
#include "sherpa-onnx/csrc/wave-writer.h"
|
#include "sherpa-onnx/csrc/wave-writer.h"
|
||||||
|
|
||||||
struct SherpaOnnxOnlineRecognizer {
|
struct SherpaOnnxOnlineRecognizer {
|
||||||
@@ -859,3 +862,97 @@ int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
|
|||||||
int32_t sample_rate, const char *filename) {
|
int32_t sample_rate, const char *filename) {
|
||||||
return sherpa_onnx::WriteWave(filename, sample_rate, samples, n);
|
return sherpa_onnx::WriteWave(filename, sample_rate, samples, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename) {
|
||||||
|
int32_t sample_rate = -1;
|
||||||
|
bool is_ok = false;
|
||||||
|
std::vector<float> samples =
|
||||||
|
sherpa_onnx::ReadWave(filename, &sample_rate, &is_ok);
|
||||||
|
if (!is_ok) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
float *c_samples = new float[samples.size()];
|
||||||
|
std::copy(samples.begin(), samples.end(), c_samples);
|
||||||
|
|
||||||
|
SherpaOnnxWave *wave = new SherpaOnnxWave;
|
||||||
|
wave->samples = c_samples;
|
||||||
|
wave->sample_rate = sample_rate;
|
||||||
|
wave->num_samples = samples.size();
|
||||||
|
return wave;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SherpaOnnxFreeWave(const SherpaOnnxWave *wave) {
|
||||||
|
if (wave) {
|
||||||
|
delete[] wave->samples;
|
||||||
|
delete wave;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SherpaOnnxSpokenLanguageIdentification {
|
||||||
|
std::unique_ptr<sherpa_onnx::SpokenLanguageIdentification> impl;
|
||||||
|
};
|
||||||
|
|
||||||
|
const SherpaOnnxSpokenLanguageIdentification *
|
||||||
|
SherpaOnnxCreateSpokenLanguageIdentification(
|
||||||
|
const SherpaOnnxSpokenLanguageIdentificationConfig *config) {
|
||||||
|
sherpa_onnx::SpokenLanguageIdentificationConfig slid_config;
|
||||||
|
slid_config.whisper.encoder = SHERPA_ONNX_OR(config->whisper.encoder, "");
|
||||||
|
slid_config.whisper.decoder = SHERPA_ONNX_OR(config->whisper.decoder, "");
|
||||||
|
slid_config.whisper.tail_paddings =
|
||||||
|
SHERPA_ONNX_OR(config->whisper.tail_paddings, -1);
|
||||||
|
slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
|
||||||
|
slid_config.debug = config->debug;
|
||||||
|
slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
|
||||||
|
|
||||||
|
if (slid_config.debug) {
|
||||||
|
SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!slid_config.Validate()) {
|
||||||
|
SHERPA_ONNX_LOGE("Errors in config");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
SherpaOnnxSpokenLanguageIdentification *slid =
|
||||||
|
new SherpaOnnxSpokenLanguageIdentification;
|
||||||
|
slid->impl =
|
||||||
|
std::make_unique<sherpa_onnx::SpokenLanguageIdentification>(slid_config);
|
||||||
|
|
||||||
|
return slid;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SherpaOnnxDestroySpokenLanguageIdentification(
|
||||||
|
const SherpaOnnxSpokenLanguageIdentification *slid) {
|
||||||
|
delete slid;
|
||||||
|
}
|
||||||
|
|
||||||
|
SherpaOnnxOfflineStream *
|
||||||
|
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(
|
||||||
|
const SherpaOnnxSpokenLanguageIdentification *slid) {
|
||||||
|
SherpaOnnxOfflineStream *stream =
|
||||||
|
new SherpaOnnxOfflineStream(slid->impl->CreateStream());
|
||||||
|
return stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
const SherpaOnnxSpokenLanguageIdentificationResult *
|
||||||
|
SherpaOnnxSpokenLanguageIdentificationCompute(
|
||||||
|
const SherpaOnnxSpokenLanguageIdentification *slid,
|
||||||
|
const SherpaOnnxOfflineStream *s) {
|
||||||
|
std::string lang = slid->impl->Compute(s->impl.get());
|
||||||
|
char *c_lang = new char[lang.size() + 1];
|
||||||
|
std::copy(lang.begin(), lang.end(), c_lang);
|
||||||
|
c_lang[lang.size()] = '\0';
|
||||||
|
SherpaOnnxSpokenLanguageIdentificationResult *r =
|
||||||
|
new SherpaOnnxSpokenLanguageIdentificationResult;
|
||||||
|
r->lang = c_lang;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SherpaOnnxDestroySpokenLanguageIdentificationResult(
|
||||||
|
const SherpaOnnxSpokenLanguageIdentificationResult *r) {
|
||||||
|
if (r) {
|
||||||
|
delete[] r->lang;
|
||||||
|
delete r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -820,6 +820,76 @@ SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
|
|||||||
int32_t sample_rate,
|
int32_t sample_rate,
|
||||||
const char *filename);
|
const char *filename);
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxWave {
|
||||||
|
// samples normalized to the range [-1, 1]
|
||||||
|
const float *samples;
|
||||||
|
int32_t sample_rate;
|
||||||
|
int32_t num_samples;
|
||||||
|
} SherpaOnnxWave;
|
||||||
|
|
||||||
|
// Return a NULL pointer on error. It supports only standard WAVE file.
|
||||||
|
// Each sample should be 16-bit. It supports only single channel..
|
||||||
|
//
|
||||||
|
// If the returned pointer is not NULL, the user has to invoke
|
||||||
|
// SherpaOnnxFreeWave() to free the returned pointer to avoid memory leak.
|
||||||
|
SHERPA_ONNX_API const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename);
|
||||||
|
|
||||||
|
SHERPA_ONNX_API void SherpaOnnxFreeWave(const SherpaOnnxWave *wave);
|
||||||
|
|
||||||
|
// Spoken language identification
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct
|
||||||
|
SherpaOnnxSpokenLanguageIdentificationWhisperConfig {
|
||||||
|
const char *encoder;
|
||||||
|
const char *decoder;
|
||||||
|
int32_t tail_paddings;
|
||||||
|
} SherpaOnnxSpokenLanguageIdentificationWhisperConfig;
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentificationConfig {
|
||||||
|
SherpaOnnxSpokenLanguageIdentificationWhisperConfig whisper;
|
||||||
|
int32_t num_threads;
|
||||||
|
int32_t debug;
|
||||||
|
const char *provider;
|
||||||
|
} SherpaOnnxSpokenLanguageIdentificationConfig;
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentification
|
||||||
|
SherpaOnnxSpokenLanguageIdentification;
|
||||||
|
|
||||||
|
// Create an instance of SpokenLanguageIdentification.
|
||||||
|
// The user has to invoke SherpaOnnxDestroySpokenLanguageIdentification()
|
||||||
|
// to free the returned pointer to avoid memory leak.
|
||||||
|
SHERPA_ONNX_API const SherpaOnnxSpokenLanguageIdentification *
|
||||||
|
SherpaOnnxCreateSpokenLanguageIdentification(
|
||||||
|
const SherpaOnnxSpokenLanguageIdentificationConfig *config);
|
||||||
|
|
||||||
|
SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentification(
|
||||||
|
const SherpaOnnxSpokenLanguageIdentification *slid);
|
||||||
|
|
||||||
|
// The user has to invoke DestroyOfflineStream()
|
||||||
|
// to free the returned pointer to avoid memory leak
|
||||||
|
SHERPA_ONNX_API SherpaOnnxOfflineStream *
|
||||||
|
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(
|
||||||
|
const SherpaOnnxSpokenLanguageIdentification *slid);
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentificationResult {
|
||||||
|
// en for English
|
||||||
|
// de for German
|
||||||
|
// zh for Chinese
|
||||||
|
// es for Spanish
|
||||||
|
// ...
|
||||||
|
const char *lang;
|
||||||
|
} SherpaOnnxSpokenLanguageIdentificationResult;
|
||||||
|
|
||||||
|
// The user has to invoke SherpaOnnxDestroySpokenLanguageIdentificationResult()
|
||||||
|
// to free the returned pointer to avoid memory leak
|
||||||
|
SHERPA_ONNX_API const SherpaOnnxSpokenLanguageIdentificationResult *
|
||||||
|
SherpaOnnxSpokenLanguageIdentificationCompute(
|
||||||
|
const SherpaOnnxSpokenLanguageIdentification *slid,
|
||||||
|
const SherpaOnnxOfflineStream *s);
|
||||||
|
|
||||||
|
SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentificationResult(
|
||||||
|
const SherpaOnnxSpokenLanguageIdentificationResult *r);
|
||||||
|
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
#pragma GCC diagnostic pop
|
#pragma GCC diagnostic pop
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user