Add C++ microphone examples for audio tagging (#749)
This commit is contained in:
2
.github/workflows/test-build-wheel.yaml
vendored
2
.github/workflows/test-build-wheel.yaml
vendored
@@ -89,7 +89,7 @@ jobs:
|
|||||||
export PATH=/c/hostedtoolcache/windows/Python/3.8.10/x64/bin:$PATH
|
export PATH=/c/hostedtoolcache/windows/Python/3.8.10/x64/bin:$PATH
|
||||||
export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
|
export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
|
||||||
export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
|
export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
|
||||||
export PATH=/c/hostedtoolcache/windows/Python/3.11.8/x64/bin:$PATH
|
export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH
|
||||||
export PATH=/c/hostedtoolcache/windows/Python/3.12.2/x64/bin:$PATH
|
export PATH=/c/hostedtoolcache/windows/Python/3.12.2/x64/bin:$PATH
|
||||||
|
|
||||||
which sherpa-onnx
|
which sherpa-onnx
|
||||||
|
|||||||
2
.github/workflows/test-pip-install.yaml
vendored
2
.github/workflows/test-pip-install.yaml
vendored
@@ -67,7 +67,7 @@ jobs:
|
|||||||
export PATH=/c/hostedtoolcache/windows/Python/3.8.10/x64/bin:$PATH
|
export PATH=/c/hostedtoolcache/windows/Python/3.8.10/x64/bin:$PATH
|
||||||
export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
|
export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH
|
||||||
export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
|
export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH
|
||||||
export PATH=/c/hostedtoolcache/windows/Python/3.11.8/x64/bin:$PATH
|
export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH
|
||||||
export PATH=/c/hostedtoolcache/windows/Python/3.12.2/x64/bin:$PATH
|
export PATH=/c/hostedtoolcache/windows/Python/3.12.2/x64/bin:$PATH
|
||||||
|
|
||||||
sherpa-onnx --help
|
sherpa-onnx --help
|
||||||
|
|||||||
33
README.md
33
README.md
@@ -2,23 +2,48 @@
|
|||||||
|
|
||||||
This repository supports running the following functions **locally**
|
This repository supports running the following functions **locally**
|
||||||
|
|
||||||
- Speech-to-text (i.e., ASR)
|
- Speech-to-text (i.e., ASR); both streaming and non-streaming are supported
|
||||||
- Text-to-speech (i.e., TTS)
|
- Text-to-speech (i.e., TTS)
|
||||||
- Speaker identification
|
- Speaker identification
|
||||||
|
- Speaker verification
|
||||||
|
- Spoken language identification
|
||||||
|
- Audio tagging
|
||||||
|
- VAD (e.g., [silero-vad](https://github.com/snakers4/silero-vad))
|
||||||
|
|
||||||
on the following platforms and operating systems:
|
on the following platforms and operating systems:
|
||||||
|
|
||||||
- Linux, macOS, Windows
|
- x86, ``x86_64``, 32-bit ARM, 64-bit ARM (arm64, aarch64), RISC-V (riscv64)
|
||||||
- Android
|
- Linux, macOS, Windows, openKylin
|
||||||
|
- Android, WearOS
|
||||||
- iOS
|
- iOS
|
||||||
- Raspberry Pi
|
- NodeJS
|
||||||
|
- WebAssembly
|
||||||
|
- [Raspberry Pi](https://www.raspberrypi.com/)
|
||||||
|
- [RV1126](https://www.rock-chips.com/uploads/pdf/2022.8.26/191/RV1126%20Brief%20Datasheet.pdf)
|
||||||
|
- [LicheePi4A](https://sipeed.com/licheepi4a)
|
||||||
|
- [VisionFive 2](https://www.starfivetech.com/en/site/boards)
|
||||||
|
- [旭日X3派](https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html)
|
||||||
- etc
|
- etc
|
||||||
|
|
||||||
|
with the following APIs
|
||||||
|
|
||||||
|
- C++
|
||||||
|
- C
|
||||||
|
- Python
|
||||||
|
- Go
|
||||||
|
- ``C#``
|
||||||
|
- Javascript
|
||||||
|
- Java
|
||||||
|
- Kotlin
|
||||||
|
- Swift
|
||||||
|
|
||||||
# Useful links
|
# Useful links
|
||||||
|
|
||||||
- Documentation: https://k2-fsa.github.io/sherpa/onnx/
|
- Documentation: https://k2-fsa.github.io/sherpa/onnx/
|
||||||
- APK for the text-to-speech engine: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html
|
- APK for the text-to-speech engine: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html
|
||||||
- APK for speaker identification: https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html
|
- APK for speaker identification: https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html
|
||||||
|
- APK for speech recognition: https://github.com/k2-fsa/sherpa-onnx/releases/
|
||||||
|
- Bilibili 演示视频: https://search.bilibili.com/all?keyword=%E6%96%B0%E4%B8%80%E4%BB%A3Kaldi
|
||||||
|
|
||||||
# How to reach us
|
# How to reach us
|
||||||
|
|
||||||
|
|||||||
@@ -7,14 +7,22 @@ for usage.
|
|||||||
- [SherpaOnnx](./SherpaOnnx) It uses a streaming ASR model.
|
- [SherpaOnnx](./SherpaOnnx) It uses a streaming ASR model.
|
||||||
|
|
||||||
- [SherpaOnnx2Pass](./SherpaOnnx2Pass) It uses a streaming ASR model
|
- [SherpaOnnx2Pass](./SherpaOnnx2Pass) It uses a streaming ASR model
|
||||||
for the first pass and use a non-streaming ASR model for the second pass.
|
for the first pass and use a non-streaming ASR model for the second pass
|
||||||
|
|
||||||
|
- [SherpaOnnxKws](./SherpaOnnxKws) It demonstrates how to use keyword spotting
|
||||||
|
|
||||||
|
- [SherpaOnnxSpeakerIdentification](./SherpaOnnxSpeakerIdentification) It demonstrates
|
||||||
|
how to use speaker identification
|
||||||
|
|
||||||
|
- [SherpaOnnxTts](./SherpaOnnxTts) It is for standalone text-to-speech.
|
||||||
|
|
||||||
|
- [SherpaOnnxTtsEngine](./SherpaOnnxTtsEngine) It is for text-to-speech engine;
|
||||||
|
you can use it to replace the system TTS engine.
|
||||||
|
|
||||||
- [SherpaOnnxVad](./SherpaOnnxVad) It demonstrates how to use a VAD
|
- [SherpaOnnxVad](./SherpaOnnxVad) It demonstrates how to use a VAD
|
||||||
|
|
||||||
- [SherpaOnnxVadAsr](./SherpaOnnxVadAsr) It uses a VAD with a non-streaming
|
- [SherpaOnnxVadAsr](./SherpaOnnxVadAsr) It uses a VAD with a non-streaming
|
||||||
ASR model.
|
ASR model.
|
||||||
|
|
||||||
- [SherpaOnnxTts](./SherpaOnnxTts) It is for standalone text-to-speech.
|
- [SherpaOnnxWebSocket](./SherpaOnnxWebSocket) It shows how to write a websocket
|
||||||
|
client for the Python streaming websocket server.
|
||||||
- [SherpaOnnxTtsEngine](./SherpaOnnxTtsEngine) It is for text-to-speech engine;
|
|
||||||
you can use it to replace the system TTS engine.
|
|
||||||
|
|||||||
@@ -99,7 +99,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
|
|||||||
Subdevices: 1/1
|
Subdevices: 1/1
|
||||||
Subdevice #0: subdevice #0
|
Subdevice #0: subdevice #0
|
||||||
|
|
||||||
and if you want to select card 3 and the device 0 on that card, please use:
|
and if you want to select card 3 and device 0 on that card, please use:
|
||||||
|
|
||||||
plughw:3,0
|
plughw:3,0
|
||||||
|
|
||||||
|
|||||||
@@ -50,6 +50,7 @@ def get_binaries():
|
|||||||
"sherpa-onnx-keyword-spotter",
|
"sherpa-onnx-keyword-spotter",
|
||||||
"sherpa-onnx-microphone",
|
"sherpa-onnx-microphone",
|
||||||
"sherpa-onnx-microphone-offline",
|
"sherpa-onnx-microphone-offline",
|
||||||
|
"sherpa-onnx-microphone-offline-audio-tagging",
|
||||||
"sherpa-onnx-microphone-offline-speaker-identification",
|
"sherpa-onnx-microphone-offline-speaker-identification",
|
||||||
"sherpa-onnx-offline",
|
"sherpa-onnx-offline",
|
||||||
"sherpa-onnx-offline-language-identification",
|
"sherpa-onnx-offline-language-identification",
|
||||||
@@ -69,6 +70,7 @@ def get_binaries():
|
|||||||
"sherpa-onnx-alsa-offline-speaker-identification",
|
"sherpa-onnx-alsa-offline-speaker-identification",
|
||||||
"sherpa-onnx-offline-tts-play-alsa",
|
"sherpa-onnx-offline-tts-play-alsa",
|
||||||
"sherpa-onnx-vad-alsa",
|
"sherpa-onnx-vad-alsa",
|
||||||
|
"sherpa-onnx-alsa-offline-audio-tagging",
|
||||||
]
|
]
|
||||||
|
|
||||||
if is_windows():
|
if is_windows():
|
||||||
|
|||||||
@@ -123,7 +123,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
|
|||||||
Subdevices: 1/1
|
Subdevices: 1/1
|
||||||
Subdevice #0: subdevice #0
|
Subdevice #0: subdevice #0
|
||||||
|
|
||||||
and if you want to select card 3 and the device 0 on that card, please use:
|
and if you want to select card 3 and device 0 on that card, please use:
|
||||||
|
|
||||||
plughw:3,0
|
plughw:3,0
|
||||||
|
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
|
|||||||
Subdevices: 1/1
|
Subdevices: 1/1
|
||||||
Subdevice #0: subdevice #0
|
Subdevice #0: subdevice #0
|
||||||
|
|
||||||
and if you want to select card 3 and the device 0 on that card, please use:
|
and if you want to select card 3 and device 0 on that card, please use:
|
||||||
|
|
||||||
plughw:3,0
|
plughw:3,0
|
||||||
|
|
||||||
|
|||||||
@@ -68,7 +68,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
|
|||||||
Subdevices: 1/1
|
Subdevices: 1/1
|
||||||
Subdevice #0: subdevice #0
|
Subdevice #0: subdevice #0
|
||||||
|
|
||||||
and if you want to select card 3 and the device 0 on that card, please use:
|
and if you want to select card 3 and device 0 on that card, please use:
|
||||||
|
|
||||||
plughw:3,0
|
plughw:3,0
|
||||||
|
|
||||||
|
|||||||
@@ -264,6 +264,7 @@ if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY)
|
|||||||
add_executable(sherpa-onnx-alsa-offline sherpa-onnx-alsa-offline.cc alsa.cc)
|
add_executable(sherpa-onnx-alsa-offline sherpa-onnx-alsa-offline.cc alsa.cc)
|
||||||
add_executable(sherpa-onnx-alsa-offline-speaker-identification sherpa-onnx-alsa-offline-speaker-identification.cc alsa.cc)
|
add_executable(sherpa-onnx-alsa-offline-speaker-identification sherpa-onnx-alsa-offline-speaker-identification.cc alsa.cc)
|
||||||
add_executable(sherpa-onnx-vad-alsa sherpa-onnx-vad-alsa.cc alsa.cc)
|
add_executable(sherpa-onnx-vad-alsa sherpa-onnx-vad-alsa.cc alsa.cc)
|
||||||
|
add_executable(sherpa-onnx-alsa-offline-audio-tagging sherpa-onnx-alsa-offline-audio-tagging.cc alsa.cc)
|
||||||
|
|
||||||
|
|
||||||
if(SHERPA_ONNX_ENABLE_TTS)
|
if(SHERPA_ONNX_ENABLE_TTS)
|
||||||
@@ -276,6 +277,7 @@ if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY)
|
|||||||
sherpa-onnx-alsa-offline-speaker-identification
|
sherpa-onnx-alsa-offline-speaker-identification
|
||||||
sherpa-onnx-keyword-spotter-alsa
|
sherpa-onnx-keyword-spotter-alsa
|
||||||
sherpa-onnx-vad-alsa
|
sherpa-onnx-vad-alsa
|
||||||
|
sherpa-onnx-alsa-offline-audio-tagging
|
||||||
)
|
)
|
||||||
|
|
||||||
if(SHERPA_ONNX_ENABLE_TTS)
|
if(SHERPA_ONNX_ENABLE_TTS)
|
||||||
@@ -354,6 +356,11 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO AND SHERPA_ONNX_ENABLE_BINARY)
|
|||||||
microphone.cc
|
microphone.cc
|
||||||
)
|
)
|
||||||
|
|
||||||
|
add_executable(sherpa-onnx-microphone-offline-audio-tagging
|
||||||
|
sherpa-onnx-microphone-offline-audio-tagging.cc
|
||||||
|
microphone.cc
|
||||||
|
)
|
||||||
|
|
||||||
if(BUILD_SHARED_LIBS)
|
if(BUILD_SHARED_LIBS)
|
||||||
set(PA_LIB portaudio)
|
set(PA_LIB portaudio)
|
||||||
else()
|
else()
|
||||||
@@ -365,6 +372,7 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO AND SHERPA_ONNX_ENABLE_BINARY)
|
|||||||
sherpa-onnx-keyword-spotter-microphone
|
sherpa-onnx-keyword-spotter-microphone
|
||||||
sherpa-onnx-microphone-offline
|
sherpa-onnx-microphone-offline
|
||||||
sherpa-onnx-microphone-offline-speaker-identification
|
sherpa-onnx-microphone-offline-speaker-identification
|
||||||
|
sherpa-onnx-microphone-offline-audio-tagging
|
||||||
sherpa-onnx-vad-microphone
|
sherpa-onnx-vad-microphone
|
||||||
sherpa-onnx-vad-microphone-offline-asr
|
sherpa-onnx-vad-microphone-offline-asr
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
|
|||||||
Subdevices: 1/1
|
Subdevices: 1/1
|
||||||
Subdevice #0: subdevice #0
|
Subdevice #0: subdevice #0
|
||||||
|
|
||||||
and if you want to select card 3 and the device 0 on that card, please use:
|
and if you want to select card 3 and device 0 on that card, please use:
|
||||||
|
|
||||||
plughw:3,0
|
plughw:3,0
|
||||||
|
|
||||||
|
|||||||
190
sherpa-onnx/csrc/sherpa-onnx-alsa-offline-audio-tagging.cc
Normal file
190
sherpa-onnx/csrc/sherpa-onnx-alsa-offline-audio-tagging.cc
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
// sherpa-onnx/csrc/sherpa-onnx-alsa-offline-audio-tagging.cc
|
||||||
|
//
|
||||||
|
// Copyright (c) 2022-2024 Xiaomi Corporation
|
||||||
|
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <mutex> // NOLINT
|
||||||
|
#include <thread> // NOLINT
|
||||||
|
|
||||||
|
#include "sherpa-onnx/csrc/alsa.h"
|
||||||
|
#include "sherpa-onnx/csrc/audio-tagging.h"
|
||||||
|
#include "sherpa-onnx/csrc/macros.h"
|
||||||
|
|
||||||
|
enum class State {
|
||||||
|
kIdle,
|
||||||
|
kRecording,
|
||||||
|
kDecoding,
|
||||||
|
};
|
||||||
|
|
||||||
|
State state = State::kIdle;
|
||||||
|
|
||||||
|
// true to stop the program and exit
|
||||||
|
bool stop = false;
|
||||||
|
|
||||||
|
std::vector<float> samples;
|
||||||
|
std::mutex samples_mutex;
|
||||||
|
|
||||||
|
static void DetectKeyPress() {
|
||||||
|
SHERPA_ONNX_LOGE("Press Enter to start");
|
||||||
|
int32_t key;
|
||||||
|
while (!stop && (key = getchar())) {
|
||||||
|
if (key != 0x0a) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (state) {
|
||||||
|
case State::kIdle:
|
||||||
|
SHERPA_ONNX_LOGE("Start recording. Press Enter to stop recording");
|
||||||
|
state = State::kRecording;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(samples_mutex);
|
||||||
|
samples.clear();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case State::kRecording:
|
||||||
|
SHERPA_ONNX_LOGE("Stop recording. Decoding ...");
|
||||||
|
state = State::kDecoding;
|
||||||
|
break;
|
||||||
|
case State::kDecoding:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Record(const char *device_name, int32_t expected_sample_rate) {
|
||||||
|
sherpa_onnx::Alsa alsa(device_name);
|
||||||
|
|
||||||
|
if (alsa.GetExpectedSampleRate() != expected_sample_rate) {
|
||||||
|
fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
|
||||||
|
expected_sample_rate);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t chunk = 0.1 * alsa.GetActualSampleRate();
|
||||||
|
while (!stop) {
|
||||||
|
const std::vector<float> &s = alsa.Read(chunk);
|
||||||
|
std::lock_guard<std::mutex> lock(samples_mutex);
|
||||||
|
samples.insert(samples.end(), s.begin(), s.end());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Handler(int32_t sig) {
|
||||||
|
stop = true;
|
||||||
|
fprintf(stderr, "\nCaught Ctrl + C. Press Enter to exit\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t main(int32_t argc, char *argv[]) {
|
||||||
|
signal(SIGINT, Handler);
|
||||||
|
|
||||||
|
const char *kUsageMessage = R"usage(
|
||||||
|
Audio tagging from microphone (Linux only).
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
|
rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
|
|
||||||
|
./bin/sherpa-onnx-alsa-offline-audio-tagging \
|
||||||
|
--zipformer-model=./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.onnx \
|
||||||
|
--labels=./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv \
|
||||||
|
device_name
|
||||||
|
|
||||||
|
Please refer to
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
|
||||||
|
for a list of pre-trained models to download.
|
||||||
|
|
||||||
|
The device name specifies which microphone to use in case there are several
|
||||||
|
on your system. You can use
|
||||||
|
|
||||||
|
arecord -l
|
||||||
|
|
||||||
|
to find all available microphones on your computer. For instance, if it outputs
|
||||||
|
|
||||||
|
**** List of CAPTURE Hardware Devices ****
|
||||||
|
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
|
||||||
|
Subdevices: 1/1
|
||||||
|
Subdevice #0: subdevice #0
|
||||||
|
|
||||||
|
and if you want to select card 3 and device 0 on that card, please use:
|
||||||
|
|
||||||
|
plughw:3,0
|
||||||
|
|
||||||
|
as the device_name.
|
||||||
|
)usage";
|
||||||
|
|
||||||
|
sherpa_onnx::ParseOptions po(kUsageMessage);
|
||||||
|
sherpa_onnx::AudioTaggingConfig config;
|
||||||
|
config.Register(&po);
|
||||||
|
|
||||||
|
po.Read(argc, argv);
|
||||||
|
if (po.NumArgs() != 1) {
|
||||||
|
fprintf(stderr, "Please provide only 1 argument: the device name\n");
|
||||||
|
po.PrintUsage();
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "%s\n", config.ToString().c_str());
|
||||||
|
|
||||||
|
if (!config.Validate()) {
|
||||||
|
fprintf(stderr, "Errors in config!\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
SHERPA_ONNX_LOGE("Creating audio tagger ...");
|
||||||
|
sherpa_onnx::AudioTagging tagger(config);
|
||||||
|
SHERPA_ONNX_LOGE("Audio tagger created created!");
|
||||||
|
|
||||||
|
std::string device_name = po.GetArg(1);
|
||||||
|
fprintf(stderr, "Use recording device: %s\n", device_name.c_str());
|
||||||
|
|
||||||
|
int32_t sample_rate = 16000; // fixed to 16000Hz for all models from icefall
|
||||||
|
|
||||||
|
std::thread t2(Record, device_name.c_str(), sample_rate);
|
||||||
|
using namespace std::chrono_literals; // NOLINT
|
||||||
|
std::this_thread::sleep_for(100ms); // sleep for 100ms
|
||||||
|
std::thread t(DetectKeyPress);
|
||||||
|
|
||||||
|
while (!stop) {
|
||||||
|
switch (state) {
|
||||||
|
case State::kIdle:
|
||||||
|
break;
|
||||||
|
case State::kRecording:
|
||||||
|
break;
|
||||||
|
case State::kDecoding: {
|
||||||
|
std::vector<float> buf;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(samples_mutex);
|
||||||
|
buf = std::move(samples);
|
||||||
|
}
|
||||||
|
SHERPA_ONNX_LOGE("Computing...");
|
||||||
|
auto s = tagger.CreateStream();
|
||||||
|
s->AcceptWaveform(sample_rate, buf.data(), buf.size());
|
||||||
|
auto results = tagger.Compute(s.get());
|
||||||
|
SHERPA_ONNX_LOGE("Result is:");
|
||||||
|
|
||||||
|
int32_t i = 0;
|
||||||
|
std::ostringstream os;
|
||||||
|
for (const auto &event : results) {
|
||||||
|
os << i << ": " << event.ToString() << "\n";
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
SHERPA_ONNX_LOGE("\n%s\n", os.str().c_str());
|
||||||
|
|
||||||
|
state = State::kIdle;
|
||||||
|
SHERPA_ONNX_LOGE("Press Enter to start");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::this_thread::sleep_for(20ms); // sleep for 20ms
|
||||||
|
}
|
||||||
|
t.join();
|
||||||
|
t2.join();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@@ -71,8 +71,8 @@ static void Record(const char *device_name, int32_t expected_sample_rate) {
|
|||||||
|
|
||||||
int32_t chunk = 0.1 * alsa.GetActualSampleRate();
|
int32_t chunk = 0.1 * alsa.GetActualSampleRate();
|
||||||
while (!stop) {
|
while (!stop) {
|
||||||
std::lock_guard<std::mutex> lock(samples_mutex);
|
|
||||||
const std::vector<float> &s = alsa.Read(chunk);
|
const std::vector<float> &s = alsa.Read(chunk);
|
||||||
|
std::lock_guard<std::mutex> lock(samples_mutex);
|
||||||
samples.insert(samples.end(), s.begin(), s.end());
|
samples.insert(samples.end(), s.begin(), s.end());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -193,7 +193,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
|
|||||||
Subdevices: 1/1
|
Subdevices: 1/1
|
||||||
Subdevice #0: subdevice #0
|
Subdevice #0: subdevice #0
|
||||||
|
|
||||||
and if you want to select card 3 and the device 0 on that card, please use:
|
and if you want to select card 3 and device 0 on that card, please use:
|
||||||
plughw:3,0
|
plughw:3,0
|
||||||
as the device_name.
|
as the device_name.
|
||||||
|
|
||||||
|
|||||||
@@ -68,8 +68,8 @@ static void Record(const char *device_name, int32_t expected_sample_rate) {
|
|||||||
|
|
||||||
int32_t chunk = 0.1 * alsa.GetActualSampleRate();
|
int32_t chunk = 0.1 * alsa.GetActualSampleRate();
|
||||||
while (!stop) {
|
while (!stop) {
|
||||||
std::lock_guard<std::mutex> lock(samples_mutex);
|
|
||||||
const std::vector<float> &s = alsa.Read(chunk);
|
const std::vector<float> &s = alsa.Read(chunk);
|
||||||
|
std::lock_guard<std::mutex> lock(samples_mutex);
|
||||||
samples.insert(samples.end(), s.begin(), s.end());
|
samples.insert(samples.end(), s.begin(), s.end());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -119,7 +119,7 @@ https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
|||||||
for a list of pre-trained models to download.
|
for a list of pre-trained models to download.
|
||||||
|
|
||||||
The device name specifies which microphone to use in case there are several
|
The device name specifies which microphone to use in case there are several
|
||||||
on you system. You can use
|
on your system. You can use
|
||||||
|
|
||||||
arecord -l
|
arecord -l
|
||||||
|
|
||||||
@@ -130,7 +130,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
|
|||||||
Subdevices: 1/1
|
Subdevices: 1/1
|
||||||
Subdevice #0: subdevice #0
|
Subdevice #0: subdevice #0
|
||||||
|
|
||||||
and if you want to select card 3 and the device 0 on that card, please use:
|
and if you want to select card 3 and device 0 on that card, please use:
|
||||||
|
|
||||||
plughw:3,0
|
plughw:3,0
|
||||||
|
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
|
|||||||
Subdevices: 1/1
|
Subdevices: 1/1
|
||||||
Subdevice #0: subdevice #0
|
Subdevice #0: subdevice #0
|
||||||
|
|
||||||
and if you want to select card 3 and the device 0 on that card, please use:
|
and if you want to select card 3 and device 0 on that card, please use:
|
||||||
|
|
||||||
plughw:3,0
|
plughw:3,0
|
||||||
|
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html
|
|||||||
for a list of pre-trained models to download.
|
for a list of pre-trained models to download.
|
||||||
|
|
||||||
The device name specifies which microphone to use in case there are several
|
The device name specifies which microphone to use in case there are several
|
||||||
on you system. You can use
|
on your system. You can use
|
||||||
|
|
||||||
arecord -l
|
arecord -l
|
||||||
|
|
||||||
@@ -51,7 +51,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
|
|||||||
Subdevices: 1/1
|
Subdevices: 1/1
|
||||||
Subdevice #0: subdevice #0
|
Subdevice #0: subdevice #0
|
||||||
|
|
||||||
and if you want to select card 3 and the device 0 on that card, please use:
|
and if you want to select card 3 and device 0 on that card, please use:
|
||||||
|
|
||||||
plughw:3,0
|
plughw:3,0
|
||||||
|
|
||||||
|
|||||||
@@ -10,10 +10,11 @@
|
|||||||
|
|
||||||
#include "portaudio.h" // NOLINT
|
#include "portaudio.h" // NOLINT
|
||||||
#include "sherpa-onnx/csrc/display.h"
|
#include "sherpa-onnx/csrc/display.h"
|
||||||
#include "sherpa-onnx/csrc/microphone.h"
|
|
||||||
#include "sherpa-onnx/csrc/keyword-spotter.h"
|
#include "sherpa-onnx/csrc/keyword-spotter.h"
|
||||||
|
#include "sherpa-onnx/csrc/microphone.h"
|
||||||
|
|
||||||
bool stop = false;
|
bool stop = false;
|
||||||
|
float mic_sample_rate = 16000;
|
||||||
|
|
||||||
static int32_t RecordCallback(const void *input_buffer,
|
static int32_t RecordCallback(const void *input_buffer,
|
||||||
void * /*output_buffer*/,
|
void * /*output_buffer*/,
|
||||||
@@ -23,7 +24,8 @@ static int32_t RecordCallback(const void *input_buffer,
|
|||||||
void *user_data) {
|
void *user_data) {
|
||||||
auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(user_data);
|
auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(user_data);
|
||||||
|
|
||||||
stream->AcceptWaveform(16000, reinterpret_cast<const float *>(input_buffer),
|
stream->AcceptWaveform(mic_sample_rate,
|
||||||
|
reinterpret_cast<const float *>(input_buffer),
|
||||||
frames_per_buffer);
|
frames_per_buffer);
|
||||||
|
|
||||||
return stop ? paComplete : paContinue;
|
return stop ? paComplete : paContinue;
|
||||||
@@ -80,14 +82,31 @@ for a list of pre-trained models to download.
|
|||||||
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
||||||
fprintf(stderr, "Num devices: %d\n", num_devices);
|
fprintf(stderr, "Num devices: %d\n", num_devices);
|
||||||
|
|
||||||
PaStreamParameters param;
|
int32_t device_index = Pa_GetDefaultInputDevice();
|
||||||
|
|
||||||
param.device = Pa_GetDefaultInputDevice();
|
if (device_index == paNoDevice) {
|
||||||
if (param.device == paNoDevice) {
|
|
||||||
fprintf(stderr, "No default input device found\n");
|
fprintf(stderr, "No default input device found\n");
|
||||||
|
fprintf(stderr, "If you are using Linux, please switch to \n");
|
||||||
|
fprintf(stderr, " ./bin/sherpa-onnx-keyword-spotter-alsa \n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
fprintf(stderr, "Use default device: %d\n", param.device);
|
|
||||||
|
const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
|
||||||
|
if (pDeviceIndex) {
|
||||||
|
fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
|
||||||
|
device_index = atoi(pDeviceIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int32_t i = 0; i != num_devices; ++i) {
|
||||||
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(i);
|
||||||
|
fprintf(stderr, " %s %d %s\n", (i == device_index) ? "*" : " ", i,
|
||||||
|
info->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
PaStreamParameters param;
|
||||||
|
param.device = device_index;
|
||||||
|
|
||||||
|
fprintf(stderr, "Use device: %d\n", param.device);
|
||||||
|
|
||||||
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
|
||||||
fprintf(stderr, " Name: %s\n", info->name);
|
fprintf(stderr, " Name: %s\n", info->name);
|
||||||
@@ -98,12 +117,19 @@ for a list of pre-trained models to download.
|
|||||||
|
|
||||||
param.suggestedLatency = info->defaultLowInputLatency;
|
param.suggestedLatency = info->defaultLowInputLatency;
|
||||||
param.hostApiSpecificStreamInfo = nullptr;
|
param.hostApiSpecificStreamInfo = nullptr;
|
||||||
|
|
||||||
|
const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
|
||||||
|
if (pSampleRateStr) {
|
||||||
|
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
|
||||||
|
mic_sample_rate = atof(pSampleRateStr);
|
||||||
|
}
|
||||||
|
|
||||||
float sample_rate = 16000;
|
float sample_rate = 16000;
|
||||||
|
|
||||||
PaStream *stream;
|
PaStream *stream;
|
||||||
PaError err =
|
PaError err =
|
||||||
Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */
|
Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */
|
||||||
sample_rate,
|
mic_sample_rate,
|
||||||
0, // frames per buffer
|
0, // frames per buffer
|
||||||
paClipOff, // we won't output out of range samples
|
paClipOff, // we won't output out of range samples
|
||||||
// so don't bother clipping them
|
// so don't bother clipping them
|
||||||
|
|||||||
238
sherpa-onnx/csrc/sherpa-onnx-microphone-offline-audio-tagging.cc
Normal file
238
sherpa-onnx/csrc/sherpa-onnx-microphone-offline-audio-tagging.cc
Normal file
@@ -0,0 +1,238 @@
|
|||||||
|
// sherpa-onnx/csrc/sherpa-onnx-microphone-offline-audio-tagging.cc
|
||||||
|
//
|
||||||
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cctype> // std::tolower
|
||||||
|
#include <mutex> // NOLINT
|
||||||
|
#include <thread> // NOLINT
|
||||||
|
|
||||||
|
#include "portaudio.h" // NOLINT
|
||||||
|
#include "sherpa-onnx/csrc/audio-tagging.h"
|
||||||
|
#include "sherpa-onnx/csrc/macros.h"
|
||||||
|
#include "sherpa-onnx/csrc/microphone.h"
|
||||||
|
|
||||||
|
enum class State {
|
||||||
|
kIdle,
|
||||||
|
kRecording,
|
||||||
|
kDecoding,
|
||||||
|
};
|
||||||
|
|
||||||
|
State state = State::kIdle;
|
||||||
|
|
||||||
|
// true to stop the program and exit
|
||||||
|
bool stop = false;
|
||||||
|
|
||||||
|
std::vector<float> samples;
|
||||||
|
std::mutex samples_mutex;
|
||||||
|
|
||||||
|
static void DetectKeyPress() {
|
||||||
|
SHERPA_ONNX_LOGE("Press Enter to start");
|
||||||
|
int32_t key;
|
||||||
|
while (!stop && (key = getchar())) {
|
||||||
|
if (key != 0x0a) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (state) {
|
||||||
|
case State::kIdle:
|
||||||
|
SHERPA_ONNX_LOGE("Start recording. Press Enter to stop recording");
|
||||||
|
state = State::kRecording;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(samples_mutex);
|
||||||
|
samples.clear();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case State::kRecording:
|
||||||
|
SHERPA_ONNX_LOGE("Stop recording. Decoding ...");
|
||||||
|
state = State::kDecoding;
|
||||||
|
break;
|
||||||
|
case State::kDecoding:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int32_t RecordCallback(const void *input_buffer,
|
||||||
|
void * /*output_buffer*/,
|
||||||
|
unsigned long frames_per_buffer, // NOLINT
|
||||||
|
const PaStreamCallbackTimeInfo * /*time_info*/,
|
||||||
|
PaStreamCallbackFlags /*status_flags*/,
|
||||||
|
void *user_data) {
|
||||||
|
std::lock_guard<std::mutex> lock(samples_mutex);
|
||||||
|
|
||||||
|
auto p = reinterpret_cast<const float *>(input_buffer);
|
||||||
|
samples.insert(samples.end(), p, p + frames_per_buffer);
|
||||||
|
|
||||||
|
return stop ? paComplete : paContinue;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Handler(int32_t sig) {
|
||||||
|
stop = true;
|
||||||
|
fprintf(stderr, "\nCaught Ctrl + C. Press Enter to exit\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t main(int32_t argc, char *argv[]) {
|
||||||
|
signal(SIGINT, Handler);
|
||||||
|
|
||||||
|
const char *kUsageMessage = R"usage(
|
||||||
|
Audio tagging from microphone.
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
|
rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
|
|
||||||
|
./bin/sherpa-onnx-microphone-offline-audio-tagging \
|
||||||
|
--zipformer-model=./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.onnx \
|
||||||
|
--labels=./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv
|
||||||
|
|
||||||
|
Please see
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
|
||||||
|
for more models.
|
||||||
|
)usage";
|
||||||
|
|
||||||
|
sherpa_onnx::ParseOptions po(kUsageMessage);
|
||||||
|
sherpa_onnx::AudioTaggingConfig config;
|
||||||
|
config.Register(&po);
|
||||||
|
|
||||||
|
po.Read(argc, argv);
|
||||||
|
if (po.NumArgs() != 0) {
|
||||||
|
fprintf(stderr, "\nThis program does not support positional arguments\n\n");
|
||||||
|
po.PrintUsage();
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "%s\n", config.ToString().c_str());
|
||||||
|
|
||||||
|
if (!config.Validate()) {
|
||||||
|
fprintf(stderr, "Errors in config!\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
SHERPA_ONNX_LOGE("Creating audio tagger ...");
|
||||||
|
sherpa_onnx::AudioTagging tagger(config);
|
||||||
|
SHERPA_ONNX_LOGE("Audio tagger created created!");
|
||||||
|
|
||||||
|
sherpa_onnx::Microphone mic;
|
||||||
|
|
||||||
|
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
||||||
|
fprintf(stderr, "Num devices: %d\n", num_devices);
|
||||||
|
|
||||||
|
int32_t device_index = Pa_GetDefaultInputDevice();
|
||||||
|
|
||||||
|
if (device_index == paNoDevice) {
|
||||||
|
fprintf(stderr, "No default input device found\n");
|
||||||
|
fprintf(stderr, "If you are using Linux, please switch to \n");
|
||||||
|
fprintf(stderr, " ./bin/sherpa-onnx-alsa-offline-audio-tagging \n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
|
||||||
|
if (pDeviceIndex) {
|
||||||
|
fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
|
||||||
|
device_index = atoi(pDeviceIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int32_t i = 0; i != num_devices; ++i) {
|
||||||
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(i);
|
||||||
|
fprintf(stderr, " %s %d %s\n", (i == device_index) ? "*" : " ", i,
|
||||||
|
info->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
PaStreamParameters param;
|
||||||
|
param.device = device_index;
|
||||||
|
|
||||||
|
fprintf(stderr, "Use device: %d\n", param.device);
|
||||||
|
|
||||||
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
|
||||||
|
fprintf(stderr, " Name: %s\n", info->name);
|
||||||
|
fprintf(stderr, " Max input channels: %d\n", info->maxInputChannels);
|
||||||
|
|
||||||
|
param.channelCount = 1;
|
||||||
|
param.sampleFormat = paFloat32;
|
||||||
|
|
||||||
|
param.suggestedLatency = info->defaultLowInputLatency;
|
||||||
|
param.hostApiSpecificStreamInfo = nullptr;
|
||||||
|
float mic_sample_rate = 16000;
|
||||||
|
const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
|
||||||
|
if (pSampleRateStr) {
|
||||||
|
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
|
||||||
|
mic_sample_rate = atof(pSampleRateStr);
|
||||||
|
}
|
||||||
|
|
||||||
|
float sample_rate = 16000;
|
||||||
|
|
||||||
|
PaStream *stream;
|
||||||
|
PaError err =
|
||||||
|
Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */
|
||||||
|
mic_sample_rate,
|
||||||
|
0, // frames per buffer
|
||||||
|
paClipOff, // we won't output out of range samples
|
||||||
|
// so don't bother clipping them
|
||||||
|
RecordCallback, nullptr);
|
||||||
|
if (err != paNoError) {
|
||||||
|
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
err = Pa_StartStream(stream);
|
||||||
|
fprintf(stderr, "Started\n");
|
||||||
|
|
||||||
|
if (err != paNoError) {
|
||||||
|
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::thread t(DetectKeyPress);
|
||||||
|
while (!stop) {
|
||||||
|
switch (state) {
|
||||||
|
case State::kIdle:
|
||||||
|
break;
|
||||||
|
case State::kRecording:
|
||||||
|
break;
|
||||||
|
case State::kDecoding: {
|
||||||
|
std::vector<float> buf;
|
||||||
|
{
|
||||||
|
std::lock_guard<std::mutex> lock(samples_mutex);
|
||||||
|
buf = std::move(samples);
|
||||||
|
}
|
||||||
|
|
||||||
|
SHERPA_ONNX_LOGE("Computing...");
|
||||||
|
auto s = tagger.CreateStream();
|
||||||
|
s->AcceptWaveform(mic_sample_rate, buf.data(), buf.size());
|
||||||
|
auto results = tagger.Compute(s.get());
|
||||||
|
|
||||||
|
SHERPA_ONNX_LOGE("Result is:");
|
||||||
|
|
||||||
|
int32_t i = 0;
|
||||||
|
std::ostringstream os;
|
||||||
|
for (const auto &event : results) {
|
||||||
|
os << i << ": " << event.ToString() << "\n";
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
SHERPA_ONNX_LOGE("\n%s\n", os.str().c_str());
|
||||||
|
|
||||||
|
state = State::kIdle;
|
||||||
|
SHERPA_ONNX_LOGE("Press Enter to start");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Pa_Sleep(20); // sleep for 20ms
|
||||||
|
}
|
||||||
|
t.join();
|
||||||
|
|
||||||
|
err = Pa_CloseStream(stream);
|
||||||
|
if (err != paNoError) {
|
||||||
|
fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
@@ -223,14 +223,31 @@ Note that `zh` means Chinese, while `en` means English.
|
|||||||
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
||||||
fprintf(stderr, "Num devices: %d\n", num_devices);
|
fprintf(stderr, "Num devices: %d\n", num_devices);
|
||||||
|
|
||||||
PaStreamParameters param;
|
int32_t device_index = Pa_GetDefaultInputDevice();
|
||||||
|
if (device_index == paNoDevice) {
|
||||||
param.device = Pa_GetDefaultInputDevice();
|
|
||||||
if (param.device == paNoDevice) {
|
|
||||||
fprintf(stderr, "No default input device found\n");
|
fprintf(stderr, "No default input device found\n");
|
||||||
|
fprintf(stderr, "If you are using Linux, please switch to \n");
|
||||||
|
fprintf(stderr,
|
||||||
|
" ./bin/sherpa-onnx-alsa-offline-speaker-identification \n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
fprintf(stderr, "Use default device: %d\n", param.device);
|
|
||||||
|
const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
|
||||||
|
if (pDeviceIndex) {
|
||||||
|
fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
|
||||||
|
device_index = atoi(pDeviceIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int32_t i = 0; i != num_devices; ++i) {
|
||||||
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(i);
|
||||||
|
fprintf(stderr, " %s %d %s\n", (i == device_index) ? "*" : " ", i,
|
||||||
|
info->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
PaStreamParameters param;
|
||||||
|
param.device = device_index;
|
||||||
|
|
||||||
|
fprintf(stderr, "Use device: %d\n", param.device);
|
||||||
|
|
||||||
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
|
||||||
fprintf(stderr, " Name: %s\n", info->name);
|
fprintf(stderr, " Name: %s\n", info->name);
|
||||||
@@ -241,12 +258,18 @@ Note that `zh` means Chinese, while `en` means English.
|
|||||||
|
|
||||||
param.suggestedLatency = info->defaultLowInputLatency;
|
param.suggestedLatency = info->defaultLowInputLatency;
|
||||||
param.hostApiSpecificStreamInfo = nullptr;
|
param.hostApiSpecificStreamInfo = nullptr;
|
||||||
|
float mic_sample_rate = 16000;
|
||||||
|
const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
|
||||||
|
if (pSampleRateStr) {
|
||||||
|
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
|
||||||
|
mic_sample_rate = atof(pSampleRateStr);
|
||||||
|
}
|
||||||
float sample_rate = 16000;
|
float sample_rate = 16000;
|
||||||
|
|
||||||
PaStream *stream;
|
PaStream *stream;
|
||||||
PaError err =
|
PaError err =
|
||||||
Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */
|
Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */
|
||||||
sample_rate,
|
mic_sample_rate,
|
||||||
0, // frames per buffer
|
0, // frames per buffer
|
||||||
paClipOff, // we won't output out of range samples
|
paClipOff, // we won't output out of range samples
|
||||||
// so don't bother clipping them
|
// so don't bother clipping them
|
||||||
@@ -279,7 +302,7 @@ Note that `zh` means Chinese, while `en` means English.
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto s = extractor.CreateStream();
|
auto s = extractor.CreateStream();
|
||||||
s->AcceptWaveform(sample_rate, buf.data(), buf.size());
|
s->AcceptWaveform(mic_sample_rate, buf.data(), buf.size());
|
||||||
s->InputFinished();
|
s->InputFinished();
|
||||||
auto embedding = extractor.Compute(s.get());
|
auto embedding = extractor.Compute(s.get());
|
||||||
auto name = manager.Search(embedding.data(), threshold);
|
auto name = manager.Search(embedding.data(), threshold);
|
||||||
|
|||||||
@@ -139,14 +139,31 @@ for a list of pre-trained models to download.
|
|||||||
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
||||||
fprintf(stderr, "Num devices: %d\n", num_devices);
|
fprintf(stderr, "Num devices: %d\n", num_devices);
|
||||||
|
|
||||||
PaStreamParameters param;
|
int32_t device_index = Pa_GetDefaultInputDevice();
|
||||||
|
|
||||||
param.device = Pa_GetDefaultInputDevice();
|
if (device_index == paNoDevice) {
|
||||||
if (param.device == paNoDevice) {
|
|
||||||
fprintf(stderr, "No default input device found\n");
|
fprintf(stderr, "No default input device found\n");
|
||||||
|
fprintf(stderr, "If you are using Linux, please switch to \n");
|
||||||
|
fprintf(stderr, " ./bin/sherpa-onnx-alsa-offline \n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
fprintf(stderr, "Use default device: %d\n", param.device);
|
|
||||||
|
const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
|
||||||
|
if (pDeviceIndex) {
|
||||||
|
fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
|
||||||
|
device_index = atoi(pDeviceIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int32_t i = 0; i != num_devices; ++i) {
|
||||||
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(i);
|
||||||
|
fprintf(stderr, " %s %d %s\n", (i == device_index) ? "*" : " ", i,
|
||||||
|
info->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
PaStreamParameters param;
|
||||||
|
param.device = device_index;
|
||||||
|
|
||||||
|
fprintf(stderr, "Use device: %d\n", param.device);
|
||||||
|
|
||||||
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
|
||||||
fprintf(stderr, " Name: %s\n", info->name);
|
fprintf(stderr, " Name: %s\n", info->name);
|
||||||
@@ -157,12 +174,18 @@ for a list of pre-trained models to download.
|
|||||||
|
|
||||||
param.suggestedLatency = info->defaultLowInputLatency;
|
param.suggestedLatency = info->defaultLowInputLatency;
|
||||||
param.hostApiSpecificStreamInfo = nullptr;
|
param.hostApiSpecificStreamInfo = nullptr;
|
||||||
|
float mic_sample_rate = 16000;
|
||||||
|
const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
|
||||||
|
if (pSampleRateStr) {
|
||||||
|
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
|
||||||
|
mic_sample_rate = atof(pSampleRateStr);
|
||||||
|
}
|
||||||
float sample_rate = 16000;
|
float sample_rate = 16000;
|
||||||
|
|
||||||
PaStream *stream;
|
PaStream *stream;
|
||||||
PaError err =
|
PaError err =
|
||||||
Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */
|
Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */
|
||||||
sample_rate,
|
mic_sample_rate,
|
||||||
0, // frames per buffer
|
0, // frames per buffer
|
||||||
paClipOff, // we won't output out of range samples
|
paClipOff, // we won't output out of range samples
|
||||||
// so don't bother clipping them
|
// so don't bother clipping them
|
||||||
@@ -195,7 +218,7 @@ for a list of pre-trained models to download.
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto s = recognizer.CreateStream();
|
auto s = recognizer.CreateStream();
|
||||||
s->AcceptWaveform(sample_rate, buf.data(), buf.size());
|
s->AcceptWaveform(mic_sample_rate, buf.data(), buf.size());
|
||||||
recognizer.DecodeStream(s.get());
|
recognizer.DecodeStream(s.get());
|
||||||
SHERPA_ONNX_LOGE("Decoding Done! Result is:");
|
SHERPA_ONNX_LOGE("Decoding Done! Result is:");
|
||||||
SHERPA_ONNX_LOGE("%s", s->GetResult().text.c_str());
|
SHERPA_ONNX_LOGE("%s", s->GetResult().text.c_str());
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
#include "sherpa-onnx/csrc/online-recognizer.h"
|
#include "sherpa-onnx/csrc/online-recognizer.h"
|
||||||
|
|
||||||
bool stop = false;
|
bool stop = false;
|
||||||
|
float mic_sample_rate = 16000;
|
||||||
|
|
||||||
static int32_t RecordCallback(const void *input_buffer,
|
static int32_t RecordCallback(const void *input_buffer,
|
||||||
void * /*output_buffer*/,
|
void * /*output_buffer*/,
|
||||||
@@ -24,7 +25,8 @@ static int32_t RecordCallback(const void *input_buffer,
|
|||||||
void *user_data) {
|
void *user_data) {
|
||||||
auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(user_data);
|
auto stream = reinterpret_cast<sherpa_onnx::OnlineStream *>(user_data);
|
||||||
|
|
||||||
stream->AcceptWaveform(16000, reinterpret_cast<const float *>(input_buffer),
|
stream->AcceptWaveform(mic_sample_rate,
|
||||||
|
reinterpret_cast<const float *>(input_buffer),
|
||||||
frames_per_buffer);
|
frames_per_buffer);
|
||||||
|
|
||||||
return stop ? paComplete : paContinue;
|
return stop ? paComplete : paContinue;
|
||||||
@@ -81,14 +83,31 @@ for a list of pre-trained models to download.
|
|||||||
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
||||||
fprintf(stderr, "Num devices: %d\n", num_devices);
|
fprintf(stderr, "Num devices: %d\n", num_devices);
|
||||||
|
|
||||||
PaStreamParameters param;
|
int32_t device_index = Pa_GetDefaultInputDevice();
|
||||||
|
|
||||||
param.device = Pa_GetDefaultInputDevice();
|
if (device_index == paNoDevice) {
|
||||||
if (param.device == paNoDevice) {
|
|
||||||
fprintf(stderr, "No default input device found\n");
|
fprintf(stderr, "No default input device found\n");
|
||||||
|
fprintf(stderr, "If you are using Linux, please switch to \n");
|
||||||
|
fprintf(stderr, " ./bin/sherpa-onnx-alsa \n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
fprintf(stderr, "Use default device: %d\n", param.device);
|
|
||||||
|
const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
|
||||||
|
if (pDeviceIndex) {
|
||||||
|
fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
|
||||||
|
device_index = atoi(pDeviceIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int32_t i = 0; i != num_devices; ++i) {
|
||||||
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(i);
|
||||||
|
fprintf(stderr, " %s %d %s\n", (i == device_index) ? "*" : " ", i,
|
||||||
|
info->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
PaStreamParameters param;
|
||||||
|
param.device = device_index;
|
||||||
|
|
||||||
|
fprintf(stderr, "Use device: %d\n", param.device);
|
||||||
|
|
||||||
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
|
||||||
fprintf(stderr, " Name: %s\n", info->name);
|
fprintf(stderr, " Name: %s\n", info->name);
|
||||||
@@ -99,6 +118,11 @@ for a list of pre-trained models to download.
|
|||||||
|
|
||||||
param.suggestedLatency = info->defaultLowInputLatency;
|
param.suggestedLatency = info->defaultLowInputLatency;
|
||||||
param.hostApiSpecificStreamInfo = nullptr;
|
param.hostApiSpecificStreamInfo = nullptr;
|
||||||
|
const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
|
||||||
|
if (pSampleRateStr) {
|
||||||
|
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
|
||||||
|
mic_sample_rate = atof(pSampleRateStr);
|
||||||
|
}
|
||||||
float sample_rate = 16000;
|
float sample_rate = 16000;
|
||||||
|
|
||||||
PaStream *stream;
|
PaStream *stream;
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
|
|||||||
Subdevices: 1/1
|
Subdevices: 1/1
|
||||||
Subdevice #0: subdevice #0
|
Subdevice #0: subdevice #0
|
||||||
|
|
||||||
and if you want to select card 3 and the device 0 on that card, please use:
|
and if you want to select card 3 and device 0 on that card, please use:
|
||||||
|
|
||||||
plughw:3,0
|
plughw:3,0
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
#include "sherpa-onnx/csrc/circular-buffer.h"
|
#include "sherpa-onnx/csrc/circular-buffer.h"
|
||||||
#include "sherpa-onnx/csrc/microphone.h"
|
#include "sherpa-onnx/csrc/microphone.h"
|
||||||
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
||||||
|
#include "sherpa-onnx/csrc/resample.h"
|
||||||
#include "sherpa-onnx/csrc/voice-activity-detector.h"
|
#include "sherpa-onnx/csrc/voice-activity-detector.h"
|
||||||
|
|
||||||
bool stop = false;
|
bool stop = false;
|
||||||
@@ -115,14 +116,29 @@ to download models for offline ASR.
|
|||||||
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
||||||
fprintf(stderr, "Num devices: %d\n", num_devices);
|
fprintf(stderr, "Num devices: %d\n", num_devices);
|
||||||
|
|
||||||
PaStreamParameters param;
|
int32_t device_index = Pa_GetDefaultInputDevice();
|
||||||
|
|
||||||
param.device = Pa_GetDefaultInputDevice();
|
if (device_index == paNoDevice) {
|
||||||
if (param.device == paNoDevice) {
|
|
||||||
fprintf(stderr, "No default input device found\n");
|
fprintf(stderr, "No default input device found\n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
fprintf(stderr, "Use default device: %d\n", param.device);
|
|
||||||
|
const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
|
||||||
|
if (pDeviceIndex) {
|
||||||
|
fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
|
||||||
|
device_index = atoi(pDeviceIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int32_t i = 0; i != num_devices; ++i) {
|
||||||
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(i);
|
||||||
|
fprintf(stderr, " %s %d %s\n", (i == device_index) ? "*" : " ", i,
|
||||||
|
info->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
PaStreamParameters param;
|
||||||
|
param.device = device_index;
|
||||||
|
|
||||||
|
fprintf(stderr, "Use device: %d\n", param.device);
|
||||||
|
|
||||||
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
|
||||||
fprintf(stderr, " Name: %s\n", info->name);
|
fprintf(stderr, " Name: %s\n", info->name);
|
||||||
@@ -133,12 +149,27 @@ to download models for offline ASR.
|
|||||||
|
|
||||||
param.suggestedLatency = info->defaultLowInputLatency;
|
param.suggestedLatency = info->defaultLowInputLatency;
|
||||||
param.hostApiSpecificStreamInfo = nullptr;
|
param.hostApiSpecificStreamInfo = nullptr;
|
||||||
|
float mic_sample_rate = 16000;
|
||||||
|
const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
|
||||||
|
if (pSampleRateStr) {
|
||||||
|
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
|
||||||
|
mic_sample_rate = atof(pSampleRateStr);
|
||||||
|
}
|
||||||
float sample_rate = 16000;
|
float sample_rate = 16000;
|
||||||
|
std::unique_ptr<sherpa_onnx::LinearResample> resampler;
|
||||||
|
if (mic_sample_rate != sample_rate) {
|
||||||
|
float min_freq = std::min(mic_sample_rate, sample_rate);
|
||||||
|
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
|
||||||
|
|
||||||
|
int32_t lowpass_filter_width = 6;
|
||||||
|
resampler = std::make_unique<sherpa_onnx::LinearResample>(
|
||||||
|
mic_sample_rate, sample_rate, lowpass_cutoff, lowpass_filter_width);
|
||||||
|
}
|
||||||
|
|
||||||
PaStream *stream;
|
PaStream *stream;
|
||||||
PaError err =
|
PaError err =
|
||||||
Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */
|
Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */
|
||||||
sample_rate,
|
mic_sample_rate,
|
||||||
0, // frames per buffer
|
0, // frames per buffer
|
||||||
paClipOff, // we won't output out of range samples
|
paClipOff, // we won't output out of range samples
|
||||||
// so don't bother clipping them
|
// so don't bother clipping them
|
||||||
@@ -168,6 +199,13 @@ to download models for offline ASR.
|
|||||||
while (buffer.Size() >= window_size) {
|
while (buffer.Size() >= window_size) {
|
||||||
std::vector<float> samples = buffer.Get(buffer.Head(), window_size);
|
std::vector<float> samples = buffer.Get(buffer.Head(), window_size);
|
||||||
buffer.Pop(window_size);
|
buffer.Pop(window_size);
|
||||||
|
|
||||||
|
if (resampler) {
|
||||||
|
std::vector<float> tmp;
|
||||||
|
resampler->Resample(samples.data(), samples.size(), true, &tmp);
|
||||||
|
samples = std::move(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
vad->AcceptWaveform(samples.data(), samples.size());
|
vad->AcceptWaveform(samples.data(), samples.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,6 +12,7 @@
|
|||||||
#include "portaudio.h" // NOLINT
|
#include "portaudio.h" // NOLINT
|
||||||
#include "sherpa-onnx/csrc/circular-buffer.h"
|
#include "sherpa-onnx/csrc/circular-buffer.h"
|
||||||
#include "sherpa-onnx/csrc/microphone.h"
|
#include "sherpa-onnx/csrc/microphone.h"
|
||||||
|
#include "sherpa-onnx/csrc/resample.h"
|
||||||
#include "sherpa-onnx/csrc/voice-activity-detector.h"
|
#include "sherpa-onnx/csrc/voice-activity-detector.h"
|
||||||
#include "sherpa-onnx/csrc/wave-writer.h"
|
#include "sherpa-onnx/csrc/wave-writer.h"
|
||||||
|
|
||||||
@@ -76,14 +77,31 @@ wget https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx
|
|||||||
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
PaDeviceIndex num_devices = Pa_GetDeviceCount();
|
||||||
fprintf(stderr, "Num devices: %d\n", num_devices);
|
fprintf(stderr, "Num devices: %d\n", num_devices);
|
||||||
|
|
||||||
PaStreamParameters param;
|
int32_t device_index = Pa_GetDefaultInputDevice();
|
||||||
|
|
||||||
param.device = Pa_GetDefaultInputDevice();
|
if (device_index == paNoDevice) {
|
||||||
if (param.device == paNoDevice) {
|
|
||||||
fprintf(stderr, "No default input device found\n");
|
fprintf(stderr, "No default input device found\n");
|
||||||
|
fprintf(stderr, "If you are using Linux, please switch to \n");
|
||||||
|
fprintf(stderr, " ./bin/sherpa-onnx-vad-alsa \n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
fprintf(stderr, "Use default device: %d\n", param.device);
|
|
||||||
|
const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
|
||||||
|
if (pDeviceIndex) {
|
||||||
|
fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
|
||||||
|
device_index = atoi(pDeviceIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int32_t i = 0; i != num_devices; ++i) {
|
||||||
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(i);
|
||||||
|
fprintf(stderr, " %s %d %s\n", (i == device_index) ? "*" : " ", i,
|
||||||
|
info->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
PaStreamParameters param;
|
||||||
|
param.device = device_index;
|
||||||
|
|
||||||
|
fprintf(stderr, "Use device: %d\n", param.device);
|
||||||
|
|
||||||
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
|
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
|
||||||
fprintf(stderr, " Name: %s\n", info->name);
|
fprintf(stderr, " Name: %s\n", info->name);
|
||||||
@@ -94,12 +112,28 @@ wget https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx
|
|||||||
|
|
||||||
param.suggestedLatency = info->defaultLowInputLatency;
|
param.suggestedLatency = info->defaultLowInputLatency;
|
||||||
param.hostApiSpecificStreamInfo = nullptr;
|
param.hostApiSpecificStreamInfo = nullptr;
|
||||||
|
float mic_sample_rate = 16000;
|
||||||
|
const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
|
||||||
|
if (pSampleRateStr) {
|
||||||
|
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
|
||||||
|
mic_sample_rate = atof(pSampleRateStr);
|
||||||
|
}
|
||||||
float sample_rate = 16000;
|
float sample_rate = 16000;
|
||||||
|
|
||||||
|
std::unique_ptr<sherpa_onnx::LinearResample> resampler;
|
||||||
|
if (mic_sample_rate != sample_rate) {
|
||||||
|
float min_freq = std::min(mic_sample_rate, sample_rate);
|
||||||
|
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
|
||||||
|
|
||||||
|
int32_t lowpass_filter_width = 6;
|
||||||
|
resampler = std::make_unique<sherpa_onnx::LinearResample>(
|
||||||
|
mic_sample_rate, sample_rate, lowpass_cutoff, lowpass_filter_width);
|
||||||
|
}
|
||||||
|
|
||||||
PaStream *stream;
|
PaStream *stream;
|
||||||
PaError err =
|
PaError err =
|
||||||
Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */
|
Pa_OpenStream(&stream, ¶m, nullptr, /* &outputParameters, */
|
||||||
sample_rate,
|
mic_sample_rate,
|
||||||
0, // frames per buffer
|
0, // frames per buffer
|
||||||
paClipOff, // we won't output out of range samples
|
paClipOff, // we won't output out of range samples
|
||||||
// so don't bother clipping them
|
// so don't bother clipping them
|
||||||
@@ -131,6 +165,13 @@ wget https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx
|
|||||||
while (buffer.Size() >= window_size) {
|
while (buffer.Size() >= window_size) {
|
||||||
std::vector<float> samples = buffer.Get(buffer.Head(), window_size);
|
std::vector<float> samples = buffer.Get(buffer.Head(), window_size);
|
||||||
buffer.Pop(window_size);
|
buffer.Pop(window_size);
|
||||||
|
|
||||||
|
if (resampler) {
|
||||||
|
std::vector<float> tmp;
|
||||||
|
resampler->Resample(samples.data(), samples.size(), true, &tmp);
|
||||||
|
samples = std::move(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
vad->AcceptWaveform(samples.data(), samples.size());
|
vad->AcceptWaveform(samples.data(), samples.size());
|
||||||
|
|
||||||
if (vad->IsSpeechDetected() && !printed) {
|
if (vad->IsSpeechDetected() && !printed) {
|
||||||
@@ -149,7 +190,7 @@ wget https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx
|
|||||||
char filename[128];
|
char filename[128];
|
||||||
snprintf(filename, sizeof(filename), "seg-%d-%.3fs.wav", k, duration);
|
snprintf(filename, sizeof(filename), "seg-%d-%.3fs.wav", k, duration);
|
||||||
k += 1;
|
k += 1;
|
||||||
sherpa_onnx::WriteWave(filename, 16000, segment.samples.data(),
|
sherpa_onnx::WriteWave(filename, sample_rate, segment.samples.data(),
|
||||||
segment.samples.size());
|
segment.samples.size());
|
||||||
fprintf(stderr, "Saved to %s\n", filename);
|
fprintf(stderr, "Saved to %s\n", filename);
|
||||||
fprintf(stderr, "----------\n");
|
fprintf(stderr, "----------\n");
|
||||||
|
|||||||
Reference in New Issue
Block a user