Add C++ microphone examples for audio tagging (#749)

This commit is contained in:
Fangjun Kuang
2024-04-10 21:00:35 +08:00
committed by GitHub
parent f20291cadc
commit 042976ea6e
24 changed files with 707 additions and 61 deletions

View File

@@ -223,14 +223,31 @@ Note that `zh` means Chinese, while `en` means English.
PaDeviceIndex num_devices = Pa_GetDeviceCount();
fprintf(stderr, "Num devices: %d\n", num_devices);
PaStreamParameters param;
param.device = Pa_GetDefaultInputDevice();
if (param.device == paNoDevice) {
int32_t device_index = Pa_GetDefaultInputDevice();
if (device_index == paNoDevice) {
fprintf(stderr, "No default input device found\n");
fprintf(stderr, "If you are using Linux, please switch to \n");
fprintf(stderr,
" ./bin/sherpa-onnx-alsa-offline-speaker-identification \n");
exit(EXIT_FAILURE);
}
fprintf(stderr, "Use default device: %d\n", param.device);
const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
if (pDeviceIndex) {
fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
device_index = atoi(pDeviceIndex);
}
for (int32_t i = 0; i != num_devices; ++i) {
const PaDeviceInfo *info = Pa_GetDeviceInfo(i);
fprintf(stderr, " %s %d %s\n", (i == device_index) ? "*" : " ", i,
info->name);
}
PaStreamParameters param;
param.device = device_index;
fprintf(stderr, "Use device: %d\n", param.device);
const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
fprintf(stderr, " Name: %s\n", info->name);
@@ -241,12 +258,18 @@ Note that `zh` means Chinese, while `en` means English.
param.suggestedLatency = info->defaultLowInputLatency;
param.hostApiSpecificStreamInfo = nullptr;
float mic_sample_rate = 16000;
const char *pSampleRateStr = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
if (pSampleRateStr) {
fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
mic_sample_rate = atof(pSampleRateStr);
}
float sample_rate = 16000;
PaStream *stream;
PaError err =
Pa_OpenStream(&stream, &param, nullptr, /* &outputParameters, */
sample_rate,
mic_sample_rate,
0, // frames per buffer
paClipOff, // we won't output out of range samples
// so don't bother clipping them
@@ -279,7 +302,7 @@ Note that `zh` means Chinese, while `en` means English.
}
auto s = extractor.CreateStream();
s->AcceptWaveform(sample_rate, buf.data(), buf.size());
s->AcceptWaveform(mic_sample_rate, buf.data(), buf.size());
s->InputFinished();
auto embedding = extractor.Compute(s.get());
auto name = manager.Search(embedding.data(), threshold);