Add function 'tolowerUnicode' in sherpa-onnx-microphone (fix #791) (#812)

This commit is contained in:
Daniel Doña
2024-04-26 03:19:32 +02:00
committed by GitHub
parent f7b3735621
commit fa2429920f

View File

@@ -7,7 +7,8 @@
#include <stdlib.h> #include <stdlib.h>
#include <algorithm> #include <algorithm>
#include <cctype> // std::tolower #include <clocale>
#include <cwctype>
#include "portaudio.h" // NOLINT #include "portaudio.h" // NOLINT
#include "sherpa-onnx/csrc/display.h" #include "sherpa-onnx/csrc/display.h"
@@ -37,6 +38,31 @@ static void Handler(int32_t sig) {
fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n"); fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
} }
static std::string tolowerUnicode(const std::string& input_str) {
// Use system locale
std::setlocale(LC_ALL, "");
// From char string to wchar string
std::wstring input_wstr(input_str.size()+1, '\0');
std::mbstowcs(&input_wstr[0], input_str.c_str(), input_str.size());
std::wstring lowercase_wstr;
for (wchar_t wc : input_wstr) {
if (std::iswupper(wc)) {
lowercase_wstr += std::towlower(wc);
} else {
lowercase_wstr += wc;
}
}
// Back to char string
std::string lowercase_str(input_str.size()+1, '\0');
std:wcstombs(&lowercase_str[0], lowercase_wstr.c_str(), lowercase_wstr.size());
return lowercase_str;
}
int32_t main(int32_t argc, char *argv[]) { int32_t main(int32_t argc, char *argv[]) {
signal(SIGINT, Handler); signal(SIGINT, Handler);
@@ -172,11 +198,7 @@ for a list of pre-trained models to download.
if (!text.empty() && last_text != text) { if (!text.empty() && last_text != text) {
last_text = text; last_text = text;
display.Print(segment_index, tolowerUnicode(text));
std::transform(text.begin(), text.end(), text.begin(),
[](auto c) { return std::tolower(c); });
display.Print(segment_index, text);
fflush(stderr); fflush(stderr);
} }