diff --git a/sherpa-onnx/csrc/sherpa-onnx-microphone.cc b/sherpa-onnx/csrc/sherpa-onnx-microphone.cc index 0e32625e..c41d6109 100644 --- a/sherpa-onnx/csrc/sherpa-onnx-microphone.cc +++ b/sherpa-onnx/csrc/sherpa-onnx-microphone.cc @@ -7,7 +7,8 @@ #include #include -#include // std::tolower +#include +#include #include "portaudio.h" // NOLINT #include "sherpa-onnx/csrc/display.h" @@ -37,6 +38,31 @@ static void Handler(int32_t sig) { fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n"); } +static std::string tolowerUnicode(const std::string& input_str) { + + // Use system locale + std::setlocale(LC_ALL, ""); + + // From char string to wchar string + std::wstring input_wstr(input_str.size()+1, '\0'); + std::mbstowcs(&input_wstr[0], input_str.c_str(), input_str.size()); + std::wstring lowercase_wstr; + + for (wchar_t wc : input_wstr) { + if (std::iswupper(wc)) { + lowercase_wstr += std::towlower(wc); + } else { + lowercase_wstr += wc; + } + } + + // Back to char string + std::string lowercase_str(input_str.size()+1, '\0'); + std:wcstombs(&lowercase_str[0], lowercase_wstr.c_str(), lowercase_wstr.size()); + + return lowercase_str; +} + int32_t main(int32_t argc, char *argv[]) { signal(SIGINT, Handler); @@ -172,11 +198,7 @@ for a list of pre-trained models to download. if (!text.empty() && last_text != text) { last_text = text; - - std::transform(text.begin(), text.end(), text.begin(), - [](auto c) { return std::tolower(c); }); - - display.Print(segment_index, text); + display.Print(segment_index, tolowerUnicode(text)); fflush(stderr); }