// sherpa-onnx/csrc/text-utils.cc // // Copyright 2009-2011 Saarland University; Microsoft Corporation // Copyright 2023 Xiaomi Corporation #include "sherpa-onnx/csrc/text-utils.h" #include #include #include #include #include #include #include // This file is copied/modified from // https://github.com/kaldi-asr/kaldi/blob/master/src/util/text-utils.cc namespace sherpa_onnx { // copied from kaldi/src/util/text-util.cc template class NumberIstream { public: explicit NumberIstream(std::istream &i) : in_(i) {} NumberIstream &operator>>(T &x) { if (!in_.good()) return *this; in_ >> x; if (!in_.fail() && RemainderIsOnlySpaces()) return *this; return ParseOnFail(&x); } private: std::istream &in_; bool RemainderIsOnlySpaces() { if (in_.tellg() != std::istream::pos_type(-1)) { std::string rem; in_ >> rem; if (rem.find_first_not_of(' ') != std::string::npos) { // there is not only spaces return false; } } in_.clear(); return true; } NumberIstream &ParseOnFail(T *x) { std::string str; in_.clear(); in_.seekg(0); // If the stream is broken even before trying // to read from it or if there are many tokens, // it's pointless to try. if (!(in_ >> str) || !RemainderIsOnlySpaces()) { in_.setstate(std::ios_base::failbit); return *this; } std::unordered_map inf_nan_map; // we'll keep just uppercase values. inf_nan_map["INF"] = std::numeric_limits::infinity(); inf_nan_map["+INF"] = std::numeric_limits::infinity(); inf_nan_map["-INF"] = -std::numeric_limits::infinity(); inf_nan_map["INFINITY"] = std::numeric_limits::infinity(); inf_nan_map["+INFINITY"] = std::numeric_limits::infinity(); inf_nan_map["-INFINITY"] = -std::numeric_limits::infinity(); inf_nan_map["NAN"] = std::numeric_limits::quiet_NaN(); inf_nan_map["+NAN"] = std::numeric_limits::quiet_NaN(); inf_nan_map["-NAN"] = -std::numeric_limits::quiet_NaN(); // MSVC inf_nan_map["1.#INF"] = std::numeric_limits::infinity(); inf_nan_map["-1.#INF"] = -std::numeric_limits::infinity(); inf_nan_map["1.#QNAN"] = std::numeric_limits::quiet_NaN(); inf_nan_map["-1.#QNAN"] = -std::numeric_limits::quiet_NaN(); std::transform(str.begin(), str.end(), str.begin(), ::toupper); if (inf_nan_map.find(str) != inf_nan_map.end()) { *x = inf_nan_map[str]; } else { in_.setstate(std::ios_base::failbit); } return *this; } }; /// ConvertStringToReal converts a string into either float or double /// and returns false if there was any kind of problem (i.e. the string /// was not a floating point number or contained extra non-whitespace junk). /// Be careful- this function will successfully read inf's or nan's. template bool ConvertStringToReal(const std::string &str, T *out) { std::istringstream iss(str); NumberIstream i(iss); i >> *out; if (iss.fail()) { // Number conversion failed. return false; } return true; } template bool ConvertStringToReal(const std::string &str, float *out); template bool ConvertStringToReal(const std::string &str, double *out); void SplitStringToVector(const std::string &full, const char *delim, bool omit_empty_strings, std::vector *out) { size_t start = 0, found = 0, end = full.size(); out->clear(); while (found != std::string::npos) { found = full.find_first_of(delim, start); // start != end condition is for when the delimiter is at the end if (!omit_empty_strings || (found != start && start != end)) out->push_back(full.substr(start, found - start)); start = found + 1; } } template bool SplitStringToFloats(const std::string &full, const char *delim, bool omit_empty_strings, // typically false std::vector *out) { assert(out != nullptr); if (*(full.c_str()) == '\0') { out->clear(); return true; } std::vector split; SplitStringToVector(full, delim, omit_empty_strings, &split); out->resize(split.size()); for (size_t i = 0; i < split.size(); ++i) { // assume atof never fails F f = 0; if (!ConvertStringToReal(split[i], &f)) return false; (*out)[i] = f; } return true; } // Instantiate the template above for float and double. template bool SplitStringToFloats(const std::string &full, const char *delim, bool omit_empty_strings, std::vector *out); template bool SplitStringToFloats(const std::string &full, const char *delim, bool omit_empty_strings, std::vector *out); } // namespace sherpa_onnx