Fix reading tokens.txt on Windows. (#1497)
This commit is contained in:
@@ -23,6 +23,29 @@
|
|||||||
|
|
||||||
namespace sherpa_onnx {
|
namespace sherpa_onnx {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
// copied from
|
||||||
|
// https://stackoverflow.com/questions/216823/how-to-trim-a-stdstring
|
||||||
|
const char *ws = " \t\n\r\f\v";
|
||||||
|
|
||||||
|
// trim from end of string (right)
|
||||||
|
inline std::string &TrimRight(std::string &s, const char *t = ws) {
|
||||||
|
s.erase(s.find_last_not_of(t) + 1);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
// trim from beginning of string (left)
|
||||||
|
inline std::string &TrimLeft(std::string &s, const char *t = ws) {
|
||||||
|
s.erase(0, s.find_first_not_of(t));
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
// trim from both ends of string (right then left)
|
||||||
|
inline std::string &Trim(std::string &s, const char *t = ws) {
|
||||||
|
return TrimLeft(TrimRight(s, t), t);
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
std::unordered_map<std::string, int32_t> ReadTokens(
|
std::unordered_map<std::string, int32_t> ReadTokens(
|
||||||
std::istream &is,
|
std::istream &is,
|
||||||
std::unordered_map<int32_t, std::string> *id2token /*= nullptr*/) {
|
std::unordered_map<int32_t, std::string> *id2token /*= nullptr*/) {
|
||||||
@@ -33,6 +56,7 @@ std::unordered_map<std::string, int32_t> ReadTokens(
|
|||||||
std::string sym;
|
std::string sym;
|
||||||
int32_t id = -1;
|
int32_t id = -1;
|
||||||
while (std::getline(is, line)) {
|
while (std::getline(is, line)) {
|
||||||
|
Trim(line);
|
||||||
std::istringstream iss(line);
|
std::istringstream iss(line);
|
||||||
iss >> sym;
|
iss >> sym;
|
||||||
if (iss.eof()) {
|
if (iss.eof()) {
|
||||||
|
|||||||
Reference in New Issue
Block a user