Add Streaming zipformer (#50)
This commit is contained in:
85
sherpa-onnx/csrc/text-utils.h
Normal file
85
sherpa-onnx/csrc/text-utils.h
Normal file
@@ -0,0 +1,85 @@
|
||||
// sherpa-onnx/csrc/text-utils.h
|
||||
//
|
||||
// Copyright 2009-2011 Saarland University; Microsoft Corporation
|
||||
// Copyright 2023 Xiaomi Corporation
|
||||
#ifndef SHERPA_ONNX_CSRC_TEXT_UTILS_H_
|
||||
#define SHERPA_ONNX_CSRC_TEXT_UTILS_H_
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define SHERPA_ONNX_STRTOLL(cur_cstr, end_cstr) \
|
||||
_strtoi64(cur_cstr, end_cstr, 10);
|
||||
#else
|
||||
#define SHERPA_ONNX_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
|
||||
#endif
|
||||
|
||||
// This file is copied/modified from
|
||||
// https://github.com/kaldi-asr/kaldi/blob/master/src/util/text-utils.h
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
/// Split a string using any of the single character delimiters.
|
||||
/// If omit_empty_strings == true, the output will contain any
|
||||
/// nonempty strings after splitting on any of the
|
||||
/// characters in the delimiter. If omit_empty_strings == false,
|
||||
/// the output will contain n+1 strings if there are n characters
|
||||
/// in the set "delim" within the input string. In this case
|
||||
/// the empty string is split to a single empty string.
|
||||
void SplitStringToVector(const std::string &full, const char *delim,
|
||||
bool omit_empty_strings,
|
||||
std::vector<std::string> *out);
|
||||
|
||||
/**
|
||||
\brief Split a string (e.g. 1:2:3) into a vector of integers.
|
||||
|
||||
\param [in] delim String containing a list of characters, any of which
|
||||
is allowed as a delimiter.
|
||||
\param [in] omit_empty_strings If true, empty strings between delimiters are
|
||||
allowed and will not produce an output integer; if false,
|
||||
instances of characters in 'delim' that are consecutive or
|
||||
at the start or end of the string would be an error.
|
||||
You'll normally want this to be true if 'delim' consists
|
||||
of spaces, and false otherwise.
|
||||
\param [out] out The output list of integers.
|
||||
*/
|
||||
template <class I>
|
||||
bool SplitStringToIntegers(const std::string &full, const char *delim,
|
||||
bool omit_empty_strings, // typically false [but
|
||||
// should probably be true
|
||||
// if "delim" is spaces].
|
||||
std::vector<I> *out) {
|
||||
static_assert(std::is_integral<I>::value, "");
|
||||
if (*(full.c_str()) == '\0') {
|
||||
out->clear();
|
||||
return true;
|
||||
}
|
||||
std::vector<std::string> split;
|
||||
SplitStringToVector(full, delim, omit_empty_strings, &split);
|
||||
out->resize(split.size());
|
||||
for (size_t i = 0; i < split.size(); i++) {
|
||||
const char *this_str = split[i].c_str();
|
||||
char *end = NULL;
|
||||
int64_t j = 0;
|
||||
j = SHERPA_ONNX_STRTOLL(this_str, &end);
|
||||
if (end == this_str || *end != '\0') {
|
||||
out->clear();
|
||||
return false;
|
||||
} else {
|
||||
I jI = static_cast<I>(j);
|
||||
if (static_cast<int64_t>(jI) != j) {
|
||||
// output type cannot fit this integer.
|
||||
out->clear();
|
||||
return false;
|
||||
}
|
||||
(*out)[i] = jI;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
#endif // SHERPA_ONNX_CSRC_TEXT_UTILS_H_
|
||||
Reference in New Issue
Block a user