adding a python api for offline decode (#110)

This commit is contained in:
manyeyes
2023-04-02 13:17:43 +08:00
committed by GitHub
parent 94d77fa52e
commit 3f7e0c23ac
17 changed files with 712 additions and 15 deletions

View File

@@ -16,20 +16,7 @@
namespace sherpa_onnx {
struct OfflineRecognitionResult {
// Recognition results.
// For English, it consists of space separated words.
// For Chinese, it consists of Chinese words without spaces.
std::string text;
// Decoded results at the token level.
// For instance, for BPE-based models it consists of a list of BPE tokens.
std::vector<std::string> tokens;
/// timestamps.size() == tokens.size()
/// timestamps[i] records the time in seconds when tokens[i] is decoded.
std::vector<float> timestamps;
};
struct OfflineRecognitionResult;
struct OfflineRecognizerConfig {
OfflineFeatureExtractorConfig feat_config;

View File

@@ -13,7 +13,21 @@
#include "sherpa-onnx/csrc/parse-options.h"
namespace sherpa_onnx {
struct OfflineRecognitionResult;
struct OfflineRecognitionResult {
// Recognition results.
// For English, it consists of space separated words.
// For Chinese, it consists of Chinese words without spaces.
std::string text;
// Decoded results at the token level.
// For instance, for BPE-based models it consists of a list of BPE tokens.
std::vector<std::string> tokens;
/// timestamps.size() == tokens.size()
/// timestamps[i] records the time in seconds when tokens[i] is decoded.
std::vector<float> timestamps;
};
struct OfflineFeatureExtractorConfig {
// Sampling rate used by the feature extractor. If it is different from