Support GigaAM CTC models for Russian ASR (#1464)

See also https://github.com/salute-developers/GigaAM
This commit is contained in:
Fangjun Kuang
2024-10-25 10:55:16 +08:00
committed by GitHub
parent 2b40079faf
commit b41f6d2c94
24 changed files with 641 additions and 160 deletions

View File

@@ -5,8 +5,10 @@
#ifndef SHERPA_ONNX_CSRC_SYMBOL_TABLE_H_
#define SHERPA_ONNX_CSRC_SYMBOL_TABLE_H_
#include <istream>
#include <string>
#include <unordered_map>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
@@ -15,6 +17,16 @@
namespace sherpa_onnx {
// The same token can be mapped to different integer IDs, so
// we need an id2token argument here.
std::unordered_map<std::string, int32_t> ReadTokens(
std::istream &is,
std::unordered_map<int32_t, std::string> *id2token = nullptr);
std::vector<int32_t> ConvertTokensToIds(
const std::unordered_map<std::string, int32_t> &token2id,
const std::vector<std::string> &tokens);
/// It manages mapping between symbols and integer IDs.
class SymbolTable {
public: