122 lines
3.9 KiB
C++
122 lines
3.9 KiB
C++
// sherpa-onnx/sharp-api/offline-api.h
|
|
//
|
|
// Copyright (c) 2023 Manyeyes Corporation
|
|
|
|
#pragma once
|
|
|
|
#include <list>
|
|
|
|
namespace sherpa_onnx
|
|
{
|
|
/// Please refer to
|
|
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
|
/// to download pre-trained models. That is, you can find encoder-xxx.onnx
|
|
/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct
|
|
/// from there.
|
|
typedef struct SherpaOnnxOfflineTransducer {
|
|
const char* encoder_filename;
|
|
const char* decoder_filename;
|
|
const char* joiner_filename;
|
|
} SherpaOnnxOfflineTransducer;
|
|
|
|
typedef struct SherpaOnnxOfflineParaformer {
|
|
const char* model;
|
|
}SherpaOnnxOfflineParaformer;
|
|
|
|
typedef struct SherpaOnnxOfflineNemoEncDecCtc {
|
|
const char* model;
|
|
}SherpaOnnxOfflineNemoEncDecCtc;
|
|
|
|
|
|
typedef struct SherpaOnnxOfflineModelConfig {
|
|
SherpaOnnxOfflineTransducer transducer;
|
|
SherpaOnnxOfflineParaformer paraformer;
|
|
SherpaOnnxOfflineNemoEncDecCtc nemo_ctc;
|
|
const char* tokens;
|
|
const int32_t num_threads;
|
|
const bool debug;
|
|
} SherpaOnnxOfflineModelConfig;
|
|
|
|
/// It expects 16 kHz 16-bit single channel wave format.
|
|
typedef struct SherpaOnnxFeatureConfig {
|
|
/// Sample rate of the input data. MUST match the one expected
|
|
/// by the model. For instance, it should be 16000 for models provided
|
|
/// by us.
|
|
int32_t sample_rate;
|
|
|
|
/// Feature dimension of the model.
|
|
/// For instance, it should be 80 for models provided by us.
|
|
int32_t feature_dim;
|
|
} SherpaOnnxFeatureConfig;
|
|
|
|
typedef struct SherpaOnnxOfflineRecognizerConfig {
|
|
SherpaOnnxFeatureConfig feat_config;
|
|
SherpaOnnxOfflineModelConfig model_config;
|
|
|
|
/// Possible values are: greedy_search, modified_beam_search
|
|
const char* decoding_method;
|
|
|
|
} SherpaOnnxOfflineRecognizerConfig;
|
|
|
|
typedef struct SherpaOnnxOfflineRecognizerResult {
|
|
// Recognition results.
|
|
// For English, it consists of space separated words.
|
|
// For Chinese, it consists of Chinese words without spaces.
|
|
char* text;
|
|
int text_len;
|
|
|
|
// Decoded results at the token level.
|
|
// For instance, for BPE-based models it consists of a list of BPE tokens.
|
|
// std::vector<std::string> tokens;
|
|
|
|
// timestamps.size() == tokens.size()
|
|
// timestamps[i] records the time in seconds when tokens[i] is decoded.
|
|
// std::vector<float> timestamps;
|
|
} SherpaOnnxOfflineRecognizerResult;
|
|
|
|
/// Note: OfflineRecognizer here means StreamingRecognizer.
|
|
/// It does not need to access the Internet during recognition.
|
|
/// Everything is run locally.
|
|
typedef struct SherpaOnnxOfflineRecognizer SherpaOnnxOfflineRecognizer;
|
|
|
|
typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream;
|
|
|
|
extern "C" __declspec(dllexport)
|
|
SherpaOnnxOfflineRecognizer * __stdcall CreateOfflineRecognizer(
|
|
const SherpaOnnxOfflineRecognizerConfig * config);
|
|
|
|
extern "C" __declspec(dllexport)
|
|
SherpaOnnxOfflineStream * __stdcall CreateOfflineStream(
|
|
SherpaOnnxOfflineRecognizer * sherpaOnnxOfflineRecognizer);
|
|
|
|
extern "C" __declspec(dllexport)
|
|
void __stdcall AcceptWaveform(
|
|
SherpaOnnxOfflineStream * stream, int32_t sample_rate,
|
|
const float* samples, int32_t samples_size);
|
|
|
|
extern "C" __declspec(dllexport)
|
|
void __stdcall DecodeOfflineStream(
|
|
SherpaOnnxOfflineRecognizer * recognizer,
|
|
SherpaOnnxOfflineStream * stream);
|
|
|
|
extern "C" __declspec(dllexport)
|
|
void __stdcall DecodeMultipleOfflineStreams(
|
|
SherpaOnnxOfflineRecognizer * recognizer,
|
|
SherpaOnnxOfflineStream * *streams, int32_t n);
|
|
|
|
extern "C" __declspec(dllexport)
|
|
SherpaOnnxOfflineRecognizerResult * __stdcall GetOfflineStreamResult(
|
|
SherpaOnnxOfflineStream * stream);
|
|
|
|
extern "C" __declspec(dllexport)
|
|
void __stdcall DestroyOfflineRecognizer(
|
|
SherpaOnnxOfflineRecognizer * recognizer);
|
|
|
|
extern "C" __declspec(dllexport)
|
|
void __stdcall DestroyOfflineStream(
|
|
SherpaOnnxOfflineStream * stream);
|
|
|
|
extern "C" __declspec(dllexport)
|
|
void __stdcall DestroyOfflineRecognizerResult(
|
|
SherpaOnnxOfflineRecognizerResult * r);
|
|
}// namespace sherpa_onnx
|