This repository has been archived on 2025-08-26. You can view files and clone it, but cannot push or open issues or pull requests.
Files
enginex_bi_series-sherpa-onnx/sherpa-onnx/csrc/transducer-keyword-decoder.h
Wei Kang b6c020901a decoder for open vocabulary keyword spotting (#505)
* various fixes to ContextGraph to support open vocabulary keywords decoder

* Add keyword spotter runtime

* Add binary

* First version works

* Minor fixes

* update text2token

* default values

* Add jni for kws

* add kws android project

* Minor fixes

* Remove unused interface

* Minor fixes

* Add workflow

* handle extra info in texts

* Minor fixes

* Add more comments

* Fix ci

* fix cpp style

* Add input box in android demo so that users can specify their keywords

* Fix cpp style

* Fix comments

* Minor fixes

* Minor fixes

* minor fixes

* Minor fixes

* Minor fixes

* Add CI

* Fix code style

* cpplint

* Fix comments

* Fix error
2024-01-20 22:52:41 +08:00

63 lines
1.6 KiB
C++

// sherpa-onnx/csrc/transducer-keywords-decoder.h
//
// Copyright (c) 2023-2024 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_TRANSDUCER_KEYWORD_DECODER_H_
#define SHERPA_ONNX_CSRC_TRANSDUCER_KEYWORD_DECODER_H_
#include <string>
#include <utility>
#include <vector>
#include "sherpa-onnx/csrc/online-stream.h"
#include "sherpa-onnx/csrc/online-transducer-model.h"
namespace sherpa_onnx {
struct TransducerKeywordResult {
/// Number of frames after subsampling we have decoded so far
int32_t frame_offset = 0;
/// The decoded token IDs for keywords
std::vector<int64_t> tokens;
/// The triggered keyword
std::string keyword;
/// number of trailing blank frames decoded so far
int32_t num_trailing_blanks = 0;
/// timestamps[i] contains the output frame index where tokens[i] is decoded.
std::vector<int32_t> timestamps;
// used only in modified beam_search
Hypotheses hyps;
};
class TransducerKeywordDecoder {
public:
TransducerKeywordDecoder(OnlineTransducerModel *model,
int32_t max_active_paths,
int32_t num_trailing_blanks, int32_t unk_id)
: model_(model),
max_active_paths_(max_active_paths),
num_trailing_blanks_(num_trailing_blanks),
unk_id_(unk_id) {}
TransducerKeywordResult GetEmptyResult() const;
void Decode(Ort::Value encoder_out, OnlineStream **ss,
std::vector<TransducerKeywordResult> *result);
private:
OnlineTransducerModel *model_; // Not owned
int32_t max_active_paths_;
int32_t num_trailing_blanks_;
int32_t unk_id_;
};
} // namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_TRANSDUCER_KEYWORD_DECODER_H_