decoder for open vocabulary keyword spotting (#505)
* various fixes to ContextGraph to support open vocabulary keywords decoder * Add keyword spotter runtime * Add binary * First version works * Minor fixes * update text2token * default values * Add jni for kws * add kws android project * Minor fixes * Remove unused interface * Minor fixes * Add workflow * handle extra info in texts * Minor fixes * Add more comments * Fix ci * fix cpp style * Add input box in android demo so that users can specify their keywords * Fix cpp style * Fix comments * Minor fixes * Minor fixes * minor fixes * Minor fixes * Minor fixes * Add CI * Fix code style * cpplint * Fix comments * Fix error
This commit is contained in:
@@ -6,6 +6,8 @@
|
||||
#define SHERPA_ONNX_CSRC_CONTEXT_GRAPH_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@@ -22,34 +24,55 @@ struct ContextState {
|
||||
float token_score;
|
||||
float node_score;
|
||||
float output_score;
|
||||
int32_t level;
|
||||
float ac_threshold;
|
||||
bool is_end;
|
||||
std::string phrase;
|
||||
std::unordered_map<int32_t, std::unique_ptr<ContextState>> next;
|
||||
const ContextState *fail = nullptr;
|
||||
const ContextState *output = nullptr;
|
||||
|
||||
ContextState() = default;
|
||||
ContextState(int32_t token, float token_score, float node_score,
|
||||
float output_score, bool is_end)
|
||||
float output_score, int32_t level = 0, float ac_threshold = 0.0f,
|
||||
bool is_end = false, const std::string &phrase = {})
|
||||
: token(token),
|
||||
token_score(token_score),
|
||||
node_score(node_score),
|
||||
output_score(output_score),
|
||||
is_end(is_end) {}
|
||||
level(level),
|
||||
ac_threshold(ac_threshold),
|
||||
is_end(is_end),
|
||||
phrase(phrase) {}
|
||||
};
|
||||
|
||||
class ContextGraph {
|
||||
public:
|
||||
ContextGraph() = default;
|
||||
ContextGraph(const std::vector<std::vector<int32_t>> &token_ids,
|
||||
float context_score)
|
||||
: context_score_(context_score) {
|
||||
root_ = std::make_unique<ContextState>(-1, 0, 0, 0, false);
|
||||
float context_score, float ac_threshold,
|
||||
const std::vector<float> &scores = {},
|
||||
const std::vector<std::string> &phrases = {},
|
||||
const std::vector<float> &ac_thresholds = {})
|
||||
: context_score_(context_score), ac_threshold_(ac_threshold) {
|
||||
root_ = std::make_unique<ContextState>(-1, 0, 0, 0);
|
||||
root_->fail = root_.get();
|
||||
Build(token_ids);
|
||||
Build(token_ids, scores, phrases, ac_thresholds);
|
||||
}
|
||||
|
||||
std::pair<float, const ContextState *> ForwardOneStep(
|
||||
const ContextState *state, int32_t token_id) const;
|
||||
ContextGraph(const std::vector<std::vector<int32_t>> &token_ids,
|
||||
float context_score, const std::vector<float> &scores = {},
|
||||
const std::vector<std::string> &phrases = {})
|
||||
: ContextGraph(token_ids, context_score, 0.0f, scores, phrases,
|
||||
std::vector<float>()) {}
|
||||
|
||||
std::tuple<float, const ContextState *, const ContextState *> ForwardOneStep(
|
||||
const ContextState *state, int32_t token_id,
|
||||
bool strict_mode = true) const;
|
||||
|
||||
std::pair<bool, const ContextState *> IsMatched(
|
||||
const ContextState *state) const;
|
||||
|
||||
std::pair<float, const ContextState *> Finalize(
|
||||
const ContextState *state) const;
|
||||
|
||||
@@ -57,8 +80,12 @@ class ContextGraph {
|
||||
|
||||
private:
|
||||
float context_score_;
|
||||
float ac_threshold_;
|
||||
std::unique_ptr<ContextState> root_;
|
||||
void Build(const std::vector<std::vector<int32_t>> &token_ids) const;
|
||||
void Build(const std::vector<std::vector<int32_t>> &token_ids,
|
||||
const std::vector<float> &scores,
|
||||
const std::vector<std::string> &phrases,
|
||||
const std::vector<float> &ac_thresholds) const;
|
||||
void FillFailOutput() const;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user