Stack and streaming conformer support (#141)

* added csrc/stack.cc

* stack: added checks

* added copyright info

* passed cpp style checks

* formatted code

* added some support for streaming conformer model support (not verified)

* code lint

* made more progress with streaming conformer support (not working yet)

* passed style check

* changes as suggested by @csukuangfj

* added some debug info

* fixed style check

* Use Cat to replace Stack

* remove debug statements

---------

Co-authored-by: Jingzhao Ou (jou2019) <jou2019@cisco.com>
Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com>
This commit is contained in:
Jingzhao Ou
2023-05-10 23:30:39 -07:00
committed by GitHub
parent 824b0809a4
commit 0992063de8
15 changed files with 836 additions and 8 deletions

View File

@@ -9,6 +9,7 @@
#include <algorithm>
#include <iomanip>
#include <iostream>
#include <memory>
#include <sstream>
#include <utility>
@@ -187,11 +188,14 @@ class OnlineRecognizer::Impl {
std::vector<OnlineTransducerDecoderResult> results(n);
std::vector<float> features_vec(n * chunk_size * feature_dim);
std::vector<std::vector<Ort::Value>> states_vec(n);
std::vector<int64_t> all_processed_frames(n);
for (int32_t i = 0; i != n; ++i) {
const auto num_processed_frames = ss[i]->GetNumProcessedFrames();
std::vector<float> features =
ss[i]->GetFrames(ss[i]->GetNumProcessedFrames(), chunk_size);
ss[i]->GetFrames(num_processed_frames, chunk_size);
// Question: should num_processed_frames include chunk_shift?
ss[i]->GetNumProcessedFrames() += chunk_shift;
std::copy(features.begin(), features.end(),
@@ -199,6 +203,7 @@ class OnlineRecognizer::Impl {
results[i] = std::move(ss[i]->GetResult());
states_vec[i] = std::move(ss[i]->GetStates());
all_processed_frames[i] = num_processed_frames;
}
auto memory_info =
@@ -210,9 +215,20 @@ class OnlineRecognizer::Impl {
features_vec.size(), x_shape.data(),
x_shape.size());
std::array<int64_t, 1> processed_frames_shape{
static_cast<int64_t>(all_processed_frames.size())};
Ort::Value processed_frames = Ort::Value::CreateTensor(
memory_info,
all_processed_frames.data(),
all_processed_frames.size(),
processed_frames_shape.data(),
processed_frames_shape.size());
auto states = model_->StackStates(states_vec);
auto pair = model_->RunEncoder(std::move(x), std::move(states));
auto pair = model_->RunEncoder(
std::move(x), std::move(states), std::move(processed_frames));
decoder_->Decode(std::move(pair.first), &results);