Stack and streaming conformer support (#141)
* added csrc/stack.cc * stack: added checks * added copyright info * passed cpp style checks * formatted code * added some support for streaming conformer model support (not verified) * code lint * made more progress with streaming conformer support (not working yet) * passed style check * changes as suggested by @csukuangfj * added some debug info * fixed style check * Use Cat to replace Stack * remove debug statements --------- Co-authored-by: Jingzhao Ou (jou2019) <jou2019@cisco.com> Co-authored-by: Fangjun Kuang <csukuangfj@gmail.com>
This commit is contained in:
@@ -9,6 +9,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
@@ -187,11 +188,14 @@ class OnlineRecognizer::Impl {
|
||||
std::vector<OnlineTransducerDecoderResult> results(n);
|
||||
std::vector<float> features_vec(n * chunk_size * feature_dim);
|
||||
std::vector<std::vector<Ort::Value>> states_vec(n);
|
||||
std::vector<int64_t> all_processed_frames(n);
|
||||
|
||||
for (int32_t i = 0; i != n; ++i) {
|
||||
const auto num_processed_frames = ss[i]->GetNumProcessedFrames();
|
||||
std::vector<float> features =
|
||||
ss[i]->GetFrames(ss[i]->GetNumProcessedFrames(), chunk_size);
|
||||
ss[i]->GetFrames(num_processed_frames, chunk_size);
|
||||
|
||||
// Question: should num_processed_frames include chunk_shift?
|
||||
ss[i]->GetNumProcessedFrames() += chunk_shift;
|
||||
|
||||
std::copy(features.begin(), features.end(),
|
||||
@@ -199,6 +203,7 @@ class OnlineRecognizer::Impl {
|
||||
|
||||
results[i] = std::move(ss[i]->GetResult());
|
||||
states_vec[i] = std::move(ss[i]->GetStates());
|
||||
all_processed_frames[i] = num_processed_frames;
|
||||
}
|
||||
|
||||
auto memory_info =
|
||||
@@ -210,9 +215,20 @@ class OnlineRecognizer::Impl {
|
||||
features_vec.size(), x_shape.data(),
|
||||
x_shape.size());
|
||||
|
||||
std::array<int64_t, 1> processed_frames_shape{
|
||||
static_cast<int64_t>(all_processed_frames.size())};
|
||||
|
||||
Ort::Value processed_frames = Ort::Value::CreateTensor(
|
||||
memory_info,
|
||||
all_processed_frames.data(),
|
||||
all_processed_frames.size(),
|
||||
processed_frames_shape.data(),
|
||||
processed_frames_shape.size());
|
||||
|
||||
auto states = model_->StackStates(states_vec);
|
||||
|
||||
auto pair = model_->RunEncoder(std::move(x), std::move(states));
|
||||
auto pair = model_->RunEncoder(
|
||||
std::move(x), std::move(states), std::move(processed_frames));
|
||||
|
||||
decoder_->Decode(std::move(pair.first), &results);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user