Add timestamps for offline paraformer (#310)

This commit is contained in:
Fangjun Kuang
2023-09-14 19:33:41 +08:00
committed by GitHub
parent 47184f9db7
commit e2be532b32
11 changed files with 175 additions and 33 deletions

View File

@@ -5,13 +5,18 @@
#include "sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.h"
#include <algorithm>
#include <utility>
#include <vector>
#include "sherpa-onnx/csrc/macros.h"
namespace sherpa_onnx {
std::vector<OfflineParaformerDecoderResult>
OfflineParaformerGreedySearchDecoder::Decode(Ort::Value log_probs,
Ort::Value /*token_num*/) {
OfflineParaformerGreedySearchDecoder::Decode(
Ort::Value log_probs, Ort::Value /*token_num*/,
Ort::Value us_cif_peak /*=Ort::Value(nullptr)*/
) {
std::vector<int64_t> shape = log_probs.GetTensorTypeAndShapeInfo().GetShape();
int32_t batch_size = shape[0];
int32_t num_tokens = shape[1];
@@ -25,12 +30,43 @@ OfflineParaformerGreedySearchDecoder::Decode(Ort::Value log_probs,
for (int32_t k = 0; k != num_tokens; ++k) {
auto max_idx = static_cast<int64_t>(
std::distance(p, std::max_element(p, p + vocab_size)));
if (max_idx == eos_id_) break;
if (max_idx == eos_id_) {
break;
}
results[i].tokens.push_back(max_idx);
p += vocab_size;
}
if (us_cif_peak) {
int32_t dim = us_cif_peak.GetTensorTypeAndShapeInfo().GetShape()[1];
const auto *peak = us_cif_peak.GetTensorData<float>() + i * dim;
std::vector<float> timestamps;
timestamps.reserve(results[i].tokens.size());
// 10.0: frameshift is 10 milliseconds
// 6: LfrWindowSize
// 3: us_cif_peak is upsampled by a factor of 3
// 1000: milliseconds to seconds
float scale = 10.0 * 6 / 3 / 1000;
for (int32_t k = 0; k != dim; ++k) {
if (peak[k] > 1 - 1e-4) {
timestamps.push_back(k * scale);
}
}
timestamps.pop_back();
if (timestamps.size() == results[i].tokens.size()) {
results[i].timestamps = std::move(timestamps);
} else {
SHERPA_ONNX_LOGE("time stamp for batch: %d, %d vs %d", i,
static_cast<int32_t>(results[i].tokens.size()),
static_cast<int32_t>(timestamps.size()));
}
}
}
return results;