First working version.
This commit is contained in:
@@ -38,6 +38,7 @@ set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
|
||||
|
||||
include(cmake/kaldi_native_io.cmake)
|
||||
include(cmake/kaldi-native-fbank.cmake)
|
||||
|
||||
add_subdirectory(sherpa-onnx)
|
||||
|
||||
27
cmake/kaldi_native_io.cmake
Normal file
27
cmake/kaldi_native_io.cmake
Normal file
@@ -0,0 +1,27 @@
|
||||
if(DEFINED ENV{KALDI_NATIVE_IO_INSTALL_PREFIX})
|
||||
message(STATUS "Using environment variable KALDI_NATIVE_IO_INSTALL_PREFIX: $ENV{KALDI_NATIVE_IO_INSTALL_PREFIX}")
|
||||
set(KALDI_NATIVE_IO_CMAKE_PREFIX_PATH $ENV{KALDI_NATIVE_IO_INSTALL_PREFIX})
|
||||
else()
|
||||
# PYTHON_EXECUTABLE is set by cmake/pybind11.cmake
|
||||
message(STATUS "Python executable: ${PYTHON_EXECUTABLE}")
|
||||
|
||||
execute_process(
|
||||
COMMAND "${PYTHON_EXECUTABLE}" -c "import kaldi_native_io; print(kaldi_native_io.cmake_prefix_path)"
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
OUTPUT_VARIABLE KALDI_NATIVE_IO_CMAKE_PREFIX_PATH
|
||||
)
|
||||
endif()
|
||||
|
||||
message(STATUS "KALDI_NATIVE_IO_CMAKE_PREFIX_PATH: ${KALDI_NATIVE_IO_CMAKE_PREFIX_PATH}")
|
||||
list(APPEND CMAKE_PREFIX_PATH "${KALDI_NATIVE_IO_CMAKE_PREFIX_PATH}")
|
||||
|
||||
find_package(kaldi_native_io REQUIRED)
|
||||
|
||||
message(STATUS "KALDI_NATIVE_IO_FOUND: ${KALDI_NATIVE_IO_FOUND}")
|
||||
message(STATUS "KALDI_NATIVE_IO_VERSION: ${KALDI_NATIVE_IO_VERSION}")
|
||||
message(STATUS "KALDI_NATIVE_IO_INCLUDE_DIRS: ${KALDI_NATIVE_IO_INCLUDE_DIRS}")
|
||||
message(STATUS "KALDI_NATIVE_IO_CXX_FLAGS: ${KALDI_NATIVE_IO_CXX_FLAGS}")
|
||||
message(STATUS "KALDI_NATIVE_IO_LIBRARIES: ${KALDI_NATIVE_IO_LIBRARIES}")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${KALDI_NATIVE_IO_CXX_FLAGS}")
|
||||
message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
|
||||
@@ -1,2 +1,13 @@
|
||||
add_executable(online-fbank-test online-fbank-test.cc)
|
||||
target_link_libraries(online-fbank-test kaldi-native-fbank-core)
|
||||
|
||||
include_directories(
|
||||
${ONNXRUNTIME_ROOTDIR}/include/onnxruntime/core/session/
|
||||
${ONNXRUNTIME_ROOTDIR}/include/onnxruntime/core/providers/tensorrt/
|
||||
)
|
||||
|
||||
include_directories(
|
||||
${KALDINATIVEIO}
|
||||
)
|
||||
add_executable(capi_test main.cpp)
|
||||
target_link_libraries(capi_test onnxruntime kaldi-native-fbank-core kaldi_native_io_core)
|
||||
|
||||
57
sherpa-onnx/csrc/fbank_features.h
Normal file
57
sherpa-onnx/csrc/fbank_features.h
Normal file
@@ -0,0 +1,57 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "kaldi_native_io/csrc/kaldi-io.h"
|
||||
#include "kaldi_native_io/csrc/wave-reader.h"
|
||||
#include "kaldi-native-fbank/csrc/online-feature.h"
|
||||
|
||||
|
||||
kaldiio::Matrix<float> readWav(std::string filename, bool log = false){
|
||||
if (log)
|
||||
std::cout << "reading " << filename << std::endl;
|
||||
|
||||
bool binary = true;
|
||||
kaldiio::Input ki(filename, &binary);
|
||||
kaldiio::WaveHolder wh;
|
||||
|
||||
if (!wh.Read(ki.Stream())) {
|
||||
std::cerr << "Failed to read " << filename;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
auto &wave_data = wh.Value();
|
||||
auto &d = wave_data.Data();
|
||||
|
||||
if (log)
|
||||
std::cout << "wav shape: " << "(" << d.NumRows() << "," << d.NumCols() << ")" << std::endl;
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
std::vector<float> ComputeFeatures(knf::OnlineFbank &fbank, knf::FbankOptions opts, kaldiio::Matrix<float> samples, bool log = false){
|
||||
int numSamples = samples.NumCols();
|
||||
|
||||
for (int i = 0; i < numSamples; i++)
|
||||
{
|
||||
float currentSample = samples.Row(0).Data()[i] / 32768;
|
||||
fbank.AcceptWaveform(opts.frame_opts.samp_freq, ¤tSample, 1);
|
||||
}
|
||||
|
||||
std::vector<float> features;
|
||||
int32_t num_frames = fbank.NumFramesReady();
|
||||
for (int32_t i = 0; i != num_frames; ++i) {
|
||||
const float *frame = fbank.GetFrame(i);
|
||||
for (int32_t k = 0; k != opts.mel_opts.num_bins; ++k) {
|
||||
features.push_back(frame[k]);
|
||||
}
|
||||
}
|
||||
if (log){
|
||||
std::cout << "done feature extraction" << std::endl;
|
||||
std::cout << "extracted fbank shape " << "(" << num_frames << "," << opts.mel_opts.num_bins << ")" << std::endl;
|
||||
|
||||
for (int i=0; i< 20; i++)
|
||||
std::cout << features.at(i) << std::endl;
|
||||
}
|
||||
|
||||
return features;
|
||||
}
|
||||
97
sherpa-onnx/csrc/main.cpp
Normal file
97
sherpa-onnx/csrc/main.cpp
Normal file
@@ -0,0 +1,97 @@
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <time.h>
|
||||
#include <math.h>
|
||||
#include <fstream>
|
||||
|
||||
#include "fbank_features.h"
|
||||
#include "rnnt_beam_search.h"
|
||||
|
||||
#include "kaldi-native-fbank/csrc/online-feature.h"
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
char* filename = argv[1];
|
||||
std::string search_method = argv[2];
|
||||
int num_active_paths = atoi(argv[3]);
|
||||
|
||||
// General parameters
|
||||
int numberOfThreads = 16;
|
||||
|
||||
// Initialize fbanks
|
||||
knf::FbankOptions opts;
|
||||
opts.frame_opts.dither = 0;
|
||||
opts.frame_opts.samp_freq = 16000;
|
||||
opts.frame_opts.frame_shift_ms = 10.0f;
|
||||
opts.frame_opts.frame_length_ms = 25.0f;
|
||||
opts.mel_opts.num_bins = 80;
|
||||
opts.frame_opts.window_type = "povey";
|
||||
opts.frame_opts.snip_edges = false;
|
||||
knf::OnlineFbank fbank(opts);
|
||||
|
||||
// set session opts
|
||||
// https://onnxruntime.ai/docs/performance/tune-performance.html
|
||||
session_options.SetIntraOpNumThreads(numberOfThreads);
|
||||
session_options.SetInterOpNumThreads(numberOfThreads);
|
||||
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
||||
session_options.SetLogSeverityLevel(4);
|
||||
session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
|
||||
|
||||
api.CreateTensorRTProviderOptions(&tensorrt_options);
|
||||
std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)> rel_trt_options(tensorrt_options, api.ReleaseTensorRTProviderOptions);
|
||||
api.SessionOptionsAppendExecutionProvider_TensorRT_V2(static_cast<OrtSessionOptions*>(session_options), rel_trt_options.get());
|
||||
|
||||
// Define model
|
||||
auto model = get_model(
|
||||
"/mnt/local4/sr/k2_sherpa/models/exp_en2/encoder_simp.onnx",
|
||||
"/mnt/local4/sr/k2_sherpa/models/exp_en2/decoder_simp.onnx",
|
||||
"/mnt/local4/sr/k2_sherpa/models/exp_en2/joiner_simp.onnx",
|
||||
"/mnt/local4/sr/k2_sherpa/models/exp_en2/joiner_encoder_proj_simp.onnx",
|
||||
"/mnt/local4/sr/k2_sherpa/models/exp_en2/joiner_decoder_proj_simp.onnx",
|
||||
"/mnt/local4/sr/k2_sherpa/models/exp_en2/enUS_tokens.txt"
|
||||
);
|
||||
|
||||
std::vector<std::string> filename_list {
|
||||
"/mnt/local4/sr/k2_sherpa/test_wavs/cnn_15sec.wav",
|
||||
//"/mnt/local4/sr/k2_sherpa/test_wavs/1089-134686-0001.wav"
|
||||
};
|
||||
|
||||
for (auto filename : filename_list){
|
||||
std::cout << filename << std::endl;
|
||||
auto samples = readWav(filename, true);
|
||||
int numSamples = samples.NumCols();
|
||||
|
||||
auto features = ComputeFeatures(fbank, opts, samples);
|
||||
|
||||
auto tic = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// # === Encoder Out === #
|
||||
int num_frames = features.size() / opts.mel_opts.num_bins;
|
||||
auto encoder_out = model.encoder_forward(features,
|
||||
std::vector<int64_t> {num_frames},
|
||||
std::vector<int64_t> {1, num_frames, 80},
|
||||
std::vector<int64_t> {1},
|
||||
memory_info);
|
||||
|
||||
// # === Search === #
|
||||
std::vector<std::vector<int32_t>> hyps;
|
||||
if (search_method == "greedy")
|
||||
hyps = GreedySearch(&model, &encoder_out);
|
||||
else{
|
||||
std::cout << "wrong search method!" << std::endl;
|
||||
exit(0);
|
||||
}
|
||||
auto results = hyps2result(model.tokens_map, hyps);
|
||||
|
||||
// # === Print Elapsed Time === #
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - tic);
|
||||
std::cout << "Elapsed: " << float(elapsed.count()) / 1000 << " seconds" << std::endl;
|
||||
std::cout << "rtf: " << float(elapsed.count()) / 1000 / (numSamples / 16000) << std::endl;
|
||||
|
||||
print_hyps(hyps);
|
||||
std::cout << results[0] << std::endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
253
sherpa-onnx/csrc/models.h
Normal file
253
sherpa-onnx/csrc/models.h
Normal file
@@ -0,0 +1,253 @@
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "utils_onnx.h"
|
||||
|
||||
|
||||
struct Model
|
||||
{
|
||||
public:
|
||||
const char* encoder_path;
|
||||
const char* decoder_path;
|
||||
const char* joiner_path;
|
||||
const char* joiner_encoder_proj_path;
|
||||
const char* joiner_decoder_proj_path;
|
||||
const char* tokens_path;
|
||||
|
||||
Ort::Session encoder = load_model(encoder_path);
|
||||
Ort::Session decoder = load_model(decoder_path);
|
||||
Ort::Session joiner = load_model(joiner_path);
|
||||
Ort::Session joiner_encoder_proj = load_model(joiner_encoder_proj_path);
|
||||
Ort::Session joiner_decoder_proj = load_model(joiner_decoder_proj_path);
|
||||
std::map<int, std::string> tokens_map = get_token_map(tokens_path);
|
||||
|
||||
int32_t blank_id;
|
||||
int32_t unk_id;
|
||||
int32_t context_size;
|
||||
|
||||
std::vector<Ort::Value> encoder_forward(std::vector<float> in_vector,
|
||||
std::vector<int64_t> in_vector_length,
|
||||
std::vector<int64_t> feature_dims,
|
||||
std::vector<int64_t> feature_length_dims,
|
||||
Ort::MemoryInfo &memory_info){
|
||||
std::vector<Ort::Value> encoder_inputTensors;
|
||||
encoder_inputTensors.push_back(Ort::Value::CreateTensor<float>(memory_info, in_vector.data(), in_vector.size(), feature_dims.data(), feature_dims.size()));
|
||||
encoder_inputTensors.push_back(Ort::Value::CreateTensor<int64_t>(memory_info, in_vector_length.data(), in_vector_length.size(), feature_length_dims.data(), feature_length_dims.size()));
|
||||
|
||||
std::vector<const char*> encoder_inputNames = {encoder.GetInputName(0, allocator), encoder.GetInputName(1, allocator)};
|
||||
std::vector<const char*> encoder_outputNames = {encoder.GetOutputName(0, allocator)};
|
||||
|
||||
auto out = encoder.Run(Ort::RunOptions{nullptr},
|
||||
encoder_inputNames.data(),
|
||||
encoder_inputTensors.data(),
|
||||
encoder_inputTensors.size(),
|
||||
encoder_outputNames.data(),
|
||||
encoder_outputNames.size());
|
||||
return out;
|
||||
}
|
||||
|
||||
std::vector<Ort::Value> decoder_forward(std::vector<int64_t> in_vector,
|
||||
std::vector<int64_t> dims,
|
||||
Ort::MemoryInfo &memory_info){
|
||||
std::vector<Ort::Value> inputTensors;
|
||||
inputTensors.push_back(Ort::Value::CreateTensor<int64_t>(memory_info, in_vector.data(), in_vector.size(), dims.data(), dims.size()));
|
||||
|
||||
std::vector<const char*> inputNames {decoder.GetInputName(0, allocator)};
|
||||
std::vector<const char*> outputNames {decoder.GetOutputName(0, allocator)};
|
||||
|
||||
auto out = decoder.Run(Ort::RunOptions{nullptr},
|
||||
inputNames.data(),
|
||||
inputTensors.data(),
|
||||
inputTensors.size(),
|
||||
outputNames.data(),
|
||||
outputNames.size());
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::vector<Ort::Value> joiner_forward(std::vector<float> projected_encoder_out,
|
||||
std::vector<float> decoder_out,
|
||||
std::vector<int64_t> projected_encoder_out_dims,
|
||||
std::vector<int64_t> decoder_out_dims,
|
||||
Ort::MemoryInfo &memory_info){
|
||||
std::vector<Ort::Value> inputTensors;
|
||||
inputTensors.push_back(Ort::Value::CreateTensor<float>(memory_info, projected_encoder_out.data(), projected_encoder_out.size(), projected_encoder_out_dims.data(), projected_encoder_out_dims.size()));
|
||||
inputTensors.push_back(Ort::Value::CreateTensor<float>(memory_info, decoder_out.data(), decoder_out.size(), decoder_out_dims.data(), decoder_out_dims.size()));
|
||||
std::vector<const char*> inputNames = {joiner.GetInputName(0, allocator), joiner.GetInputName(1, allocator)};
|
||||
std::vector<const char*> outputNames = {joiner.GetOutputName(0, allocator)};
|
||||
|
||||
auto out = joiner.Run(Ort::RunOptions{nullptr},
|
||||
inputNames.data(),
|
||||
inputTensors.data(),
|
||||
inputTensors.size(),
|
||||
outputNames.data(),
|
||||
outputNames.size());
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::vector<Ort::Value> joiner_encoder_proj_forward(std::vector<float> in_vector,
|
||||
std::vector<int64_t> dims,
|
||||
Ort::MemoryInfo &memory_info){
|
||||
std::vector<Ort::Value> inputTensors;
|
||||
inputTensors.push_back(Ort::Value::CreateTensor<float>(memory_info, in_vector.data(), in_vector.size(), dims.data(), dims.size()));
|
||||
|
||||
std::vector<const char*> inputNames {joiner_encoder_proj.GetInputName(0, allocator)};
|
||||
std::vector<const char*> outputNames {joiner_encoder_proj.GetOutputName(0, allocator)};
|
||||
|
||||
auto out = joiner_encoder_proj.Run(Ort::RunOptions{nullptr},
|
||||
inputNames.data(),
|
||||
inputTensors.data(),
|
||||
inputTensors.size(),
|
||||
outputNames.data(),
|
||||
outputNames.size());
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::vector<Ort::Value> joiner_decoder_proj_forward(std::vector<float> in_vector,
|
||||
std::vector<int64_t> dims,
|
||||
Ort::MemoryInfo &memory_info){
|
||||
std::vector<Ort::Value> inputTensors;
|
||||
inputTensors.push_back(Ort::Value::CreateTensor<float>(memory_info, in_vector.data(), in_vector.size(), dims.data(), dims.size()));
|
||||
|
||||
std::vector<const char*> inputNames {joiner_decoder_proj.GetInputName(0, allocator)};
|
||||
std::vector<const char*> outputNames {joiner_decoder_proj.GetOutputName(0, allocator)};
|
||||
|
||||
auto out = joiner_decoder_proj.Run(Ort::RunOptions{nullptr},
|
||||
inputNames.data(),
|
||||
inputTensors.data(),
|
||||
inputTensors.size(),
|
||||
outputNames.data(),
|
||||
outputNames.size());
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
Ort::Session load_model(const char* path){
|
||||
struct stat buffer;
|
||||
if (stat(path, &buffer) != 0){
|
||||
std::cout << "File does not exist!: " << path << std::endl;
|
||||
exit(0);
|
||||
}
|
||||
std::cout << "loading " << path << std::endl;
|
||||
Ort::Session onnx_model(env, path, session_options);
|
||||
return onnx_model;
|
||||
}
|
||||
|
||||
void extract_constant_lm_parameters(){
|
||||
/*
|
||||
all_in_one contains these params. We should trace all_in_one and find 'constants_lm' nodes to extract these params
|
||||
For now, these params are set staticaly.
|
||||
in: Ort::Session &all_in_one
|
||||
out: {blank_id, unk_id, context_size}
|
||||
should return std::vector<int32_t>
|
||||
*/
|
||||
blank_id = 0;
|
||||
unk_id = 0;
|
||||
context_size = 2;
|
||||
}
|
||||
|
||||
std::map<int, std::string> get_token_map(const char* token_path){
|
||||
std::ifstream inFile;
|
||||
inFile.open(token_path);
|
||||
if (inFile.fail())
|
||||
std::cerr << "Could not find token file" << std::endl;
|
||||
|
||||
std::map<int, std::string> token_map;
|
||||
|
||||
std::string line;
|
||||
while (std::getline(inFile, line))
|
||||
{
|
||||
int id;
|
||||
std::string token;
|
||||
|
||||
std::istringstream iss(line);
|
||||
iss >> token;
|
||||
iss >> id;
|
||||
|
||||
token_map[id] = token;
|
||||
}
|
||||
|
||||
return token_map;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
Model get_model(std::string exp_path, char* tokens_path){
|
||||
Model model{
|
||||
(exp_path + "/encoder_simp.onnx").c_str(),
|
||||
(exp_path + "/decoder_simp.onnx").c_str(),
|
||||
(exp_path + "/joiner_simp.onnx").c_str(),
|
||||
(exp_path + "/joiner_encoder_proj_simp.onnx").c_str(),
|
||||
(exp_path + "/joiner_decoder_proj_simp.onnx").c_str(),
|
||||
tokens_path,
|
||||
};
|
||||
model.extract_constant_lm_parameters();
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
Model get_model(char* encoder_path,
|
||||
char* decoder_path,
|
||||
char* joiner_path,
|
||||
char* joiner_encoder_proj_path,
|
||||
char* joiner_decoder_proj_path,
|
||||
char* tokens_path){
|
||||
Model model{
|
||||
encoder_path,
|
||||
decoder_path,
|
||||
joiner_path,
|
||||
joiner_encoder_proj_path,
|
||||
joiner_decoder_proj_path,
|
||||
tokens_path,
|
||||
};
|
||||
model.extract_constant_lm_parameters();
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
|
||||
void doWarmup(Model *model, int numWarmup = 5){
|
||||
std::cout << "Warmup is started" << std::endl;
|
||||
|
||||
std::vector<float> encoder_warmup_sample (500 * 80, 1.0);
|
||||
for (int i=0; i<numWarmup; i++)
|
||||
auto encoder_out = model->encoder_forward(encoder_warmup_sample,
|
||||
std::vector<int64_t> {500},
|
||||
std::vector<int64_t> {1, 500, 80},
|
||||
std::vector<int64_t> {1},
|
||||
memory_info);
|
||||
|
||||
std::vector<int64_t> decoder_warmup_sample {1, 1};
|
||||
for (int i=0; i<numWarmup; i++)
|
||||
auto decoder_out = model->decoder_forward(decoder_warmup_sample,
|
||||
std::vector<int64_t> {1, 2},
|
||||
memory_info);
|
||||
|
||||
std::vector<float> joiner_warmup_sample1 (512, 1.0);
|
||||
std::vector<float> joiner_warmup_sample2 (512, 1.0);
|
||||
for (int i=0; i<numWarmup; i++)
|
||||
auto logits = model->joiner_forward(joiner_warmup_sample1,
|
||||
joiner_warmup_sample2,
|
||||
std::vector<int64_t> {1, 1, 1, 512},
|
||||
std::vector<int64_t> {1, 1, 1, 512},
|
||||
memory_info);
|
||||
|
||||
std::vector<float> joiner_encoder_proj_warmup_sample (100 * 512, 1.0);
|
||||
for (int i=0; i<numWarmup; i++)
|
||||
auto projected_encoder_out = model->joiner_encoder_proj_forward(joiner_encoder_proj_warmup_sample,
|
||||
std::vector<int64_t> {100, 512},
|
||||
memory_info);
|
||||
|
||||
std::vector<float> joiner_decoder_proj_warmup_sample (512, 1.0);
|
||||
for (int i=0; i<numWarmup; i++)
|
||||
auto projected_decoder_out = model->joiner_decoder_proj_forward(joiner_decoder_proj_warmup_sample,
|
||||
std::vector<int64_t> {1, 512},
|
||||
memory_info);
|
||||
std::cout << "Warmup is done" << std::endl;
|
||||
}
|
||||
121
sherpa-onnx/csrc/rnnt_beam_search.h
Normal file
121
sherpa-onnx/csrc/rnnt_beam_search.h
Normal file
@@ -0,0 +1,121 @@
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <time.h>
|
||||
|
||||
#include "models.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
std::vector<float> getEncoderCol(Ort::Value &tensor, int start, int length){
|
||||
float* floatarr = tensor.GetTensorMutableData<float>();
|
||||
std::vector<float> vector {floatarr + start, floatarr + length};
|
||||
return vector;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Assume batch size = 1
|
||||
*/
|
||||
std::vector<int64_t> BuildDecoderInput(const std::vector<std::vector<int32_t>> &hyps,
|
||||
std::vector<int64_t> &decoder_input) {
|
||||
|
||||
int32_t context_size = decoder_input.size();
|
||||
int32_t hyps_length = hyps[0].size();
|
||||
for (int i=0; i < context_size; i++)
|
||||
decoder_input[i] = hyps[0][hyps_length-context_size+i];
|
||||
|
||||
return decoder_input;
|
||||
}
|
||||
|
||||
|
||||
std::vector<std::vector<int32_t>> GreedySearch(
|
||||
Model *model, // NOLINT
|
||||
std::vector<Ort::Value> *encoder_out){
|
||||
Ort::Value &encoder_out_tensor = encoder_out->at(0);
|
||||
int encoder_out_dim1 = encoder_out_tensor.GetTensorTypeAndShapeInfo().GetShape()[1];
|
||||
int encoder_out_dim2 = encoder_out_tensor.GetTensorTypeAndShapeInfo().GetShape()[2];
|
||||
auto encoder_out_vector = ortVal2Vector(encoder_out_tensor, encoder_out_dim1 * encoder_out_dim2);
|
||||
|
||||
// # === Greedy Search === #
|
||||
int32_t batch_size = 1;
|
||||
std::vector<int32_t> blanks(model->context_size, model->blank_id);
|
||||
std::vector<std::vector<int32_t>> hyps(batch_size, blanks);
|
||||
std::vector<int64_t> decoder_input(model->context_size, model->blank_id);
|
||||
|
||||
auto decoder_out = model->decoder_forward(decoder_input,
|
||||
std::vector<int64_t> {batch_size, model->context_size},
|
||||
memory_info);
|
||||
|
||||
Ort::Value &decoder_out_tensor = decoder_out[0];
|
||||
int decoder_out_dim = decoder_out_tensor.GetTensorTypeAndShapeInfo().GetShape()[2];
|
||||
auto decoder_out_vector = ortVal2Vector(decoder_out_tensor, decoder_out_dim);
|
||||
|
||||
decoder_out = model->joiner_decoder_proj_forward(decoder_out_vector,
|
||||
std::vector<int64_t> {1, decoder_out_dim},
|
||||
memory_info);
|
||||
Ort::Value &projected_decoder_out_tensor = decoder_out[0];
|
||||
auto projected_decoder_out_dim = projected_decoder_out_tensor.GetTensorTypeAndShapeInfo().GetShape()[1];
|
||||
auto projected_decoder_out_vector = ortVal2Vector(projected_decoder_out_tensor, projected_decoder_out_dim);
|
||||
|
||||
auto projected_encoder_out = model->joiner_encoder_proj_forward(encoder_out_vector,
|
||||
std::vector<int64_t> {encoder_out_dim1, encoder_out_dim2},
|
||||
memory_info);
|
||||
|
||||
Ort::Value &projected_encoder_out_tensor = projected_encoder_out[0];
|
||||
int projected_encoder_out_dim1 = projected_encoder_out_tensor.GetTensorTypeAndShapeInfo().GetShape()[0];
|
||||
int projected_encoder_out_dim2 = projected_encoder_out_tensor.GetTensorTypeAndShapeInfo().GetShape()[1];
|
||||
auto projected_encoder_out_vector = ortVal2Vector(projected_encoder_out_tensor, projected_encoder_out_dim1 * projected_encoder_out_dim2);
|
||||
|
||||
int32_t offset = 0;
|
||||
for (int i=0; i< projected_encoder_out_dim1; i++){
|
||||
int32_t cur_batch_size = 1;
|
||||
int32_t start = offset;
|
||||
int32_t end = start + cur_batch_size;
|
||||
offset = end;
|
||||
|
||||
auto cur_encoder_out = getEncoderCol(projected_encoder_out_tensor, start * projected_encoder_out_dim2, end * projected_encoder_out_dim2);
|
||||
|
||||
auto logits = model->joiner_forward(cur_encoder_out,
|
||||
projected_decoder_out_vector,
|
||||
std::vector<int64_t> {1, 1, 1, projected_encoder_out_dim2},
|
||||
std::vector<int64_t> {1, 1, 1, projected_decoder_out_dim},
|
||||
memory_info);
|
||||
|
||||
Ort::Value &logits_tensor = logits[0];
|
||||
int logits_dim = logits_tensor.GetTensorTypeAndShapeInfo().GetShape()[3];
|
||||
auto logits_vector = ortVal2Vector(logits_tensor, logits_dim);
|
||||
|
||||
int max_indices = static_cast<int>(std::distance(logits_vector.begin(), std::max_element(logits_vector.begin(), logits_vector.end())));
|
||||
bool emitted = false;
|
||||
|
||||
for (int32_t k = 0; k != cur_batch_size; ++k) {
|
||||
auto index = max_indices;
|
||||
if (index != model->blank_id && index != model->unk_id) {
|
||||
emitted = true;
|
||||
hyps[k].push_back(index);
|
||||
}
|
||||
}
|
||||
|
||||
if (emitted) {
|
||||
decoder_input = BuildDecoderInput(hyps, decoder_input);
|
||||
|
||||
decoder_out = model->decoder_forward(decoder_input,
|
||||
std::vector<int64_t> {batch_size, model->context_size},
|
||||
memory_info);
|
||||
|
||||
decoder_out_dim = decoder_out[0].GetTensorTypeAndShapeInfo().GetShape()[2];
|
||||
decoder_out_vector = ortVal2Vector(decoder_out[0], decoder_out_dim);
|
||||
|
||||
decoder_out = model->joiner_decoder_proj_forward(decoder_out_vector,
|
||||
std::vector<int64_t> {1, decoder_out_dim},
|
||||
memory_info);
|
||||
|
||||
projected_decoder_out_dim = decoder_out[0].GetTensorTypeAndShapeInfo().GetShape()[1];
|
||||
projected_decoder_out_vector = ortVal2Vector(decoder_out[0], projected_decoder_out_dim);
|
||||
}
|
||||
}
|
||||
|
||||
return hyps;
|
||||
}
|
||||
|
||||
39
sherpa-onnx/csrc/utils.h
Normal file
39
sherpa-onnx/csrc/utils.h
Normal file
@@ -0,0 +1,39 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
|
||||
void vector2file(std::vector<float> vector, std::string saveFileName){
|
||||
std::ofstream f(saveFileName);
|
||||
for(std::vector<float>::const_iterator i = vector.begin(); i != vector.end(); ++i) {
|
||||
f << *i << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::vector<std::string> hyps2result(std::map<int, std::string> token_map, std::vector<std::vector<int32_t>> hyps, int context_size = 2){
|
||||
std::vector<std::string> results;
|
||||
|
||||
for (int k=0; k < hyps.size(); k++){
|
||||
std::string result = token_map[hyps[k][context_size]];
|
||||
|
||||
for (int i=context_size+1; i < hyps[k].size(); i++){
|
||||
std::string token = token_map[hyps[k][i]];
|
||||
|
||||
// TODO: recognising '_' is not working
|
||||
if (token.at(0) == '_')
|
||||
result += " " + token;
|
||||
else
|
||||
result += token;
|
||||
}
|
||||
results.push_back(result);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
void print_hyps(std::vector<std::vector<int32_t>> hyps, int context_size = 2){
|
||||
std::cout << "Hyps:" << std::endl;
|
||||
for (int i=context_size; i<hyps[0].size(); i++)
|
||||
std::cout << hyps[0][i] << "-";
|
||||
std::cout << "|" << std::endl;
|
||||
}
|
||||
77
sherpa-onnx/csrc/utils_onnx.h
Normal file
77
sherpa-onnx/csrc/utils_onnx.h
Normal file
@@ -0,0 +1,77 @@
|
||||
#include <iostream>
|
||||
#include <onnxruntime_cxx_api.h>
|
||||
|
||||
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
|
||||
const auto& api = Ort::GetApi();
|
||||
OrtTensorRTProviderOptionsV2* tensorrt_options;
|
||||
Ort::SessionOptions session_options;
|
||||
Ort::AllocatorWithDefaultOptions allocator;
|
||||
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
|
||||
|
||||
|
||||
std::vector<float> ortVal2Vector(Ort::Value &tensor, int tensor_length){
|
||||
/**
|
||||
* convert ort tensor to vector
|
||||
*/
|
||||
float* floatarr = tensor.GetTensorMutableData<float>();
|
||||
std::vector<float> vector {floatarr, floatarr + tensor_length};
|
||||
return vector;
|
||||
}
|
||||
|
||||
|
||||
void print_onnx_forward_output(std::vector<Ort::Value> &output_tensors, int num){
|
||||
float* floatarr = output_tensors.front().GetTensorMutableData<float>();
|
||||
for (int i = 0; i < num; i++)
|
||||
printf("[%d] = %f\n", i, floatarr[i]);
|
||||
}
|
||||
|
||||
|
||||
void print_shape_of_ort_val(std::vector<Ort::Value> &tensor){
|
||||
auto out_shape = tensor.front().GetTensorTypeAndShapeInfo().GetShape();
|
||||
auto out_size = out_shape.size();
|
||||
std::cout << "(";
|
||||
for (int i=0; i<out_size; i++){
|
||||
std::cout << out_shape[i];
|
||||
if (i < out_size-1)
|
||||
std::cout << ",";
|
||||
}
|
||||
std::cout << ")" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
void print_model_info(Ort::Session &session, std::string title){
|
||||
std::cout << "=== Printing '" << title << "' model ===" << std::endl;
|
||||
Ort::AllocatorWithDefaultOptions allocator;
|
||||
|
||||
// print number of model input nodes
|
||||
size_t num_input_nodes = session.GetInputCount();
|
||||
std::vector<const char*> input_node_names(num_input_nodes);
|
||||
std::vector<int64_t> input_node_dims;
|
||||
|
||||
printf("Number of inputs = %zu\n", num_input_nodes);
|
||||
|
||||
char* output_name = session.GetOutputName(0, allocator);
|
||||
printf("output name: %s\n", output_name);
|
||||
|
||||
// iterate over all input nodes
|
||||
for (int i = 0; i < num_input_nodes; i++) {
|
||||
// print input node names
|
||||
char* input_name = session.GetInputName(i, allocator);
|
||||
printf("Input %d : name=%s\n", i, input_name);
|
||||
input_node_names[i] = input_name;
|
||||
|
||||
// print input node types
|
||||
Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
|
||||
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
|
||||
|
||||
ONNXTensorElementDataType type = tensor_info.GetElementType();
|
||||
printf("Input %d : type=%d\n", i, type);
|
||||
|
||||
// print input shapes/dims
|
||||
input_node_dims = tensor_info.GetShape();
|
||||
printf("Input %d : num_dims=%zu\n", i, input_node_dims.size());
|
||||
for (size_t j = 0; j < input_node_dims.size(); j++)
|
||||
printf("Input %d : dim %zu=%jd\n", i, j, input_node_dims[j]);
|
||||
}
|
||||
std::cout << "=======================================" << std::endl;
|
||||
}
|
||||
Reference in New Issue
Block a user