diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c352b2b..a8ec5099 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,6 +38,7 @@ set(CMAKE_CXX_EXTENSIONS OFF) list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) +include(cmake/kaldi_native_io.cmake) include(cmake/kaldi-native-fbank.cmake) add_subdirectory(sherpa-onnx) diff --git a/cmake/kaldi_native_io.cmake b/cmake/kaldi_native_io.cmake new file mode 100644 index 00000000..406f2efb --- /dev/null +++ b/cmake/kaldi_native_io.cmake @@ -0,0 +1,27 @@ +if(DEFINED ENV{KALDI_NATIVE_IO_INSTALL_PREFIX}) + message(STATUS "Using environment variable KALDI_NATIVE_IO_INSTALL_PREFIX: $ENV{KALDI_NATIVE_IO_INSTALL_PREFIX}") + set(KALDI_NATIVE_IO_CMAKE_PREFIX_PATH $ENV{KALDI_NATIVE_IO_INSTALL_PREFIX}) +else() + # PYTHON_EXECUTABLE is set by cmake/pybind11.cmake + message(STATUS "Python executable: ${PYTHON_EXECUTABLE}") + + execute_process( + COMMAND "${PYTHON_EXECUTABLE}" -c "import kaldi_native_io; print(kaldi_native_io.cmake_prefix_path)" + OUTPUT_STRIP_TRAILING_WHITESPACE + OUTPUT_VARIABLE KALDI_NATIVE_IO_CMAKE_PREFIX_PATH + ) +endif() + +message(STATUS "KALDI_NATIVE_IO_CMAKE_PREFIX_PATH: ${KALDI_NATIVE_IO_CMAKE_PREFIX_PATH}") +list(APPEND CMAKE_PREFIX_PATH "${KALDI_NATIVE_IO_CMAKE_PREFIX_PATH}") + +find_package(kaldi_native_io REQUIRED) + +message(STATUS "KALDI_NATIVE_IO_FOUND: ${KALDI_NATIVE_IO_FOUND}") +message(STATUS "KALDI_NATIVE_IO_VERSION: ${KALDI_NATIVE_IO_VERSION}") +message(STATUS "KALDI_NATIVE_IO_INCLUDE_DIRS: ${KALDI_NATIVE_IO_INCLUDE_DIRS}") +message(STATUS "KALDI_NATIVE_IO_CXX_FLAGS: ${KALDI_NATIVE_IO_CXX_FLAGS}") +message(STATUS "KALDI_NATIVE_IO_LIBRARIES: ${KALDI_NATIVE_IO_LIBRARIES}") + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${KALDI_NATIVE_IO_CXX_FLAGS}") +message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") \ No newline at end of file diff --git a/sherpa-onnx/csrc/CMakeLists.txt b/sherpa-onnx/csrc/CMakeLists.txt index 36f4c57c..31320ada 100644 --- a/sherpa-onnx/csrc/CMakeLists.txt +++ b/sherpa-onnx/csrc/CMakeLists.txt @@ -1,2 +1,13 @@ add_executable(online-fbank-test online-fbank-test.cc) target_link_libraries(online-fbank-test kaldi-native-fbank-core) + +include_directories( + ${ONNXRUNTIME_ROOTDIR}/include/onnxruntime/core/session/ + ${ONNXRUNTIME_ROOTDIR}/include/onnxruntime/core/providers/tensorrt/ +) + +include_directories( + ${KALDINATIVEIO} +) +add_executable(capi_test main.cpp) +target_link_libraries(capi_test onnxruntime kaldi-native-fbank-core kaldi_native_io_core) diff --git a/sherpa-onnx/csrc/fbank_features.h b/sherpa-onnx/csrc/fbank_features.h new file mode 100644 index 00000000..d0caab06 --- /dev/null +++ b/sherpa-onnx/csrc/fbank_features.h @@ -0,0 +1,57 @@ +#include + +#include "kaldi_native_io/csrc/kaldi-io.h" +#include "kaldi_native_io/csrc/wave-reader.h" +#include "kaldi-native-fbank/csrc/online-feature.h" + + +kaldiio::Matrix readWav(std::string filename, bool log = false){ + if (log) + std::cout << "reading " << filename << std::endl; + + bool binary = true; + kaldiio::Input ki(filename, &binary); + kaldiio::WaveHolder wh; + + if (!wh.Read(ki.Stream())) { + std::cerr << "Failed to read " << filename; + exit(EXIT_FAILURE); + } + + auto &wave_data = wh.Value(); + auto &d = wave_data.Data(); + + if (log) + std::cout << "wav shape: " << "(" << d.NumRows() << "," << d.NumCols() << ")" << std::endl; + + return d; +} + + +std::vector ComputeFeatures(knf::OnlineFbank &fbank, knf::FbankOptions opts, kaldiio::Matrix samples, bool log = false){ + int numSamples = samples.NumCols(); + + for (int i = 0; i < numSamples; i++) + { + float currentSample = samples.Row(0).Data()[i] / 32768; + fbank.AcceptWaveform(opts.frame_opts.samp_freq, ¤tSample, 1); + } + + std::vector features; + int32_t num_frames = fbank.NumFramesReady(); + for (int32_t i = 0; i != num_frames; ++i) { + const float *frame = fbank.GetFrame(i); + for (int32_t k = 0; k != opts.mel_opts.num_bins; ++k) { + features.push_back(frame[k]); + } + } + if (log){ + std::cout << "done feature extraction" << std::endl; + std::cout << "extracted fbank shape " << "(" << num_frames << "," << opts.mel_opts.num_bins << ")" << std::endl; + + for (int i=0; i< 20; i++) + std::cout << features.at(i) << std::endl; + } + + return features; +} \ No newline at end of file diff --git a/sherpa-onnx/csrc/main.cpp b/sherpa-onnx/csrc/main.cpp new file mode 100644 index 00000000..db868e16 --- /dev/null +++ b/sherpa-onnx/csrc/main.cpp @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include +#include + +#include "fbank_features.h" +#include "rnnt_beam_search.h" + +#include "kaldi-native-fbank/csrc/online-feature.h" + + +int main(int argc, char* argv[]) { + char* filename = argv[1]; + std::string search_method = argv[2]; + int num_active_paths = atoi(argv[3]); + + // General parameters + int numberOfThreads = 16; + + // Initialize fbanks + knf::FbankOptions opts; + opts.frame_opts.dither = 0; + opts.frame_opts.samp_freq = 16000; + opts.frame_opts.frame_shift_ms = 10.0f; + opts.frame_opts.frame_length_ms = 25.0f; + opts.mel_opts.num_bins = 80; + opts.frame_opts.window_type = "povey"; + opts.frame_opts.snip_edges = false; + knf::OnlineFbank fbank(opts); + + // set session opts + // https://onnxruntime.ai/docs/performance/tune-performance.html + session_options.SetIntraOpNumThreads(numberOfThreads); + session_options.SetInterOpNumThreads(numberOfThreads); + session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + session_options.SetLogSeverityLevel(4); + session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); + + api.CreateTensorRTProviderOptions(&tensorrt_options); + std::unique_ptr rel_trt_options(tensorrt_options, api.ReleaseTensorRTProviderOptions); + api.SessionOptionsAppendExecutionProvider_TensorRT_V2(static_cast(session_options), rel_trt_options.get()); + + // Define model + auto model = get_model( + "/mnt/local4/sr/k2_sherpa/models/exp_en2/encoder_simp.onnx", + "/mnt/local4/sr/k2_sherpa/models/exp_en2/decoder_simp.onnx", + "/mnt/local4/sr/k2_sherpa/models/exp_en2/joiner_simp.onnx", + "/mnt/local4/sr/k2_sherpa/models/exp_en2/joiner_encoder_proj_simp.onnx", + "/mnt/local4/sr/k2_sherpa/models/exp_en2/joiner_decoder_proj_simp.onnx", + "/mnt/local4/sr/k2_sherpa/models/exp_en2/enUS_tokens.txt" + ); + + std::vector filename_list { + "/mnt/local4/sr/k2_sherpa/test_wavs/cnn_15sec.wav", + //"/mnt/local4/sr/k2_sherpa/test_wavs/1089-134686-0001.wav" + }; + + for (auto filename : filename_list){ + std::cout << filename << std::endl; + auto samples = readWav(filename, true); + int numSamples = samples.NumCols(); + + auto features = ComputeFeatures(fbank, opts, samples); + + auto tic = std::chrono::high_resolution_clock::now(); + + // # === Encoder Out === # + int num_frames = features.size() / opts.mel_opts.num_bins; + auto encoder_out = model.encoder_forward(features, + std::vector {num_frames}, + std::vector {1, num_frames, 80}, + std::vector {1}, + memory_info); + + // # === Search === # + std::vector> hyps; + if (search_method == "greedy") + hyps = GreedySearch(&model, &encoder_out); + else{ + std::cout << "wrong search method!" << std::endl; + exit(0); + } + auto results = hyps2result(model.tokens_map, hyps); + + // # === Print Elapsed Time === # + auto elapsed = std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - tic); + std::cout << "Elapsed: " << float(elapsed.count()) / 1000 << " seconds" << std::endl; + std::cout << "rtf: " << float(elapsed.count()) / 1000 / (numSamples / 16000) << std::endl; + + print_hyps(hyps); + std::cout << results[0] << std::endl; + } + + return 0; +} diff --git a/sherpa-onnx/csrc/models.h b/sherpa-onnx/csrc/models.h new file mode 100644 index 00000000..b18d168a --- /dev/null +++ b/sherpa-onnx/csrc/models.h @@ -0,0 +1,253 @@ +#include +#include +#include +#include +#include + +#include "utils_onnx.h" + + +struct Model +{ + public: + const char* encoder_path; + const char* decoder_path; + const char* joiner_path; + const char* joiner_encoder_proj_path; + const char* joiner_decoder_proj_path; + const char* tokens_path; + + Ort::Session encoder = load_model(encoder_path); + Ort::Session decoder = load_model(decoder_path); + Ort::Session joiner = load_model(joiner_path); + Ort::Session joiner_encoder_proj = load_model(joiner_encoder_proj_path); + Ort::Session joiner_decoder_proj = load_model(joiner_decoder_proj_path); + std::map tokens_map = get_token_map(tokens_path); + + int32_t blank_id; + int32_t unk_id; + int32_t context_size; + + std::vector encoder_forward(std::vector in_vector, + std::vector in_vector_length, + std::vector feature_dims, + std::vector feature_length_dims, + Ort::MemoryInfo &memory_info){ + std::vector encoder_inputTensors; + encoder_inputTensors.push_back(Ort::Value::CreateTensor(memory_info, in_vector.data(), in_vector.size(), feature_dims.data(), feature_dims.size())); + encoder_inputTensors.push_back(Ort::Value::CreateTensor(memory_info, in_vector_length.data(), in_vector_length.size(), feature_length_dims.data(), feature_length_dims.size())); + + std::vector encoder_inputNames = {encoder.GetInputName(0, allocator), encoder.GetInputName(1, allocator)}; + std::vector encoder_outputNames = {encoder.GetOutputName(0, allocator)}; + + auto out = encoder.Run(Ort::RunOptions{nullptr}, + encoder_inputNames.data(), + encoder_inputTensors.data(), + encoder_inputTensors.size(), + encoder_outputNames.data(), + encoder_outputNames.size()); + return out; + } + + std::vector decoder_forward(std::vector in_vector, + std::vector dims, + Ort::MemoryInfo &memory_info){ + std::vector inputTensors; + inputTensors.push_back(Ort::Value::CreateTensor(memory_info, in_vector.data(), in_vector.size(), dims.data(), dims.size())); + + std::vector inputNames {decoder.GetInputName(0, allocator)}; + std::vector outputNames {decoder.GetOutputName(0, allocator)}; + + auto out = decoder.Run(Ort::RunOptions{nullptr}, + inputNames.data(), + inputTensors.data(), + inputTensors.size(), + outputNames.data(), + outputNames.size()); + + return out; + } + + std::vector joiner_forward(std::vector projected_encoder_out, + std::vector decoder_out, + std::vector projected_encoder_out_dims, + std::vector decoder_out_dims, + Ort::MemoryInfo &memory_info){ + std::vector inputTensors; + inputTensors.push_back(Ort::Value::CreateTensor(memory_info, projected_encoder_out.data(), projected_encoder_out.size(), projected_encoder_out_dims.data(), projected_encoder_out_dims.size())); + inputTensors.push_back(Ort::Value::CreateTensor(memory_info, decoder_out.data(), decoder_out.size(), decoder_out_dims.data(), decoder_out_dims.size())); + std::vector inputNames = {joiner.GetInputName(0, allocator), joiner.GetInputName(1, allocator)}; + std::vector outputNames = {joiner.GetOutputName(0, allocator)}; + + auto out = joiner.Run(Ort::RunOptions{nullptr}, + inputNames.data(), + inputTensors.data(), + inputTensors.size(), + outputNames.data(), + outputNames.size()); + + return out; + } + + std::vector joiner_encoder_proj_forward(std::vector in_vector, + std::vector dims, + Ort::MemoryInfo &memory_info){ + std::vector inputTensors; + inputTensors.push_back(Ort::Value::CreateTensor(memory_info, in_vector.data(), in_vector.size(), dims.data(), dims.size())); + + std::vector inputNames {joiner_encoder_proj.GetInputName(0, allocator)}; + std::vector outputNames {joiner_encoder_proj.GetOutputName(0, allocator)}; + + auto out = joiner_encoder_proj.Run(Ort::RunOptions{nullptr}, + inputNames.data(), + inputTensors.data(), + inputTensors.size(), + outputNames.data(), + outputNames.size()); + + return out; + } + + std::vector joiner_decoder_proj_forward(std::vector in_vector, + std::vector dims, + Ort::MemoryInfo &memory_info){ + std::vector inputTensors; + inputTensors.push_back(Ort::Value::CreateTensor(memory_info, in_vector.data(), in_vector.size(), dims.data(), dims.size())); + + std::vector inputNames {joiner_decoder_proj.GetInputName(0, allocator)}; + std::vector outputNames {joiner_decoder_proj.GetOutputName(0, allocator)}; + + auto out = joiner_decoder_proj.Run(Ort::RunOptions{nullptr}, + inputNames.data(), + inputTensors.data(), + inputTensors.size(), + outputNames.data(), + outputNames.size()); + + return out; + } + + Ort::Session load_model(const char* path){ + struct stat buffer; + if (stat(path, &buffer) != 0){ + std::cout << "File does not exist!: " << path << std::endl; + exit(0); + } + std::cout << "loading " << path << std::endl; + Ort::Session onnx_model(env, path, session_options); + return onnx_model; + } + + void extract_constant_lm_parameters(){ + /* + all_in_one contains these params. We should trace all_in_one and find 'constants_lm' nodes to extract these params + For now, these params are set staticaly. + in: Ort::Session &all_in_one + out: {blank_id, unk_id, context_size} + should return std::vector + */ + blank_id = 0; + unk_id = 0; + context_size = 2; + } + + std::map get_token_map(const char* token_path){ + std::ifstream inFile; + inFile.open(token_path); + if (inFile.fail()) + std::cerr << "Could not find token file" << std::endl; + + std::map token_map; + + std::string line; + while (std::getline(inFile, line)) + { + int id; + std::string token; + + std::istringstream iss(line); + iss >> token; + iss >> id; + + token_map[id] = token; + } + + return token_map; + } + +}; + + +Model get_model(std::string exp_path, char* tokens_path){ + Model model{ + (exp_path + "/encoder_simp.onnx").c_str(), + (exp_path + "/decoder_simp.onnx").c_str(), + (exp_path + "/joiner_simp.onnx").c_str(), + (exp_path + "/joiner_encoder_proj_simp.onnx").c_str(), + (exp_path + "/joiner_decoder_proj_simp.onnx").c_str(), + tokens_path, + }; + model.extract_constant_lm_parameters(); + + return model; +} + +Model get_model(char* encoder_path, + char* decoder_path, + char* joiner_path, + char* joiner_encoder_proj_path, + char* joiner_decoder_proj_path, + char* tokens_path){ + Model model{ + encoder_path, + decoder_path, + joiner_path, + joiner_encoder_proj_path, + joiner_decoder_proj_path, + tokens_path, + }; + model.extract_constant_lm_parameters(); + + return model; +} + + +void doWarmup(Model *model, int numWarmup = 5){ + std::cout << "Warmup is started" << std::endl; + + std::vector encoder_warmup_sample (500 * 80, 1.0); + for (int i=0; iencoder_forward(encoder_warmup_sample, + std::vector {500}, + std::vector {1, 500, 80}, + std::vector {1}, + memory_info); + + std::vector decoder_warmup_sample {1, 1}; + for (int i=0; idecoder_forward(decoder_warmup_sample, + std::vector {1, 2}, + memory_info); + + std::vector joiner_warmup_sample1 (512, 1.0); + std::vector joiner_warmup_sample2 (512, 1.0); + for (int i=0; ijoiner_forward(joiner_warmup_sample1, + joiner_warmup_sample2, + std::vector {1, 1, 1, 512}, + std::vector {1, 1, 1, 512}, + memory_info); + + std::vector joiner_encoder_proj_warmup_sample (100 * 512, 1.0); + for (int i=0; ijoiner_encoder_proj_forward(joiner_encoder_proj_warmup_sample, + std::vector {100, 512}, + memory_info); + + std::vector joiner_decoder_proj_warmup_sample (512, 1.0); + for (int i=0; ijoiner_decoder_proj_forward(joiner_decoder_proj_warmup_sample, + std::vector {1, 512}, + memory_info); + std::cout << "Warmup is done" << std::endl; +} diff --git a/sherpa-onnx/csrc/rnnt_beam_search.h b/sherpa-onnx/csrc/rnnt_beam_search.h new file mode 100644 index 00000000..c027680f --- /dev/null +++ b/sherpa-onnx/csrc/rnnt_beam_search.h @@ -0,0 +1,121 @@ +#include +#include +#include +#include + +#include "models.h" +#include "utils.h" + + +std::vector getEncoderCol(Ort::Value &tensor, int start, int length){ + float* floatarr = tensor.GetTensorMutableData(); + std::vector vector {floatarr + start, floatarr + length}; + return vector; +} + + +/** + * Assume batch size = 1 + */ +std::vector BuildDecoderInput(const std::vector> &hyps, + std::vector &decoder_input) { + + int32_t context_size = decoder_input.size(); + int32_t hyps_length = hyps[0].size(); + for (int i=0; i < context_size; i++) + decoder_input[i] = hyps[0][hyps_length-context_size+i]; + + return decoder_input; +} + + +std::vector> GreedySearch( + Model *model, // NOLINT + std::vector *encoder_out){ + Ort::Value &encoder_out_tensor = encoder_out->at(0); + int encoder_out_dim1 = encoder_out_tensor.GetTensorTypeAndShapeInfo().GetShape()[1]; + int encoder_out_dim2 = encoder_out_tensor.GetTensorTypeAndShapeInfo().GetShape()[2]; + auto encoder_out_vector = ortVal2Vector(encoder_out_tensor, encoder_out_dim1 * encoder_out_dim2); + + // # === Greedy Search === # + int32_t batch_size = 1; + std::vector blanks(model->context_size, model->blank_id); + std::vector> hyps(batch_size, blanks); + std::vector decoder_input(model->context_size, model->blank_id); + + auto decoder_out = model->decoder_forward(decoder_input, + std::vector {batch_size, model->context_size}, + memory_info); + + Ort::Value &decoder_out_tensor = decoder_out[0]; + int decoder_out_dim = decoder_out_tensor.GetTensorTypeAndShapeInfo().GetShape()[2]; + auto decoder_out_vector = ortVal2Vector(decoder_out_tensor, decoder_out_dim); + + decoder_out = model->joiner_decoder_proj_forward(decoder_out_vector, + std::vector {1, decoder_out_dim}, + memory_info); + Ort::Value &projected_decoder_out_tensor = decoder_out[0]; + auto projected_decoder_out_dim = projected_decoder_out_tensor.GetTensorTypeAndShapeInfo().GetShape()[1]; + auto projected_decoder_out_vector = ortVal2Vector(projected_decoder_out_tensor, projected_decoder_out_dim); + + auto projected_encoder_out = model->joiner_encoder_proj_forward(encoder_out_vector, + std::vector {encoder_out_dim1, encoder_out_dim2}, + memory_info); + + Ort::Value &projected_encoder_out_tensor = projected_encoder_out[0]; + int projected_encoder_out_dim1 = projected_encoder_out_tensor.GetTensorTypeAndShapeInfo().GetShape()[0]; + int projected_encoder_out_dim2 = projected_encoder_out_tensor.GetTensorTypeAndShapeInfo().GetShape()[1]; + auto projected_encoder_out_vector = ortVal2Vector(projected_encoder_out_tensor, projected_encoder_out_dim1 * projected_encoder_out_dim2); + + int32_t offset = 0; + for (int i=0; i< projected_encoder_out_dim1; i++){ + int32_t cur_batch_size = 1; + int32_t start = offset; + int32_t end = start + cur_batch_size; + offset = end; + + auto cur_encoder_out = getEncoderCol(projected_encoder_out_tensor, start * projected_encoder_out_dim2, end * projected_encoder_out_dim2); + + auto logits = model->joiner_forward(cur_encoder_out, + projected_decoder_out_vector, + std::vector {1, 1, 1, projected_encoder_out_dim2}, + std::vector {1, 1, 1, projected_decoder_out_dim}, + memory_info); + + Ort::Value &logits_tensor = logits[0]; + int logits_dim = logits_tensor.GetTensorTypeAndShapeInfo().GetShape()[3]; + auto logits_vector = ortVal2Vector(logits_tensor, logits_dim); + + int max_indices = static_cast(std::distance(logits_vector.begin(), std::max_element(logits_vector.begin(), logits_vector.end()))); + bool emitted = false; + + for (int32_t k = 0; k != cur_batch_size; ++k) { + auto index = max_indices; + if (index != model->blank_id && index != model->unk_id) { + emitted = true; + hyps[k].push_back(index); + } + } + + if (emitted) { + decoder_input = BuildDecoderInput(hyps, decoder_input); + + decoder_out = model->decoder_forward(decoder_input, + std::vector {batch_size, model->context_size}, + memory_info); + + decoder_out_dim = decoder_out[0].GetTensorTypeAndShapeInfo().GetShape()[2]; + decoder_out_vector = ortVal2Vector(decoder_out[0], decoder_out_dim); + + decoder_out = model->joiner_decoder_proj_forward(decoder_out_vector, + std::vector {1, decoder_out_dim}, + memory_info); + + projected_decoder_out_dim = decoder_out[0].GetTensorTypeAndShapeInfo().GetShape()[1]; + projected_decoder_out_vector = ortVal2Vector(decoder_out[0], projected_decoder_out_dim); + } + } + + return hyps; +} + diff --git a/sherpa-onnx/csrc/utils.h b/sherpa-onnx/csrc/utils.h new file mode 100644 index 00000000..17fdbbc0 --- /dev/null +++ b/sherpa-onnx/csrc/utils.h @@ -0,0 +1,39 @@ +#include +#include + + +void vector2file(std::vector vector, std::string saveFileName){ + std::ofstream f(saveFileName); + for(std::vector::const_iterator i = vector.begin(); i != vector.end(); ++i) { + f << *i << '\n'; + } +} + + +std::vector hyps2result(std::map token_map, std::vector> hyps, int context_size = 2){ + std::vector results; + + for (int k=0; k < hyps.size(); k++){ + std::string result = token_map[hyps[k][context_size]]; + + for (int i=context_size+1; i < hyps[k].size(); i++){ + std::string token = token_map[hyps[k][i]]; + + // TODO: recognising '_' is not working + if (token.at(0) == '_') + result += " " + token; + else + result += token; + } + results.push_back(result); + } + return results; +} + + +void print_hyps(std::vector> hyps, int context_size = 2){ + std::cout << "Hyps:" << std::endl; + for (int i=context_size; i +#include + +Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test"); +const auto& api = Ort::GetApi(); +OrtTensorRTProviderOptionsV2* tensorrt_options; +Ort::SessionOptions session_options; +Ort::AllocatorWithDefaultOptions allocator; +auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + + +std::vector ortVal2Vector(Ort::Value &tensor, int tensor_length){ + /** + * convert ort tensor to vector + */ + float* floatarr = tensor.GetTensorMutableData(); + std::vector vector {floatarr, floatarr + tensor_length}; + return vector; +} + + +void print_onnx_forward_output(std::vector &output_tensors, int num){ + float* floatarr = output_tensors.front().GetTensorMutableData(); + for (int i = 0; i < num; i++) + printf("[%d] = %f\n", i, floatarr[i]); +} + + +void print_shape_of_ort_val(std::vector &tensor){ + auto out_shape = tensor.front().GetTensorTypeAndShapeInfo().GetShape(); + auto out_size = out_shape.size(); + std::cout << "("; + for (int i=0; i input_node_names(num_input_nodes); + std::vector input_node_dims; + + printf("Number of inputs = %zu\n", num_input_nodes); + + char* output_name = session.GetOutputName(0, allocator); + printf("output name: %s\n", output_name); + + // iterate over all input nodes + for (int i = 0; i < num_input_nodes; i++) { + // print input node names + char* input_name = session.GetInputName(i, allocator); + printf("Input %d : name=%s\n", i, input_name); + input_node_names[i] = input_name; + + // print input node types + Ort::TypeInfo type_info = session.GetInputTypeInfo(i); + auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); + + ONNXTensorElementDataType type = tensor_info.GetElementType(); + printf("Input %d : type=%d\n", i, type); + + // print input shapes/dims + input_node_dims = tensor_info.GetShape(); + printf("Input %d : num_dims=%zu\n", i, input_node_dims.size()); + for (size_t j = 0; j < input_node_dims.size(); j++) + printf("Input %d : dim %zu=%jd\n", i, j, input_node_dims[j]); + } + std::cout << "=======================================" << std::endl; +}