code refactoring and add CI (#11)

2022-10-12 11:27:05 +08:00
parent d9b84d5526
commit 77ccd625b8
9 changed files with 267 additions and 121 deletions
--- a/.github/workflows/test-linux.yaml
+++ b/.github/workflows/test-linux.yaml
@@ -0,0 +1,85 @@
 name: test-linux
 on:
  push:
    branches:
      - master
    paths:
      - '.github/workflows/test-linux.yaml'
      - 'CMakeLists.txt'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
  pull_request:
    branches:
      - master
    paths:
      - '.github/workflows/test-linux.yaml'
      - 'CMakeLists.txt'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
 concurrency:
  group: test-linux-${{ github.ref }}
  cancel-in-progress: true
 permissions:
  contents: read
 jobs:
  test-linux:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Download pretrained model and test-data (English)
        shell: bash
        run: |
          git lfs install
          git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
      - name: Configure Cmake
        shell: bash
        run: |
          mkdir build
          cd build
          cmake -D CMAKE_BUILD_TYPE=Release ..
      - name: Build sherpa-onnx for ubuntu
        run: |
          cd build
          make VERBOSE=1 -j3
      - name: Run tests for ubuntu (English)
        run: |
          time ./build/bin/sherpa-onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/encoder.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/decoder.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/joiner.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/joiner_encoder_proj.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/joiner_decoder_proj.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/data/lang_bpe_500/tokens.txt \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/test_wavs/1089-134686-0001.wav
          time ./build/bin/sherpa-onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/encoder.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/decoder.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/joiner.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/joiner_encoder_proj.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/joiner_decoder_proj.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/data/lang_bpe_500/tokens.txt \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/test_wavs/1221-135766-0001.wav
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/encoder.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/decoder.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/joiner.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/joiner_encoder_proj.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/exp/onnx/joiner_decoder_proj.onnx \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/data/lang_bpe_500/tokens.txt \
            ./icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13/test_wavs/1221-135766-0002.wav
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
 build
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -38,7 +38,8 @@ set(CMAKE_CXX_EXTENSIONS OFF)
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
-include(cmake/kaldi_native_io.cmake)
+include(kaldi_native_io)
-include(cmake/kaldi-native-fbank.cmake)
+include(kaldi-native-fbank)
 include(onnxruntime)
 add_subdirectory(sherpa-onnx)
--- a/cmake/kaldi_native_io.cmake
+++ b/cmake/kaldi_native_io.cmake
@@ -1,27 +1,39 @@
-if(DEFINED ENV{KALDI_NATIVE_IO_INSTALL_PREFIX})
+function(download_kaldi_native_io)
-  message(STATUS "Using environment variable KALDI_NATIVE_IO_INSTALL_PREFIX: $ENV{KALDI_NATIVE_IO_INSTALL_PREFIX}")
+  if(CMAKE_VERSION VERSION_LESS 3.11)
-  set(KALDI_NATIVE_IO_CMAKE_PREFIX_PATH $ENV{KALDI_NATIVE_IO_INSTALL_PREFIX})
+    # FetchContent is available since 3.11,
-else()
+    # we've copied it to ${CMAKE_SOURCE_DIR}/cmake/Modules
-  # PYTHON_EXECUTABLE is set by cmake/pybind11.cmake
+    # so that it can be used in lower CMake versions.
-  message(STATUS "Python executable: ${PYTHON_EXECUTABLE}")
+    message(STATUS "Use FetchContent provided by sherpa-onnx")
-
+    list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
  execute_process(
    COMMAND "${PYTHON_EXECUTABLE}" -c "import kaldi_native_io; print(kaldi_native_io.cmake_prefix_path)"
    OUTPUT_STRIP_TRAILING_WHITESPACE
    OUTPUT_VARIABLE KALDI_NATIVE_IO_CMAKE_PREFIX_PATH
  )
  endif()
-message(STATUS "KALDI_NATIVE_IO_CMAKE_PREFIX_PATH: ${KALDI_NATIVE_IO_CMAKE_PREFIX_PATH}")
+  include(FetchContent)
 list(APPEND CMAKE_PREFIX_PATH "${KALDI_NATIVE_IO_CMAKE_PREFIX_PATH}")
-find_package(kaldi_native_io REQUIRED)
+  set(kaldi_native_io_URL  "https://github.com/csukuangfj/kaldi_native_io/archive/refs/tags/v1.15.1.tar.gz")
  set(kaldi_native_io_HASH "SHA256=97377e1d61e99d8fc1d6037a418d3037522dfa46337e06162e24b1d97f3d70a6")
-message(STATUS "KALDI_NATIVE_IO_FOUND: ${KALDI_NATIVE_IO_FOUND}")
+  set(KALDI_NATIVE_IO_BUILD_TESTS OFF CACHE BOOL "" FORCE)
-message(STATUS "KALDI_NATIVE_IO_VERSION: ${KALDI_NATIVE_IO_VERSION}")
+  set(KALDI_NATIVE_IO_BUILD_PYTHON OFF CACHE BOOL "" FORCE)
 message(STATUS "KALDI_NATIVE_IO_INCLUDE_DIRS: ${KALDI_NATIVE_IO_INCLUDE_DIRS}")
 message(STATUS "KALDI_NATIVE_IO_CXX_FLAGS: ${KALDI_NATIVE_IO_CXX_FLAGS}")
 message(STATUS "KALDI_NATIVE_IO_LIBRARIES: ${KALDI_NATIVE_IO_LIBRARIES}")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${KALDI_NATIVE_IO_CXX_FLAGS}")
+  FetchContent_Declare(kaldi_native_io
-message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
+    URL               ${kaldi_native_io_URL}
    URL_HASH          ${kaldi_native_io_HASH}
  )
  FetchContent_GetProperties(kaldi_native_io)
  if(NOT kaldi_native_io_POPULATED)
    message(STATUS "Downloading kaldi_native_io ${kaldi_native_io_URL}")
    FetchContent_Populate(kaldi_native_io)
  endif()
  message(STATUS "kaldi_native_io is downloaded to ${kaldi_native_io_SOURCE_DIR}")
  message(STATUS "kaldi_native_io's binary dir is ${kaldi_native_io_BINARY_DIR}")
  add_subdirectory(${kaldi_native_io_SOURCE_DIR} ${kaldi_native_io_BINARY_DIR} EXCLUDE_FROM_ALL)
  target_include_directories(kaldi_native_io_core
    PUBLIC
      ${kaldi_native_io_SOURCE_DIR}/
  )
 endfunction()
 download_kaldi_native_io()
--- a/cmake/onnxruntime.cmake
+++ b/cmake/onnxruntime.cmake
@@ -0,0 +1,55 @@
 function(download_onnxruntime)
  if(CMAKE_VERSION VERSION_LESS 3.11)
    # FetchContent is available since 3.11,
    # we've copied it to ${CMAKE_SOURCE_DIR}/cmake/Modules
    # so that it can be used in lower CMake versions.
    message(STATUS "Use FetchContent provided by sherpa-onnx")
    list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
  endif()
  include(FetchContent)
  if(UNIX AND NOT APPLE)
    # set(onnxruntime_URL  "http://github.com/microsoft/onnxruntime/releases/download/v1.12.1/onnxruntime-linux-x64-1.12.1.tgz")
    # If you don't have access to the internet, you can first download onnxruntime to some directory, and the use
    # set(onnxruntime_URL  "file:///ceph-fj/fangjun/open-source/sherpa-onnx/onnxruntime-linux-x64-1.12.1.tgz")
    set(onnxruntime_HASH "SHA256=8f6eb9e2da9cf74e7905bf3fc687ef52e34cc566af7af2f92dafe5a5d106aa3d")
    # After downloading, it contains:
    #  ./lib/libonnxruntime.so.1.12.1
    #  ./lib/libonnxruntime.so, which is a symlink to lib/libonnxruntime.so.1.12.1
    #
    # ./include
    #    It contains all the needed header files
  else()
    message(FATAL_ERROR "Only support Linux at present. Will support other OSes later")
  endif()
  FetchContent_Declare(onnxruntime
    URL               ${onnxruntime_URL}
    URL_HASH          ${onnxruntime_HASH}
  )
  FetchContent_GetProperties(onnxruntime)
  if(NOT onnxruntime_POPULATED)
    message(STATUS "Downloading onnxruntime ${onnxruntime_URL}")
    FetchContent_Populate(onnxruntime)
  endif()
  message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")
  find_library(location_onnxruntime onnxruntime
    PATHS
    "${onnxruntime_SOURCE_DIR}/lib"
  )
  message(STATUS "location_onnxruntime: ${location_onnxruntime}")
  add_library(onnxruntime SHARED IMPORTED)
  set_target_properties(onnxruntime PROPERTIES
    IMPORTED_LOCATION ${location_onnxruntime}
    INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include"
  )
 endfunction()
 download_onnxruntime()
--- a/sherpa-onnx/csrc/CMakeLists.txt
+++ b/sherpa-onnx/csrc/CMakeLists.txt
@@ -1,13 +1,8 @@
-add_executable(online-fbank-test online-fbank-test.cc)
+include_directories(${CMAKE_SOURCE_DIR})
 target_link_libraries(online-fbank-test kaldi-native-fbank-core)
 include_directories( 
    ${ONNXRUNTIME_ROOTDIR}/include/onnxruntime/core/session/
    ${ONNXRUNTIME_ROOTDIR}/include/onnxruntime/core/providers/tensorrt/
 )
 include_directories(
    ${KALDINATIVEIO}
 )
 add_executable(sherpa-onnx main.cpp)
-target_link_libraries(sherpa-onnx onnxruntime kaldi-native-fbank-core kaldi_native_io_core)
+
 target_link_libraries(sherpa-onnx
  onnxruntime
  kaldi-native-fbank-core
  kaldi_native_io_core
 )
--- a/sherpa-onnx/csrc/main.cpp
+++ b/sherpa-onnx/csrc/main.cpp
@@ -1,16 +1,15 @@
 #include <vector>
 #include <iostream>
 #include <algorithm>
 #include <time.h>
 #include <math.h>
 #include <fstream>
 #include <iostream>
 #include <math.h>
 #include <time.h>
 #include <vector>
-#include "fbank_features.h"
+#include "sherpa-onnx/csrc/fbank_features.h"
-#include "rnnt_beam_search.h"
+#include "sherpa-onnx/csrc/rnnt_beam_search.h"
 #include "kaldi-native-fbank/csrc/online-feature.h"
 int main(int argc, char *argv[]) {
  char *encoder_path = argv[1];
  char *decoder_path = argv[2];
@@ -39,27 +38,24 @@ int main(int argc, char* argv[]) {
  // https://onnxruntime.ai/docs/performance/tune-performance.html
  session_options.SetIntraOpNumThreads(numberOfThreads);
  session_options.SetInterOpNumThreads(numberOfThreads);
-    session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
+  session_options.SetGraphOptimizationLevel(
      GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
  session_options.SetLogSeverityLevel(4);
  session_options.SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL);
  api.CreateTensorRTProviderOptions(&tensorrt_options);
-    std::unique_ptr<OrtTensorRTProviderOptionsV2, decltype(api.ReleaseTensorRTProviderOptions)> rel_trt_options(tensorrt_options, api.ReleaseTensorRTProviderOptions);
+  std::unique_ptr<OrtTensorRTProviderOptionsV2,
-    api.SessionOptionsAppendExecutionProvider_TensorRT_V2(static_cast<OrtSessionOptions*>(session_options), rel_trt_options.get());
+                  decltype(api.ReleaseTensorRTProviderOptions)>
      rel_trt_options(tensorrt_options, api.ReleaseTensorRTProviderOptions);
  api.SessionOptionsAppendExecutionProvider_TensorRT_V2(
      static_cast<OrtSessionOptions *>(session_options), rel_trt_options.get());
  // Define model
-    auto model = get_model(
+  auto model =
-        encoder_path,
+      get_model(encoder_path, decoder_path, joiner_path,
-        decoder_path,
+                joiner_encoder_proj_path, joiner_decoder_proj_path, token_path);
        joiner_path,
        joiner_encoder_proj_path,
        joiner_decoder_proj_path,
        token_path
    );
-    std::vector<std::string> filename_list {
+  std::vector<std::string> filename_list{filename};
        filename
    };
  for (auto filename : filename_list) {
    std::cout << filename << std::endl;
@@ -72,11 +68,10 @@ int main(int argc, char* argv[]) {
    // # === Encoder Out === #
    int num_frames = features.size() / opts.mel_opts.num_bins;
-        auto encoder_out = model.encoder_forward(features,
+    auto encoder_out =
-                                std::vector<int64_t> {num_frames},
+        model.encoder_forward(features, std::vector<int64_t>{num_frames},
                              std::vector<int64_t>{1, num_frames, 80},
-                                std::vector<int64_t> {1},
+                              std::vector<int64_t>{1}, memory_info);
                                memory_info);
    // # === Search === #
    std::vector<std::vector<int32_t>> hyps;
@@ -89,9 +84,12 @@ int main(int argc, char* argv[]) {
    auto results = hyps2result(model.tokens_map, hyps);
    // # === Print Elapsed Time === #
-        auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - tic);
+    auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
-        std::cout << "Elapsed: " << float(elapsed.count()) / 1000 << " seconds" << std::endl;
+        std::chrono::high_resolution_clock::now() - tic);
-        std::cout << "rtf: " << float(elapsed.count()) / 1000 / (numSamples / 16000)  << std::endl;
+    std::cout << "Elapsed: " << float(elapsed.count()) / 1000 << " seconds"
              << std::endl;
    std::cout << "rtf: " << float(elapsed.count()) / 1000 / (numSamples / 16000)
              << std::endl;
    print_hyps(hyps);
    std::cout << results[0] << std::endl;
--- a/sherpa-onnx/csrc/rnnt_beam_search.h
+++ b/sherpa-onnx/csrc/rnnt_beam_search.h
@@ -61,7 +61,6 @@ std::vector<std::vector<int32_t>> GreedySearch(
    auto projected_encoder_out = model->joiner_encoder_proj_forward(encoder_out_vector, 
                                        std::vector<int64_t> {encoder_out_dim1, encoder_out_dim2}, 
                                        memory_info);
    Ort::Value &projected_encoder_out_tensor = projected_encoder_out[0];
    int projected_encoder_out_dim1 = projected_encoder_out_tensor.GetTensorTypeAndShapeInfo().GetShape()[0];
    int projected_encoder_out_dim2 = projected_encoder_out_tensor.GetTensorTypeAndShapeInfo().GetShape()[1];
@@ -78,12 +77,12 @@ std::vector<std::vector<int32_t>> GreedySearch(
        auto logits = model->joiner_forward(cur_encoder_out,
                                            projected_decoder_out_vector,
-                                            std::vector<int64_t> {1, 1, 1, projected_encoder_out_dim2},
+                                            std::vector<int64_t> {1, projected_encoder_out_dim2},
-                                            std::vector<int64_t> {1, 1, 1, projected_decoder_out_dim},
+                                            std::vector<int64_t> {1, projected_decoder_out_dim},
                                            memory_info);
        Ort::Value &logits_tensor = logits[0];
-        int logits_dim = logits_tensor.GetTensorTypeAndShapeInfo().GetShape()[3];
+        int logits_dim = logits_tensor.GetTensorTypeAndShapeInfo().GetShape()[1];
        auto logits_vector = ortVal2Vector(logits_tensor, logits_dim);
        int max_indices = static_cast<int>(std::distance(logits_vector.begin(), std::max_element(logits_vector.begin(), logits_vector.end())));
--- a/sherpa-onnx/csrc/utils_onnx.h
+++ b/sherpa-onnx/csrc/utils_onnx.h
@@ -1,5 +1,5 @@
 #include <iostream>
-#include <onnxruntime_cxx_api.h>
+#include "onnxruntime_cxx_api.h"
 Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
 const auto& api = Ort::GetApi();