Support building GPU-capable sherpa-onnx on Linux aarch64. (#1500)

Thanks to @Peakyxh for providing pre-built onnxruntime libraries with CUDA support for Linux aarch64. Tested on Jetson nano b01
2024-11-01 11:16:28 +08:00
parent a3c89aa0d8
commit 9ab89c33bc
41 changed files with 537 additions and 291 deletions
--- a/sherpa-onnx/csrc/online-nemo-ctc-model.cc
+++ b/sherpa-onnx/csrc/online-nemo-ctc-model.cc
@@ -102,7 +102,7 @@ class OnlineNeMoCtcModel::Impl {

  int32_t ChunkShift() const { return chunk_shift_; }

-  OrtAllocator *Allocator() const { return allocator_; }
+  OrtAllocator *Allocator() { return allocator_; }

  // Return a vector containing 3 tensors
  // - cache_last_channel
@@ -119,7 +119,7 @@ class OnlineNeMoCtcModel::Impl {
  }

  std::vector<Ort::Value> StackStates(
-      std::vector<std::vector<Ort::Value>> states) const {
+      std::vector<std::vector<Ort::Value>> states) {
    int32_t batch_size = static_cast<int32_t>(states.size());
    if (batch_size == 1) {
      return std::move(states[0]);
@@ -157,6 +157,8 @@ class OnlineNeMoCtcModel::Impl {
      std::vector<Ort::Value> states) const {
    assert(states.size() == 3);

+    auto allocator = const_cast<Impl *>(this)->allocator_;
+
    std::vector<std::vector<Ort::Value>> ans;

    auto shape = states[0].GetTensorTypeAndShapeInfo().GetShape();
@@ -171,9 +173,9 @@ class OnlineNeMoCtcModel::Impl {
    for (int32_t i = 0; i != 3; ++i) {
      std::vector<Ort::Value> v;
      if (i == 2) {
-        v = Unbind<int64_t>(allocator_, &states[i], 0);
+        v = Unbind<int64_t>(allocator, &states[i], 0);
      } else {
-        v = Unbind(allocator_, &states[i], 0);
+        v = Unbind(allocator, &states[i], 0);
      }

      assert(v.size() == batch_size);