Support building GPU-capable sherpa-onnx on Linux aarch64. (#1500)

Thanks to @Peakyxh for providing pre-built onnxruntime libraries with CUDA support for Linux aarch64. Tested on Jetson nano b01
2024-11-01 11:16:28 +08:00
parent a3c89aa0d8
commit 9ab89c33bc
41 changed files with 537 additions and 291 deletions
--- a/sherpa-onnx/csrc/onnx-utils.cc
+++ b/sherpa-onnx/csrc/onnx-utils.cc
@@ -21,6 +21,36 @@

 namespace sherpa_onnx {

+static std::string GetInputName(Ort::Session *sess, size_t index,
+                                OrtAllocator *allocator) {
+// Note(fangjun): We only tested 1.17.1 and 1.11.0
+// For other versions, we may need to change it
+#if ORT_API_VERSION >= 17
+  auto v = sess->GetInputNameAllocated(index, allocator);
+  return v.get();
+#else
+  auto v = sess->GetInputName(index, allocator);
+  std::string ans = v;
+  allocator->Free(allocator, v);
+  return ans;
+#endif
+}
+
+static std::string GetOutputName(Ort::Session *sess, size_t index,
+                                 OrtAllocator *allocator) {
+// Note(fangjun): We only tested 1.17.1 and 1.11.0
+// For other versions, we may need to change it
+#if ORT_API_VERSION >= 17
+  auto v = sess->GetOutputNameAllocated(index, allocator);
+  return v.get();
+#else
+  auto v = sess->GetOutputName(index, allocator);
+  std::string ans = v;
+  allocator->Free(allocator, v);
+  return ans;
+#endif
+}
+
 void GetInputNames(Ort::Session *sess, std::vector<std::string> *input_names,
                   std::vector<const char *> *input_names_ptr) {
  Ort::AllocatorWithDefaultOptions allocator;
@@ -28,8 +58,7 @@ void GetInputNames(Ort::Session *sess, std::vector<std::string> *input_names,
  input_names->resize(node_count);
  input_names_ptr->resize(node_count);
  for (size_t i = 0; i != node_count; ++i) {
-    auto tmp = sess->GetInputNameAllocated(i, allocator);
-    (*input_names)[i] = tmp.get();
+    (*input_names)[i] = GetInputName(sess, i, allocator);
    (*input_names_ptr)[i] = (*input_names)[i].c_str();
  }
 }
@@ -41,8 +70,7 @@ void GetOutputNames(Ort::Session *sess, std::vector<std::string> *output_names,
  output_names->resize(node_count);
  output_names_ptr->resize(node_count);
  for (size_t i = 0; i != node_count; ++i) {
-    auto tmp = sess->GetOutputNameAllocated(i, allocator);
-    (*output_names)[i] = tmp.get();
+    (*output_names)[i] = GetOutputName(sess, i, allocator);
    (*output_names_ptr)[i] = (*output_names)[i].c_str();
  }
 }
@@ -78,12 +106,24 @@ Ort::Value GetEncoderOutFrame(OrtAllocator *allocator, Ort::Value *encoder_out,

 void PrintModelMetadata(std::ostream &os, const Ort::ModelMetadata &meta_data) {
  Ort::AllocatorWithDefaultOptions allocator;
+#if ORT_API_VERSION >= 17
  std::vector<Ort::AllocatedStringPtr> v =
      meta_data.GetCustomMetadataMapKeysAllocated(allocator);
  for (const auto &key : v) {
    auto p = meta_data.LookupCustomMetadataMapAllocated(key.get(), allocator);
    os << key.get() << "=" << p.get() << "\n";
  }
+#else
+  int64_t num_keys = 0;
+  char **keys = meta_data.GetCustomMetadataMapKeys(allocator, num_keys);
+  for (int32_t i = 0; i < num_keys; ++i) {
+    auto v = LookupCustomModelMetaData(meta_data, keys[i], allocator);
+    os << keys[i] << "=" << v << "\n";
+    allocator.Free(keys[i]);
+  }
+
+  allocator.Free(keys);
+#endif
 }

 Ort::Value Clone(OrtAllocator *allocator, const Ort::Value *v) {
@@ -361,4 +401,20 @@ std::vector<Ort::Value> Convert(std::vector<CopyableOrtValue> values) {
  return ans;
 }

+std::string LookupCustomModelMetaData(const Ort::ModelMetadata &meta_data,
+                                      const char *key,
+                                      OrtAllocator *allocator) {
+// Note(fangjun): We only tested 1.17.1 and 1.11.0
+// For other versions, we may need to change it
+#if ORT_API_VERSION >= 17
+  auto v = meta_data.LookupCustomMetadataMapAllocated(key, allocator);
+  return v.get();
+#else
+  auto v = meta_data.LookupCustomMetadataMap(key, allocator);
+  std::string ans = v;
+  allocator->Free(allocator, v);
+  return ans;
+#endif
+}
+
 }  // namespace sherpa_onnx