Add C API for spoken language identification. (#695)

2024-03-25 15:16:47 +08:00
parent 0d258dd150
commit ab7cff2513
18 changed files with 366 additions and 70 deletions
--- a/.github/scripts/test-c-api.sh
+++ b/.github/scripts/test-c-api.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+set -e
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+echo "SLID_EXE is $SLID_EXE"
+echo "PATH: $PATH"
+
+
+log "------------------------------------------------------------"
+log "Download whisper tiny for spoken language identification    "
+log "------------------------------------------------------------"
+
+rm -rf sherpa-onnx-whisper-tiny*
+curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
+tar xvf sherpa-onnx-whisper-tiny.tar.bz2
+rm sherpa-onnx-whisper-tiny.tar.bz2
+
+$SLID_EXE
+
+rm -rf sherpa-onnx-whisper-tiny*
--- a/.github/scripts/test-spoken-language-identification.sh
+++ b/.github/scripts/test-spoken-language-identification.sh
@@ -28,32 +28,32 @@ ar-arabic.wav
 bg-bulgarian.wav
 cs-czech.wav
 da-danish.wav
-de-german.wav
-el-greek.wav
-en-english.wav
-es-spanish.wav
-fa-persian.wav
-fi-finnish.wav
-fr-french.wav
-hi-hindi.wav
-hr-croatian.wav
-id-indonesian.wav
-it-italian.wav
-ja-japanese.wav
-ko-korean.wav
-nl-dutch.wav
-no-norwegian.wav
-po-polish.wav
-pt-portuguese.wav
-ro-romanian.wav
-ru-russian.wav
-sk-slovak.wav
-sv-swedish.wav
-ta-tamil.wav
-tl-tagalog.wav
-tr-turkish.wav
-uk-ukrainian.wav
-zh-chinese.wav
+# de-german.wav
+# el-greek.wav
+# en-english.wav
+# es-spanish.wav
+# fa-persian.wav
+# fi-finnish.wav
+# fr-french.wav
+# hi-hindi.wav
+# hr-croatian.wav
+# id-indonesian.wav
+# it-italian.wav
+# ja-japanese.wav
+# ko-korean.wav
+# nl-dutch.wav
+# no-norwegian.wav
+# po-polish.wav
+# pt-portuguese.wav
+# ro-romanian.wav
+# ru-russian.wav
+# sk-slovak.wav
+# sv-swedish.wav
+# ta-tamil.wav
+# tl-tagalog.wav
+# tr-turkish.wav
+# uk-ukrainian.wav
+# zh-chinese.wav
 )

 for wav in ${waves[@]}; do
--- a/.github/workflows/android.yaml
+++ b/.github/workflows/android.yaml
@@ -113,6 +113,7 @@ jobs:
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

+            rm -rf huggingface
            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface

            cd huggingface
--- a/.github/workflows/build-xcframework.yaml
+++ b/.github/workflows/build-xcframework.yaml
@@ -90,6 +90,7 @@ jobs:
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

+            rm -rf huggingface
            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface

            cd huggingface
--- a/.github/workflows/linux.yaml
+++ b/.github/workflows/linux.yaml
@@ -123,8 +123,15 @@ jobs:
          name: release-${{ matrix.build_type }}-${{ matrix.shared_lib }}
          path: build/bin/*

-      - name: Test spoken language identification
-        if: matrix.build_type != 'Debug'
+      - name: Test spoken language identification (C API)
+        shell: bash
+        run: |
+          export PATH=$PWD/build/bin:$PATH
+          export SLID_EXE=spoken-language-identification-c-api
+
+          .github/scripts/test-c-api.sh
+
+      - name: Test spoken language identification (C++ API)
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
@@ -243,6 +250,7 @@ jobs:
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"

+            rm -rf huggingface
            GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface

            cd huggingface
--- a/.github/workflows/macos.yaml
+++ b/.github/workflows/macos.yaml
@@ -102,8 +102,15 @@ jobs:
          otool -L build/bin/sherpa-onnx
          otool -l build/bin/sherpa-onnx

-      - name: Test spoken language identification
-        if: matrix.build_type != 'Debug'
+      - name: Test spoken language identification (C API)
+        shell: bash
+        run: |
+          export PATH=$PWD/build/bin:$PATH
+          export SLID_EXE=spoken-language-identification-c-api
+
+          .github/scripts/test-c-api.sh
+
+      - name: Test spoken language identification (C++ API)
        shell: bash
        run: |
          export PATH=$PWD/build/bin:$PATH
--- a/.github/workflows/windows-x64.yaml
+++ b/.github/workflows/windows-x64.yaml
@@ -68,7 +68,15 @@ jobs:

          ls -lh ./bin/Release/sherpa-onnx.exe

-      - name: Test spoken language identification
+      - name: Test spoken language identification (C API)
+        shell: bash
+        run: |
+          export PATH=$PWD/build/bin/Release:$PATH
+          export SLID_EXE=spoken-language-identification-c-api.exe
+
+          .github/scripts/test-c-api.sh
+
+      - name: Test spoken language identification (C++ API)
        shell: bash
        run: |
          export PATH=$PWD/build/bin/Release:$PATH
--- a/.github/workflows/windows-x86.yaml
+++ b/.github/workflows/windows-x86.yaml
@@ -69,6 +69,14 @@ jobs:

          ls -lh ./bin/Release/sherpa-onnx.exe

+      - name: Test spoken language identification (C API)
+        shell: bash
+        run: |
+          export PATH=$PWD/build/bin/Release:$PATH
+          export SLID_EXE=spoken-language-identification-c-api.exe
+
+          .github/scripts/test-c-api.sh
+
      # - name: Test spoken language identification
      #   shell: bash
      #   run: |
--- a/.gitignore
+++ b/.gitignore
@@ -85,3 +85,4 @@ log
 vits-piper-*
 vits-coqui-*
 vits-mms-*
+*.tar.bz2
--- a/c-api-examples/CMakeLists.txt
+++ b/c-api-examples/CMakeLists.txt
@@ -7,8 +7,11 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
 add_executable(offline-tts-c-api offline-tts-c-api.c)
 target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)

+add_executable(spoken-language-identification-c-api spoken-language-identification-c-api.c)
+target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api)
+
 if(SHERPA_ONNX_HAS_ALSA)
  add_subdirectory(./asr-microphone-example)
-else()
+elseif((UNIX AND NOT APPLE) OR LINUX)
  message(WARNING "Not include ./asr-microphone-example since alsa is not available")
 endif()
--- a/c-api-examples/Makefile
+++ b/c-api-examples/Makefile
@@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd)
 CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/
 LDFLAGS := -L ../build/lib
 LDFLAGS += -L ../build/_deps/onnxruntime-src/lib
-LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lcargs
+LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime
 LDFLAGS += -framework Foundation
 LDFLAGS += -lc++
 LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib
--- a/c-api-examples/decode-file-c-api.c
+++ b/c-api-examples/decode-file-c-api.c
@@ -169,55 +169,56 @@ int32_t main(int32_t argc, char *argv[]) {
  int32_t segment_id = 0;

  const char *wav_filename = argv[context.index];
-  FILE *fp = fopen(wav_filename, "rb");
-  if (!fp) {
-    fprintf(stderr, "Failed to open %s\n", wav_filename);
+  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+  if (wave == NULL) {
+    fprintf(stderr, "Failed to read %s\n", wav_filename);
    return -1;
  }
-
-  // Assume the wave header occupies 44 bytes.
-  fseek(fp, 44, SEEK_SET);
-
  // simulate streaming

 #define N 3200  // 0.2 s. Sample rate is fixed to 16 kHz

  int16_t buffer[N];
  float samples[N];
+  fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
+          wave->sample_rate, wave->num_samples,
+          (float)wave->num_samples / wave->sample_rate);

-  while (!feof(fp)) {
-    size_t n = fread((void *)buffer, sizeof(int16_t), N, fp);
-    if (n > 0) {
-      for (size_t i = 0; i != n; ++i) {
-        samples[i] = buffer[i] / 32768.;
-      }
-      AcceptWaveform(stream, 16000, samples, n);
-      while (IsOnlineStreamReady(recognizer, stream)) {
-        DecodeOnlineStream(recognizer, stream);
-      }
+  int32_t k = 0;
+  while (k < wave->num_samples) {
+    int32_t start = k;
+    int32_t end =
+        (start + N > wave->num_samples) ? wave->num_samples : (start + N);
+    k += N;

-      const SherpaOnnxOnlineRecognizerResult *r =
-          GetOnlineStreamResult(recognizer, stream);
-
-      if (strlen(r->text)) {
-        SherpaOnnxPrint(display, segment_id, r->text);
-      }
-
-      if (IsEndpoint(recognizer, stream)) {
-        if (strlen(r->text)) {
-          ++segment_id;
-        }
-        Reset(recognizer, stream);
-      }
-
-      DestroyOnlineRecognizerResult(r);
+    AcceptWaveform(stream, wave->sample_rate, wave->samples + start,
+                   end - start);
+    while (IsOnlineStreamReady(recognizer, stream)) {
+      DecodeOnlineStream(recognizer, stream);
    }
+
+    const SherpaOnnxOnlineRecognizerResult *r =
+        GetOnlineStreamResult(recognizer, stream);
+
+    if (strlen(r->text)) {
+      SherpaOnnxPrint(display, segment_id, r->text);
+    }
+
+    if (IsEndpoint(recognizer, stream)) {
+      if (strlen(r->text)) {
+        ++segment_id;
+      }
+      Reset(recognizer, stream);
+    }
+
+    DestroyOnlineRecognizerResult(r);
  }
-  fclose(fp);

  // add some tail padding
  float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
-  AcceptWaveform(stream, 16000, tail_paddings, 4800);
+  AcceptWaveform(stream, wave->sample_rate, tail_paddings, 4800);
+
+  SherpaOnnxFreeWave(wave);

  InputFinished(stream);
  while (IsOnlineStreamReady(recognizer, stream)) {
--- a/c-api-examples/spoken-language-identification-c-api.c
+++ b/c-api-examples/spoken-language-identification-c-api.c
@@ -0,0 +1,65 @@
+
+// We assume you have pre-downloaded the whisper multi-lingual models
+// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+// An example command to download the "tiny" whisper model is given below:
+//
+// clang-format off
+//
+// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
+// tar xvf sherpa-onnx-whisper-tiny.tar.bz2
+// rm sherpa-onnx-whisper-tiny.tar.bz2
+//
+// clang-format on
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+int32_t main() {
+  SherpaOnnxSpokenLanguageIdentificationConfig config;
+
+  memset(&config, 0, sizeof(config));
+
+  config.whisper.encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx";
+  config.whisper.decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx";
+  config.num_threads = 1;
+  config.debug = 1;
+  config.provider = "cpu";
+
+  const SherpaOnnxSpokenLanguageIdentification *slid =
+      SherpaOnnxCreateSpokenLanguageIdentification(&config);
+  if (!slid) {
+    fprintf(stderr, "Failed to create spoken language identifier");
+    return -1;
+  }
+
+  // You can find more test waves from
+  // https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs
+  const char *wav_filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav";
+  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+  if (wave == NULL) {
+    fprintf(stderr, "Failed to read %s\n", wav_filename);
+    return -1;
+  }
+
+  SherpaOnnxOfflineStream *stream =
+      SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid);
+
+  AcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
+                        wave->num_samples);
+
+  const SherpaOnnxSpokenLanguageIdentificationResult *result =
+      SherpaOnnxSpokenLanguageIdentificationCompute(slid, stream);
+
+  fprintf(stderr, "wav_filename: %s\n", wav_filename);
+  fprintf(stderr, "Detected language: %s\n", result->lang);
+
+  SherpaOnnxDestroySpokenLanguageIdentificationResult(result);
+  DestroyOfflineStream(stream);
+  SherpaOnnxFreeWave(wave);
+  SherpaOnnxDestroySpokenLanguageIdentification(slid);
+
+  return 0;
+}
--- a/dotnet-examples/offline-decode-files/run-hotwords.sh
+++ b/dotnet-examples/offline-decode-files/run-hotwords.sh
@@ -3,7 +3,7 @@
 set -ex

 if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
-  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
  tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
  rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
 fi
--- a/dotnet-examples/offline-decode-files/run-zipformer.sh
+++ b/dotnet-examples/offline-decode-files/run-zipformer.sh
@@ -3,7 +3,7 @@
 set -ex

 if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then
-  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
  tar xvf sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
  rm sherpa-onnx-zipformer-en-2023-04-01.tar.bz2
 fi
--- a/dotnet-examples/online-decode-files/run-transducer.sh
+++ b/dotnet-examples/online-decode-files/run-transducer.sh
@@ -6,7 +6,7 @@

 set -ex
 if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
-  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
  tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
 fi

--- a/sherpa-onnx/c-api/c-api.cc
+++ b/sherpa-onnx/c-api/c-api.cc
@@ -6,6 +6,7 @@

 #include <algorithm>
 #include <memory>
+#include <string>
 #include <utility>
 #include <vector>

@@ -16,7 +17,9 @@
 #include "sherpa-onnx/csrc/offline-recognizer.h"
 #include "sherpa-onnx/csrc/offline-tts.h"
 #include "sherpa-onnx/csrc/online-recognizer.h"
+#include "sherpa-onnx/csrc/spoken-language-identification.h"
 #include "sherpa-onnx/csrc/voice-activity-detector.h"
+#include "sherpa-onnx/csrc/wave-reader.h"
 #include "sherpa-onnx/csrc/wave-writer.h"

 struct SherpaOnnxOnlineRecognizer {
@@ -859,3 +862,97 @@ int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
                            int32_t sample_rate, const char *filename) {
  return sherpa_onnx::WriteWave(filename, sample_rate, samples, n);
 }
+
+const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename) {
+  int32_t sample_rate = -1;
+  bool is_ok = false;
+  std::vector<float> samples =
+      sherpa_onnx::ReadWave(filename, &sample_rate, &is_ok);
+  if (!is_ok) {
+    return nullptr;
+  }
+
+  float *c_samples = new float[samples.size()];
+  std::copy(samples.begin(), samples.end(), c_samples);
+
+  SherpaOnnxWave *wave = new SherpaOnnxWave;
+  wave->samples = c_samples;
+  wave->sample_rate = sample_rate;
+  wave->num_samples = samples.size();
+  return wave;
+}
+
+void SherpaOnnxFreeWave(const SherpaOnnxWave *wave) {
+  if (wave) {
+    delete[] wave->samples;
+    delete wave;
+  }
+}
+
+struct SherpaOnnxSpokenLanguageIdentification {
+  std::unique_ptr<sherpa_onnx::SpokenLanguageIdentification> impl;
+};
+
+const SherpaOnnxSpokenLanguageIdentification *
+SherpaOnnxCreateSpokenLanguageIdentification(
+    const SherpaOnnxSpokenLanguageIdentificationConfig *config) {
+  sherpa_onnx::SpokenLanguageIdentificationConfig slid_config;
+  slid_config.whisper.encoder = SHERPA_ONNX_OR(config->whisper.encoder, "");
+  slid_config.whisper.decoder = SHERPA_ONNX_OR(config->whisper.decoder, "");
+  slid_config.whisper.tail_paddings =
+      SHERPA_ONNX_OR(config->whisper.tail_paddings, -1);
+  slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
+  slid_config.debug = config->debug;
+  slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
+
+  if (slid_config.debug) {
+    SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str());
+  }
+
+  if (!slid_config.Validate()) {
+    SHERPA_ONNX_LOGE("Errors in config");
+    return nullptr;
+  }
+
+  SherpaOnnxSpokenLanguageIdentification *slid =
+      new SherpaOnnxSpokenLanguageIdentification;
+  slid->impl =
+      std::make_unique<sherpa_onnx::SpokenLanguageIdentification>(slid_config);
+
+  return slid;
+}
+
+void SherpaOnnxDestroySpokenLanguageIdentification(
+    const SherpaOnnxSpokenLanguageIdentification *slid) {
+  delete slid;
+}
+
+SherpaOnnxOfflineStream *
+SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(
+    const SherpaOnnxSpokenLanguageIdentification *slid) {
+  SherpaOnnxOfflineStream *stream =
+      new SherpaOnnxOfflineStream(slid->impl->CreateStream());
+  return stream;
+}
+
+const SherpaOnnxSpokenLanguageIdentificationResult *
+SherpaOnnxSpokenLanguageIdentificationCompute(
+    const SherpaOnnxSpokenLanguageIdentification *slid,
+    const SherpaOnnxOfflineStream *s) {
+  std::string lang = slid->impl->Compute(s->impl.get());
+  char *c_lang = new char[lang.size() + 1];
+  std::copy(lang.begin(), lang.end(), c_lang);
+  c_lang[lang.size()] = '\0';
+  SherpaOnnxSpokenLanguageIdentificationResult *r =
+      new SherpaOnnxSpokenLanguageIdentificationResult;
+  r->lang = c_lang;
+  return r;
+}
+
+void SherpaOnnxDestroySpokenLanguageIdentificationResult(
+    const SherpaOnnxSpokenLanguageIdentificationResult *r) {
+  if (r) {
+    delete[] r->lang;
+    delete r;
+  }
+}
--- a/sherpa-onnx/c-api/c-api.h
+++ b/sherpa-onnx/c-api/c-api.h
@@ -820,6 +820,76 @@ SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
                                            int32_t sample_rate,
                                            const char *filename);

+SHERPA_ONNX_API typedef struct SherpaOnnxWave {
+  // samples normalized to the range [-1, 1]
+  const float *samples;
+  int32_t sample_rate;
+  int32_t num_samples;
+} SherpaOnnxWave;
+
+// Return a NULL pointer on error. It supports only standard WAVE file.
+// Each sample should be 16-bit. It supports only single channel..
+//
+// If the returned pointer is not NULL, the user has to invoke
+// SherpaOnnxFreeWave() to free the returned pointer to avoid memory leak.
+SHERPA_ONNX_API const SherpaOnnxWave *SherpaOnnxReadWave(const char *filename);
+
+SHERPA_ONNX_API void SherpaOnnxFreeWave(const SherpaOnnxWave *wave);
+
+// Spoken language identification
+
+SHERPA_ONNX_API typedef struct
+    SherpaOnnxSpokenLanguageIdentificationWhisperConfig {
+  const char *encoder;
+  const char *decoder;
+  int32_t tail_paddings;
+} SherpaOnnxSpokenLanguageIdentificationWhisperConfig;
+
+SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentificationConfig {
+  SherpaOnnxSpokenLanguageIdentificationWhisperConfig whisper;
+  int32_t num_threads;
+  int32_t debug;
+  const char *provider;
+} SherpaOnnxSpokenLanguageIdentificationConfig;
+
+SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentification
+    SherpaOnnxSpokenLanguageIdentification;
+
+// Create an instance of SpokenLanguageIdentification.
+// The user has to invoke SherpaOnnxDestroySpokenLanguageIdentification()
+// to free the returned pointer to avoid memory leak.
+SHERPA_ONNX_API const SherpaOnnxSpokenLanguageIdentification *
+SherpaOnnxCreateSpokenLanguageIdentification(
+    const SherpaOnnxSpokenLanguageIdentificationConfig *config);
+
+SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentification(
+    const SherpaOnnxSpokenLanguageIdentification *slid);
+
+// The user has to invoke DestroyOfflineStream()
+// to free the returned pointer to avoid memory leak
+SHERPA_ONNX_API SherpaOnnxOfflineStream *
+SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(
+    const SherpaOnnxSpokenLanguageIdentification *slid);
+
+SHERPA_ONNX_API typedef struct SherpaOnnxSpokenLanguageIdentificationResult {
+  // en for English
+  // de for German
+  // zh for Chinese
+  // es for Spanish
+  // ...
+  const char *lang;
+} SherpaOnnxSpokenLanguageIdentificationResult;
+
+// The user has to invoke SherpaOnnxDestroySpokenLanguageIdentificationResult()
+// to free the returned pointer to avoid memory leak
+SHERPA_ONNX_API const SherpaOnnxSpokenLanguageIdentificationResult *
+SherpaOnnxSpokenLanguageIdentificationCompute(
+    const SherpaOnnxSpokenLanguageIdentification *slid,
+    const SherpaOnnxOfflineStream *s);
+
+SHERPA_ONNX_API void SherpaOnnxDestroySpokenLanguageIdentificationResult(
+    const SherpaOnnxSpokenLanguageIdentificationResult *r);
+
 #if defined(__GNUC__)
 #pragma GCC diagnostic pop
 #endif