diff --git a/.github/workflows/android.yaml b/.github/workflows/android.yaml index 760b5759..452e2cb3 100644 --- a/.github/workflows/android.yaml +++ b/.github/workflows/android.yaml @@ -57,6 +57,7 @@ jobs: ./build-android-arm64-v8a.sh mkdir -p jniLibs/arm64-v8a/ cp -v ./build-android-arm64-v8a/install/lib/*.so ./jniLibs/arm64-v8a/ + rm -rf ./build-android-arm64-v8a/ - name: build android armv7-eabi shell: bash @@ -65,6 +66,7 @@ jobs: ./build-android-armv7-eabi.sh mkdir -p ./jniLibs/armeabi-v7a/ cp -v ./build-android-armv7-eabi/install/lib/*.so ./jniLibs/armeabi-v7a/ + rm -rf ./build-android-armv7-eabi - name: build android x86_64 shell: bash @@ -73,6 +75,7 @@ jobs: ./build-android-x86-64.sh mkdir -p ./jniLibs/x86_64 cp -v ./build-android-x86-64/install/lib/*.so ./jniLibs/x86_64 + rm -rf ./build-android-x86-64 - name: build android x86 shell: bash @@ -81,6 +84,7 @@ jobs: ./build-android-x86.sh mkdir -p ./jniLibs/x86 cp -v ./build-android-x86/install/lib/*.so ./jniLibs/x86 + rm -rf ./build-android-x86 - name: Copy files shell: bash @@ -112,6 +116,8 @@ jobs: command: | git config --global user.email "csukuangfj@gmail.com" git config --global user.name "Fangjun Kuang" + du -h -d1 . + ls -lh rm -rf huggingface GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface diff --git a/.github/workflows/apk-kws.yaml b/.github/workflows/apk-kws.yaml index 7c85e6af..e5be8f7f 100644 --- a/.github/workflows/apk-kws.yaml +++ b/.github/workflows/apk-kws.yaml @@ -44,6 +44,23 @@ jobs: echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" ls -lh ${ANDROID_NDK_LATEST_HOME} + - name: Setup build tool version variable + shell: bash + run: | + echo "---" + ls -lh /usr/local/lib/android/ + echo "---" + + ls -lh /usr/local/lib/android/sdk + echo "---" + + ls -lh /usr/local/lib/android/sdk/build-tools + echo "---" + + BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1) + echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV + echo "Last build tool version is: $BUILD_TOOL_VERSION" + - name: build APK shell: bash run: | @@ -59,13 +76,77 @@ jobs: run: | ls -lh ./apks/ - - uses: actions/upload-artifact@v4 - with: - path: ./apks/*.apk - - name: Release APK - uses: svenstaro/upload-release-action@v2 + # https://github.com/marketplace/actions/sign-android-release + - uses: r0adkll/sign-android-release@v1 + name: Sign app APK with: - file_glob: true - file: apks/*.apk - overwrite: true + releaseDirectory: ./apks + signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }} + alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }} + keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }} + env: + BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }} + + - name: Display APK after signing + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Rename APK after signing + shell: bash + run: | + cd apks + rm -fv signingKey.jks + rm -fv *.apk.idsig + rm -fv *-aligned.apk + + all_apks=$(ls -1 *-signed.apk) + echo "----" + echo $all_apks + echo "----" + for apk in ${all_apks[@]}; do + n=$(echo $apk | sed -e s/-signed//) + mv -v $apk $n + done + + cd .. + + ls -lh ./apks/ + du -h -d1 . + + - name: Display APK after rename + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + mkdir -p kws + cp -v ../apks/*.apk ./kws/ + git status + git lfs track "*.apk" + git add . + git commit -m "add more apks" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml index 3a3ae903..37719b61 100644 --- a/.github/workflows/run-java-test.yaml +++ b/.github/workflows/run-java-test.yaml @@ -106,6 +106,14 @@ jobs: make -j4 ls -lh lib + - name: Run java test (Spoken language identification) + shell: bash + run: | + cd ./java-api-examples + ./run-spoken-language-identification-whisper.sh + # Delete model files to save space + rm -rf sherpa-onnx-whisper-* + - name: Run java test (Streaming ASR) shell: bash run: | diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt index e4eb5e27..fa282963 100644 --- a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -200,7 +200,7 @@ class MainActivity : AppCompatActivity() { val config = OnlineRecognizerConfig( featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), modelConfig = getModelConfig(type = type)!!, - lmConfig = getOnlineLMConfig(type = type), + // lmConfig = getOnlineLMConfig(type = type), endpointConfig = getEndpointConfig(), enableEndpoint = true, ) diff --git a/java-api-examples/.gitignore b/java-api-examples/.gitignore index 6091eb46..91c35d7a 100644 --- a/java-api-examples/.gitignore +++ b/java-api-examples/.gitignore @@ -1,4 +1,3 @@ lib hs_err* -!run-streaming*.sh -!run-non-streaming*.sh +!run-*.sh diff --git a/java-api-examples/README.md b/java-api-examples/README.md index c653dbb4..e40de7b6 100755 --- a/java-api-examples/README.md +++ b/java-api-examples/README.md @@ -29,3 +29,9 @@ This directory contains examples for the JAVA API of sherpa-onnx. ./run-non-streaming-tts-coqui-de.sh ./run-non-streaming-tts-vits-zh.sh ``` + +## Spoken language identification + +```bash +./run-spoken-language-identification-whisper.sh +``` diff --git a/java-api-examples/SpokenLanguageIdentificationWhisper.java b/java-api-examples/SpokenLanguageIdentificationWhisper.java new file mode 100644 index 00000000..f2e690b8 --- /dev/null +++ b/java-api-examples/SpokenLanguageIdentificationWhisper.java @@ -0,0 +1,61 @@ +// Copyright 2024 Xiaomi Corporation + +// This file shows how to use a multilingual whisper model for +// spoken language identification. +// +// Note that it needs a multilingual whisper model. For instance, +// tiny works, but tiny.en doesn't. +import com.k2fsa.sherpa.onnx.*; + +public class SpokenLanguageIdentificationWhisper { + public static void main(String[] args) { + // please download model and test files from + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models + String encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx"; + String decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx"; + + String[] testFiles = + new String[] { + "./spoken-language-identification-test-wavs/en-english.wav", + "./spoken-language-identification-test-wavs/de-german.wav", + "./spoken-language-identification-test-wavs/zh-chinese.wav", + "./spoken-language-identification-test-wavs/es-spanish.wav", + "./spoken-language-identification-test-wavs/fa-persian.wav", + "./spoken-language-identification-test-wavs/ko-korean.wav", + "./spoken-language-identification-test-wavs/ja-japanese.wav", + "./spoken-language-identification-test-wavs/ru-russian.wav", + "./spoken-language-identification-test-wavs/uk-ukrainian.wav", + }; + + SpokenLanguageIdentificationWhisperConfig whisper = + SpokenLanguageIdentificationWhisperConfig.builder() + .setEncoder(encoder) + .setDecoder(decoder) + .build(); + + SpokenLanguageIdentificationConfig config = + SpokenLanguageIdentificationConfig.builder() + .setWhisper(whisper) + .setNumThreads(1) + .setDebug(true) + .build(); + + SpokenLanguageIdentification slid = new SpokenLanguageIdentification(config); + for (String filename : testFiles) { + WaveReader reader = new WaveReader(filename); + + OfflineStream stream = slid.createStream(); + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); + + String lang = slid.compute(stream); + System.out.println("---"); + System.out.printf("filename: %s\n", filename); + System.out.printf("lang: %s\n", lang); + + stream.release(); + } + System.out.println("---"); + + slid.release(); + } +} diff --git a/java-api-examples/run-spoken-language-identification-whisper.sh b/java-api-examples/run-spoken-language-identification-whisper.sh new file mode 100755 index 00000000..48523212 --- /dev/null +++ b/java-api-examples/run-spoken-language-identification-whisper.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib +fi + +# Note that it needs a multilingual whisper model. so, for example, tiny works while tiny.en does not work +# https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 +if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 + tar xvf sherpa-onnx-whisper-tiny.tar.bz2 + rm sherpa-onnx-whisper-tiny.tar.bz2 +fi + +if [ ! -f ./spoken-language-identification-test-wavs/en-english.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/spoken-language-identification-test-wavs.tar.bz2 + tar xvf spoken-language-identification-test-wavs.tar.bz2 + rm spoken-language-identification-test-wavs.tar.bz2 +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + ./SpokenLanguageIdentificationWhisper.java diff --git a/sherpa-onnx/java-api/Makefile b/sherpa-onnx/java-api/Makefile index 2e02f4c4..67cedde4 100644 --- a/sherpa-onnx/java-api/Makefile +++ b/sherpa-onnx/java-api/Makefile @@ -36,6 +36,10 @@ java_files += OfflineTtsConfig.java java_files += GeneratedAudio.java java_files += OfflineTts.java +java_files += SpokenLanguageIdentificationWhisperConfig.java +java_files += SpokenLanguageIdentificationConfig.java +java_files += SpokenLanguageIdentification.java + class_files := $(java_files:%.java=%.class) java_files := $(addprefix src/$(package_dir)/,$(java_files)) diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.java new file mode 100644 index 00000000..379f3853 --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpokenLanguageIdentification.java @@ -0,0 +1,59 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +public class SpokenLanguageIdentification { + static { + System.loadLibrary("sherpa-onnx-jni"); + } + + private final Map localeMap; + private long ptr = 0; // this is the asr engine ptrss + + public SpokenLanguageIdentification(SpokenLanguageIdentificationConfig config) { + ptr = newFromFile(config); + + String[] languages = Locale.getISOLanguages(); + localeMap = new HashMap(languages.length); + for (String language : languages) { + Locale locale = new Locale(language); + localeMap.put(language, locale.getDisplayName()); + } + } + + public String compute(OfflineStream stream) { + String lang = compute(ptr, stream.getPtr()); + return localeMap.getOrDefault(lang, lang); + } + + public OfflineStream createStream() { + long p = createStream(ptr); + return new OfflineStream(p); + } + + @Override + protected void finalize() throws Throwable { + release(); + } + + // You'd better call it manually if it is not used anymore + public void release() { + if (this.ptr == 0) { + return; + } + delete(this.ptr); + this.ptr = 0; + } + + private native void delete(long ptr); + + private native long newFromFile(SpokenLanguageIdentificationConfig config); + + private native long createStream(long ptr); + + private native String compute(long ptr, long streamPtr); +} diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpokenLanguageIdentificationConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpokenLanguageIdentificationConfig.java new file mode 100644 index 00000000..4043bcf4 --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpokenLanguageIdentificationConfig.java @@ -0,0 +1,56 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +public class SpokenLanguageIdentificationConfig { + private final SpokenLanguageIdentificationWhisperConfig whisper; + private final int numThreads; + private final boolean debug; + private final String provider; + + private SpokenLanguageIdentificationConfig(Builder builder) { + this.whisper = builder.whisper; + this.numThreads = builder.numThreads; + this.debug = builder.debug; + this.provider = builder.provider; + } + + public static Builder builder() { + return new Builder(); + } + + public SpokenLanguageIdentificationWhisperConfig getWhisper() { + return whisper; + } + + public static class Builder { + private SpokenLanguageIdentificationWhisperConfig whisper = SpokenLanguageIdentificationWhisperConfig.builder().build(); + private int numThreads = 1; + private boolean debug = true; + private String provider = "cpu"; + + public SpokenLanguageIdentificationConfig build() { + return new SpokenLanguageIdentificationConfig(this); + } + + public Builder setWhisper(SpokenLanguageIdentificationWhisperConfig whisper) { + this.whisper = whisper; + return this; + } + + public Builder setNumThreads(int numThreads) { + this.numThreads = numThreads; + return this; + } + + public Builder setDebug(boolean debug) { + this.debug = debug; + return this; + } + + public Builder setProvider(String provider) { + this.provider = provider; + return this; + } + } +} diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpokenLanguageIdentificationWhisperConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpokenLanguageIdentificationWhisperConfig.java new file mode 100644 index 00000000..5475e722 --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpokenLanguageIdentificationWhisperConfig.java @@ -0,0 +1,56 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +public class SpokenLanguageIdentificationWhisperConfig { + private final String encoder; + private final String decoder; + private final int tailPaddings; + + private SpokenLanguageIdentificationWhisperConfig(Builder builder) { + this.encoder = builder.encoder; + this.decoder = builder.decoder; + this.tailPaddings = builder.tailPaddings; + } + + public static Builder builder() { + return new Builder(); + } + + public String getEncoder() { + return encoder; + } + + public String getDecoder() { + return decoder; + } + + public int getTailPaddings() { + return tailPaddings; + } + + public static class Builder { + private String encoder = ""; + private String decoder = ""; + private int tailPaddings = 1000; // number of frames to pad + + public SpokenLanguageIdentificationWhisperConfig build() { + return new SpokenLanguageIdentificationWhisperConfig(this); + } + + public Builder setEncoder(String encoder) { + this.encoder = encoder; + return this; + } + + public Builder setDecoder(String decoder) { + this.decoder = decoder; + return this; + } + + public Builder setTailPaddings(int tailPaddings) { + this.tailPaddings = tailPaddings; + return this; + } + } +}