diff --git a/.github/workflows/export-ced-to-onnx.yaml b/.github/workflows/export-ced-to-onnx.yaml new file mode 100644 index 00000000..506abe51 --- /dev/null +++ b/.github/workflows/export-ced-to-onnx.yaml @@ -0,0 +1,78 @@ +name: export-ced-to-onnx + +on: + workflow_dispatch: + +concurrency: + group: export-ced-to-onnx-${{ github.ref }} + cancel-in-progress: true + +jobs: + export-ced-to-onnx: + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' + name: export ced + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: ["3.8"] + + steps: + - uses: actions/checkout@v4 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Run + shell: bash + run: | + cd scripts/ced + ./run.sh + + - name: Release + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + file: ./*.tar.bz2 + overwrite: true + repo_name: k2-fsa/sherpa-onnx + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + tag: audio-tagging-models + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + models=( + tiny + mini + small + base + ) + + for m in ${models[@]}; do + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + d=sherpa-onnx-ced-$m-audio-tagging-2024-04-19 + git clone https://huggingface.co/k2-fsa/$d huggingface + mv -v $d/* huggingface + cd huggingface + git lfs track "*.onnx" + git status + git add . + git status + git commit -m "first commit" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/$d main + cd .. + done diff --git a/android/SherpaOnnx/app/src/main/res/values/strings.xml b/android/SherpaOnnx/app/src/main/res/values/strings.xml index 801b18f4..0c3c70a2 100644 --- a/android/SherpaOnnx/app/src/main/res/values/strings.xml +++ b/android/SherpaOnnx/app/src/main/res/values/strings.xml @@ -1,5 +1,5 @@ - ASR with Next-gen Kaldi + ASR Click the Start button to play speech-to-text with Next-gen Kaldi. \n \n\n\n diff --git a/android/SherpaOnnx2Pass/app/src/main/res/values/strings.xml b/android/SherpaOnnx2Pass/app/src/main/res/values/strings.xml index cc2a4050..942912b0 100644 --- a/android/SherpaOnnx2Pass/app/src/main/res/values/strings.xml +++ b/android/SherpaOnnx2Pass/app/src/main/res/values/strings.xml @@ -1,5 +1,5 @@ - ASR with Next-gen Kaldi + ASR2pass Click the Start button to play speech-to-text with Next-gen Kaldi. \n \n\n\n diff --git a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/AudioTagging.kt b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/AudioTagging.kt index 43730291..df897dbd 100644 --- a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/AudioTagging.kt +++ b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/AudioTagging.kt @@ -1,16 +1,16 @@ package com.k2fsa.sherpa.onnx import android.content.res.AssetManager -import android.util.Log -private val TAG = "sherpa-onnx" +const val TAG = "sherpa-onnx" data class OfflineZipformerAudioTaggingModelConfig( - var model: String, + var model: String = "", ) data class AudioTaggingModelConfig( - var zipformer: OfflineZipformerAudioTaggingModelConfig, + var zipformer: OfflineZipformerAudioTaggingModelConfig = OfflineZipformerAudioTaggingModelConfig(), + var ced: String = "", var numThreads: Int = 1, var debug: Boolean = false, var provider: String = "cpu", @@ -103,7 +103,7 @@ class AudioTagging( // // See also // https://k2-fsa.github.io/sherpa/onnx/audio-tagging/ -fun getAudioTaggingConfig(type: Int, numThreads: Int=1): AudioTaggingConfig? { +fun getAudioTaggingConfig(type: Int, numThreads: Int = 1): AudioTaggingConfig? { when (type) { 0 -> { val modelDir = "sherpa-onnx-zipformer-small-audio-tagging-2024-04-15" @@ -123,7 +123,7 @@ fun getAudioTaggingConfig(type: Int, numThreads: Int=1): AudioTaggingConfig? { return AudioTaggingConfig( model = AudioTaggingModelConfig( zipformer = OfflineZipformerAudioTaggingModelConfig(model = "$modelDir/model.int8.onnx"), - numThreads = 1, + numThreads = numThreads, debug = true, ), labels = "$modelDir/class_labels_indices.csv", @@ -131,6 +131,57 @@ fun getAudioTaggingConfig(type: Int, numThreads: Int=1): AudioTaggingConfig? { ) } + 2 -> { + val modelDir = "sherpa-onnx-ced-tiny-audio-tagging-2024-04-19" + return AudioTaggingConfig( + model = AudioTaggingModelConfig( + ced = "$modelDir/model.int8.onnx", + numThreads = numThreads, + debug = true, + ), + labels = "$modelDir/class_labels_indices.csv", + topK = 3, + ) + } + + 3 -> { + val modelDir = "sherpa-onnx-ced-mini-audio-tagging-2024-04-19" + return AudioTaggingConfig( + model = AudioTaggingModelConfig( + ced = "$modelDir/model.int8.onnx", + numThreads = numThreads, + debug = true, + ), + labels = "$modelDir/class_labels_indices.csv", + topK = 3, + ) + } + + 4 -> { + val modelDir = "sherpa-onnx-ced-small-audio-tagging-2024-04-19" + return AudioTaggingConfig( + model = AudioTaggingModelConfig( + ced = "$modelDir/model.int8.onnx", + numThreads = numThreads, + debug = true, + ), + labels = "$modelDir/class_labels_indices.csv", + topK = 3, + ) + } + + 5 -> { + val modelDir = "sherpa-onnx-ced-base-audio-tagging-2024-04-19" + return AudioTaggingConfig( + model = AudioTaggingModelConfig( + ced = "$modelDir/model.int8.onnx", + numThreads = numThreads, + debug = true, + ), + labels = "$modelDir/class_labels_indices.csv", + topK = 3, + ) + } } return null diff --git a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Home.kt b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Home.kt index 0a84964a..b2239cee 100644 --- a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Home.kt +++ b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Home.kt @@ -3,24 +3,15 @@ package com.k2fsa.sherpa.onnx.audio.tagging import android.Manifest - import android.app.Activity import android.content.pm.PackageManager import android.media.AudioFormat import android.media.AudioRecord -import androidx.compose.foundation.lazy.items import android.media.MediaRecorder import android.util.Log import androidx.compose.foundation.ExperimentalFoundationApi -import androidx.compose.foundation.background import androidx.compose.foundation.layout.Arrangement import androidx.compose.foundation.layout.Box -import androidx.compose.material3.CenterAlignedTopAppBar -import androidx.compose.runtime.Composable -import androidx.compose.material3.Scaffold -import androidx.compose.material3.TopAppBarDefaults -import androidx.compose.material3.MaterialTheme -import androidx.compose.material3.Text import androidx.compose.foundation.layout.Column import androidx.compose.foundation.layout.PaddingValues import androidx.compose.foundation.layout.Row @@ -30,10 +21,17 @@ import androidx.compose.foundation.layout.fillMaxWidth import androidx.compose.foundation.layout.height import androidx.compose.foundation.layout.padding import androidx.compose.foundation.lazy.LazyColumn +import androidx.compose.foundation.lazy.items import androidx.compose.material3.Button +import androidx.compose.material3.CenterAlignedTopAppBar import androidx.compose.material3.ExperimentalMaterial3Api +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.Scaffold import androidx.compose.material3.Slider import androidx.compose.material3.Surface +import androidx.compose.material3.Text +import androidx.compose.material3.TopAppBarDefaults +import androidx.compose.runtime.Composable import androidx.compose.runtime.getValue import androidx.compose.runtime.mutableStateListOf import androidx.compose.runtime.mutableStateOf @@ -41,7 +39,6 @@ import androidx.compose.runtime.remember import androidx.compose.runtime.setValue import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier -import androidx.compose.ui.graphics.Color import androidx.compose.ui.platform.LocalContext import androidx.compose.ui.text.font.FontWeight import androidx.compose.ui.text.style.TextAlign @@ -49,6 +46,7 @@ import androidx.compose.ui.unit.dp import androidx.compose.ui.unit.sp import androidx.core.app.ActivityCompat import com.k2fsa.sherpa.onnx.AudioEvent +import com.k2fsa.sherpa.onnx.Tagger import kotlin.concurrent.thread diff --git a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/MainActivity.kt b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/MainActivity.kt index e76cff4c..cb45f100 100644 --- a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/MainActivity.kt +++ b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/MainActivity.kt @@ -13,6 +13,7 @@ import androidx.compose.material3.Surface import androidx.compose.runtime.Composable import androidx.compose.ui.Modifier import androidx.core.app.ActivityCompat +import com.k2fsa.sherpa.onnx.Tagger import com.k2fsa.sherpa.onnx.audio.tagging.ui.theme.SherpaOnnxAudioTaggingTheme const val TAG = "sherpa-onnx" diff --git a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Tagger.kt b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Tagger.kt index 4be860ef..c714094a 100644 --- a/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Tagger.kt +++ b/android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/Tagger.kt @@ -1,10 +1,8 @@ -package com.k2fsa.sherpa.onnx.audio.tagging +package com.k2fsa.sherpa.onnx import android.content.res.AssetManager import android.util.Log -import com.k2fsa.sherpa.onnx.AudioTagging -import com.k2fsa.sherpa.onnx.audio.tagging.wear.os.presentation.TAG -import com.k2fsa.sherpa.onnx.getAudioTaggingConfig + object Tagger { private var _tagger: AudioTagging? = null @@ -12,6 +10,7 @@ object Tagger { get() { return _tagger!! } + fun initTagger(assetManager: AssetManager? = null, numThreads: Int = 1) { synchronized(this) { if (_tagger != null) { @@ -19,7 +18,7 @@ object Tagger { } Log.i(TAG, "Initializing audio tagger") - val config = getAudioTaggingConfig(type = 0, numThreads=numThreads)!! + val config = getAudioTaggingConfig(type = 0, numThreads = numThreads)!! _tagger = AudioTagging(assetManager, config) } } diff --git a/android/SherpaOnnxAudioTaggingWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/wear/os/presentation/HomeScreen.kt b/android/SherpaOnnxAudioTaggingWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/wear/os/presentation/HomeScreen.kt index 9af2c571..a4f1ba88 100644 --- a/android/SherpaOnnxAudioTaggingWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/wear/os/presentation/HomeScreen.kt +++ b/android/SherpaOnnxAudioTaggingWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/wear/os/presentation/HomeScreen.kt @@ -33,7 +33,7 @@ import androidx.wear.compose.material.Button import androidx.wear.compose.material.MaterialTheme import androidx.wear.compose.material.Text import com.k2fsa.sherpa.onnx.AudioEvent -import com.k2fsa.sherpa.onnx.audio.tagging.Tagger +import com.k2fsa.sherpa.onnx.Tagger import com.k2fsa.sherpa.onnx.audio.tagging.wear.os.presentation.theme.SherpaOnnxAudioTaggingWearOsTheme import kotlin.concurrent.thread diff --git a/android/SherpaOnnxAudioTaggingWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/wear/os/presentation/MainActivity.kt b/android/SherpaOnnxAudioTaggingWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/wear/os/presentation/MainActivity.kt index 06542e60..fd8b1f71 100644 --- a/android/SherpaOnnxAudioTaggingWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/wear/os/presentation/MainActivity.kt +++ b/android/SherpaOnnxAudioTaggingWearOs/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/wear/os/presentation/MainActivity.kt @@ -17,7 +17,7 @@ import androidx.activity.compose.setContent import androidx.compose.runtime.Composable import androidx.core.app.ActivityCompat import androidx.core.splashscreen.SplashScreen.Companion.installSplashScreen -import com.k2fsa.sherpa.onnx.audio.tagging.Tagger +import com.k2fsa.sherpa.onnx.Tagger const val TAG = "sherpa-onnx" private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 diff --git a/android/SherpaOnnxAudioTaggingWearOs/app/src/main/res/values/strings.xml b/android/SherpaOnnxAudioTaggingWearOs/app/src/main/res/values/strings.xml index 28dfe9bc..5a6464b5 100644 --- a/android/SherpaOnnxAudioTaggingWearOs/app/src/main/res/values/strings.xml +++ b/android/SherpaOnnxAudioTaggingWearOs/app/src/main/res/values/strings.xml @@ -1,5 +1,5 @@ - AudioTagging + Audio Tagging