Refactor the JNI interface to make it more modular and maintainable (#802)
This commit is contained in:
174
.github/workflows/apk-asr.yaml
vendored
Normal file
174
.github/workflows/apk-asr.yaml
vendored
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
name: apk-asr
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- '*'
|
||||||
|
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: apk-asr-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
apk_asr:
|
||||||
|
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
name: apk for asr ${{ matrix.index }}/${{ matrix.total }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest]
|
||||||
|
total: ["1"]
|
||||||
|
index: ["0"]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
# https://github.com/actions/setup-java
|
||||||
|
- uses: actions/setup-java@v4
|
||||||
|
with:
|
||||||
|
distribution: 'temurin' # See 'Supported distributions' for available options
|
||||||
|
java-version: '21'
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: hendrikmuhs/ccache-action@v1.2
|
||||||
|
with:
|
||||||
|
key: ${{ matrix.os }}-android
|
||||||
|
|
||||||
|
- name: Display NDK HOME
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
|
||||||
|
ls -lh ${ANDROID_NDK_LATEST_HOME}
|
||||||
|
|
||||||
|
- name: Install Python dependencies
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
python3 -m pip install --upgrade pip jinja2
|
||||||
|
|
||||||
|
- name: Setup build tool version variable
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
echo "---"
|
||||||
|
ls -lh /usr/local/lib/android/
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
ls -lh /usr/local/lib/android/sdk
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
ls -lh /usr/local/lib/android/sdk/build-tools
|
||||||
|
echo "---"
|
||||||
|
|
||||||
|
BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
|
||||||
|
echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
|
||||||
|
echo "Last build tool version is: $BUILD_TOOL_VERSION"
|
||||||
|
|
||||||
|
- name: Generate build script
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd scripts/apk
|
||||||
|
|
||||||
|
total=${{ matrix.total }}
|
||||||
|
index=${{ matrix.index }}
|
||||||
|
|
||||||
|
./generate-asr-apk-script.py --total $total --index $index
|
||||||
|
|
||||||
|
chmod +x build-apk-asr.sh
|
||||||
|
mv -v ./build-apk-asr.sh ../..
|
||||||
|
|
||||||
|
- name: build APK
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||||
|
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
|
||||||
|
cmake --version
|
||||||
|
|
||||||
|
export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
|
||||||
|
./build-apk-asr.sh
|
||||||
|
|
||||||
|
- name: Display APK
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
ls -lh ./apks/
|
||||||
|
du -h -d1 .
|
||||||
|
|
||||||
|
# https://github.com/marketplace/actions/sign-android-release
|
||||||
|
- uses: r0adkll/sign-android-release@v1
|
||||||
|
name: Sign app APK
|
||||||
|
with:
|
||||||
|
releaseDirectory: ./apks
|
||||||
|
signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
|
||||||
|
alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
|
||||||
|
keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
|
||||||
|
env:
|
||||||
|
BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}
|
||||||
|
|
||||||
|
- name: Display APK after signing
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
ls -lh ./apks/
|
||||||
|
du -h -d1 .
|
||||||
|
|
||||||
|
- name: Rename APK after signing
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd apks
|
||||||
|
rm -fv signingKey.jks
|
||||||
|
rm -fv *.apk.idsig
|
||||||
|
rm -fv *-aligned.apk
|
||||||
|
|
||||||
|
all_apks=$(ls -1 *-signed.apk)
|
||||||
|
echo "----"
|
||||||
|
echo $all_apks
|
||||||
|
echo "----"
|
||||||
|
for apk in ${all_apks[@]}; do
|
||||||
|
n=$(echo $apk | sed -e s/-signed//)
|
||||||
|
mv -v $apk $n
|
||||||
|
done
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
ls -lh ./apks/
|
||||||
|
du -h -d1 .
|
||||||
|
|
||||||
|
- name: Display APK after rename
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
ls -lh ./apks/
|
||||||
|
du -h -d1 .
|
||||||
|
|
||||||
|
- name: Publish to huggingface
|
||||||
|
env:
|
||||||
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
|
uses: nick-fields/retry@v3
|
||||||
|
with:
|
||||||
|
max_attempts: 20
|
||||||
|
timeout_seconds: 200
|
||||||
|
shell: bash
|
||||||
|
command: |
|
||||||
|
git config --global user.email "csukuangfj@gmail.com"
|
||||||
|
git config --global user.name "Fangjun Kuang"
|
||||||
|
|
||||||
|
rm -rf huggingface
|
||||||
|
export GIT_LFS_SKIP_SMUDGE=1
|
||||||
|
|
||||||
|
git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
|
||||||
|
cd huggingface
|
||||||
|
git fetch
|
||||||
|
git pull
|
||||||
|
git merge -m "merge remote" --ff origin main
|
||||||
|
|
||||||
|
mkdir -p asr
|
||||||
|
cp -v ../apks/*.apk ./asr/
|
||||||
|
git status
|
||||||
|
git lfs track "*.apk"
|
||||||
|
git add .
|
||||||
|
git commit -m "add more apks"
|
||||||
|
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -95,3 +95,4 @@ sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
|
|||||||
spoken-language-identification-test-wavs
|
spoken-language-identification-test-wavs
|
||||||
my-release-key*
|
my-release-key*
|
||||||
vits-zh-hf-fanchen-C
|
vits-zh-hf-fanchen-C
|
||||||
|
sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
tools:targetApi="31">
|
tools:targetApi="31">
|
||||||
<activity
|
<activity
|
||||||
android:name=".MainActivity"
|
android:name=".MainActivity"
|
||||||
|
android:label="ASR: Next-gen Kaldi"
|
||||||
android:exported="true">
|
android:exported="true">
|
||||||
<intent-filter>
|
<intent-filter>
|
||||||
<action android:name="android.intent.action.MAIN" />
|
<action android:name="android.intent.action.MAIN" />
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt
|
||||||
@@ -12,16 +12,19 @@ import android.widget.Button
|
|||||||
import android.widget.TextView
|
import android.widget.TextView
|
||||||
import androidx.appcompat.app.AppCompatActivity
|
import androidx.appcompat.app.AppCompatActivity
|
||||||
import androidx.core.app.ActivityCompat
|
import androidx.core.app.ActivityCompat
|
||||||
import com.k2fsa.sherpa.onnx.*
|
|
||||||
import kotlin.concurrent.thread
|
import kotlin.concurrent.thread
|
||||||
|
|
||||||
private const val TAG = "sherpa-onnx"
|
private const val TAG = "sherpa-onnx"
|
||||||
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
|
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
|
||||||
|
|
||||||
|
// To enable microphone in android emulator, use
|
||||||
|
//
|
||||||
|
// adb emu avd hostmicon
|
||||||
|
|
||||||
class MainActivity : AppCompatActivity() {
|
class MainActivity : AppCompatActivity() {
|
||||||
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
||||||
|
|
||||||
private lateinit var model: SherpaOnnx
|
private lateinit var recognizer: OnlineRecognizer
|
||||||
private var audioRecord: AudioRecord? = null
|
private var audioRecord: AudioRecord? = null
|
||||||
private lateinit var recordButton: Button
|
private lateinit var recordButton: Button
|
||||||
private lateinit var textView: TextView
|
private lateinit var textView: TextView
|
||||||
@@ -87,7 +90,6 @@ class MainActivity : AppCompatActivity() {
|
|||||||
audioRecord!!.startRecording()
|
audioRecord!!.startRecording()
|
||||||
recordButton.setText(R.string.stop)
|
recordButton.setText(R.string.stop)
|
||||||
isRecording = true
|
isRecording = true
|
||||||
model.reset(true)
|
|
||||||
textView.text = ""
|
textView.text = ""
|
||||||
lastText = ""
|
lastText = ""
|
||||||
idx = 0
|
idx = 0
|
||||||
@@ -108,6 +110,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
|
|
||||||
private fun processSamples() {
|
private fun processSamples() {
|
||||||
Log.i(TAG, "processing samples")
|
Log.i(TAG, "processing samples")
|
||||||
|
val stream = recognizer.createStream()
|
||||||
|
|
||||||
val interval = 0.1 // i.e., 100 ms
|
val interval = 0.1 // i.e., 100 ms
|
||||||
val bufferSize = (interval * sampleRateInHz).toInt() // in samples
|
val bufferSize = (interval * sampleRateInHz).toInt() // in samples
|
||||||
@@ -117,29 +120,41 @@ class MainActivity : AppCompatActivity() {
|
|||||||
val ret = audioRecord?.read(buffer, 0, buffer.size)
|
val ret = audioRecord?.read(buffer, 0, buffer.size)
|
||||||
if (ret != null && ret > 0) {
|
if (ret != null && ret > 0) {
|
||||||
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
|
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
|
||||||
model.acceptWaveform(samples, sampleRate=sampleRateInHz)
|
stream.acceptWaveform(samples, sampleRate = sampleRateInHz)
|
||||||
while (model.isReady()) {
|
while (recognizer.isReady(stream)) {
|
||||||
model.decode()
|
recognizer.decode(stream)
|
||||||
}
|
}
|
||||||
|
|
||||||
val isEndpoint = model.isEndpoint()
|
val isEndpoint = recognizer.isEndpoint(stream)
|
||||||
val text = model.text
|
var text = recognizer.getResult(stream).text
|
||||||
|
|
||||||
var textToDisplay = lastText;
|
// For streaming parformer, we need to manually add some
|
||||||
|
// paddings so that it has enough right context to
|
||||||
|
// recognize the last word of this segment
|
||||||
|
if (isEndpoint && recognizer.config.modelConfig.paraformer.encoder.isNotBlank()) {
|
||||||
|
val tailPaddings = FloatArray((0.8 * sampleRateInHz).toInt())
|
||||||
|
stream.acceptWaveform(tailPaddings, sampleRate = sampleRateInHz)
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream)
|
||||||
|
}
|
||||||
|
text = recognizer.getResult(stream).text
|
||||||
|
}
|
||||||
|
|
||||||
if(text.isNotBlank()) {
|
var textToDisplay = lastText
|
||||||
if (lastText.isBlank()) {
|
|
||||||
textToDisplay = "${idx}: ${text}"
|
if (text.isNotBlank()) {
|
||||||
|
textToDisplay = if (lastText.isBlank()) {
|
||||||
|
"${idx}: $text"
|
||||||
} else {
|
} else {
|
||||||
textToDisplay = "${lastText}\n${idx}: ${text}"
|
"${lastText}\n${idx}: $text"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isEndpoint) {
|
if (isEndpoint) {
|
||||||
model.reset()
|
recognizer.reset(stream)
|
||||||
if (text.isNotBlank()) {
|
if (text.isNotBlank()) {
|
||||||
lastText = "${lastText}\n${idx}: ${text}"
|
lastText = "${lastText}\n${idx}: $text"
|
||||||
textToDisplay = lastText;
|
textToDisplay = lastText
|
||||||
idx += 1
|
idx += 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -149,6 +164,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
stream.release()
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun initMicrophone(): Boolean {
|
private fun initMicrophone(): Boolean {
|
||||||
@@ -180,7 +196,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
// for a list of available models
|
// for a list of available models
|
||||||
val type = 0
|
val type = 0
|
||||||
println("Select model type ${type}")
|
Log.i(TAG, "Select model type $type")
|
||||||
val config = OnlineRecognizerConfig(
|
val config = OnlineRecognizerConfig(
|
||||||
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
||||||
modelConfig = getModelConfig(type = type)!!,
|
modelConfig = getModelConfig(type = type)!!,
|
||||||
@@ -189,7 +205,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
enableEndpoint = true,
|
enableEndpoint = true,
|
||||||
)
|
)
|
||||||
|
|
||||||
model = SherpaOnnx(
|
recognizer = OnlineRecognizer(
|
||||||
assetManager = application.assets,
|
assetManager = application.assets,
|
||||||
config = config,
|
config = config,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
// Copyright (c) 2023 Xiaomi Corporation
|
|
||||||
package com.k2fsa.sherpa.onnx
|
|
||||||
|
|
||||||
import android.content.res.AssetManager
|
|
||||||
|
|
||||||
class WaveReader {
|
|
||||||
companion object {
|
|
||||||
// Read a mono wave file asset
|
|
||||||
// The returned array has two entries:
|
|
||||||
// - the first entry contains an 1-D float array
|
|
||||||
// - the second entry is the sample rate
|
|
||||||
external fun readWaveFromAsset(
|
|
||||||
assetManager: AssetManager,
|
|
||||||
filename: String,
|
|
||||||
): Array<Any>
|
|
||||||
|
|
||||||
// Read a mono wave file from disk
|
|
||||||
// The returned array has two entries:
|
|
||||||
// - the first entry contains an 1-D float array
|
|
||||||
// - the second entry is the sample rate
|
|
||||||
external fun readWaveFromFile(
|
|
||||||
filename: String,
|
|
||||||
): Array<Any>
|
|
||||||
|
|
||||||
init {
|
|
||||||
System.loadLibrary("sherpa-onnx-jni")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/WaveReader.kt
|
||||||
@@ -16,6 +16,7 @@
|
|||||||
tools:targetApi="31">
|
tools:targetApi="31">
|
||||||
<activity
|
<activity
|
||||||
android:name=".MainActivity"
|
android:name=".MainActivity"
|
||||||
|
android:label="2pass ASR: Next-gen Kaldi"
|
||||||
android:exported="true">
|
android:exported="true">
|
||||||
<intent-filter>
|
<intent-filter>
|
||||||
<action android:name="android.intent.action.MAIN" />
|
<action android:name="android.intent.action.MAIN" />
|
||||||
@@ -29,4 +30,4 @@
|
|||||||
</activity>
|
</activity>
|
||||||
</application>
|
</application>
|
||||||
|
|
||||||
</manifest>
|
</manifest>
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt
|
||||||
@@ -17,11 +17,13 @@ import kotlin.concurrent.thread
|
|||||||
private const val TAG = "sherpa-onnx"
|
private const val TAG = "sherpa-onnx"
|
||||||
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
|
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
|
||||||
|
|
||||||
|
// adb emu avd hostmicon
|
||||||
|
// to enable microphone inside the emulator
|
||||||
class MainActivity : AppCompatActivity() {
|
class MainActivity : AppCompatActivity() {
|
||||||
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
||||||
|
|
||||||
private lateinit var onlineRecognizer: SherpaOnnx
|
private lateinit var onlineRecognizer: OnlineRecognizer
|
||||||
private lateinit var offlineRecognizer: SherpaOnnxOffline
|
private lateinit var offlineRecognizer: OfflineRecognizer
|
||||||
private var audioRecord: AudioRecord? = null
|
private var audioRecord: AudioRecord? = null
|
||||||
private lateinit var recordButton: Button
|
private lateinit var recordButton: Button
|
||||||
private lateinit var textView: TextView
|
private lateinit var textView: TextView
|
||||||
@@ -93,7 +95,6 @@ class MainActivity : AppCompatActivity() {
|
|||||||
audioRecord!!.startRecording()
|
audioRecord!!.startRecording()
|
||||||
recordButton.setText(R.string.stop)
|
recordButton.setText(R.string.stop)
|
||||||
isRecording = true
|
isRecording = true
|
||||||
onlineRecognizer.reset(true)
|
|
||||||
samplesBuffer.clear()
|
samplesBuffer.clear()
|
||||||
textView.text = ""
|
textView.text = ""
|
||||||
lastText = ""
|
lastText = ""
|
||||||
@@ -115,6 +116,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
|
|
||||||
private fun processSamples() {
|
private fun processSamples() {
|
||||||
Log.i(TAG, "processing samples")
|
Log.i(TAG, "processing samples")
|
||||||
|
val stream = onlineRecognizer.createStream()
|
||||||
|
|
||||||
val interval = 0.1 // i.e., 100 ms
|
val interval = 0.1 // i.e., 100 ms
|
||||||
val bufferSize = (interval * sampleRateInHz).toInt() // in samples
|
val bufferSize = (interval * sampleRateInHz).toInt() // in samples
|
||||||
@@ -126,29 +128,29 @@ class MainActivity : AppCompatActivity() {
|
|||||||
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
|
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
|
||||||
samplesBuffer.add(samples)
|
samplesBuffer.add(samples)
|
||||||
|
|
||||||
onlineRecognizer.acceptWaveform(samples, sampleRate = sampleRateInHz)
|
stream.acceptWaveform(samples, sampleRate = sampleRateInHz)
|
||||||
while (onlineRecognizer.isReady()) {
|
while (onlineRecognizer.isReady(stream)) {
|
||||||
onlineRecognizer.decode()
|
onlineRecognizer.decode(stream)
|
||||||
}
|
}
|
||||||
val isEndpoint = onlineRecognizer.isEndpoint()
|
val isEndpoint = onlineRecognizer.isEndpoint(stream)
|
||||||
var textToDisplay = lastText
|
var textToDisplay = lastText
|
||||||
|
|
||||||
var text = onlineRecognizer.text
|
var text = onlineRecognizer.getResult(stream).text
|
||||||
if (text.isNotBlank()) {
|
if (text.isNotBlank()) {
|
||||||
if (lastText.isBlank()) {
|
textToDisplay = if (lastText.isBlank()) {
|
||||||
// textView.text = "${idx}: ${text}"
|
// textView.text = "${idx}: ${text}"
|
||||||
textToDisplay = "${idx}: ${text}"
|
"${idx}: $text"
|
||||||
} else {
|
} else {
|
||||||
textToDisplay = "${lastText}\n${idx}: ${text}"
|
"${lastText}\n${idx}: $text"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isEndpoint) {
|
if (isEndpoint) {
|
||||||
onlineRecognizer.reset()
|
onlineRecognizer.reset(stream)
|
||||||
|
|
||||||
if (text.isNotBlank()) {
|
if (text.isNotBlank()) {
|
||||||
text = runSecondPass()
|
text = runSecondPass()
|
||||||
lastText = "${lastText}\n${idx}: ${text}"
|
lastText = "${lastText}\n${idx}: $text"
|
||||||
idx += 1
|
idx += 1
|
||||||
} else {
|
} else {
|
||||||
samplesBuffer.clear()
|
samplesBuffer.clear()
|
||||||
@@ -160,6 +162,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
stream.release()
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun initMicrophone(): Boolean {
|
private fun initMicrophone(): Boolean {
|
||||||
@@ -190,8 +193,8 @@ class MainActivity : AppCompatActivity() {
|
|||||||
// Please change getModelConfig() to add new models
|
// Please change getModelConfig() to add new models
|
||||||
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
// for a list of available models
|
// for a list of available models
|
||||||
val firstType = 1
|
val firstType = 9
|
||||||
println("Select model type ${firstType} for the first pass")
|
Log.i(TAG, "Select model type $firstType for the first pass")
|
||||||
val config = OnlineRecognizerConfig(
|
val config = OnlineRecognizerConfig(
|
||||||
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
||||||
modelConfig = getModelConfig(type = firstType)!!,
|
modelConfig = getModelConfig(type = firstType)!!,
|
||||||
@@ -199,7 +202,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
enableEndpoint = true,
|
enableEndpoint = true,
|
||||||
)
|
)
|
||||||
|
|
||||||
onlineRecognizer = SherpaOnnx(
|
onlineRecognizer = OnlineRecognizer(
|
||||||
assetManager = application.assets,
|
assetManager = application.assets,
|
||||||
config = config,
|
config = config,
|
||||||
)
|
)
|
||||||
@@ -209,15 +212,15 @@ class MainActivity : AppCompatActivity() {
|
|||||||
// Please change getOfflineModelConfig() to add new models
|
// Please change getOfflineModelConfig() to add new models
|
||||||
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
// for a list of available models
|
// for a list of available models
|
||||||
val secondType = 1
|
val secondType = 0
|
||||||
println("Select model type ${secondType} for the second pass")
|
Log.i(TAG, "Select model type $secondType for the second pass")
|
||||||
|
|
||||||
val config = OfflineRecognizerConfig(
|
val config = OfflineRecognizerConfig(
|
||||||
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
||||||
modelConfig = getOfflineModelConfig(type = secondType)!!,
|
modelConfig = getOfflineModelConfig(type = secondType)!!,
|
||||||
)
|
)
|
||||||
|
|
||||||
offlineRecognizer = SherpaOnnxOffline(
|
offlineRecognizer = OfflineRecognizer(
|
||||||
assetManager = application.assets,
|
assetManager = application.assets,
|
||||||
config = config,
|
config = config,
|
||||||
)
|
)
|
||||||
@@ -244,8 +247,15 @@ class MainActivity : AppCompatActivity() {
|
|||||||
val n = maxOf(0, samples.size - 8000)
|
val n = maxOf(0, samples.size - 8000)
|
||||||
|
|
||||||
samplesBuffer.clear()
|
samplesBuffer.clear()
|
||||||
samplesBuffer.add(samples.sliceArray(n..samples.size-1))
|
samplesBuffer.add(samples.sliceArray(n until samples.size))
|
||||||
|
|
||||||
return offlineRecognizer.decode(samples.sliceArray(0..n), sampleRateInHz)
|
val stream = offlineRecognizer.createStream()
|
||||||
|
stream.acceptWaveform(samples.sliceArray(0..n), sampleRateInHz)
|
||||||
|
offlineRecognizer.decode(stream)
|
||||||
|
val result = offlineRecognizer.getResult(stream)
|
||||||
|
|
||||||
|
stream.release()
|
||||||
|
|
||||||
|
return result.text
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineRecognizer.kt
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt
|
||||||
@@ -1,404 +0,0 @@
|
|||||||
package com.k2fsa.sherpa.onnx
|
|
||||||
|
|
||||||
import android.content.res.AssetManager
|
|
||||||
|
|
||||||
data class EndpointRule(
|
|
||||||
var mustContainNonSilence: Boolean,
|
|
||||||
var minTrailingSilence: Float,
|
|
||||||
var minUtteranceLength: Float,
|
|
||||||
)
|
|
||||||
|
|
||||||
data class EndpointConfig(
|
|
||||||
var rule1: EndpointRule = EndpointRule(false, 2.0f, 0.0f),
|
|
||||||
var rule2: EndpointRule = EndpointRule(true, 1.2f, 0.0f),
|
|
||||||
var rule3: EndpointRule = EndpointRule(false, 0.0f, 20.0f)
|
|
||||||
)
|
|
||||||
|
|
||||||
data class OnlineTransducerModelConfig(
|
|
||||||
var encoder: String = "",
|
|
||||||
var decoder: String = "",
|
|
||||||
var joiner: String = "",
|
|
||||||
)
|
|
||||||
|
|
||||||
data class OnlineParaformerModelConfig(
|
|
||||||
var encoder: String = "",
|
|
||||||
var decoder: String = "",
|
|
||||||
)
|
|
||||||
|
|
||||||
data class OnlineZipformer2CtcModelConfig(
|
|
||||||
var model: String = "",
|
|
||||||
)
|
|
||||||
|
|
||||||
data class OnlineModelConfig(
|
|
||||||
var transducer: OnlineTransducerModelConfig = OnlineTransducerModelConfig(),
|
|
||||||
var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(),
|
|
||||||
var zipformer2Ctc: OnlineZipformer2CtcModelConfig = OnlineZipformer2CtcModelConfig(),
|
|
||||||
var tokens: String,
|
|
||||||
var numThreads: Int = 1,
|
|
||||||
var debug: Boolean = false,
|
|
||||||
var provider: String = "cpu",
|
|
||||||
var modelType: String = "",
|
|
||||||
)
|
|
||||||
|
|
||||||
data class OnlineLMConfig(
|
|
||||||
var model: String = "",
|
|
||||||
var scale: Float = 0.5f,
|
|
||||||
)
|
|
||||||
|
|
||||||
data class FeatureConfig(
|
|
||||||
var sampleRate: Int = 16000,
|
|
||||||
var featureDim: Int = 80,
|
|
||||||
)
|
|
||||||
|
|
||||||
data class OnlineRecognizerConfig(
|
|
||||||
var featConfig: FeatureConfig = FeatureConfig(),
|
|
||||||
var modelConfig: OnlineModelConfig,
|
|
||||||
var lmConfig: OnlineLMConfig = OnlineLMConfig(),
|
|
||||||
var endpointConfig: EndpointConfig = EndpointConfig(),
|
|
||||||
var enableEndpoint: Boolean = true,
|
|
||||||
var decodingMethod: String = "greedy_search",
|
|
||||||
var maxActivePaths: Int = 4,
|
|
||||||
var hotwordsFile: String = "",
|
|
||||||
var hotwordsScore: Float = 1.5f,
|
|
||||||
)
|
|
||||||
|
|
||||||
data class OfflineTransducerModelConfig(
|
|
||||||
var encoder: String = "",
|
|
||||||
var decoder: String = "",
|
|
||||||
var joiner: String = "",
|
|
||||||
)
|
|
||||||
|
|
||||||
data class OfflineParaformerModelConfig(
|
|
||||||
var model: String = "",
|
|
||||||
)
|
|
||||||
|
|
||||||
data class OfflineWhisperModelConfig(
|
|
||||||
var encoder: String = "",
|
|
||||||
var decoder: String = "",
|
|
||||||
var language: String = "en", // Used with multilingual model
|
|
||||||
var task: String = "transcribe", // transcribe or translate
|
|
||||||
var tailPaddings: Int = 1000, // Padding added at the end of the samples
|
|
||||||
)
|
|
||||||
|
|
||||||
data class OfflineModelConfig(
|
|
||||||
var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(),
|
|
||||||
var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(),
|
|
||||||
var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(),
|
|
||||||
var numThreads: Int = 1,
|
|
||||||
var debug: Boolean = false,
|
|
||||||
var provider: String = "cpu",
|
|
||||||
var modelType: String = "",
|
|
||||||
var tokens: String,
|
|
||||||
)
|
|
||||||
|
|
||||||
data class OfflineRecognizerConfig(
|
|
||||||
var featConfig: FeatureConfig = FeatureConfig(),
|
|
||||||
var modelConfig: OfflineModelConfig,
|
|
||||||
// var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it
|
|
||||||
var decodingMethod: String = "greedy_search",
|
|
||||||
var maxActivePaths: Int = 4,
|
|
||||||
var hotwordsFile: String = "",
|
|
||||||
var hotwordsScore: Float = 1.5f,
|
|
||||||
)
|
|
||||||
|
|
||||||
class SherpaOnnx(
|
|
||||||
assetManager: AssetManager? = null,
|
|
||||||
var config: OnlineRecognizerConfig,
|
|
||||||
) {
|
|
||||||
private val ptr: Long
|
|
||||||
|
|
||||||
init {
|
|
||||||
if (assetManager != null) {
|
|
||||||
ptr = new(assetManager, config)
|
|
||||||
} else {
|
|
||||||
ptr = newFromFile(config)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected fun finalize() {
|
|
||||||
delete(ptr)
|
|
||||||
}
|
|
||||||
|
|
||||||
fun acceptWaveform(samples: FloatArray, sampleRate: Int) =
|
|
||||||
acceptWaveform(ptr, samples, sampleRate)
|
|
||||||
|
|
||||||
fun inputFinished() = inputFinished(ptr)
|
|
||||||
fun reset(recreate: Boolean = false, hotwords: String = "") = reset(ptr, recreate, hotwords)
|
|
||||||
fun decode() = decode(ptr)
|
|
||||||
fun isEndpoint(): Boolean = isEndpoint(ptr)
|
|
||||||
fun isReady(): Boolean = isReady(ptr)
|
|
||||||
|
|
||||||
val text: String
|
|
||||||
get() = getText(ptr)
|
|
||||||
|
|
||||||
val tokens: Array<String>
|
|
||||||
get() = getTokens(ptr)
|
|
||||||
|
|
||||||
private external fun delete(ptr: Long)
|
|
||||||
|
|
||||||
private external fun new(
|
|
||||||
assetManager: AssetManager,
|
|
||||||
config: OnlineRecognizerConfig,
|
|
||||||
): Long
|
|
||||||
|
|
||||||
private external fun newFromFile(
|
|
||||||
config: OnlineRecognizerConfig,
|
|
||||||
): Long
|
|
||||||
|
|
||||||
private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int)
|
|
||||||
private external fun inputFinished(ptr: Long)
|
|
||||||
private external fun getText(ptr: Long): String
|
|
||||||
private external fun reset(ptr: Long, recreate: Boolean, hotwords: String)
|
|
||||||
private external fun decode(ptr: Long)
|
|
||||||
private external fun isEndpoint(ptr: Long): Boolean
|
|
||||||
private external fun isReady(ptr: Long): Boolean
|
|
||||||
private external fun getTokens(ptr: Long): Array<String>
|
|
||||||
|
|
||||||
companion object {
|
|
||||||
init {
|
|
||||||
System.loadLibrary("sherpa-onnx-jni")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class SherpaOnnxOffline(
|
|
||||||
assetManager: AssetManager? = null,
|
|
||||||
var config: OfflineRecognizerConfig,
|
|
||||||
) {
|
|
||||||
private val ptr: Long
|
|
||||||
|
|
||||||
init {
|
|
||||||
if (assetManager != null) {
|
|
||||||
ptr = new(assetManager, config)
|
|
||||||
} else {
|
|
||||||
ptr = newFromFile(config)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected fun finalize() {
|
|
||||||
delete(ptr)
|
|
||||||
}
|
|
||||||
|
|
||||||
fun decode(samples: FloatArray, sampleRate: Int) = decode(ptr, samples, sampleRate)
|
|
||||||
|
|
||||||
private external fun delete(ptr: Long)
|
|
||||||
|
|
||||||
private external fun new(
|
|
||||||
assetManager: AssetManager,
|
|
||||||
config: OfflineRecognizerConfig,
|
|
||||||
): Long
|
|
||||||
|
|
||||||
private external fun newFromFile(
|
|
||||||
config: OfflineRecognizerConfig,
|
|
||||||
): Long
|
|
||||||
|
|
||||||
private external fun decode(ptr: Long, samples: FloatArray, sampleRate: Int): String
|
|
||||||
|
|
||||||
companion object {
|
|
||||||
init {
|
|
||||||
System.loadLibrary("sherpa-onnx-jni")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun getFeatureConfig(sampleRate: Int, featureDim: Int): FeatureConfig {
|
|
||||||
return FeatureConfig(sampleRate = sampleRate, featureDim = featureDim)
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
Please see
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
|
||||||
for a list of pre-trained models.
|
|
||||||
|
|
||||||
We only add a few here. Please change the following code
|
|
||||||
to add your own. (It should be straightforward to add a new model
|
|
||||||
by following the code)
|
|
||||||
|
|
||||||
@param type
|
|
||||||
0 - csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23 (Chinese)
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23
|
|
||||||
encoder/joiner int8, decoder float32
|
|
||||||
|
|
||||||
1 - csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17 (English)
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-20m-2023-02-17-english
|
|
||||||
encoder/joiner int8, decoder fp32
|
|
||||||
|
|
||||||
*/
|
|
||||||
fun getModelConfig(type: Int): OnlineModelConfig? {
|
|
||||||
when (type) {
|
|
||||||
0 -> {
|
|
||||||
val modelDir = "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23"
|
|
||||||
return OnlineModelConfig(
|
|
||||||
transducer = OnlineTransducerModelConfig(
|
|
||||||
encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx",
|
|
||||||
decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",
|
|
||||||
joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx",
|
|
||||||
),
|
|
||||||
tokens = "$modelDir/tokens.txt",
|
|
||||||
modelType = "zipformer",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
1 -> {
|
|
||||||
val modelDir = "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17"
|
|
||||||
return OnlineModelConfig(
|
|
||||||
transducer = OnlineTransducerModelConfig(
|
|
||||||
encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx",
|
|
||||||
decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",
|
|
||||||
joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx",
|
|
||||||
),
|
|
||||||
tokens = "$modelDir/tokens.txt",
|
|
||||||
modelType = "zipformer",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
Please see
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
|
||||||
for a list of pre-trained models.
|
|
||||||
|
|
||||||
We only add a few here. Please change the following code
|
|
||||||
to add your own LM model. (It should be straightforward to train a new NN LM model
|
|
||||||
by following the code, https://github.com/k2-fsa/icefall/blob/master/icefall/rnn_lm/train.py)
|
|
||||||
|
|
||||||
@param type
|
|
||||||
0 - sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
|
|
||||||
*/
|
|
||||||
fun getOnlineLMConfig(type: Int): OnlineLMConfig {
|
|
||||||
when (type) {
|
|
||||||
0 -> {
|
|
||||||
val modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20"
|
|
||||||
return OnlineLMConfig(
|
|
||||||
model = "$modelDir/with-state-epoch-99-avg-1.int8.onnx",
|
|
||||||
scale = 0.5f,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return OnlineLMConfig()
|
|
||||||
}
|
|
||||||
|
|
||||||
// for English models, use a small value for rule2.minTrailingSilence, e.g., 0.8
|
|
||||||
fun getEndpointConfig(): EndpointConfig {
|
|
||||||
return EndpointConfig(
|
|
||||||
rule1 = EndpointRule(false, 2.4f, 0.0f),
|
|
||||||
rule2 = EndpointRule(true, 0.8f, 0.0f),
|
|
||||||
rule3 = EndpointRule(false, 0.0f, 20.0f)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
Please see
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
|
||||||
for a list of pre-trained models.
|
|
||||||
|
|
||||||
We only add a few here. Please change the following code
|
|
||||||
to add your own. (It should be straightforward to add a new model
|
|
||||||
by following the code)
|
|
||||||
|
|
||||||
@param type
|
|
||||||
|
|
||||||
0 - csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 (Chinese)
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese
|
|
||||||
int8
|
|
||||||
|
|
||||||
1 - icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04 (English)
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#icefall-asr-multidataset-pruned-transducer-stateless7-2023-05-04-english
|
|
||||||
encoder int8, decoder/joiner float32
|
|
||||||
|
|
||||||
2 - sherpa-onnx-whisper-tiny.en
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en
|
|
||||||
encoder int8, decoder int8
|
|
||||||
|
|
||||||
3 - sherpa-onnx-whisper-base.en
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en
|
|
||||||
encoder int8, decoder int8
|
|
||||||
|
|
||||||
4 - pkufool/icefall-asr-zipformer-wenetspeech-20230615 (Chinese)
|
|
||||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-wenetspeech-20230615-chinese
|
|
||||||
encoder/joiner int8, decoder fp32
|
|
||||||
|
|
||||||
*/
|
|
||||||
fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
|
|
||||||
when (type) {
|
|
||||||
0 -> {
|
|
||||||
val modelDir = "sherpa-onnx-paraformer-zh-2023-03-28"
|
|
||||||
return OfflineModelConfig(
|
|
||||||
paraformer = OfflineParaformerModelConfig(
|
|
||||||
model = "$modelDir/model.int8.onnx",
|
|
||||||
),
|
|
||||||
tokens = "$modelDir/tokens.txt",
|
|
||||||
modelType = "paraformer",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
1 -> {
|
|
||||||
val modelDir = "icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04"
|
|
||||||
return OfflineModelConfig(
|
|
||||||
transducer = OfflineTransducerModelConfig(
|
|
||||||
encoder = "$modelDir/encoder-epoch-30-avg-4.int8.onnx",
|
|
||||||
decoder = "$modelDir/decoder-epoch-30-avg-4.onnx",
|
|
||||||
joiner = "$modelDir/joiner-epoch-30-avg-4.onnx",
|
|
||||||
),
|
|
||||||
tokens = "$modelDir/tokens.txt",
|
|
||||||
modelType = "zipformer",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
2 -> {
|
|
||||||
val modelDir = "sherpa-onnx-whisper-tiny.en"
|
|
||||||
return OfflineModelConfig(
|
|
||||||
whisper = OfflineWhisperModelConfig(
|
|
||||||
encoder = "$modelDir/tiny.en-encoder.int8.onnx",
|
|
||||||
decoder = "$modelDir/tiny.en-decoder.int8.onnx",
|
|
||||||
),
|
|
||||||
tokens = "$modelDir/tiny.en-tokens.txt",
|
|
||||||
modelType = "whisper",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
3 -> {
|
|
||||||
val modelDir = "sherpa-onnx-whisper-base.en"
|
|
||||||
return OfflineModelConfig(
|
|
||||||
whisper = OfflineWhisperModelConfig(
|
|
||||||
encoder = "$modelDir/base.en-encoder.int8.onnx",
|
|
||||||
decoder = "$modelDir/base.en-decoder.int8.onnx",
|
|
||||||
),
|
|
||||||
tokens = "$modelDir/base.en-tokens.txt",
|
|
||||||
modelType = "whisper",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
4 -> {
|
|
||||||
val modelDir = "icefall-asr-zipformer-wenetspeech-20230615"
|
|
||||||
return OfflineModelConfig(
|
|
||||||
transducer = OfflineTransducerModelConfig(
|
|
||||||
encoder = "$modelDir/encoder-epoch-12-avg-4.int8.onnx",
|
|
||||||
decoder = "$modelDir/decoder-epoch-12-avg-4.onnx",
|
|
||||||
joiner = "$modelDir/joiner-epoch-12-avg-4.int8.onnx",
|
|
||||||
),
|
|
||||||
tokens = "$modelDir/tokens.txt",
|
|
||||||
modelType = "zipformer",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
5 -> {
|
|
||||||
val modelDir = "sherpa-onnx-zipformer-multi-zh-hans-2023-9-2"
|
|
||||||
return OfflineModelConfig(
|
|
||||||
transducer = OfflineTransducerModelConfig(
|
|
||||||
encoder = "$modelDir/encoder-epoch-20-avg-1.int8.onnx",
|
|
||||||
decoder = "$modelDir/decoder-epoch-20-avg-1.onnx",
|
|
||||||
joiner = "$modelDir/joiner-epoch-20-avg-1.int8.onnx",
|
|
||||||
),
|
|
||||||
tokens = "$modelDir/tokens.txt",
|
|
||||||
modelType = "zipformer2",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
package com.k2fsa.sherpa.onnx
|
|
||||||
|
|
||||||
import android.content.res.AssetManager
|
|
||||||
|
|
||||||
class WaveReader {
|
|
||||||
companion object {
|
|
||||||
// Read a mono wave file asset
|
|
||||||
// The returned array has two entries:
|
|
||||||
// - the first entry contains an 1-D float array
|
|
||||||
// - the second entry is the sample rate
|
|
||||||
external fun readWaveFromAsset(
|
|
||||||
assetManager: AssetManager,
|
|
||||||
filename: String,
|
|
||||||
): Array<Any>
|
|
||||||
|
|
||||||
// Read a mono wave file from disk
|
|
||||||
// The returned array has two entries:
|
|
||||||
// - the first entry contains an 1-D float array
|
|
||||||
// - the second entry is the sample rate
|
|
||||||
external fun readWaveFromFile(
|
|
||||||
filename: String,
|
|
||||||
): Array<Any>
|
|
||||||
|
|
||||||
init {
|
|
||||||
System.loadLibrary("sherpa-onnx-jni")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,188 +0,0 @@
|
|||||||
package com.k2fsa.sherpa.onnx
|
|
||||||
|
|
||||||
import android.content.res.AssetManager
|
|
||||||
|
|
||||||
const val TAG = "sherpa-onnx"
|
|
||||||
|
|
||||||
data class OfflineZipformerAudioTaggingModelConfig(
|
|
||||||
var model: String = "",
|
|
||||||
)
|
|
||||||
|
|
||||||
data class AudioTaggingModelConfig(
|
|
||||||
var zipformer: OfflineZipformerAudioTaggingModelConfig = OfflineZipformerAudioTaggingModelConfig(),
|
|
||||||
var ced: String = "",
|
|
||||||
var numThreads: Int = 1,
|
|
||||||
var debug: Boolean = false,
|
|
||||||
var provider: String = "cpu",
|
|
||||||
)
|
|
||||||
|
|
||||||
data class AudioTaggingConfig(
|
|
||||||
var model: AudioTaggingModelConfig,
|
|
||||||
var labels: String,
|
|
||||||
var topK: Int = 5,
|
|
||||||
)
|
|
||||||
|
|
||||||
data class AudioEvent(
|
|
||||||
val name: String,
|
|
||||||
val index: Int,
|
|
||||||
val prob: Float,
|
|
||||||
)
|
|
||||||
|
|
||||||
class AudioTagging(
|
|
||||||
assetManager: AssetManager? = null,
|
|
||||||
config: AudioTaggingConfig,
|
|
||||||
) {
|
|
||||||
private var ptr: Long
|
|
||||||
|
|
||||||
init {
|
|
||||||
ptr = if (assetManager != null) {
|
|
||||||
newFromAsset(assetManager, config)
|
|
||||||
} else {
|
|
||||||
newFromFile(config)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected fun finalize() {
|
|
||||||
if (ptr != 0L) {
|
|
||||||
delete(ptr)
|
|
||||||
ptr = 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun release() = finalize()
|
|
||||||
|
|
||||||
fun createStream(): OfflineStream {
|
|
||||||
val p = createStream(ptr)
|
|
||||||
return OfflineStream(p)
|
|
||||||
}
|
|
||||||
|
|
||||||
@Suppress("UNCHECKED_CAST")
|
|
||||||
fun compute(stream: OfflineStream, topK: Int = -1): ArrayList<AudioEvent> {
|
|
||||||
val events: Array<Any> = compute(ptr, stream.ptr, topK)
|
|
||||||
val ans = ArrayList<AudioEvent>()
|
|
||||||
|
|
||||||
for (e in events) {
|
|
||||||
val p: Array<Any> = e as Array<Any>
|
|
||||||
ans.add(
|
|
||||||
AudioEvent(
|
|
||||||
name = p[0] as String,
|
|
||||||
index = p[1] as Int,
|
|
||||||
prob = p[2] as Float,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return ans
|
|
||||||
}
|
|
||||||
|
|
||||||
private external fun newFromAsset(
|
|
||||||
assetManager: AssetManager,
|
|
||||||
config: AudioTaggingConfig,
|
|
||||||
): Long
|
|
||||||
|
|
||||||
private external fun newFromFile(
|
|
||||||
config: AudioTaggingConfig,
|
|
||||||
): Long
|
|
||||||
|
|
||||||
private external fun delete(ptr: Long)
|
|
||||||
|
|
||||||
private external fun createStream(ptr: Long): Long
|
|
||||||
|
|
||||||
private external fun compute(ptr: Long, streamPtr: Long, topK: Int): Array<Any>
|
|
||||||
|
|
||||||
companion object {
|
|
||||||
init {
|
|
||||||
System.loadLibrary("sherpa-onnx-jni")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// please refer to
|
|
||||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
|
|
||||||
// to download more models
|
|
||||||
//
|
|
||||||
// See also
|
|
||||||
// https://k2-fsa.github.io/sherpa/onnx/audio-tagging/
|
|
||||||
fun getAudioTaggingConfig(type: Int, numThreads: Int = 1): AudioTaggingConfig? {
|
|
||||||
when (type) {
|
|
||||||
0 -> {
|
|
||||||
val modelDir = "sherpa-onnx-zipformer-small-audio-tagging-2024-04-15"
|
|
||||||
return AudioTaggingConfig(
|
|
||||||
model = AudioTaggingModelConfig(
|
|
||||||
zipformer = OfflineZipformerAudioTaggingModelConfig(model = "$modelDir/model.int8.onnx"),
|
|
||||||
numThreads = numThreads,
|
|
||||||
debug = true,
|
|
||||||
),
|
|
||||||
labels = "$modelDir/class_labels_indices.csv",
|
|
||||||
topK = 3,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
1 -> {
|
|
||||||
val modelDir = "sherpa-onnx-zipformer-audio-tagging-2024-04-09"
|
|
||||||
return AudioTaggingConfig(
|
|
||||||
model = AudioTaggingModelConfig(
|
|
||||||
zipformer = OfflineZipformerAudioTaggingModelConfig(model = "$modelDir/model.int8.onnx"),
|
|
||||||
numThreads = numThreads,
|
|
||||||
debug = true,
|
|
||||||
),
|
|
||||||
labels = "$modelDir/class_labels_indices.csv",
|
|
||||||
topK = 3,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
2 -> {
|
|
||||||
val modelDir = "sherpa-onnx-ced-tiny-audio-tagging-2024-04-19"
|
|
||||||
return AudioTaggingConfig(
|
|
||||||
model = AudioTaggingModelConfig(
|
|
||||||
ced = "$modelDir/model.int8.onnx",
|
|
||||||
numThreads = numThreads,
|
|
||||||
debug = true,
|
|
||||||
),
|
|
||||||
labels = "$modelDir/class_labels_indices.csv",
|
|
||||||
topK = 3,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
3 -> {
|
|
||||||
val modelDir = "sherpa-onnx-ced-mini-audio-tagging-2024-04-19"
|
|
||||||
return AudioTaggingConfig(
|
|
||||||
model = AudioTaggingModelConfig(
|
|
||||||
ced = "$modelDir/model.int8.onnx",
|
|
||||||
numThreads = numThreads,
|
|
||||||
debug = true,
|
|
||||||
),
|
|
||||||
labels = "$modelDir/class_labels_indices.csv",
|
|
||||||
topK = 3,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
4 -> {
|
|
||||||
val modelDir = "sherpa-onnx-ced-small-audio-tagging-2024-04-19"
|
|
||||||
return AudioTaggingConfig(
|
|
||||||
model = AudioTaggingModelConfig(
|
|
||||||
ced = "$modelDir/model.int8.onnx",
|
|
||||||
numThreads = numThreads,
|
|
||||||
debug = true,
|
|
||||||
),
|
|
||||||
labels = "$modelDir/class_labels_indices.csv",
|
|
||||||
topK = 3,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
5 -> {
|
|
||||||
val modelDir = "sherpa-onnx-ced-base-audio-tagging-2024-04-19"
|
|
||||||
return AudioTaggingConfig(
|
|
||||||
model = AudioTaggingModelConfig(
|
|
||||||
ced = "$modelDir/model.int8.onnx",
|
|
||||||
numThreads = numThreads,
|
|
||||||
debug = true,
|
|
||||||
),
|
|
||||||
labels = "$modelDir/class_labels_indices.csv",
|
|
||||||
topK = 3,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../../../sherpa-onnx/kotlin-api/AudioTagging.kt
|
||||||
@@ -46,7 +46,6 @@ import androidx.compose.ui.unit.dp
|
|||||||
import androidx.compose.ui.unit.sp
|
import androidx.compose.ui.unit.sp
|
||||||
import androidx.core.app.ActivityCompat
|
import androidx.core.app.ActivityCompat
|
||||||
import com.k2fsa.sherpa.onnx.AudioEvent
|
import com.k2fsa.sherpa.onnx.AudioEvent
|
||||||
import com.k2fsa.sherpa.onnx.Tagger
|
|
||||||
import kotlin.concurrent.thread
|
import kotlin.concurrent.thread
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -13,13 +13,14 @@ import androidx.compose.material3.Surface
|
|||||||
import androidx.compose.runtime.Composable
|
import androidx.compose.runtime.Composable
|
||||||
import androidx.compose.ui.Modifier
|
import androidx.compose.ui.Modifier
|
||||||
import androidx.core.app.ActivityCompat
|
import androidx.core.app.ActivityCompat
|
||||||
import com.k2fsa.sherpa.onnx.Tagger
|
|
||||||
import com.k2fsa.sherpa.onnx.audio.tagging.ui.theme.SherpaOnnxAudioTaggingTheme
|
import com.k2fsa.sherpa.onnx.audio.tagging.ui.theme.SherpaOnnxAudioTaggingTheme
|
||||||
|
|
||||||
const val TAG = "sherpa-onnx"
|
const val TAG = "sherpa-onnx"
|
||||||
|
|
||||||
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
|
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
|
||||||
|
|
||||||
|
// adb emu avd hostmicon
|
||||||
|
// to enable mic inside the emulator
|
||||||
class MainActivity : ComponentActivity() {
|
class MainActivity : ComponentActivity() {
|
||||||
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
||||||
override fun onCreate(savedInstanceState: Bundle?) {
|
override fun onCreate(savedInstanceState: Bundle?) {
|
||||||
|
|||||||
@@ -1,24 +0,0 @@
|
|||||||
package com.k2fsa.sherpa.onnx
|
|
||||||
|
|
||||||
class OfflineStream(var ptr: Long) {
|
|
||||||
fun acceptWaveform(samples: FloatArray, sampleRate: Int) =
|
|
||||||
acceptWaveform(ptr, samples, sampleRate)
|
|
||||||
|
|
||||||
protected fun finalize() {
|
|
||||||
if (ptr != 0L) {
|
|
||||||
delete(ptr)
|
|
||||||
ptr = 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun release() = finalize()
|
|
||||||
|
|
||||||
private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int)
|
|
||||||
private external fun delete(ptr: Long)
|
|
||||||
|
|
||||||
companion object {
|
|
||||||
init {
|
|
||||||
System.loadLibrary("sherpa-onnx-jni")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt
|
||||||
@@ -1,7 +1,9 @@
|
|||||||
package com.k2fsa.sherpa.onnx
|
package com.k2fsa.sherpa.onnx.audio.tagging
|
||||||
|
|
||||||
import android.content.res.AssetManager
|
import android.content.res.AssetManager
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
|
import com.k2fsa.sherpa.onnx.AudioTagging
|
||||||
|
import com.k2fsa.sherpa.onnx.getAudioTaggingConfig
|
||||||
|
|
||||||
|
|
||||||
object Tagger {
|
object Tagger {
|
||||||
@@ -17,7 +19,7 @@ object Tagger {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
Log.i(TAG, "Initializing audio tagger")
|
Log.i("sherpa-onnx", "Initializing audio tagger")
|
||||||
val config = getAudioTaggingConfig(type = 0, numThreads = numThreads)!!
|
val config = getAudioTaggingConfig(type = 0, numThreads = numThreads)!!
|
||||||
_tagger = AudioTagging(assetManager, config)
|
_tagger = AudioTagging(assetManager, config)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ import androidx.wear.compose.material.Button
|
|||||||
import androidx.wear.compose.material.MaterialTheme
|
import androidx.wear.compose.material.MaterialTheme
|
||||||
import androidx.wear.compose.material.Text
|
import androidx.wear.compose.material.Text
|
||||||
import com.k2fsa.sherpa.onnx.AudioEvent
|
import com.k2fsa.sherpa.onnx.AudioEvent
|
||||||
import com.k2fsa.sherpa.onnx.Tagger
|
import com.k2fsa.sherpa.onnx.audio.tagging.Tagger
|
||||||
import com.k2fsa.sherpa.onnx.audio.tagging.wear.os.presentation.theme.SherpaOnnxAudioTaggingWearOsTheme
|
import com.k2fsa.sherpa.onnx.audio.tagging.wear.os.presentation.theme.SherpaOnnxAudioTaggingWearOsTheme
|
||||||
import kotlin.concurrent.thread
|
import kotlin.concurrent.thread
|
||||||
|
|
||||||
|
|||||||
@@ -17,11 +17,14 @@ import androidx.activity.compose.setContent
|
|||||||
import androidx.compose.runtime.Composable
|
import androidx.compose.runtime.Composable
|
||||||
import androidx.core.app.ActivityCompat
|
import androidx.core.app.ActivityCompat
|
||||||
import androidx.core.splashscreen.SplashScreen.Companion.installSplashScreen
|
import androidx.core.splashscreen.SplashScreen.Companion.installSplashScreen
|
||||||
import com.k2fsa.sherpa.onnx.Tagger
|
import com.k2fsa.sherpa.onnx.audio.tagging.Tagger
|
||||||
|
|
||||||
const val TAG = "sherpa-onnx"
|
const val TAG = "sherpa-onnx"
|
||||||
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
|
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
|
||||||
|
|
||||||
|
// adb emu avd hostmicon
|
||||||
|
// to enable mic inside the emulator
|
||||||
|
|
||||||
class MainActivity : ComponentActivity() {
|
class MainActivity : ComponentActivity() {
|
||||||
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
||||||
override fun onCreate(savedInstanceState: Bundle?) {
|
override fun onCreate(savedInstanceState: Bundle?) {
|
||||||
|
|||||||
@@ -15,7 +15,8 @@
|
|||||||
android:theme="@style/Theme.SherpaOnnx"
|
android:theme="@style/Theme.SherpaOnnx"
|
||||||
tools:targetApi="31">
|
tools:targetApi="31">
|
||||||
<activity
|
<activity
|
||||||
android:name=".MainActivity"
|
android:name=".kws.MainActivity"
|
||||||
|
android:label="Keyword-spotter"
|
||||||
android:exported="true">
|
android:exported="true">
|
||||||
<intent-filter>
|
<intent-filter>
|
||||||
<action android:name="android.intent.action.MAIN" />
|
<action android:name="android.intent.action.MAIN" />
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/KeywordSpotter.kt
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.k2fsa.sherpa.onnx
|
package com.k2fsa.sherpa.onnx.kws
|
||||||
|
|
||||||
import android.Manifest
|
import android.Manifest
|
||||||
import android.content.pm.PackageManager
|
import android.content.pm.PackageManager
|
||||||
@@ -14,7 +14,13 @@ import android.widget.TextView
|
|||||||
import android.widget.Toast
|
import android.widget.Toast
|
||||||
import androidx.appcompat.app.AppCompatActivity
|
import androidx.appcompat.app.AppCompatActivity
|
||||||
import androidx.core.app.ActivityCompat
|
import androidx.core.app.ActivityCompat
|
||||||
import com.k2fsa.sherpa.onnx.*
|
import com.k2fsa.sherpa.onnx.KeywordSpotter
|
||||||
|
import com.k2fsa.sherpa.onnx.KeywordSpotterConfig
|
||||||
|
import com.k2fsa.sherpa.onnx.OnlineStream
|
||||||
|
import com.k2fsa.sherpa.onnx.R
|
||||||
|
import com.k2fsa.sherpa.onnx.getFeatureConfig
|
||||||
|
import com.k2fsa.sherpa.onnx.getKeywordsFile
|
||||||
|
import com.k2fsa.sherpa.onnx.getKwsModelConfig
|
||||||
import kotlin.concurrent.thread
|
import kotlin.concurrent.thread
|
||||||
|
|
||||||
private const val TAG = "sherpa-onnx"
|
private const val TAG = "sherpa-onnx"
|
||||||
@@ -23,7 +29,8 @@ private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
|
|||||||
class MainActivity : AppCompatActivity() {
|
class MainActivity : AppCompatActivity() {
|
||||||
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
||||||
|
|
||||||
private lateinit var model: SherpaOnnxKws
|
private lateinit var kws: KeywordSpotter
|
||||||
|
private lateinit var stream: OnlineStream
|
||||||
private var audioRecord: AudioRecord? = null
|
private var audioRecord: AudioRecord? = null
|
||||||
private lateinit var recordButton: Button
|
private lateinit var recordButton: Button
|
||||||
private lateinit var textView: TextView
|
private lateinit var textView: TextView
|
||||||
@@ -87,15 +94,18 @@ class MainActivity : AppCompatActivity() {
|
|||||||
|
|
||||||
Log.i(TAG, keywords)
|
Log.i(TAG, keywords)
|
||||||
keywords = keywords.replace("\n", "/")
|
keywords = keywords.replace("\n", "/")
|
||||||
|
keywords = keywords.trim()
|
||||||
// If keywords is an empty string, it just resets the decoding stream
|
// If keywords is an empty string, it just resets the decoding stream
|
||||||
// always returns true in this case.
|
// always returns true in this case.
|
||||||
// If keywords is not empty, it will create a new decoding stream with
|
// If keywords is not empty, it will create a new decoding stream with
|
||||||
// the given keywords appended to the default keywords.
|
// the given keywords appended to the default keywords.
|
||||||
// Return false if errors occured when adding keywords, true otherwise.
|
// Return false if errors occurred when adding keywords, true otherwise.
|
||||||
val status = model.reset(keywords)
|
stream.release()
|
||||||
if (!status) {
|
stream = kws.createStream(keywords)
|
||||||
Log.i(TAG, "Failed to reset with keywords.")
|
if (stream.ptr == 0L) {
|
||||||
Toast.makeText(this, "Failed to set keywords.", Toast.LENGTH_LONG).show();
|
Log.i(TAG, "Failed to create stream with keywords: $keywords")
|
||||||
|
Toast.makeText(this, "Failed to set keywords to $keywords.", Toast.LENGTH_LONG)
|
||||||
|
.show()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -122,6 +132,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
audioRecord!!.release()
|
audioRecord!!.release()
|
||||||
audioRecord = null
|
audioRecord = null
|
||||||
recordButton.setText(R.string.start)
|
recordButton.setText(R.string.start)
|
||||||
|
stream.release()
|
||||||
Log.i(TAG, "Stopped recording")
|
Log.i(TAG, "Stopped recording")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -137,22 +148,22 @@ class MainActivity : AppCompatActivity() {
|
|||||||
val ret = audioRecord?.read(buffer, 0, buffer.size)
|
val ret = audioRecord?.read(buffer, 0, buffer.size)
|
||||||
if (ret != null && ret > 0) {
|
if (ret != null && ret > 0) {
|
||||||
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
|
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
|
||||||
model.acceptWaveform(samples, sampleRate=sampleRateInHz)
|
stream.acceptWaveform(samples, sampleRate = sampleRateInHz)
|
||||||
while (model.isReady()) {
|
while (kws.isReady(stream)) {
|
||||||
model.decode()
|
kws.decode(stream)
|
||||||
}
|
}
|
||||||
|
|
||||||
val text = model.keyword
|
val text = kws.getResult(stream).keyword
|
||||||
|
|
||||||
var textToDisplay = lastText;
|
var textToDisplay = lastText
|
||||||
|
|
||||||
if(text.isNotBlank()) {
|
if (text.isNotBlank()) {
|
||||||
if (lastText.isBlank()) {
|
if (lastText.isBlank()) {
|
||||||
textToDisplay = "${idx}: ${text}"
|
textToDisplay = "$idx: $text"
|
||||||
} else {
|
} else {
|
||||||
textToDisplay = "${idx}: ${text}\n${lastText}"
|
textToDisplay = "$idx: $text\n$lastText"
|
||||||
}
|
}
|
||||||
lastText = "${idx}: ${text}\n${lastText}"
|
lastText = "$idx: $text\n$lastText"
|
||||||
idx += 1
|
idx += 1
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -188,20 +199,21 @@ class MainActivity : AppCompatActivity() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private fun initModel() {
|
private fun initModel() {
|
||||||
// Please change getModelConfig() to add new models
|
// Please change getKwsModelConfig() to add new models
|
||||||
// See https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html
|
// See https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html
|
||||||
// for a list of available models
|
// for a list of available models
|
||||||
val type = 0
|
val type = 0
|
||||||
Log.i(TAG, "Select model type ${type}")
|
Log.i(TAG, "Select model type $type")
|
||||||
val config = KeywordSpotterConfig(
|
val config = KeywordSpotterConfig(
|
||||||
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
||||||
modelConfig = getModelConfig(type = type)!!,
|
modelConfig = getKwsModelConfig(type = type)!!,
|
||||||
keywordsFile = getKeywordsFile(type = type)!!,
|
keywordsFile = getKeywordsFile(type = type),
|
||||||
)
|
)
|
||||||
|
|
||||||
model = SherpaOnnxKws(
|
kws = KeywordSpotter(
|
||||||
assetManager = application.assets,
|
assetManager = application.assets,
|
||||||
config = config,
|
config = config,
|
||||||
)
|
)
|
||||||
|
stream = kws.createStream()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt
|
||||||
@@ -1,12 +1,12 @@
|
|||||||
<resources>
|
<resources>
|
||||||
<string name="app_name">KWS with Next-gen Kaldi</string>
|
<string name="app_name">Keyword spotting</string>
|
||||||
<string name="hint">Click the Start button to play keyword spotting with Next-gen Kaldi.
|
<string name="hint">Click the Start button to play keyword spotting with Next-gen Kaldi.
|
||||||
\n
|
\n
|
||||||
\n\n\n
|
\n\n\n
|
||||||
The source code and pre-trained models are publicly available.
|
The source code and pre-trained models are publicly available.
|
||||||
Please see https://github.com/k2-fsa/sherpa-onnx for details.
|
Please see https://github.com/k2-fsa/sherpa-onnx for details.
|
||||||
</string>
|
</string>
|
||||||
<string name="keyword_hint">Input your keywords here, one keyword perline.</string>
|
<string name="keyword_hint">Input your keywords here, one keyword per line.\nTwo example keywords are given below:\n\nn ǐ h ǎo @你好\nd àn g ē d àn g ē @蛋哥蛋哥</string>
|
||||||
<string name="start">Start</string>
|
<string name="start">Start</string>
|
||||||
<string name="stop">Stop</string>
|
<string name="stop">Stop</string>
|
||||||
</resources>
|
</resources>
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ package com.k2fsa.sherpa.onnx.speaker.identification
|
|||||||
|
|
||||||
import androidx.compose.ui.graphics.vector.ImageVector
|
import androidx.compose.ui.graphics.vector.ImageVector
|
||||||
|
|
||||||
data class BarItem (
|
data class BarItem(
|
||||||
val title: String,
|
val title: String,
|
||||||
|
|
||||||
// see https://www.composables.com/icons
|
// see https://www.composables.com/icons
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
package com.k2fsa.sherpa.onnx.speaker.identification
|
package com.k2fsa.sherpa.onnx.speaker.identification
|
||||||
|
|
||||||
sealed class NavRoutes(val route: String) {
|
sealed class NavRoutes(val route: String) {
|
||||||
object Home: NavRoutes("home")
|
object Home : NavRoutes("home")
|
||||||
object Register: NavRoutes("register")
|
object Register : NavRoutes("register")
|
||||||
object View: NavRoutes("view")
|
object View : NavRoutes("view")
|
||||||
object Help: NavRoutes("help")
|
object Help : NavRoutes("help")
|
||||||
}
|
}
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt
|
||||||
@@ -1,188 +0,0 @@
|
|||||||
package com.k2fsa.sherpa.onnx
|
|
||||||
|
|
||||||
import android.content.res.AssetManager
|
|
||||||
import android.util.Log
|
|
||||||
|
|
||||||
private val TAG = "sherpa-onnx"
|
|
||||||
data class SpeakerEmbeddingExtractorConfig(
|
|
||||||
val model: String,
|
|
||||||
var numThreads: Int = 1,
|
|
||||||
var debug: Boolean = false,
|
|
||||||
var provider: String = "cpu",
|
|
||||||
)
|
|
||||||
|
|
||||||
class SpeakerEmbeddingExtractorStream(var ptr: Long) {
|
|
||||||
fun acceptWaveform(samples: FloatArray, sampleRate: Int) =
|
|
||||||
acceptWaveform(ptr, samples, sampleRate)
|
|
||||||
|
|
||||||
fun inputFinished() = inputFinished(ptr)
|
|
||||||
|
|
||||||
protected fun finalize() {
|
|
||||||
delete(ptr)
|
|
||||||
ptr = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
private external fun myTest(ptr: Long, v: Array<FloatArray>)
|
|
||||||
|
|
||||||
fun release() = finalize()
|
|
||||||
private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int)
|
|
||||||
|
|
||||||
private external fun inputFinished(ptr: Long)
|
|
||||||
|
|
||||||
private external fun delete(ptr: Long)
|
|
||||||
|
|
||||||
companion object {
|
|
||||||
init {
|
|
||||||
System.loadLibrary("sherpa-onnx-jni")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class SpeakerEmbeddingExtractor(
|
|
||||||
assetManager: AssetManager? = null,
|
|
||||||
config: SpeakerEmbeddingExtractorConfig,
|
|
||||||
) {
|
|
||||||
private var ptr: Long
|
|
||||||
|
|
||||||
init {
|
|
||||||
ptr = if (assetManager != null) {
|
|
||||||
new(assetManager, config)
|
|
||||||
} else {
|
|
||||||
newFromFile(config)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected fun finalize() {
|
|
||||||
delete(ptr)
|
|
||||||
ptr = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
fun release() = finalize()
|
|
||||||
|
|
||||||
fun createStream(): SpeakerEmbeddingExtractorStream {
|
|
||||||
val p = createStream(ptr)
|
|
||||||
return SpeakerEmbeddingExtractorStream(p)
|
|
||||||
}
|
|
||||||
|
|
||||||
fun isReady(stream: SpeakerEmbeddingExtractorStream) = isReady(ptr, stream.ptr)
|
|
||||||
fun compute(stream: SpeakerEmbeddingExtractorStream) = compute(ptr, stream.ptr)
|
|
||||||
fun dim() = dim(ptr)
|
|
||||||
|
|
||||||
private external fun new(
|
|
||||||
assetManager: AssetManager,
|
|
||||||
config: SpeakerEmbeddingExtractorConfig,
|
|
||||||
): Long
|
|
||||||
|
|
||||||
private external fun newFromFile(
|
|
||||||
config: SpeakerEmbeddingExtractorConfig,
|
|
||||||
): Long
|
|
||||||
|
|
||||||
private external fun delete(ptr: Long)
|
|
||||||
|
|
||||||
private external fun createStream(ptr: Long): Long
|
|
||||||
|
|
||||||
private external fun isReady(ptr: Long, streamPtr: Long): Boolean
|
|
||||||
|
|
||||||
private external fun compute(ptr: Long, streamPtr: Long): FloatArray
|
|
||||||
|
|
||||||
private external fun dim(ptr: Long): Int
|
|
||||||
|
|
||||||
companion object {
|
|
||||||
init {
|
|
||||||
System.loadLibrary("sherpa-onnx-jni")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class SpeakerEmbeddingManager(val dim: Int) {
|
|
||||||
private var ptr: Long
|
|
||||||
|
|
||||||
init {
|
|
||||||
ptr = new(dim)
|
|
||||||
}
|
|
||||||
|
|
||||||
protected fun finalize() {
|
|
||||||
delete(ptr)
|
|
||||||
ptr = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
fun release() = finalize()
|
|
||||||
fun add(name: String, embedding: FloatArray) = add(ptr, name, embedding)
|
|
||||||
fun add(name: String, embedding: Array<FloatArray>) = addList(ptr, name, embedding)
|
|
||||||
fun remove(name: String) = remove(ptr, name)
|
|
||||||
fun search(embedding: FloatArray, threshold: Float) = search(ptr, embedding, threshold)
|
|
||||||
fun verify(name: String, embedding: FloatArray, threshold: Float) =
|
|
||||||
verify(ptr, name, embedding, threshold)
|
|
||||||
|
|
||||||
fun contains(name: String) = contains(ptr, name)
|
|
||||||
fun numSpeakers() = numSpeakers(ptr)
|
|
||||||
|
|
||||||
fun allSpeakerNames() = allSpeakerNames(ptr)
|
|
||||||
|
|
||||||
private external fun new(dim: Int): Long
|
|
||||||
private external fun delete(ptr: Long): Unit
|
|
||||||
private external fun add(ptr: Long, name: String, embedding: FloatArray): Boolean
|
|
||||||
private external fun addList(ptr: Long, name: String, embedding: Array<FloatArray>): Boolean
|
|
||||||
private external fun remove(ptr: Long, name: String): Boolean
|
|
||||||
private external fun search(ptr: Long, embedding: FloatArray, threshold: Float): String
|
|
||||||
private external fun verify(
|
|
||||||
ptr: Long,
|
|
||||||
name: String,
|
|
||||||
embedding: FloatArray,
|
|
||||||
threshold: Float
|
|
||||||
): Boolean
|
|
||||||
|
|
||||||
private external fun contains(ptr: Long, name: String): Boolean
|
|
||||||
private external fun numSpeakers(ptr: Long): Int
|
|
||||||
|
|
||||||
private external fun allSpeakerNames(ptr: Long): Array<String>
|
|
||||||
|
|
||||||
companion object {
|
|
||||||
init {
|
|
||||||
System.loadLibrary("sherpa-onnx-jni")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Please download the model file from
|
|
||||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
|
|
||||||
// and put it inside the assets directory.
|
|
||||||
//
|
|
||||||
// Please don't put it in a subdirectory of assets
|
|
||||||
private val modelName = "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
|
|
||||||
|
|
||||||
object SpeakerRecognition {
|
|
||||||
var _extractor: SpeakerEmbeddingExtractor? = null
|
|
||||||
var _manager: SpeakerEmbeddingManager? = null
|
|
||||||
|
|
||||||
val extractor: SpeakerEmbeddingExtractor
|
|
||||||
get() {
|
|
||||||
return _extractor!!
|
|
||||||
}
|
|
||||||
|
|
||||||
val manager: SpeakerEmbeddingManager
|
|
||||||
get() {
|
|
||||||
return _manager!!
|
|
||||||
}
|
|
||||||
|
|
||||||
fun initExtractor(assetManager: AssetManager? = null) {
|
|
||||||
synchronized(this) {
|
|
||||||
if (_extractor != null) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
Log.i(TAG, "Initializing speaker embedding extractor")
|
|
||||||
|
|
||||||
_extractor = SpeakerEmbeddingExtractor(
|
|
||||||
assetManager = assetManager,
|
|
||||||
config = SpeakerEmbeddingExtractorConfig(
|
|
||||||
model = modelName,
|
|
||||||
numThreads = 2,
|
|
||||||
debug = false,
|
|
||||||
provider = "cpu",
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
_manager = SpeakerEmbeddingManager(dim = _extractor!!.dim())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../../../sherpa-onnx/kotlin-api/Speaker.kt
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
@file:OptIn(ExperimentalMaterial3Api::class, ExperimentalFoundationApi::class)
|
@file:OptIn(ExperimentalMaterial3Api::class)
|
||||||
|
|
||||||
package com.k2fsa.sherpa.onnx.slid
|
package com.k2fsa.sherpa.onnx.slid
|
||||||
|
|
||||||
@@ -9,11 +9,9 @@ import android.media.AudioFormat
|
|||||||
import android.media.AudioRecord
|
import android.media.AudioRecord
|
||||||
import android.media.MediaRecorder
|
import android.media.MediaRecorder
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
import androidx.compose.foundation.ExperimentalFoundationApi
|
|
||||||
import androidx.compose.foundation.layout.Box
|
import androidx.compose.foundation.layout.Box
|
||||||
import androidx.compose.foundation.layout.Column
|
import androidx.compose.foundation.layout.Column
|
||||||
import androidx.compose.foundation.layout.PaddingValues
|
import androidx.compose.foundation.layout.PaddingValues
|
||||||
import androidx.compose.ui.Modifier
|
|
||||||
import androidx.compose.foundation.layout.Spacer
|
import androidx.compose.foundation.layout.Spacer
|
||||||
import androidx.compose.foundation.layout.fillMaxSize
|
import androidx.compose.foundation.layout.fillMaxSize
|
||||||
import androidx.compose.foundation.layout.height
|
import androidx.compose.foundation.layout.height
|
||||||
@@ -31,6 +29,7 @@ import androidx.compose.runtime.mutableStateOf
|
|||||||
import androidx.compose.runtime.remember
|
import androidx.compose.runtime.remember
|
||||||
import androidx.compose.runtime.setValue
|
import androidx.compose.runtime.setValue
|
||||||
import androidx.compose.ui.Alignment
|
import androidx.compose.ui.Alignment
|
||||||
|
import androidx.compose.ui.Modifier
|
||||||
import androidx.compose.ui.platform.LocalContext
|
import androidx.compose.ui.platform.LocalContext
|
||||||
import androidx.compose.ui.text.font.FontWeight
|
import androidx.compose.ui.text.font.FontWeight
|
||||||
import androidx.compose.ui.unit.dp
|
import androidx.compose.ui.unit.dp
|
||||||
@@ -63,13 +62,13 @@ fun Home() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private var audioRecord: AudioRecord? = null
|
private var audioRecord: AudioRecord? = null
|
||||||
private val sampleRateInHz = 16000
|
private const val sampleRateInHz = 16000
|
||||||
|
|
||||||
@Composable
|
@Composable
|
||||||
fun MyApp(padding: PaddingValues) {
|
fun MyApp(padding: PaddingValues) {
|
||||||
val activity = LocalContext.current as Activity
|
val activity = LocalContext.current as Activity
|
||||||
var isStarted by remember { mutableStateOf(false) }
|
var isStarted by remember { mutableStateOf(false) }
|
||||||
var result by remember { mutableStateOf<String>("") }
|
var result by remember { mutableStateOf("") }
|
||||||
|
|
||||||
val onButtonClick: () -> Unit = {
|
val onButtonClick: () -> Unit = {
|
||||||
isStarted = !isStarted
|
isStarted = !isStarted
|
||||||
@@ -114,12 +113,12 @@ fun MyApp(padding: PaddingValues) {
|
|||||||
}
|
}
|
||||||
Log.i(TAG, "Stop recording")
|
Log.i(TAG, "Stop recording")
|
||||||
Log.i(TAG, "Start recognition")
|
Log.i(TAG, "Start recognition")
|
||||||
val samples = Flatten(sampleList)
|
val samples = flatten(sampleList)
|
||||||
val stream = Slid.slid.createStream()
|
val stream = Slid.slid.createStream()
|
||||||
stream.acceptWaveform(samples, sampleRateInHz)
|
stream.acceptWaveform(samples, sampleRateInHz)
|
||||||
val lang = Slid.slid.compute(stream)
|
val lang = Slid.slid.compute(stream)
|
||||||
|
|
||||||
result = Slid.localeMap.get(lang) ?: lang
|
result = Slid.localeMap[lang] ?: lang
|
||||||
|
|
||||||
stream.release()
|
stream.release()
|
||||||
}
|
}
|
||||||
@@ -152,7 +151,7 @@ fun MyApp(padding: PaddingValues) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun Flatten(sampleList: ArrayList<FloatArray>): FloatArray {
|
fun flatten(sampleList: ArrayList<FloatArray>): FloatArray {
|
||||||
var totalSamples = 0
|
var totalSamples = 0
|
||||||
for (a in sampleList) {
|
for (a in sampleList) {
|
||||||
totalSamples += a.size
|
totalSamples += a.size
|
||||||
|
|||||||
@@ -10,12 +10,9 @@ import androidx.activity.compose.setContent
|
|||||||
import androidx.compose.foundation.layout.fillMaxSize
|
import androidx.compose.foundation.layout.fillMaxSize
|
||||||
import androidx.compose.material3.MaterialTheme
|
import androidx.compose.material3.MaterialTheme
|
||||||
import androidx.compose.material3.Surface
|
import androidx.compose.material3.Surface
|
||||||
import androidx.compose.material3.Text
|
|
||||||
import androidx.compose.runtime.Composable
|
import androidx.compose.runtime.Composable
|
||||||
import androidx.compose.ui.Modifier
|
import androidx.compose.ui.Modifier
|
||||||
import androidx.compose.ui.tooling.preview.Preview
|
|
||||||
import androidx.core.app.ActivityCompat
|
import androidx.core.app.ActivityCompat
|
||||||
import com.k2fsa.sherpa.onnx.SpokenLanguageIdentification
|
|
||||||
import com.k2fsa.sherpa.onnx.slid.ui.theme.SherpaOnnxSpokenLanguageIdentificationTheme
|
import com.k2fsa.sherpa.onnx.slid.ui.theme.SherpaOnnxSpokenLanguageIdentificationTheme
|
||||||
|
|
||||||
const val TAG = "sherpa-onnx"
|
const val TAG = "sherpa-onnx"
|
||||||
@@ -32,6 +29,7 @@ class MainActivity : ComponentActivity() {
|
|||||||
ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
|
ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
|
||||||
Slid.initSlid(this.assets)
|
Slid.initSlid(this.assets)
|
||||||
}
|
}
|
||||||
|
|
||||||
@Suppress("DEPRECATION")
|
@Suppress("DEPRECATION")
|
||||||
@Deprecated("Deprecated in Java")
|
@Deprecated("Deprecated in Java")
|
||||||
override fun onRequestPermissionsResult(
|
override fun onRequestPermissionsResult(
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
../../../../../../../../../../SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/OfflineStream.kt
|
../../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt
|
||||||
@@ -1,102 +0,0 @@
|
|||||||
package com.k2fsa.sherpa.onnx
|
|
||||||
|
|
||||||
import android.content.res.AssetManager
|
|
||||||
import android.util.Log
|
|
||||||
|
|
||||||
private val TAG = "sherpa-onnx"
|
|
||||||
|
|
||||||
data class SpokenLanguageIdentificationWhisperConfig (
|
|
||||||
var encoder: String,
|
|
||||||
var decoder: String,
|
|
||||||
var tailPaddings: Int = -1,
|
|
||||||
)
|
|
||||||
|
|
||||||
data class SpokenLanguageIdentificationConfig (
|
|
||||||
var whisper: SpokenLanguageIdentificationWhisperConfig,
|
|
||||||
var numThreads: Int = 1,
|
|
||||||
var debug: Boolean = false,
|
|
||||||
var provider: String = "cpu",
|
|
||||||
)
|
|
||||||
|
|
||||||
class SpokenLanguageIdentification (
|
|
||||||
assetManager: AssetManager? = null,
|
|
||||||
config: SpokenLanguageIdentificationConfig,
|
|
||||||
) {
|
|
||||||
private var ptr: Long
|
|
||||||
|
|
||||||
init {
|
|
||||||
ptr = if (assetManager != null) {
|
|
||||||
newFromAsset(assetManager, config)
|
|
||||||
} else {
|
|
||||||
newFromFile(config)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected fun finalize() {
|
|
||||||
if (ptr != 0L) {
|
|
||||||
delete(ptr)
|
|
||||||
ptr = 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun release() = finalize()
|
|
||||||
|
|
||||||
fun createStream(): OfflineStream {
|
|
||||||
val p = createStream(ptr)
|
|
||||||
return OfflineStream(p)
|
|
||||||
}
|
|
||||||
|
|
||||||
fun compute(stream: OfflineStream) = compute(ptr, stream.ptr)
|
|
||||||
|
|
||||||
private external fun newFromAsset(
|
|
||||||
assetManager: AssetManager,
|
|
||||||
config: SpokenLanguageIdentificationConfig,
|
|
||||||
): Long
|
|
||||||
|
|
||||||
private external fun newFromFile(
|
|
||||||
config: SpokenLanguageIdentificationConfig,
|
|
||||||
): Long
|
|
||||||
|
|
||||||
private external fun delete(ptr: Long)
|
|
||||||
|
|
||||||
private external fun createStream(ptr: Long): Long
|
|
||||||
|
|
||||||
private external fun compute(ptr: Long, streamPtr: Long): String
|
|
||||||
|
|
||||||
companion object {
|
|
||||||
init {
|
|
||||||
System.loadLibrary("sherpa-onnx-jni")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// please refer to
|
|
||||||
// https://k2-fsa.github.io/sherpa/onnx/spolken-language-identification/pretrained_models.html#whisper
|
|
||||||
// to download more models
|
|
||||||
fun getSpokenLanguageIdentificationConfig(type: Int, numThreads: Int=1): SpokenLanguageIdentificationConfig? {
|
|
||||||
when (type) {
|
|
||||||
0 -> {
|
|
||||||
val modelDir = "sherpa-onnx-whisper-tiny"
|
|
||||||
return SpokenLanguageIdentificationConfig(
|
|
||||||
whisper = SpokenLanguageIdentificationWhisperConfig(
|
|
||||||
encoder = "$modelDir/tiny-encoder.int8.onnx",
|
|
||||||
decoder = "$modelDir/tiny-decoder.int8.onnx",
|
|
||||||
),
|
|
||||||
numThreads = numThreads,
|
|
||||||
debug = true,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
1 -> {
|
|
||||||
val modelDir = "sherpa-onnx-whisper-base"
|
|
||||||
return SpokenLanguageIdentificationConfig(
|
|
||||||
whisper = SpokenLanguageIdentificationWhisperConfig(
|
|
||||||
encoder = "$modelDir/tiny-encoder.int8.onnx",
|
|
||||||
decoder = "$modelDir/tiny-decoder.int8.onnx",
|
|
||||||
),
|
|
||||||
numThreads = 1,
|
|
||||||
debug = true,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../../sherpa-onnx/kotlin-api/SpokenLanguageIdentification.kt
|
||||||
@@ -15,10 +15,10 @@ object Slid {
|
|||||||
get() {
|
get() {
|
||||||
return _slid!!
|
return _slid!!
|
||||||
}
|
}
|
||||||
val localeMap : Map<String, String>
|
val localeMap: Map<String, String>
|
||||||
get() {
|
get() {
|
||||||
return _localeMap
|
return _localeMap
|
||||||
}
|
}
|
||||||
|
|
||||||
fun initSlid(assetManager: AssetManager? = null, numThreads: Int = 1) {
|
fun initSlid(assetManager: AssetManager? = null, numThreads: Int = 1) {
|
||||||
synchronized(this) {
|
synchronized(this) {
|
||||||
@@ -31,7 +31,7 @@ object Slid {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (_localeMap.isEmpty()) {
|
if (_localeMap.isEmpty()) {
|
||||||
val allLang = Locale.getISOLanguages();
|
val allLang = Locale.getISOLanguages()
|
||||||
for (lang in allLang) {
|
for (lang in allLang) {
|
||||||
val locale = Locale(lang)
|
val locale = Locale(lang)
|
||||||
_localeMap[lang] = locale.displayName
|
_localeMap[lang] = locale.displayName
|
||||||
|
|||||||
@@ -1,7 +1,11 @@
|
|||||||
package com.k2fsa.sherpa.onnx
|
package com.k2fsa.sherpa.onnx
|
||||||
|
|
||||||
import android.content.res.AssetManager
|
import android.content.res.AssetManager
|
||||||
import android.media.*
|
import android.media.AudioAttributes
|
||||||
|
import android.media.AudioFormat
|
||||||
|
import android.media.AudioManager
|
||||||
|
import android.media.AudioTrack
|
||||||
|
import android.media.MediaPlayer
|
||||||
import android.net.Uri
|
import android.net.Uri
|
||||||
import android.os.Bundle
|
import android.os.Bundle
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
@@ -212,7 +216,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (dictDir != null) {
|
if (dictDir != null) {
|
||||||
val newDir = copyDataDir( modelDir!!)
|
val newDir = copyDataDir(modelDir!!)
|
||||||
modelDir = newDir + "/" + modelDir
|
modelDir = newDir + "/" + modelDir
|
||||||
dictDir = modelDir + "/" + "dict"
|
dictDir = modelDir + "/" + "dict"
|
||||||
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
||||||
@@ -220,7 +224,9 @@ class MainActivity : AppCompatActivity() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
val config = getOfflineTtsConfig(
|
val config = getOfflineTtsConfig(
|
||||||
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
|
modelDir = modelDir!!,
|
||||||
|
modelName = modelName!!,
|
||||||
|
lexicon = lexicon ?: "",
|
||||||
dataDir = dataDir ?: "",
|
dataDir = dataDir ?: "",
|
||||||
dictDir = dictDir ?: "",
|
dictDir = dictDir ?: "",
|
||||||
ruleFsts = ruleFsts ?: "",
|
ruleFsts = ruleFsts ?: "",
|
||||||
@@ -232,11 +238,11 @@ class MainActivity : AppCompatActivity() {
|
|||||||
|
|
||||||
|
|
||||||
private fun copyDataDir(dataDir: String): String {
|
private fun copyDataDir(dataDir: String): String {
|
||||||
println("data dir is $dataDir")
|
Log.i(TAG, "data dir is $dataDir")
|
||||||
copyAssets(dataDir)
|
copyAssets(dataDir)
|
||||||
|
|
||||||
val newDataDir = application.getExternalFilesDir(null)!!.absolutePath
|
val newDataDir = application.getExternalFilesDir(null)!!.absolutePath
|
||||||
println("newDataDir: $newDataDir")
|
Log.i(TAG, "newDataDir: $newDataDir")
|
||||||
return newDataDir
|
return newDataDir
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -256,7 +262,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (ex: IOException) {
|
} catch (ex: IOException) {
|
||||||
Log.e(TAG, "Failed to copy $path. ${ex.toString()}")
|
Log.e(TAG, "Failed to copy $path. $ex")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -276,7 +282,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
ostream.flush()
|
ostream.flush()
|
||||||
ostream.close()
|
ostream.close()
|
||||||
} catch (ex: Exception) {
|
} catch (ex: Exception) {
|
||||||
Log.e(TAG, "Failed to copy $filename, ${ex.toString()}")
|
Log.e(TAG, "Failed to copy $filename, $ex")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,10 +49,10 @@ class OfflineTts(
|
|||||||
private var ptr: Long
|
private var ptr: Long
|
||||||
|
|
||||||
init {
|
init {
|
||||||
if (assetManager != null) {
|
ptr = if (assetManager != null) {
|
||||||
ptr = newFromAsset(assetManager, config)
|
newFromAsset(assetManager, config)
|
||||||
} else {
|
} else {
|
||||||
ptr = newFromFile(config)
|
newFromFile(config)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -65,7 +65,7 @@ class OfflineTts(
|
|||||||
sid: Int = 0,
|
sid: Int = 0,
|
||||||
speed: Float = 1.0f
|
speed: Float = 1.0f
|
||||||
): GeneratedAudio {
|
): GeneratedAudio {
|
||||||
var objArray = generateImpl(ptr, text = text, sid = sid, speed = speed)
|
val objArray = generateImpl(ptr, text = text, sid = sid, speed = speed)
|
||||||
return GeneratedAudio(
|
return GeneratedAudio(
|
||||||
samples = objArray[0] as FloatArray,
|
samples = objArray[0] as FloatArray,
|
||||||
sampleRate = objArray[1] as Int
|
sampleRate = objArray[1] as Int
|
||||||
@@ -78,7 +78,13 @@ class OfflineTts(
|
|||||||
speed: Float = 1.0f,
|
speed: Float = 1.0f,
|
||||||
callback: (samples: FloatArray) -> Unit
|
callback: (samples: FloatArray) -> Unit
|
||||||
): GeneratedAudio {
|
): GeneratedAudio {
|
||||||
var objArray = generateWithCallbackImpl(ptr, text = text, sid = sid, speed = speed, callback=callback)
|
val objArray = generateWithCallbackImpl(
|
||||||
|
ptr,
|
||||||
|
text = text,
|
||||||
|
sid = sid,
|
||||||
|
speed = speed,
|
||||||
|
callback = callback
|
||||||
|
)
|
||||||
return GeneratedAudio(
|
return GeneratedAudio(
|
||||||
samples = objArray[0] as FloatArray,
|
samples = objArray[0] as FloatArray,
|
||||||
sampleRate = objArray[1] as Int
|
sampleRate = objArray[1] as Int
|
||||||
@@ -87,10 +93,10 @@ class OfflineTts(
|
|||||||
|
|
||||||
fun allocate(assetManager: AssetManager? = null) {
|
fun allocate(assetManager: AssetManager? = null) {
|
||||||
if (ptr == 0L) {
|
if (ptr == 0L) {
|
||||||
if (assetManager != null) {
|
ptr = if (assetManager != null) {
|
||||||
ptr = newFromAsset(assetManager, config)
|
newFromAsset(assetManager, config)
|
||||||
} else {
|
} else {
|
||||||
ptr = newFromFile(config)
|
newFromFile(config)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -103,9 +109,14 @@ class OfflineTts(
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected fun finalize() {
|
protected fun finalize() {
|
||||||
delete(ptr)
|
if (ptr != 0L) {
|
||||||
|
delete(ptr)
|
||||||
|
ptr = 0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun release() = finalize()
|
||||||
|
|
||||||
private external fun newFromAsset(
|
private external fun newFromAsset(
|
||||||
assetManager: AssetManager,
|
assetManager: AssetManager,
|
||||||
config: OfflineTtsConfig,
|
config: OfflineTtsConfig,
|
||||||
@@ -123,14 +134,14 @@ class OfflineTts(
|
|||||||
// - the first entry is an 1-D float array containing audio samples.
|
// - the first entry is an 1-D float array containing audio samples.
|
||||||
// Each sample is normalized to the range [-1, 1]
|
// Each sample is normalized to the range [-1, 1]
|
||||||
// - the second entry is the sample rate
|
// - the second entry is the sample rate
|
||||||
external fun generateImpl(
|
private external fun generateImpl(
|
||||||
ptr: Long,
|
ptr: Long,
|
||||||
text: String,
|
text: String,
|
||||||
sid: Int = 0,
|
sid: Int = 0,
|
||||||
speed: Float = 1.0f
|
speed: Float = 1.0f
|
||||||
): Array<Any>
|
): Array<Any>
|
||||||
|
|
||||||
external fun generateWithCallbackImpl(
|
private external fun generateWithCallbackImpl(
|
||||||
ptr: Long,
|
ptr: Long,
|
||||||
text: String,
|
text: String,
|
||||||
sid: Int = 0,
|
sid: Int = 0,
|
||||||
@@ -156,7 +167,7 @@ fun getOfflineTtsConfig(
|
|||||||
dictDir: String,
|
dictDir: String,
|
||||||
ruleFsts: String,
|
ruleFsts: String,
|
||||||
ruleFars: String
|
ruleFars: String
|
||||||
): OfflineTtsConfig? {
|
): OfflineTtsConfig {
|
||||||
return OfflineTtsConfig(
|
return OfflineTtsConfig(
|
||||||
model = OfflineTtsModelConfig(
|
model = OfflineTtsModelConfig(
|
||||||
vits = OfflineTtsVitsModelConfig(
|
vits = OfflineTtsVitsModelConfig(
|
||||||
|
|||||||
@@ -1,15 +1,18 @@
|
|||||||
package com.k2fsa.sherpa.onnx.tts.engine
|
package com.k2fsa.sherpa.onnx.tts.engine
|
||||||
|
|
||||||
import android.content.Intent
|
import android.content.Intent
|
||||||
import androidx.appcompat.app.AppCompatActivity
|
|
||||||
import android.os.Bundle
|
import android.os.Bundle
|
||||||
import android.speech.tts.TextToSpeech
|
import android.speech.tts.TextToSpeech
|
||||||
|
import androidx.appcompat.app.AppCompatActivity
|
||||||
|
|
||||||
class CheckVoiceData : AppCompatActivity() {
|
class CheckVoiceData : AppCompatActivity() {
|
||||||
override fun onCreate(savedInstanceState: Bundle?) {
|
override fun onCreate(savedInstanceState: Bundle?) {
|
||||||
super.onCreate(savedInstanceState)
|
super.onCreate(savedInstanceState)
|
||||||
val intent = Intent().apply {
|
val intent = Intent().apply {
|
||||||
putStringArrayListExtra(TextToSpeech.Engine.EXTRA_AVAILABLE_VOICES, arrayListOf(TtsEngine.lang))
|
putStringArrayListExtra(
|
||||||
|
TextToSpeech.Engine.EXTRA_AVAILABLE_VOICES,
|
||||||
|
arrayListOf(TtsEngine.lang)
|
||||||
|
)
|
||||||
putStringArrayListExtra(TextToSpeech.Engine.EXTRA_UNAVAILABLE_VOICES, arrayListOf())
|
putStringArrayListExtra(TextToSpeech.Engine.EXTRA_UNAVAILABLE_VOICES, arrayListOf())
|
||||||
}
|
}
|
||||||
setResult(TextToSpeech.Engine.CHECK_VOICE_DATA_PASS, intent)
|
setResult(TextToSpeech.Engine.CHECK_VOICE_DATA_PASS, intent)
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ package com.k2fsa.sherpa.onnx.tts.engine
|
|||||||
|
|
||||||
import android.app.Activity
|
import android.app.Activity
|
||||||
import android.content.Intent
|
import android.content.Intent
|
||||||
import androidx.appcompat.app.AppCompatActivity
|
|
||||||
import android.os.Bundle
|
import android.os.Bundle
|
||||||
import android.speech.tts.TextToSpeech
|
import android.speech.tts.TextToSpeech
|
||||||
|
|
||||||
@@ -12,120 +11,168 @@ fun getSampleText(lang: String): String {
|
|||||||
"ara" -> {
|
"ara" -> {
|
||||||
text = "هذا هو محرك تحويل النص إلى كلام باستخدام الجيل القادم من كالدي"
|
text = "هذا هو محرك تحويل النص إلى كلام باستخدام الجيل القادم من كالدي"
|
||||||
}
|
}
|
||||||
|
|
||||||
"ben" -> {
|
"ben" -> {
|
||||||
text = "এটি একটি টেক্সট-টু-স্পীচ ইঞ্জিন যা পরবর্তী প্রজন্মের কালডি ব্যবহার করে"
|
text = "এটি একটি টেক্সট-টু-স্পীচ ইঞ্জিন যা পরবর্তী প্রজন্মের কালডি ব্যবহার করে"
|
||||||
}
|
}
|
||||||
|
|
||||||
"bul" -> {
|
"bul" -> {
|
||||||
text = "Това е машина за преобразуване на текст в реч, използваща Kaldi от следващо поколение"
|
text =
|
||||||
|
"Това е машина за преобразуване на текст в реч, използваща Kaldi от следващо поколение"
|
||||||
}
|
}
|
||||||
|
|
||||||
"cat" -> {
|
"cat" -> {
|
||||||
text = "Aquest és un motor de text a veu que utilitza Kaldi de nova generació"
|
text = "Aquest és un motor de text a veu que utilitza Kaldi de nova generació"
|
||||||
}
|
}
|
||||||
|
|
||||||
"ces" -> {
|
"ces" -> {
|
||||||
text = "Toto je převodník textu na řeč využívající novou generaci kaldi"
|
text = "Toto je převodník textu na řeč využívající novou generaci kaldi"
|
||||||
}
|
}
|
||||||
|
|
||||||
"dan" -> {
|
"dan" -> {
|
||||||
text = "Dette er en tekst til tale-motor, der bruger næste generation af kaldi"
|
text = "Dette er en tekst til tale-motor, der bruger næste generation af kaldi"
|
||||||
}
|
}
|
||||||
|
|
||||||
"deu" -> {
|
"deu" -> {
|
||||||
text = "Dies ist eine Text-to-Speech-Engine, die Kaldi der nächsten Generation verwendet"
|
text =
|
||||||
|
"Dies ist eine Text-to-Speech-Engine, die Kaldi der nächsten Generation verwendet"
|
||||||
}
|
}
|
||||||
|
|
||||||
"ell" -> {
|
"ell" -> {
|
||||||
text = "Αυτή είναι μια μηχανή κειμένου σε ομιλία που χρησιμοποιεί kaldi επόμενης γενιάς"
|
text = "Αυτή είναι μια μηχανή κειμένου σε ομιλία που χρησιμοποιεί kaldi επόμενης γενιάς"
|
||||||
}
|
}
|
||||||
|
|
||||||
"eng" -> {
|
"eng" -> {
|
||||||
text = "This is a text-to-speech engine using next generation Kaldi"
|
text = "This is a text-to-speech engine using next generation Kaldi"
|
||||||
}
|
}
|
||||||
|
|
||||||
"est" -> {
|
"est" -> {
|
||||||
text = "See on teksti kõneks muutmise mootor, mis kasutab järgmise põlvkonna Kaldi"
|
text = "See on teksti kõneks muutmise mootor, mis kasutab järgmise põlvkonna Kaldi"
|
||||||
}
|
}
|
||||||
|
|
||||||
"fin" -> {
|
"fin" -> {
|
||||||
text = "Tämä on tekstistä puheeksi -moottori, joka käyttää seuraavan sukupolven kaldia"
|
text = "Tämä on tekstistä puheeksi -moottori, joka käyttää seuraavan sukupolven kaldia"
|
||||||
}
|
}
|
||||||
|
|
||||||
"fra" -> {
|
"fra" -> {
|
||||||
text = "Il s'agit d'un moteur de synthèse vocale utilisant Kaldi de nouvelle génération"
|
text = "Il s'agit d'un moteur de synthèse vocale utilisant Kaldi de nouvelle génération"
|
||||||
}
|
}
|
||||||
|
|
||||||
"gle" -> {
|
"gle" -> {
|
||||||
text = "Is inneall téacs-go-hurlabhra é seo a úsáideann Kaldi den chéad ghlúin eile"
|
text = "Is inneall téacs-go-hurlabhra é seo a úsáideann Kaldi den chéad ghlúin eile"
|
||||||
}
|
}
|
||||||
|
|
||||||
"hrv" -> {
|
"hrv" -> {
|
||||||
text = "Ovo je mehanizam za pretvaranje teksta u govor koji koristi Kaldi sljedeće generacije"
|
text =
|
||||||
|
"Ovo je mehanizam za pretvaranje teksta u govor koji koristi Kaldi sljedeće generacije"
|
||||||
}
|
}
|
||||||
|
|
||||||
"hun" -> {
|
"hun" -> {
|
||||||
text = "Ez egy szövegfelolvasó motor a következő generációs kaldi használatával"
|
text = "Ez egy szövegfelolvasó motor a következő generációs kaldi használatával"
|
||||||
}
|
}
|
||||||
|
|
||||||
"isl" -> {
|
"isl" -> {
|
||||||
text = "Þetta er texta í tal vél sem notar næstu kynslóð kaldi"
|
text = "Þetta er texta í tal vél sem notar næstu kynslóð kaldi"
|
||||||
}
|
}
|
||||||
|
|
||||||
"ita" -> {
|
"ita" -> {
|
||||||
text = "Questo è un motore di sintesi vocale che utilizza kaldi di nuova generazione"
|
text = "Questo è un motore di sintesi vocale che utilizza kaldi di nuova generazione"
|
||||||
}
|
}
|
||||||
|
|
||||||
"kat" -> {
|
"kat" -> {
|
||||||
text = "ეს არის ტექსტიდან მეტყველების ძრავა შემდეგი თაობის კალდის გამოყენებით"
|
text = "ეს არის ტექსტიდან მეტყველების ძრავა შემდეგი თაობის კალდის გამოყენებით"
|
||||||
}
|
}
|
||||||
|
|
||||||
"kaz" -> {
|
"kaz" -> {
|
||||||
text = "Бұл келесі буын kaldi көмегімен мәтіннен сөйлеуге арналған қозғалтқыш"
|
text = "Бұл келесі буын kaldi көмегімен мәтіннен сөйлеуге арналған қозғалтқыш"
|
||||||
}
|
}
|
||||||
|
|
||||||
"mlt" -> {
|
"mlt" -> {
|
||||||
text = "Din hija magna text-to-speech li tuża Kaldi tal-ġenerazzjoni li jmiss"
|
text = "Din hija magna text-to-speech li tuża Kaldi tal-ġenerazzjoni li jmiss"
|
||||||
}
|
}
|
||||||
|
|
||||||
"lav" -> {
|
"lav" -> {
|
||||||
text = "Šis ir teksta pārvēršanas runā dzinējs, kas izmanto nākamās paaudzes Kaldi"
|
text = "Šis ir teksta pārvēršanas runā dzinējs, kas izmanto nākamās paaudzes Kaldi"
|
||||||
}
|
}
|
||||||
|
|
||||||
"lit" -> {
|
"lit" -> {
|
||||||
text = "Tai teksto į kalbą variklis, kuriame naudojamas naujos kartos Kaldi"
|
text = "Tai teksto į kalbą variklis, kuriame naudojamas naujos kartos Kaldi"
|
||||||
}
|
}
|
||||||
|
|
||||||
"ltz" -> {
|
"ltz" -> {
|
||||||
text = "Dëst ass en Text-zu-Speech-Motor mat der nächster Generatioun Kaldi"
|
text = "Dëst ass en Text-zu-Speech-Motor mat der nächster Generatioun Kaldi"
|
||||||
}
|
}
|
||||||
|
|
||||||
"nep" -> {
|
"nep" -> {
|
||||||
text = "यो अर्को पुस्ता काल्डी प्रयोग गरेर स्पीच इन्जिनको पाठ हो"
|
text = "यो अर्को पुस्ता काल्डी प्रयोग गरेर स्पीच इन्जिनको पाठ हो"
|
||||||
}
|
}
|
||||||
|
|
||||||
"nld" -> {
|
"nld" -> {
|
||||||
text = "Dit is een tekst-naar-spraak-engine die gebruik maakt van Kaldi van de volgende generatie"
|
text =
|
||||||
|
"Dit is een tekst-naar-spraak-engine die gebruik maakt van Kaldi van de volgende generatie"
|
||||||
}
|
}
|
||||||
|
|
||||||
"nor" -> {
|
"nor" -> {
|
||||||
text = "Dette er en tekst til tale-motor som bruker neste generasjons kaldi"
|
text = "Dette er en tekst til tale-motor som bruker neste generasjons kaldi"
|
||||||
}
|
}
|
||||||
|
|
||||||
"pol" -> {
|
"pol" -> {
|
||||||
text = "Jest to silnik syntezatora mowy wykorzystujący Kaldi nowej generacji"
|
text = "Jest to silnik syntezatora mowy wykorzystujący Kaldi nowej generacji"
|
||||||
}
|
}
|
||||||
|
|
||||||
"por" -> {
|
"por" -> {
|
||||||
text = "Este é um mecanismo de conversão de texto em fala usando Kaldi de próxima geração"
|
text =
|
||||||
|
"Este é um mecanismo de conversão de texto em fala usando Kaldi de próxima geração"
|
||||||
}
|
}
|
||||||
|
|
||||||
"ron" -> {
|
"ron" -> {
|
||||||
text = "Acesta este un motor text to speech care folosește generația următoare de kadi"
|
text = "Acesta este un motor text to speech care folosește generația următoare de kadi"
|
||||||
}
|
}
|
||||||
|
|
||||||
"rus" -> {
|
"rus" -> {
|
||||||
text = "Это движок преобразования текста в речь, использующий Kaldi следующего поколения."
|
text =
|
||||||
|
"Это движок преобразования текста в речь, использующий Kaldi следующего поколения."
|
||||||
}
|
}
|
||||||
|
|
||||||
"slk" -> {
|
"slk" -> {
|
||||||
text = "Toto je nástroj na prevod textu na reč využívajúci kaldi novej generácie"
|
text = "Toto je nástroj na prevod textu na reč využívajúci kaldi novej generácie"
|
||||||
}
|
}
|
||||||
|
|
||||||
"slv" -> {
|
"slv" -> {
|
||||||
text = "To je mehanizem za pretvorbo besedila v govor, ki uporablja Kaldi naslednje generacije"
|
text =
|
||||||
|
"To je mehanizem za pretvorbo besedila v govor, ki uporablja Kaldi naslednje generacije"
|
||||||
}
|
}
|
||||||
|
|
||||||
"spa" -> {
|
"spa" -> {
|
||||||
text = "Este es un motor de texto a voz que utiliza kaldi de próxima generación."
|
text = "Este es un motor de texto a voz que utiliza kaldi de próxima generación."
|
||||||
}
|
}
|
||||||
|
|
||||||
"srp" -> {
|
"srp" -> {
|
||||||
text = "Ово је механизам за претварање текста у говор који користи калди следеће генерације"
|
text =
|
||||||
|
"Ово је механизам за претварање текста у говор који користи калди следеће генерације"
|
||||||
}
|
}
|
||||||
|
|
||||||
"swa" -> {
|
"swa" -> {
|
||||||
text = "Haya ni maandishi kwa injini ya hotuba kwa kutumia kizazi kijacho kaldi"
|
text = "Haya ni maandishi kwa injini ya hotuba kwa kutumia kizazi kijacho kaldi"
|
||||||
}
|
}
|
||||||
|
|
||||||
"swe" -> {
|
"swe" -> {
|
||||||
text = "Detta är en text till tal-motor som använder nästa generations kaldi"
|
text = "Detta är en text till tal-motor som använder nästa generations kaldi"
|
||||||
}
|
}
|
||||||
|
|
||||||
"tur" -> {
|
"tur" -> {
|
||||||
text = "Bu, yeni nesil kaldi'yi kullanan bir metinden konuşmaya motorudur"
|
text = "Bu, yeni nesil kaldi'yi kullanan bir metinden konuşmaya motorudur"
|
||||||
}
|
}
|
||||||
|
|
||||||
"ukr" -> {
|
"ukr" -> {
|
||||||
text = "Це механізм перетворення тексту на мовлення, який використовує kaldi нового покоління"
|
text =
|
||||||
|
"Це механізм перетворення тексту на мовлення, який використовує kaldi нового покоління"
|
||||||
}
|
}
|
||||||
|
|
||||||
"vie" -> {
|
"vie" -> {
|
||||||
text = "Đây là công cụ chuyển văn bản thành giọng nói sử dụng kaldi thế hệ tiếp theo"
|
text = "Đây là công cụ chuyển văn bản thành giọng nói sử dụng kaldi thế hệ tiếp theo"
|
||||||
}
|
}
|
||||||
|
|
||||||
"zho", "cmn" -> {
|
"zho", "cmn" -> {
|
||||||
text = "使用新一代卡尔迪的语音合成引擎"
|
text = "使用新一代卡尔迪的语音合成引擎"
|
||||||
}
|
}
|
||||||
@@ -137,13 +184,13 @@ class GetSampleText : Activity() {
|
|||||||
override fun onCreate(savedInstanceState: Bundle?) {
|
override fun onCreate(savedInstanceState: Bundle?) {
|
||||||
super.onCreate(savedInstanceState)
|
super.onCreate(savedInstanceState)
|
||||||
var result = TextToSpeech.LANG_AVAILABLE
|
var result = TextToSpeech.LANG_AVAILABLE
|
||||||
var text: String = getSampleText(TtsEngine.lang ?: "")
|
val text: String = getSampleText(TtsEngine.lang ?: "")
|
||||||
if (text.isEmpty()) {
|
if (text.isEmpty()) {
|
||||||
result = TextToSpeech.LANG_NOT_SUPPORTED
|
result = TextToSpeech.LANG_NOT_SUPPORTED
|
||||||
}
|
}
|
||||||
|
|
||||||
val intent = Intent().apply{
|
val intent = Intent().apply {
|
||||||
if(result == TextToSpeech.LANG_AVAILABLE) {
|
if (result == TextToSpeech.LANG_AVAILABLE) {
|
||||||
putExtra(TextToSpeech.Engine.EXTRA_SAMPLE_TEXT, text)
|
putExtra(TextToSpeech.Engine.EXTRA_SAMPLE_TEXT, text)
|
||||||
} else {
|
} else {
|
||||||
putExtra("sampleText", text)
|
putExtra("sampleText", text)
|
||||||
|
|||||||
@@ -26,20 +26,16 @@ import androidx.compose.material3.Scaffold
|
|||||||
import androidx.compose.material3.Slider
|
import androidx.compose.material3.Slider
|
||||||
import androidx.compose.material3.Surface
|
import androidx.compose.material3.Surface
|
||||||
import androidx.compose.material3.Text
|
import androidx.compose.material3.Text
|
||||||
import androidx.compose.material3.TextField
|
|
||||||
import androidx.compose.material3.TopAppBar
|
import androidx.compose.material3.TopAppBar
|
||||||
import androidx.compose.runtime.Composable
|
|
||||||
import androidx.compose.runtime.getValue
|
import androidx.compose.runtime.getValue
|
||||||
import androidx.compose.runtime.mutableStateOf
|
import androidx.compose.runtime.mutableStateOf
|
||||||
import androidx.compose.runtime.remember
|
import androidx.compose.runtime.remember
|
||||||
import androidx.compose.runtime.setValue
|
import androidx.compose.runtime.setValue
|
||||||
import androidx.compose.ui.Modifier
|
import androidx.compose.ui.Modifier
|
||||||
import androidx.compose.ui.text.input.KeyboardType
|
import androidx.compose.ui.text.input.KeyboardType
|
||||||
import androidx.compose.ui.tooling.preview.Preview
|
|
||||||
import androidx.compose.ui.unit.dp
|
import androidx.compose.ui.unit.dp
|
||||||
import com.k2fsa.sherpa.onnx.tts.engine.ui.theme.SherpaOnnxTtsEngineTheme
|
import com.k2fsa.sherpa.onnx.tts.engine.ui.theme.SherpaOnnxTtsEngineTheme
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.lang.NumberFormatException
|
|
||||||
|
|
||||||
const val TAG = "sherpa-onnx-tts-engine"
|
const val TAG = "sherpa-onnx-tts-engine"
|
||||||
|
|
||||||
@@ -76,7 +72,7 @@ class MainActivity : ComponentActivity() {
|
|||||||
val testTextContent = getSampleText(TtsEngine.lang ?: "")
|
val testTextContent = getSampleText(TtsEngine.lang ?: "")
|
||||||
|
|
||||||
var testText by remember { mutableStateOf(testTextContent) }
|
var testText by remember { mutableStateOf(testTextContent) }
|
||||||
|
|
||||||
val numSpeakers = TtsEngine.tts!!.numSpeakers()
|
val numSpeakers = TtsEngine.tts!!.numSpeakers()
|
||||||
if (numSpeakers > 1) {
|
if (numSpeakers > 1) {
|
||||||
OutlinedTextField(
|
OutlinedTextField(
|
||||||
@@ -88,7 +84,7 @@ class MainActivity : ComponentActivity() {
|
|||||||
try {
|
try {
|
||||||
TtsEngine.speakerId = it.toString().toInt()
|
TtsEngine.speakerId = it.toString().toInt()
|
||||||
} catch (ex: NumberFormatException) {
|
} catch (ex: NumberFormatException) {
|
||||||
Log.i(TAG, "Invalid input: ${it}")
|
Log.i(TAG, "Invalid input: $it")
|
||||||
TtsEngine.speakerId = 0
|
TtsEngine.speakerId = 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -119,7 +115,7 @@ class MainActivity : ComponentActivity() {
|
|||||||
Button(
|
Button(
|
||||||
modifier = Modifier.padding(20.dp),
|
modifier = Modifier.padding(20.dp),
|
||||||
onClick = {
|
onClick = {
|
||||||
Log.i(TAG, "Clicked, text: ${testText}")
|
Log.i(TAG, "Clicked, text: $testText")
|
||||||
if (testText.isBlank() || testText.isEmpty()) {
|
if (testText.isBlank() || testText.isEmpty()) {
|
||||||
Toast.makeText(
|
Toast.makeText(
|
||||||
applicationContext,
|
applicationContext,
|
||||||
@@ -136,7 +132,7 @@ class MainActivity : ComponentActivity() {
|
|||||||
val filename =
|
val filename =
|
||||||
application.filesDir.absolutePath + "/generated.wav"
|
application.filesDir.absolutePath + "/generated.wav"
|
||||||
val ok =
|
val ok =
|
||||||
audio.samples.size > 0 && audio.save(filename)
|
audio.samples.isNotEmpty() && audio.save(filename)
|
||||||
|
|
||||||
if (ok) {
|
if (ok) {
|
||||||
stopMediaPlayer()
|
stopMediaPlayer()
|
||||||
|
|||||||
@@ -4,8 +4,10 @@ import android.content.Context
|
|||||||
import android.content.res.AssetManager
|
import android.content.res.AssetManager
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
import androidx.compose.runtime.MutableState
|
import androidx.compose.runtime.MutableState
|
||||||
import androidx.compose.runtime.mutableStateOf
|
import androidx.compose.runtime.mutableFloatStateOf
|
||||||
import com.k2fsa.sherpa.onnx.*
|
import androidx.compose.runtime.mutableIntStateOf
|
||||||
|
import com.k2fsa.sherpa.onnx.OfflineTts
|
||||||
|
import com.k2fsa.sherpa.onnx.getOfflineTtsConfig
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import java.io.FileOutputStream
|
import java.io.FileOutputStream
|
||||||
import java.io.IOException
|
import java.io.IOException
|
||||||
@@ -21,8 +23,8 @@ object TtsEngine {
|
|||||||
var lang: String? = null
|
var lang: String? = null
|
||||||
|
|
||||||
|
|
||||||
val speedState: MutableState<Float> = mutableStateOf(1.0F)
|
val speedState: MutableState<Float> = mutableFloatStateOf(1.0F)
|
||||||
val speakerIdState: MutableState<Int> = mutableStateOf(0)
|
val speakerIdState: MutableState<Int> = mutableIntStateOf(0)
|
||||||
|
|
||||||
var speed: Float
|
var speed: Float
|
||||||
get() = speedState.value
|
get() = speedState.value
|
||||||
@@ -113,15 +115,15 @@ object TtsEngine {
|
|||||||
|
|
||||||
if (dataDir != null) {
|
if (dataDir != null) {
|
||||||
val newDir = copyDataDir(context, modelDir!!)
|
val newDir = copyDataDir(context, modelDir!!)
|
||||||
modelDir = newDir + "/" + modelDir
|
modelDir = "$newDir/$modelDir"
|
||||||
dataDir = newDir + "/" + dataDir
|
dataDir = "$newDir/$dataDir"
|
||||||
assets = null
|
assets = null
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dictDir != null) {
|
if (dictDir != null) {
|
||||||
val newDir = copyDataDir(context, modelDir!!)
|
val newDir = copyDataDir(context, modelDir!!)
|
||||||
modelDir = newDir + "/" + modelDir
|
modelDir = "$newDir/$modelDir"
|
||||||
dictDir = modelDir + "/" + "dict"
|
dictDir = "$modelDir/dict"
|
||||||
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
||||||
assets = null
|
assets = null
|
||||||
}
|
}
|
||||||
@@ -132,18 +134,18 @@ object TtsEngine {
|
|||||||
dictDir = dictDir ?: "",
|
dictDir = dictDir ?: "",
|
||||||
ruleFsts = ruleFsts ?: "",
|
ruleFsts = ruleFsts ?: "",
|
||||||
ruleFars = ruleFars ?: ""
|
ruleFars = ruleFars ?: ""
|
||||||
)!!
|
)
|
||||||
|
|
||||||
tts = OfflineTts(assetManager = assets, config = config)
|
tts = OfflineTts(assetManager = assets, config = config)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private fun copyDataDir(context: Context, dataDir: String): String {
|
private fun copyDataDir(context: Context, dataDir: String): String {
|
||||||
println("data dir is $dataDir")
|
Log.i(TAG, "data dir is $dataDir")
|
||||||
copyAssets(context, dataDir)
|
copyAssets(context, dataDir)
|
||||||
|
|
||||||
val newDataDir = context.getExternalFilesDir(null)!!.absolutePath
|
val newDataDir = context.getExternalFilesDir(null)!!.absolutePath
|
||||||
println("newDataDir: $newDataDir")
|
Log.i(TAG, "newDataDir: $newDataDir")
|
||||||
return newDataDir
|
return newDataDir
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -158,12 +160,12 @@ object TtsEngine {
|
|||||||
val dir = File(fullPath)
|
val dir = File(fullPath)
|
||||||
dir.mkdirs()
|
dir.mkdirs()
|
||||||
for (asset in assets.iterator()) {
|
for (asset in assets.iterator()) {
|
||||||
val p: String = if (path == "") "" else path + "/"
|
val p: String = if (path == "") "" else "$path/"
|
||||||
copyAssets(context, p + asset)
|
copyAssets(context, p + asset)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (ex: IOException) {
|
} catch (ex: IOException) {
|
||||||
Log.e(TAG, "Failed to copy $path. ${ex.toString()}")
|
Log.e(TAG, "Failed to copy $path. $ex")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -183,7 +185,7 @@ object TtsEngine {
|
|||||||
ostream.flush()
|
ostream.flush()
|
||||||
ostream.close()
|
ostream.close()
|
||||||
} catch (ex: Exception) {
|
} catch (ex: Exception) {
|
||||||
Log.e(TAG, "Failed to copy $filename, ${ex.toString()}")
|
Log.e(TAG, "Failed to copy $filename, $ex")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import android.speech.tts.SynthesisRequest
|
|||||||
import android.speech.tts.TextToSpeech
|
import android.speech.tts.TextToSpeech
|
||||||
import android.speech.tts.TextToSpeechService
|
import android.speech.tts.TextToSpeechService
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
import com.k2fsa.sherpa.onnx.*
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
https://developer.android.com/reference/java/util/Locale#getISO3Language()
|
https://developer.android.com/reference/java/util/Locale#getISO3Language()
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
package com.k2fsa.sherpa.onnx.tts.engine
|
package com.k2fsa.sherpa.onnx.tts.engine
|
||||||
|
|
||||||
import android.app.Application
|
import android.app.Application
|
||||||
import android.os.FileUtils.ProgressListener
|
|
||||||
import android.speech.tts.TextToSpeech
|
import android.speech.tts.TextToSpeech
|
||||||
import android.speech.tts.TextToSpeech.OnInitListener
|
import android.speech.tts.TextToSpeech.OnInitListener
|
||||||
import android.speech.tts.UtteranceProgressListener
|
import android.speech.tts.UtteranceProgressListener
|
||||||
@@ -27,7 +26,7 @@ class TtsViewModel : ViewModel() {
|
|||||||
private val onInitListener = object : OnInitListener {
|
private val onInitListener = object : OnInitListener {
|
||||||
override fun onInit(status: Int) {
|
override fun onInit(status: Int) {
|
||||||
when (status) {
|
when (status) {
|
||||||
TextToSpeech.SUCCESS -> Log.i(TAG, "Init tts succeded")
|
TextToSpeech.SUCCESS -> Log.i(TAG, "Init tts succeeded")
|
||||||
TextToSpeech.ERROR -> Log.i(TAG, "Init tts failed")
|
TextToSpeech.ERROR -> Log.i(TAG, "Init tts failed")
|
||||||
else -> Log.i(TAG, "Unknown status $status")
|
else -> Log.i(TAG, "Unknown status $status")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
android:theme="@style/Theme.SherpaOnnxVad"
|
android:theme="@style/Theme.SherpaOnnxVad"
|
||||||
tools:targetApi="31">
|
tools:targetApi="31">
|
||||||
<activity
|
<activity
|
||||||
android:name=".MainActivity"
|
android:name="com.k2fsa.sherpa.onnx.vad.MainActivity"
|
||||||
android:exported="true">
|
android:exported="true">
|
||||||
<intent-filter>
|
<intent-filter>
|
||||||
<action android:name="android.intent.action.MAIN" />
|
<action android:name="android.intent.action.MAIN" />
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.k2fsa.sherpa.onnx
|
package com.k2fsa.sherpa.onnx.vad
|
||||||
|
|
||||||
import android.Manifest
|
import android.Manifest
|
||||||
import android.content.pm.PackageManager
|
import android.content.pm.PackageManager
|
||||||
@@ -11,6 +11,9 @@ import android.view.View
|
|||||||
import android.widget.Button
|
import android.widget.Button
|
||||||
import androidx.appcompat.app.AppCompatActivity
|
import androidx.appcompat.app.AppCompatActivity
|
||||||
import androidx.core.app.ActivityCompat
|
import androidx.core.app.ActivityCompat
|
||||||
|
import com.k2fsa.sherpa.onnx.R
|
||||||
|
import com.k2fsa.sherpa.onnx.Vad
|
||||||
|
import com.k2fsa.sherpa.onnx.getVadModelConfig
|
||||||
import kotlin.concurrent.thread
|
import kotlin.concurrent.thread
|
||||||
|
|
||||||
|
|
||||||
@@ -116,7 +119,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
|
|
||||||
private fun initVadModel() {
|
private fun initVadModel() {
|
||||||
val type = 0
|
val type = 0
|
||||||
println("Select VAD model type ${type}")
|
Log.i(TAG, "Select VAD model type ${type}")
|
||||||
val config = getVadModelConfig(type)
|
val config = getVadModelConfig(type)
|
||||||
|
|
||||||
vad = Vad(
|
vad = Vad(
|
||||||
@@ -171,4 +174,4 @@ class MainActivity : AppCompatActivity() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,104 +0,0 @@
|
|||||||
// Copyright (c) 2023 Xiaomi Corporation
|
|
||||||
package com.k2fsa.sherpa.onnx
|
|
||||||
|
|
||||||
import android.content.res.AssetManager
|
|
||||||
|
|
||||||
data class SileroVadModelConfig(
|
|
||||||
var model: String,
|
|
||||||
var threshold: Float = 0.5F,
|
|
||||||
var minSilenceDuration: Float = 0.25F,
|
|
||||||
var minSpeechDuration: Float = 0.25F,
|
|
||||||
var windowSize: Int = 512,
|
|
||||||
)
|
|
||||||
|
|
||||||
data class VadModelConfig(
|
|
||||||
var sileroVadModelConfig: SileroVadModelConfig,
|
|
||||||
var sampleRate: Int = 16000,
|
|
||||||
var numThreads: Int = 1,
|
|
||||||
var provider: String = "cpu",
|
|
||||||
var debug: Boolean = false,
|
|
||||||
)
|
|
||||||
|
|
||||||
class Vad(
|
|
||||||
assetManager: AssetManager? = null,
|
|
||||||
var config: VadModelConfig,
|
|
||||||
) {
|
|
||||||
private val ptr: Long
|
|
||||||
|
|
||||||
init {
|
|
||||||
if (assetManager != null) {
|
|
||||||
ptr = new(assetManager, config)
|
|
||||||
} else {
|
|
||||||
ptr = newFromFile(config)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected fun finalize() {
|
|
||||||
delete(ptr)
|
|
||||||
}
|
|
||||||
|
|
||||||
fun acceptWaveform(samples: FloatArray) = acceptWaveform(ptr, samples)
|
|
||||||
|
|
||||||
fun empty(): Boolean = empty(ptr)
|
|
||||||
fun pop() = pop(ptr)
|
|
||||||
|
|
||||||
// return an array containing
|
|
||||||
// [start: Int, samples: FloatArray]
|
|
||||||
fun front() = front(ptr)
|
|
||||||
|
|
||||||
fun clear() = clear(ptr)
|
|
||||||
|
|
||||||
fun isSpeechDetected(): Boolean = isSpeechDetected(ptr)
|
|
||||||
|
|
||||||
fun reset() = reset(ptr)
|
|
||||||
|
|
||||||
private external fun delete(ptr: Long)
|
|
||||||
|
|
||||||
private external fun new(
|
|
||||||
assetManager: AssetManager,
|
|
||||||
config: VadModelConfig,
|
|
||||||
): Long
|
|
||||||
|
|
||||||
private external fun newFromFile(
|
|
||||||
config: VadModelConfig,
|
|
||||||
): Long
|
|
||||||
|
|
||||||
private external fun acceptWaveform(ptr: Long, samples: FloatArray)
|
|
||||||
private external fun empty(ptr: Long): Boolean
|
|
||||||
private external fun pop(ptr: Long)
|
|
||||||
private external fun clear(ptr: Long)
|
|
||||||
private external fun front(ptr: Long): Array<Any>
|
|
||||||
private external fun isSpeechDetected(ptr: Long): Boolean
|
|
||||||
private external fun reset(ptr: Long)
|
|
||||||
|
|
||||||
companion object {
|
|
||||||
init {
|
|
||||||
System.loadLibrary("sherpa-onnx-jni")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Please visit
|
|
||||||
// https://github.com/snakers4/silero-vad/blob/master/files/silero_vad.onnx
|
|
||||||
// to download silero_vad.onnx
|
|
||||||
// and put it inside the assets/
|
|
||||||
// directory
|
|
||||||
fun getVadModelConfig(type: Int): VadModelConfig? {
|
|
||||||
when (type) {
|
|
||||||
0 -> {
|
|
||||||
return VadModelConfig(
|
|
||||||
sileroVadModelConfig = SileroVadModelConfig(
|
|
||||||
model = "silero_vad.onnx",
|
|
||||||
threshold = 0.5F,
|
|
||||||
minSilenceDuration = 0.25F,
|
|
||||||
minSpeechDuration = 0.25F,
|
|
||||||
windowSize = 512,
|
|
||||||
),
|
|
||||||
sampleRate = 16000,
|
|
||||||
numThreads = 1,
|
|
||||||
provider = "cpu",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/Vad.kt
|
||||||
@@ -4,7 +4,7 @@
|
|||||||
xmlns:tools="http://schemas.android.com/tools"
|
xmlns:tools="http://schemas.android.com/tools"
|
||||||
android:layout_width="match_parent"
|
android:layout_width="match_parent"
|
||||||
android:layout_height="match_parent"
|
android:layout_height="match_parent"
|
||||||
tools:context=".MainActivity">
|
tools:context="com.k2fsa.sherpa.onnx.vad.MainActivity">
|
||||||
<LinearLayout
|
<LinearLayout
|
||||||
android:layout_width="match_parent"
|
android:layout_width="match_parent"
|
||||||
android:layout_height="match_parent"
|
android:layout_height="match_parent"
|
||||||
@@ -40,4 +40,4 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
</androidx.constraintlayout.widget.ConstraintLayout>
|
</androidx.constraintlayout.widget.ConstraintLayout>
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
android:theme="@style/Theme.SherpaOnnxVadAsr"
|
android:theme="@style/Theme.SherpaOnnxVadAsr"
|
||||||
tools:targetApi="31">
|
tools:targetApi="31">
|
||||||
<activity
|
<activity
|
||||||
android:name=".MainActivity"
|
android:name=".vad.asr.MainActivity"
|
||||||
android:exported="true">
|
android:exported="true">
|
||||||
<intent-filter>
|
<intent-filter>
|
||||||
<action android:name="android.intent.action.MAIN" />
|
<action android:name="android.intent.action.MAIN" />
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.k2fsa.sherpa.onnx
|
package com.k2fsa.sherpa.onnx.vad.asr
|
||||||
|
|
||||||
import android.Manifest
|
import android.Manifest
|
||||||
import android.content.pm.PackageManager
|
import android.content.pm.PackageManager
|
||||||
@@ -13,6 +13,13 @@ import android.widget.Button
|
|||||||
import android.widget.TextView
|
import android.widget.TextView
|
||||||
import androidx.appcompat.app.AppCompatActivity
|
import androidx.appcompat.app.AppCompatActivity
|
||||||
import androidx.core.app.ActivityCompat
|
import androidx.core.app.ActivityCompat
|
||||||
|
import com.k2fsa.sherpa.onnx.OfflineRecognizer
|
||||||
|
import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
|
||||||
|
import com.k2fsa.sherpa.onnx.R
|
||||||
|
import com.k2fsa.sherpa.onnx.Vad
|
||||||
|
import com.k2fsa.sherpa.onnx.getFeatureConfig
|
||||||
|
import com.k2fsa.sherpa.onnx.getOfflineModelConfig
|
||||||
|
import com.k2fsa.sherpa.onnx.getVadModelConfig
|
||||||
import kotlin.concurrent.thread
|
import kotlin.concurrent.thread
|
||||||
|
|
||||||
|
|
||||||
@@ -40,7 +47,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
||||||
|
|
||||||
// Non-streaming ASR
|
// Non-streaming ASR
|
||||||
private lateinit var offlineRecognizer: SherpaOnnxOffline
|
private lateinit var offlineRecognizer: OfflineRecognizer
|
||||||
|
|
||||||
private var idx: Int = 0
|
private var idx: Int = 0
|
||||||
private var lastText: String = ""
|
private var lastText: String = ""
|
||||||
@@ -122,7 +129,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
|
|
||||||
private fun initVadModel() {
|
private fun initVadModel() {
|
||||||
val type = 0
|
val type = 0
|
||||||
println("Select VAD model type ${type}")
|
Log.i(TAG, "Select VAD model type ${type}")
|
||||||
val config = getVadModelConfig(type)
|
val config = getVadModelConfig(type)
|
||||||
|
|
||||||
vad = Vad(
|
vad = Vad(
|
||||||
@@ -194,20 +201,25 @@ class MainActivity : AppCompatActivity() {
|
|||||||
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
// for a list of available models
|
// for a list of available models
|
||||||
val secondType = 0
|
val secondType = 0
|
||||||
println("Select model type ${secondType} for the second pass")
|
Log.i(TAG, "Select model type ${secondType} for the second pass")
|
||||||
|
|
||||||
val config = OfflineRecognizerConfig(
|
val config = OfflineRecognizerConfig(
|
||||||
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
||||||
modelConfig = getOfflineModelConfig(type = secondType)!!,
|
modelConfig = getOfflineModelConfig(type = secondType)!!,
|
||||||
)
|
)
|
||||||
|
|
||||||
offlineRecognizer = SherpaOnnxOffline(
|
offlineRecognizer = OfflineRecognizer(
|
||||||
assetManager = application.assets,
|
assetManager = application.assets,
|
||||||
config = config,
|
config = config,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun runSecondPass(samples: FloatArray): String {
|
private fun runSecondPass(samples: FloatArray): String {
|
||||||
return offlineRecognizer.decode(samples, sampleRateInHz)
|
val stream = offlineRecognizer.createStream()
|
||||||
|
stream.acceptWaveform(samples, sampleRateInHz)
|
||||||
|
offlineRecognizer.decode(stream)
|
||||||
|
val result = offlineRecognizer.getResult(stream)
|
||||||
|
stream.release()
|
||||||
|
return result.text
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineRecognizer.kt
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt
|
||||||
@@ -1 +0,0 @@
|
|||||||
../../../../../../../../../SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt
|
|
||||||
@@ -1 +1 @@
|
|||||||
../../../../../../../../../SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt
|
../../../../../../../../../../sherpa-onnx/kotlin-api/Vad.kt
|
||||||
@@ -4,7 +4,7 @@
|
|||||||
xmlns:tools="http://schemas.android.com/tools"
|
xmlns:tools="http://schemas.android.com/tools"
|
||||||
android:layout_width="match_parent"
|
android:layout_width="match_parent"
|
||||||
android:layout_height="match_parent"
|
android:layout_height="match_parent"
|
||||||
tools:context=".MainActivity">
|
tools:context=".vad.asr.MainActivity">
|
||||||
|
|
||||||
<LinearLayout
|
<LinearLayout
|
||||||
android:layout_width="match_parent"
|
android:layout_width="match_parent"
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
<resources>
|
<resources>
|
||||||
<string name="app_name">VAD-ASR</string>
|
<string name="app_name">VAD+ASR</string>
|
||||||
<string name="hint">Click the Start button to play speech-to-text with Next-gen Kaldi.
|
<string name="hint">Click the Start button to play speech-to-text with Next-gen Kaldi.
|
||||||
\n
|
\n
|
||||||
\n\n\n
|
\n\n\n
|
||||||
|
|||||||
@@ -59,7 +59,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
|
|||||||
echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
|
echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
|
||||||
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
|
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
|
||||||
|
|
||||||
|
if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then
|
||||||
|
SHERPA_ONNX_ENABLE_TTS=ON
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then
|
||||||
|
SHERPA_ONNX_ENABLE_BINARY=OFF
|
||||||
|
fi
|
||||||
|
|
||||||
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
|
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \
|
||||||
|
-DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \
|
||||||
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
|
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
|
||||||
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
|
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
|
||||||
-DBUILD_ESPEAK_NG_EXE=OFF \
|
-DBUILD_ESPEAK_NG_EXE=OFF \
|
||||||
|
|||||||
@@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
|
|||||||
echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
|
echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
|
||||||
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
|
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
|
||||||
|
|
||||||
|
if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then
|
||||||
|
SHERPA_ONNX_ENABLE_TTS=ON
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then
|
||||||
|
SHERPA_ONNX_ENABLE_BINARY=OFF
|
||||||
|
fi
|
||||||
|
|
||||||
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
|
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \
|
||||||
|
-DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \
|
||||||
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
|
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
|
||||||
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
|
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
|
||||||
-DBUILD_ESPEAK_NG_EXE=OFF \
|
-DBUILD_ESPEAK_NG_EXE=OFF \
|
||||||
|
|||||||
@@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
|
|||||||
echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
|
echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
|
||||||
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
|
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
|
||||||
|
|
||||||
|
if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then
|
||||||
|
SHERPA_ONNX_ENABLE_TTS=ON
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then
|
||||||
|
SHERPA_ONNX_ENABLE_BINARY=OFF
|
||||||
|
fi
|
||||||
|
|
||||||
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
|
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \
|
||||||
|
-DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \
|
||||||
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
|
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
|
||||||
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
|
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
|
||||||
-DBUILD_ESPEAK_NG_EXE=OFF \
|
-DBUILD_ESPEAK_NG_EXE=OFF \
|
||||||
|
|||||||
@@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
|
|||||||
echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
|
echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
|
||||||
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
|
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
|
||||||
|
|
||||||
|
if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then
|
||||||
|
SHERPA_ONNX_ENABLE_TTS=ON
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then
|
||||||
|
SHERPA_ONNX_ENABLE_BINARY=OFF
|
||||||
|
fi
|
||||||
|
|
||||||
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
|
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
|
||||||
|
-DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \
|
||||||
|
-DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \
|
||||||
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
|
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
|
||||||
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
|
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
|
||||||
-DBUILD_ESPEAK_NG_EXE=OFF \
|
-DBUILD_ESPEAK_NG_EXE=OFF \
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
../android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/AudioTagging.kt
|
../sherpa-onnx/kotlin-api/AudioTagging.kt
|
||||||
1
kotlin-api-examples/FeatureConfig.kt
Symbolic link
1
kotlin-api-examples/FeatureConfig.kt
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../sherpa-onnx/kotlin-api/FeatureConfig.kt
|
||||||
@@ -1,245 +0,0 @@
|
|||||||
package com.k2fsa.sherpa.onnx
|
|
||||||
|
|
||||||
import android.content.res.AssetManager
|
|
||||||
|
|
||||||
fun callback(samples: FloatArray): Unit {
|
|
||||||
println("callback got called with ${samples.size} samples");
|
|
||||||
}
|
|
||||||
|
|
||||||
fun main() {
|
|
||||||
testSpokenLanguageIdentifcation()
|
|
||||||
testAudioTagging()
|
|
||||||
testSpeakerRecognition()
|
|
||||||
testTts()
|
|
||||||
testAsr("transducer")
|
|
||||||
testAsr("zipformer2-ctc")
|
|
||||||
}
|
|
||||||
|
|
||||||
fun testSpokenLanguageIdentifcation() {
|
|
||||||
val config = SpokenLanguageIdentificationConfig(
|
|
||||||
whisper = SpokenLanguageIdentificationWhisperConfig(
|
|
||||||
encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx",
|
|
||||||
decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx",
|
|
||||||
tailPaddings = 33,
|
|
||||||
),
|
|
||||||
numThreads=1,
|
|
||||||
debug=true,
|
|
||||||
provider="cpu",
|
|
||||||
)
|
|
||||||
val slid = SpokenLanguageIdentification(assetManager=null, config=config)
|
|
||||||
|
|
||||||
val testFiles = arrayOf(
|
|
||||||
"./spoken-language-identification-test-wavs/ar-arabic.wav",
|
|
||||||
"./spoken-language-identification-test-wavs/bg-bulgarian.wav",
|
|
||||||
"./spoken-language-identification-test-wavs/de-german.wav",
|
|
||||||
)
|
|
||||||
|
|
||||||
for (waveFilename in testFiles) {
|
|
||||||
val objArray = WaveReader.readWaveFromFile(
|
|
||||||
filename = waveFilename,
|
|
||||||
)
|
|
||||||
val samples: FloatArray = objArray[0] as FloatArray
|
|
||||||
val sampleRate: Int = objArray[1] as Int
|
|
||||||
|
|
||||||
val stream = slid.createStream()
|
|
||||||
stream.acceptWaveform(samples, sampleRate = sampleRate)
|
|
||||||
val lang = slid.compute(stream)
|
|
||||||
stream.release()
|
|
||||||
println(waveFilename)
|
|
||||||
println(lang)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun testAudioTagging() {
|
|
||||||
val config = AudioTaggingConfig(
|
|
||||||
model=AudioTaggingModelConfig(
|
|
||||||
zipformer=OfflineZipformerAudioTaggingModelConfig(
|
|
||||||
model="./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx",
|
|
||||||
),
|
|
||||||
numThreads=1,
|
|
||||||
debug=true,
|
|
||||||
provider="cpu",
|
|
||||||
),
|
|
||||||
labels="./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv",
|
|
||||||
topK=5,
|
|
||||||
)
|
|
||||||
val tagger = AudioTagging(assetManager=null, config=config)
|
|
||||||
|
|
||||||
val testFiles = arrayOf(
|
|
||||||
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/1.wav",
|
|
||||||
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/2.wav",
|
|
||||||
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/3.wav",
|
|
||||||
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/4.wav",
|
|
||||||
)
|
|
||||||
println("----------")
|
|
||||||
for (waveFilename in testFiles) {
|
|
||||||
val stream = tagger.createStream()
|
|
||||||
|
|
||||||
val objArray = WaveReader.readWaveFromFile(
|
|
||||||
filename = waveFilename,
|
|
||||||
)
|
|
||||||
val samples: FloatArray = objArray[0] as FloatArray
|
|
||||||
val sampleRate: Int = objArray[1] as Int
|
|
||||||
|
|
||||||
stream.acceptWaveform(samples, sampleRate = sampleRate)
|
|
||||||
val events = tagger.compute(stream)
|
|
||||||
stream.release()
|
|
||||||
|
|
||||||
println(waveFilename)
|
|
||||||
println(events)
|
|
||||||
println("----------")
|
|
||||||
}
|
|
||||||
|
|
||||||
tagger.release()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun computeEmbedding(extractor: SpeakerEmbeddingExtractor, filename: String): FloatArray {
|
|
||||||
var objArray = WaveReader.readWaveFromFile(
|
|
||||||
filename = filename,
|
|
||||||
)
|
|
||||||
var samples: FloatArray = objArray[0] as FloatArray
|
|
||||||
var sampleRate: Int = objArray[1] as Int
|
|
||||||
|
|
||||||
val stream = extractor.createStream()
|
|
||||||
stream.acceptWaveform(sampleRate = sampleRate, samples=samples)
|
|
||||||
stream.inputFinished()
|
|
||||||
check(extractor.isReady(stream))
|
|
||||||
|
|
||||||
val embedding = extractor.compute(stream)
|
|
||||||
|
|
||||||
stream.release()
|
|
||||||
|
|
||||||
return embedding
|
|
||||||
}
|
|
||||||
|
|
||||||
fun testSpeakerRecognition() {
|
|
||||||
val config = SpeakerEmbeddingExtractorConfig(
|
|
||||||
model="./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx",
|
|
||||||
)
|
|
||||||
val extractor = SpeakerEmbeddingExtractor(config = config)
|
|
||||||
|
|
||||||
val embedding1a = computeEmbedding(extractor, "./speaker1_a_cn_16k.wav")
|
|
||||||
val embedding2a = computeEmbedding(extractor, "./speaker2_a_cn_16k.wav")
|
|
||||||
val embedding1b = computeEmbedding(extractor, "./speaker1_b_cn_16k.wav")
|
|
||||||
|
|
||||||
var manager = SpeakerEmbeddingManager(extractor.dim())
|
|
||||||
var ok = manager.add(name = "speaker1", embedding=embedding1a)
|
|
||||||
check(ok)
|
|
||||||
|
|
||||||
manager.add(name = "speaker2", embedding=embedding2a)
|
|
||||||
check(ok)
|
|
||||||
|
|
||||||
var name = manager.search(embedding=embedding1b, threshold=0.5f)
|
|
||||||
check(name == "speaker1")
|
|
||||||
|
|
||||||
manager.release()
|
|
||||||
|
|
||||||
manager = SpeakerEmbeddingManager(extractor.dim())
|
|
||||||
val embeddingList = mutableListOf(embedding1a, embedding1b)
|
|
||||||
ok = manager.add(name = "s1", embedding=embeddingList.toTypedArray())
|
|
||||||
check(ok)
|
|
||||||
|
|
||||||
name = manager.search(embedding=embedding1b, threshold=0.5f)
|
|
||||||
check(name == "s1")
|
|
||||||
|
|
||||||
name = manager.search(embedding=embedding2a, threshold=0.5f)
|
|
||||||
check(name.length == 0)
|
|
||||||
|
|
||||||
manager.release()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun testTts() {
|
|
||||||
// see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
|
||||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
|
|
||||||
var config = OfflineTtsConfig(
|
|
||||||
model=OfflineTtsModelConfig(
|
|
||||||
vits=OfflineTtsVitsModelConfig(
|
|
||||||
model="./vits-piper-en_US-amy-low/en_US-amy-low.onnx",
|
|
||||||
tokens="./vits-piper-en_US-amy-low/tokens.txt",
|
|
||||||
dataDir="./vits-piper-en_US-amy-low/espeak-ng-data",
|
|
||||||
),
|
|
||||||
numThreads=1,
|
|
||||||
debug=true,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
val tts = OfflineTts(config=config)
|
|
||||||
val audio = tts.generateWithCallback(text="“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”", callback=::callback)
|
|
||||||
audio.save(filename="test-en.wav")
|
|
||||||
}
|
|
||||||
|
|
||||||
fun testAsr(type: String) {
|
|
||||||
var featConfig = FeatureConfig(
|
|
||||||
sampleRate = 16000,
|
|
||||||
featureDim = 80,
|
|
||||||
)
|
|
||||||
|
|
||||||
var waveFilename: String
|
|
||||||
var modelConfig: OnlineModelConfig = when (type) {
|
|
||||||
"transducer" -> {
|
|
||||||
waveFilename = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/test_wavs/0.wav"
|
|
||||||
// please refer to
|
|
||||||
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
|
||||||
// to dowload pre-trained models
|
|
||||||
OnlineModelConfig(
|
|
||||||
transducer = OnlineTransducerModelConfig(
|
|
||||||
encoder = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/encoder-epoch-99-avg-1.onnx",
|
|
||||||
decoder = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/decoder-epoch-99-avg-1.onnx",
|
|
||||||
joiner = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/joiner-epoch-99-avg-1.onnx",
|
|
||||||
),
|
|
||||||
tokens = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt",
|
|
||||||
numThreads = 1,
|
|
||||||
debug = false,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
"zipformer2-ctc" -> {
|
|
||||||
waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav"
|
|
||||||
OnlineModelConfig(
|
|
||||||
zipformer2Ctc = OnlineZipformer2CtcModelConfig(
|
|
||||||
model = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx",
|
|
||||||
),
|
|
||||||
tokens = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt",
|
|
||||||
numThreads = 1,
|
|
||||||
debug = false,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
else -> throw IllegalArgumentException(type)
|
|
||||||
}
|
|
||||||
|
|
||||||
var endpointConfig = EndpointConfig()
|
|
||||||
|
|
||||||
var lmConfig = OnlineLMConfig()
|
|
||||||
|
|
||||||
var config = OnlineRecognizerConfig(
|
|
||||||
modelConfig = modelConfig,
|
|
||||||
lmConfig = lmConfig,
|
|
||||||
featConfig = featConfig,
|
|
||||||
endpointConfig = endpointConfig,
|
|
||||||
enableEndpoint = true,
|
|
||||||
decodingMethod = "greedy_search",
|
|
||||||
maxActivePaths = 4,
|
|
||||||
)
|
|
||||||
|
|
||||||
var model = SherpaOnnx(
|
|
||||||
config = config,
|
|
||||||
)
|
|
||||||
|
|
||||||
var objArray = WaveReader.readWaveFromFile(
|
|
||||||
filename = waveFilename,
|
|
||||||
)
|
|
||||||
var samples: FloatArray = objArray[0] as FloatArray
|
|
||||||
var sampleRate: Int = objArray[1] as Int
|
|
||||||
|
|
||||||
model.acceptWaveform(samples, sampleRate = sampleRate)
|
|
||||||
while (model.isReady()) {
|
|
||||||
model.decode()
|
|
||||||
}
|
|
||||||
|
|
||||||
var tailPaddings = FloatArray((sampleRate * 0.5).toInt()) // 0.5 seconds
|
|
||||||
model.acceptWaveform(tailPaddings, sampleRate = sampleRate)
|
|
||||||
model.inputFinished()
|
|
||||||
while (model.isReady()) {
|
|
||||||
model.decode()
|
|
||||||
}
|
|
||||||
|
|
||||||
println("results: ${model.text}")
|
|
||||||
}
|
|
||||||
1
kotlin-api-examples/OfflineRecognizer.kt
Symbolic link
1
kotlin-api-examples/OfflineRecognizer.kt
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../sherpa-onnx/kotlin-api/OfflineRecognizer.kt
|
||||||
@@ -1 +1 @@
|
|||||||
../android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/OfflineStream.kt
|
../sherpa-onnx/kotlin-api/OfflineStream.kt
|
||||||
1
kotlin-api-examples/OnlineRecognizer.kt
Symbolic link
1
kotlin-api-examples/OnlineRecognizer.kt
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../sherpa-onnx/kotlin-api/OnlineRecognizer.kt
|
||||||
1
kotlin-api-examples/OnlineStream.kt
Symbolic link
1
kotlin-api-examples/OnlineStream.kt
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../sherpa-onnx/kotlin-api/OnlineStream.kt
|
||||||
@@ -1 +0,0 @@
|
|||||||
../android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
../android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt
|
|
||||||
@@ -1 +1 @@
|
|||||||
../android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/Speaker.kt
|
../sherpa-onnx/kotlin-api/Speaker.kt
|
||||||
@@ -1 +1 @@
|
|||||||
../android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/SpokenLanguageIdentification.kt
|
../sherpa-onnx/kotlin-api/SpokenLanguageIdentification.kt
|
||||||
@@ -1 +1 @@
|
|||||||
../android/SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt
|
../sherpa-onnx/kotlin-api/Vad.kt
|
||||||
@@ -1 +1 @@
|
|||||||
../android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt
|
../sherpa-onnx/kotlin-api/WaveReader.kt
|
||||||
@@ -44,9 +44,23 @@ function testSpeakerEmbeddingExtractor() {
|
|||||||
if [ ! -f ./speaker2_a_cn_16k.wav ]; then
|
if [ ! -f ./speaker2_a_cn_16k.wav ]; then
|
||||||
curl -SL -O https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker2_a_cn_16k.wav
|
curl -SL -O https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker2_a_cn_16k.wav
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
out_filename=test_speaker_id.jar
|
||||||
|
kotlinc-jvm -include-runtime -d $out_filename \
|
||||||
|
test_speaker_id.kt \
|
||||||
|
OnlineStream.kt \
|
||||||
|
Speaker.kt \
|
||||||
|
WaveReader.kt \
|
||||||
|
faked-asset-manager.kt \
|
||||||
|
faked-log.kt
|
||||||
|
|
||||||
|
ls -lh $out_filename
|
||||||
|
|
||||||
|
java -Djava.library.path=../build/lib -jar $out_filename
|
||||||
}
|
}
|
||||||
|
|
||||||
function testAsr() {
|
|
||||||
|
function testOnlineAsr() {
|
||||||
if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then
|
if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then
|
||||||
git lfs install
|
git lfs install
|
||||||
git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21
|
git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21
|
||||||
@@ -57,6 +71,20 @@ function testAsr() {
|
|||||||
tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
|
tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
|
||||||
rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
|
rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
out_filename=test_online_asr.jar
|
||||||
|
kotlinc-jvm -include-runtime -d $out_filename \
|
||||||
|
test_online_asr.kt \
|
||||||
|
FeatureConfig.kt \
|
||||||
|
OnlineRecognizer.kt \
|
||||||
|
OnlineStream.kt \
|
||||||
|
WaveReader.kt \
|
||||||
|
faked-asset-manager.kt \
|
||||||
|
faked-log.kt
|
||||||
|
|
||||||
|
ls -lh $out_filename
|
||||||
|
|
||||||
|
java -Djava.library.path=../build/lib -jar $out_filename
|
||||||
}
|
}
|
||||||
|
|
||||||
function testTts() {
|
function testTts() {
|
||||||
@@ -65,16 +93,42 @@ function testTts() {
|
|||||||
tar xf vits-piper-en_US-amy-low.tar.bz2
|
tar xf vits-piper-en_US-amy-low.tar.bz2
|
||||||
rm vits-piper-en_US-amy-low.tar.bz2
|
rm vits-piper-en_US-amy-low.tar.bz2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
out_filename=test_tts.jar
|
||||||
|
kotlinc-jvm -include-runtime -d $out_filename \
|
||||||
|
test_tts.kt \
|
||||||
|
Tts.kt \
|
||||||
|
faked-asset-manager.kt \
|
||||||
|
faked-log.kt
|
||||||
|
|
||||||
|
ls -lh $out_filename
|
||||||
|
|
||||||
|
java -Djava.library.path=../build/lib -jar $out_filename
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function testAudioTagging() {
|
function testAudioTagging() {
|
||||||
if [ ! -d sherpa-onnx-zipformer-audio-tagging-2024-04-09 ]; then
|
if [ ! -d sherpa-onnx-zipformer-audio-tagging-2024-04-09 ]; then
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
out_filename=test_audio_tagging.jar
|
||||||
|
kotlinc-jvm -include-runtime -d $out_filename \
|
||||||
|
test_audio_tagging.kt \
|
||||||
|
AudioTagging.kt \
|
||||||
|
OfflineStream.kt \
|
||||||
|
WaveReader.kt \
|
||||||
|
faked-asset-manager.kt \
|
||||||
|
faked-log.kt
|
||||||
|
|
||||||
|
ls -lh $out_filename
|
||||||
|
|
||||||
|
java -Djava.library.path=../build/lib -jar $out_filename
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function testSpokenLanguageIdentification() {
|
function testSpokenLanguageIdentification() {
|
||||||
if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then
|
if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
@@ -87,50 +141,44 @@ function testSpokenLanguageIdentification() {
|
|||||||
tar xvf spoken-language-identification-test-wavs.tar.bz2
|
tar xvf spoken-language-identification-test-wavs.tar.bz2
|
||||||
rm spoken-language-identification-test-wavs.tar.bz2
|
rm spoken-language-identification-test-wavs.tar.bz2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
out_filename=test_language_id.jar
|
||||||
|
kotlinc-jvm -include-runtime -d $out_filename \
|
||||||
|
test_language_id.kt \
|
||||||
|
SpokenLanguageIdentification.kt \
|
||||||
|
OfflineStream.kt \
|
||||||
|
WaveReader.kt \
|
||||||
|
faked-asset-manager.kt \
|
||||||
|
faked-log.kt
|
||||||
|
|
||||||
|
ls -lh $out_filename
|
||||||
|
|
||||||
|
java -Djava.library.path=../build/lib -jar $out_filename
|
||||||
}
|
}
|
||||||
|
|
||||||
function test() {
|
function testOfflineAsr() {
|
||||||
testSpokenLanguageIdentification
|
|
||||||
testAudioTagging
|
|
||||||
testSpeakerEmbeddingExtractor
|
|
||||||
testAsr
|
|
||||||
testTts
|
|
||||||
}
|
|
||||||
|
|
||||||
test
|
|
||||||
|
|
||||||
kotlinc-jvm -include-runtime -d main.jar \
|
|
||||||
AudioTagging.kt \
|
|
||||||
Main.kt \
|
|
||||||
OfflineStream.kt \
|
|
||||||
SherpaOnnx.kt \
|
|
||||||
Speaker.kt \
|
|
||||||
SpokenLanguageIdentification.kt \
|
|
||||||
Tts.kt \
|
|
||||||
WaveReader.kt \
|
|
||||||
faked-asset-manager.kt \
|
|
||||||
faked-log.kt
|
|
||||||
|
|
||||||
ls -lh main.jar
|
|
||||||
|
|
||||||
java -Djava.library.path=../build/lib -jar main.jar
|
|
||||||
|
|
||||||
function testTwoPass() {
|
|
||||||
if [ ! -f ./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/encoder-epoch-99-avg-1.int8.onnx ]; then
|
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
|
|
||||||
tar xvf sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
|
|
||||||
rm sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx ]; then
|
if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx ]; then
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
rm sherpa-onnx-whisper-tiny.en.tar.bz2
|
rm sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
kotlinc-jvm -include-runtime -d 2pass.jar test-2pass.kt WaveReader.kt SherpaOnnx2Pass.kt faked-asset-manager.kt
|
out_filename=test_offline_asr.jar
|
||||||
ls -lh 2pass.jar
|
kotlinc-jvm -include-runtime -d $out_filename \
|
||||||
java -Djava.library.path=../build/lib -jar 2pass.jar
|
test_offline_asr.kt \
|
||||||
|
FeatureConfig.kt \
|
||||||
|
OfflineRecognizer.kt \
|
||||||
|
OfflineStream.kt \
|
||||||
|
WaveReader.kt \
|
||||||
|
faked-asset-manager.kt
|
||||||
|
|
||||||
|
ls -lh $out_filename
|
||||||
|
java -Djava.library.path=../build/lib -jar $out_filename
|
||||||
}
|
}
|
||||||
|
|
||||||
testTwoPass
|
testSpeakerEmbeddingExtractor
|
||||||
|
testOnlineAsr
|
||||||
|
testTts
|
||||||
|
testAudioTagging
|
||||||
|
testSpokenLanguageIdentification
|
||||||
|
testOfflineAsr
|
||||||
|
|||||||
@@ -1,49 +0,0 @@
|
|||||||
package com.k2fsa.sherpa.onnx
|
|
||||||
|
|
||||||
fun main() {
|
|
||||||
test2Pass()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun test2Pass() {
|
|
||||||
val firstPass = createFirstPass()
|
|
||||||
val secondPass = createSecondPass()
|
|
||||||
|
|
||||||
val waveFilename = "./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/test_wavs/0.wav"
|
|
||||||
|
|
||||||
var objArray = WaveReader.readWaveFromFile(
|
|
||||||
filename = waveFilename,
|
|
||||||
)
|
|
||||||
var samples: FloatArray = objArray[0] as FloatArray
|
|
||||||
var sampleRate: Int = objArray[1] as Int
|
|
||||||
|
|
||||||
firstPass.acceptWaveform(samples, sampleRate = sampleRate)
|
|
||||||
while (firstPass.isReady()) {
|
|
||||||
firstPass.decode()
|
|
||||||
}
|
|
||||||
|
|
||||||
var text = firstPass.text
|
|
||||||
println("First pass text: $text")
|
|
||||||
|
|
||||||
text = secondPass.decode(samples, sampleRate)
|
|
||||||
println("Second pass text: $text")
|
|
||||||
}
|
|
||||||
|
|
||||||
fun createFirstPass(): SherpaOnnx {
|
|
||||||
val config = OnlineRecognizerConfig(
|
|
||||||
featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80),
|
|
||||||
modelConfig = getModelConfig(type = 1)!!,
|
|
||||||
endpointConfig = getEndpointConfig(),
|
|
||||||
enableEndpoint = true,
|
|
||||||
)
|
|
||||||
|
|
||||||
return SherpaOnnx(config = config)
|
|
||||||
}
|
|
||||||
|
|
||||||
fun createSecondPass(): SherpaOnnxOffline {
|
|
||||||
val config = OfflineRecognizerConfig(
|
|
||||||
featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80),
|
|
||||||
modelConfig = getOfflineModelConfig(type = 2)!!,
|
|
||||||
)
|
|
||||||
|
|
||||||
return SherpaOnnxOffline(config = config)
|
|
||||||
}
|
|
||||||
49
kotlin-api-examples/test_audio_tagging.kt
Normal file
49
kotlin-api-examples/test_audio_tagging.kt
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
package com.k2fsa.sherpa.onnx
|
||||||
|
|
||||||
|
fun main() {
|
||||||
|
testAudioTagging()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun testAudioTagging() {
|
||||||
|
val config = AudioTaggingConfig(
|
||||||
|
model=AudioTaggingModelConfig(
|
||||||
|
zipformer=OfflineZipformerAudioTaggingModelConfig(
|
||||||
|
model="./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx",
|
||||||
|
),
|
||||||
|
numThreads=1,
|
||||||
|
debug=true,
|
||||||
|
provider="cpu",
|
||||||
|
),
|
||||||
|
labels="./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv",
|
||||||
|
topK=5,
|
||||||
|
)
|
||||||
|
val tagger = AudioTagging(config=config)
|
||||||
|
|
||||||
|
val testFiles = arrayOf(
|
||||||
|
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/1.wav",
|
||||||
|
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/2.wav",
|
||||||
|
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/3.wav",
|
||||||
|
"./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/4.wav",
|
||||||
|
)
|
||||||
|
println("----------")
|
||||||
|
for (waveFilename in testFiles) {
|
||||||
|
val stream = tagger.createStream()
|
||||||
|
|
||||||
|
val objArray = WaveReader.readWaveFromFile(
|
||||||
|
filename = waveFilename,
|
||||||
|
)
|
||||||
|
val samples: FloatArray = objArray[0] as FloatArray
|
||||||
|
val sampleRate: Int = objArray[1] as Int
|
||||||
|
|
||||||
|
stream.acceptWaveform(samples, sampleRate = sampleRate)
|
||||||
|
val events = tagger.compute(stream)
|
||||||
|
stream.release()
|
||||||
|
|
||||||
|
println(waveFilename)
|
||||||
|
println(events)
|
||||||
|
println("----------")
|
||||||
|
}
|
||||||
|
|
||||||
|
tagger.release()
|
||||||
|
}
|
||||||
|
|
||||||
43
kotlin-api-examples/test_language_id.kt
Normal file
43
kotlin-api-examples/test_language_id.kt
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
package com.k2fsa.sherpa.onnx
|
||||||
|
|
||||||
|
fun main() {
|
||||||
|
testSpokenLanguageIdentifcation()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun testSpokenLanguageIdentifcation() {
|
||||||
|
val config = SpokenLanguageIdentificationConfig(
|
||||||
|
whisper = SpokenLanguageIdentificationWhisperConfig(
|
||||||
|
encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx",
|
||||||
|
decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx",
|
||||||
|
tailPaddings = 33,
|
||||||
|
),
|
||||||
|
numThreads=1,
|
||||||
|
debug=true,
|
||||||
|
provider="cpu",
|
||||||
|
)
|
||||||
|
val slid = SpokenLanguageIdentification(config=config)
|
||||||
|
|
||||||
|
val testFiles = arrayOf(
|
||||||
|
"./spoken-language-identification-test-wavs/ar-arabic.wav",
|
||||||
|
"./spoken-language-identification-test-wavs/bg-bulgarian.wav",
|
||||||
|
"./spoken-language-identification-test-wavs/de-german.wav",
|
||||||
|
)
|
||||||
|
|
||||||
|
for (waveFilename in testFiles) {
|
||||||
|
val objArray = WaveReader.readWaveFromFile(
|
||||||
|
filename = waveFilename,
|
||||||
|
)
|
||||||
|
val samples: FloatArray = objArray[0] as FloatArray
|
||||||
|
val sampleRate: Int = objArray[1] as Int
|
||||||
|
|
||||||
|
val stream = slid.createStream()
|
||||||
|
stream.acceptWaveform(samples, sampleRate = sampleRate)
|
||||||
|
val lang = slid.compute(stream)
|
||||||
|
stream.release()
|
||||||
|
println(waveFilename)
|
||||||
|
println(lang)
|
||||||
|
}
|
||||||
|
|
||||||
|
slid.release()
|
||||||
|
}
|
||||||
|
|
||||||
32
kotlin-api-examples/test_offline_asr.kt
Normal file
32
kotlin-api-examples/test_offline_asr.kt
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
package com.k2fsa.sherpa.onnx
|
||||||
|
|
||||||
|
fun main() {
|
||||||
|
val recognizer = createOfflineRecognizer()
|
||||||
|
|
||||||
|
val waveFilename = "./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/test_wavs/0.wav"
|
||||||
|
|
||||||
|
val objArray = WaveReader.readWaveFromFile(
|
||||||
|
filename = waveFilename,
|
||||||
|
)
|
||||||
|
val samples: FloatArray = objArray[0] as FloatArray
|
||||||
|
val sampleRate: Int = objArray[1] as Int
|
||||||
|
|
||||||
|
val stream = recognizer.createStream()
|
||||||
|
stream.acceptWaveform(samples, sampleRate=sampleRate)
|
||||||
|
recognizer.decode(stream)
|
||||||
|
|
||||||
|
val result = recognizer.getResult(stream)
|
||||||
|
println(result)
|
||||||
|
|
||||||
|
stream.release()
|
||||||
|
recognizer.release()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun createOfflineRecognizer(): OfflineRecognizer {
|
||||||
|
val config = OfflineRecognizerConfig(
|
||||||
|
featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80),
|
||||||
|
modelConfig = getOfflineModelConfig(type = 2)!!,
|
||||||
|
)
|
||||||
|
|
||||||
|
return OfflineRecognizer(config = config)
|
||||||
|
}
|
||||||
87
kotlin-api-examples/test_online_asr.kt
Normal file
87
kotlin-api-examples/test_online_asr.kt
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
package com.k2fsa.sherpa.onnx
|
||||||
|
|
||||||
|
fun main() {
|
||||||
|
testOnlineAsr("transducer")
|
||||||
|
testOnlineAsr("zipformer2-ctc")
|
||||||
|
}
|
||||||
|
|
||||||
|
fun testOnlineAsr(type: String) {
|
||||||
|
val featConfig = FeatureConfig(
|
||||||
|
sampleRate = 16000,
|
||||||
|
featureDim = 80,
|
||||||
|
)
|
||||||
|
|
||||||
|
val waveFilename: String
|
||||||
|
val modelConfig: OnlineModelConfig = when (type) {
|
||||||
|
"transducer" -> {
|
||||||
|
waveFilename = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/test_wavs/0.wav"
|
||||||
|
// please refer to
|
||||||
|
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
|
// to dowload pre-trained models
|
||||||
|
OnlineModelConfig(
|
||||||
|
transducer = OnlineTransducerModelConfig(
|
||||||
|
encoder = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/encoder-epoch-99-avg-1.onnx",
|
||||||
|
decoder = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/decoder-epoch-99-avg-1.onnx",
|
||||||
|
joiner = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/joiner-epoch-99-avg-1.onnx",
|
||||||
|
),
|
||||||
|
tokens = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt",
|
||||||
|
numThreads = 1,
|
||||||
|
debug = false,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
"zipformer2-ctc" -> {
|
||||||
|
waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav"
|
||||||
|
OnlineModelConfig(
|
||||||
|
zipformer2Ctc = OnlineZipformer2CtcModelConfig(
|
||||||
|
model = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx",
|
||||||
|
),
|
||||||
|
tokens = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt",
|
||||||
|
numThreads = 1,
|
||||||
|
debug = false,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
else -> throw IllegalArgumentException(type)
|
||||||
|
}
|
||||||
|
|
||||||
|
val endpointConfig = EndpointConfig()
|
||||||
|
|
||||||
|
val lmConfig = OnlineLMConfig()
|
||||||
|
|
||||||
|
val config = OnlineRecognizerConfig(
|
||||||
|
modelConfig = modelConfig,
|
||||||
|
lmConfig = lmConfig,
|
||||||
|
featConfig = featConfig,
|
||||||
|
endpointConfig = endpointConfig,
|
||||||
|
enableEndpoint = true,
|
||||||
|
decodingMethod = "greedy_search",
|
||||||
|
maxActivePaths = 4,
|
||||||
|
)
|
||||||
|
|
||||||
|
val recognizer = OnlineRecognizer(
|
||||||
|
config = config,
|
||||||
|
)
|
||||||
|
|
||||||
|
val objArray = WaveReader.readWaveFromFile(
|
||||||
|
filename = waveFilename,
|
||||||
|
)
|
||||||
|
val samples: FloatArray = objArray[0] as FloatArray
|
||||||
|
val sampleRate: Int = objArray[1] as Int
|
||||||
|
|
||||||
|
val stream = recognizer.createStream()
|
||||||
|
stream.acceptWaveform(samples, sampleRate = sampleRate)
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream)
|
||||||
|
}
|
||||||
|
|
||||||
|
val tailPaddings = FloatArray((sampleRate * 0.5).toInt()) // 0.5 seconds
|
||||||
|
stream.acceptWaveform(tailPaddings, sampleRate = sampleRate)
|
||||||
|
stream.inputFinished()
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream)
|
||||||
|
}
|
||||||
|
|
||||||
|
println("results: ${recognizer.getResult(stream).text}")
|
||||||
|
|
||||||
|
stream.release()
|
||||||
|
recognizer.release()
|
||||||
|
}
|
||||||
62
kotlin-api-examples/test_speaker_id.kt
Normal file
62
kotlin-api-examples/test_speaker_id.kt
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
package com.k2fsa.sherpa.onnx
|
||||||
|
|
||||||
|
fun main() {
|
||||||
|
testSpeakerRecognition()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun testSpeakerRecognition() {
|
||||||
|
val config = SpeakerEmbeddingExtractorConfig(
|
||||||
|
model="./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx",
|
||||||
|
)
|
||||||
|
val extractor = SpeakerEmbeddingExtractor(config = config)
|
||||||
|
|
||||||
|
val embedding1a = computeEmbedding(extractor, "./speaker1_a_cn_16k.wav")
|
||||||
|
val embedding2a = computeEmbedding(extractor, "./speaker2_a_cn_16k.wav")
|
||||||
|
val embedding1b = computeEmbedding(extractor, "./speaker1_b_cn_16k.wav")
|
||||||
|
|
||||||
|
var manager = SpeakerEmbeddingManager(extractor.dim())
|
||||||
|
var ok = manager.add(name = "speaker1", embedding=embedding1a)
|
||||||
|
check(ok)
|
||||||
|
|
||||||
|
manager.add(name = "speaker2", embedding=embedding2a)
|
||||||
|
check(ok)
|
||||||
|
|
||||||
|
var name = manager.search(embedding=embedding1b, threshold=0.5f)
|
||||||
|
check(name == "speaker1")
|
||||||
|
|
||||||
|
manager.release()
|
||||||
|
|
||||||
|
manager = SpeakerEmbeddingManager(extractor.dim())
|
||||||
|
val embeddingList = mutableListOf(embedding1a, embedding1b)
|
||||||
|
ok = manager.add(name = "s1", embedding=embeddingList.toTypedArray())
|
||||||
|
check(ok)
|
||||||
|
|
||||||
|
name = manager.search(embedding=embedding1b, threshold=0.5f)
|
||||||
|
check(name == "s1")
|
||||||
|
|
||||||
|
name = manager.search(embedding=embedding2a, threshold=0.5f)
|
||||||
|
check(name.length == 0)
|
||||||
|
|
||||||
|
manager.release()
|
||||||
|
extractor.release()
|
||||||
|
println("Speaker ID test done!")
|
||||||
|
}
|
||||||
|
|
||||||
|
fun computeEmbedding(extractor: SpeakerEmbeddingExtractor, filename: String): FloatArray {
|
||||||
|
var objArray = WaveReader.readWaveFromFile(
|
||||||
|
filename = filename,
|
||||||
|
)
|
||||||
|
var samples: FloatArray = objArray[0] as FloatArray
|
||||||
|
var sampleRate: Int = objArray[1] as Int
|
||||||
|
|
||||||
|
val stream = extractor.createStream()
|
||||||
|
stream.acceptWaveform(sampleRate = sampleRate, samples=samples)
|
||||||
|
stream.inputFinished()
|
||||||
|
check(extractor.isReady(stream))
|
||||||
|
|
||||||
|
val embedding = extractor.compute(stream)
|
||||||
|
|
||||||
|
stream.release()
|
||||||
|
|
||||||
|
return embedding
|
||||||
|
}
|
||||||
30
kotlin-api-examples/test_tts.kt
Normal file
30
kotlin-api-examples/test_tts.kt
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
package com.k2fsa.sherpa.onnx
|
||||||
|
|
||||||
|
fun main() {
|
||||||
|
testTts()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun testTts() {
|
||||||
|
// see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
|
||||||
|
var config = OfflineTtsConfig(
|
||||||
|
model=OfflineTtsModelConfig(
|
||||||
|
vits=OfflineTtsVitsModelConfig(
|
||||||
|
model="./vits-piper-en_US-amy-low/en_US-amy-low.onnx",
|
||||||
|
tokens="./vits-piper-en_US-amy-low/tokens.txt",
|
||||||
|
dataDir="./vits-piper-en_US-amy-low/espeak-ng-data",
|
||||||
|
),
|
||||||
|
numThreads=1,
|
||||||
|
debug=true,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
val tts = OfflineTts(config=config)
|
||||||
|
val audio = tts.generateWithCallback(text="“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”", callback=::callback)
|
||||||
|
audio.save(filename="test-en.wav")
|
||||||
|
tts.release()
|
||||||
|
println("Saved to test-en.wav")
|
||||||
|
}
|
||||||
|
|
||||||
|
fun callback(samples: FloatArray): Unit {
|
||||||
|
println("callback got called with ${samples.size} samples");
|
||||||
|
}
|
||||||
91
scripts/apk/build-apk-asr.sh.in
Normal file
91
scripts/apk/build-apk-asr.sh.in
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# Auto generated! Please DO NOT EDIT!
|
||||||
|
|
||||||
|
# Please set the environment variable ANDROID_NDK
|
||||||
|
# before running this script
|
||||||
|
|
||||||
|
# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
|
||||||
|
# and some other files like the file "build/cmake/android.toolchain.cmake"
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
log() {
|
||||||
|
# This function is from espnet
|
||||||
|
local fname=${BASH_SOURCE[1]##*/}
|
||||||
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||||
|
}
|
||||||
|
|
||||||
|
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
|
||||||
|
|
||||||
|
log "Building streaming ASR APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"
|
||||||
|
|
||||||
|
export SHERPA_ONNX_ENABLE_TTS=OFF
|
||||||
|
|
||||||
|
log "====================arm64-v8a================="
|
||||||
|
./build-android-arm64-v8a.sh
|
||||||
|
log "====================armv7-eabi================"
|
||||||
|
./build-android-armv7-eabi.sh
|
||||||
|
log "====================x86-64===================="
|
||||||
|
./build-android-x86-64.sh
|
||||||
|
log "====================x86===================="
|
||||||
|
./build-android-x86.sh
|
||||||
|
|
||||||
|
mkdir -p apks
|
||||||
|
|
||||||
|
{% for model in model_list %}
|
||||||
|
pushd ./android/SherpaOnnx/app/src/main/assets/
|
||||||
|
model_name={{ model.model_name }}
|
||||||
|
type={{ model.idx }}
|
||||||
|
lang={{ model.lang }}
|
||||||
|
short_name={{ model.short_name }}
|
||||||
|
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
|
||||||
|
tar xvf ${model_name}.tar.bz2
|
||||||
|
|
||||||
|
{{ model.cmd }}
|
||||||
|
|
||||||
|
rm -rf *.tar.bz2
|
||||||
|
ls -lh $model_name
|
||||||
|
|
||||||
|
popd
|
||||||
|
# Now we are at the project root directory
|
||||||
|
|
||||||
|
git checkout .
|
||||||
|
pushd android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx
|
||||||
|
sed -i.bak s/"type = 0/type = $type/" ./MainActivity.kt
|
||||||
|
git diff
|
||||||
|
popd
|
||||||
|
|
||||||
|
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
|
||||||
|
log "------------------------------------------------------------"
|
||||||
|
log "build ASR apk for $arch"
|
||||||
|
log "------------------------------------------------------------"
|
||||||
|
src_arch=$arch
|
||||||
|
if [ $arch == "armeabi-v7a" ]; then
|
||||||
|
src_arch=armv7-eabi
|
||||||
|
elif [ $arch == "x86_64" ]; then
|
||||||
|
src_arch=x86-64
|
||||||
|
fi
|
||||||
|
|
||||||
|
ls -lh ./build-android-$src_arch/install/lib/*.so
|
||||||
|
|
||||||
|
cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnx/app/src/main/jniLibs/$arch/
|
||||||
|
|
||||||
|
pushd ./android/SherpaOnnx
|
||||||
|
sed -i.bak s/2048/9012/g ./gradle.properties
|
||||||
|
git diff ./gradle.properties
|
||||||
|
./gradlew assembleRelease
|
||||||
|
popd
|
||||||
|
|
||||||
|
mv android/SherpaOnnx/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-asr-$lang-$short_name.apk
|
||||||
|
ls -lh apks
|
||||||
|
rm -v ./android/SherpaOnnx/app/src/main/jniLibs/$arch/*.so
|
||||||
|
done
|
||||||
|
|
||||||
|
rm -rf ./android/SherpaOnnx/app/src/main/assets/$model_name
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
git checkout .
|
||||||
|
|
||||||
|
ls -lh apks/
|
||||||
@@ -29,6 +29,8 @@ log "====================x86-64===================="
|
|||||||
log "====================x86===================="
|
log "====================x86===================="
|
||||||
./build-android-x86.sh
|
./build-android-x86.sh
|
||||||
|
|
||||||
|
export SHERPA_ONNX_ENABLE_TTS=OFF
|
||||||
|
|
||||||
mkdir -p apks
|
mkdir -p apks
|
||||||
|
|
||||||
{% for model in model_list %}
|
{% for model in model_list %}
|
||||||
|
|||||||
@@ -29,6 +29,8 @@ log "====================x86-64===================="
|
|||||||
log "====================x86===================="
|
log "====================x86===================="
|
||||||
./build-android-x86.sh
|
./build-android-x86.sh
|
||||||
|
|
||||||
|
export SHERPA_ONNX_ENABLE_TTS=OFF
|
||||||
|
|
||||||
mkdir -p apks
|
mkdir -p apks
|
||||||
|
|
||||||
{% for model in model_list %}
|
{% for model in model_list %}
|
||||||
|
|||||||
@@ -29,6 +29,8 @@ log "====================x86-64===================="
|
|||||||
log "====================x86===================="
|
log "====================x86===================="
|
||||||
./build-android-x86.sh
|
./build-android-x86.sh
|
||||||
|
|
||||||
|
export SHERPA_ONNX_ENABLE_TTS=OFF
|
||||||
|
|
||||||
mkdir -p apks
|
mkdir -p apks
|
||||||
|
|
||||||
{% for model in model_list %}
|
{% for model in model_list %}
|
||||||
|
|||||||
@@ -29,6 +29,8 @@ log "====================x86-64===================="
|
|||||||
log "====================x86===================="
|
log "====================x86===================="
|
||||||
./build-android-x86.sh
|
./build-android-x86.sh
|
||||||
|
|
||||||
|
export SHERPA_ONNX_ENABLE_TTS=OFF
|
||||||
|
|
||||||
mkdir -p apks
|
mkdir -p apks
|
||||||
|
|
||||||
{% for model in model_list %}
|
{% for model in model_list %}
|
||||||
|
|||||||
@@ -29,6 +29,8 @@ log "====================x86-64===================="
|
|||||||
log "====================x86===================="
|
log "====================x86===================="
|
||||||
./build-android-x86.sh
|
./build-android-x86.sh
|
||||||
|
|
||||||
|
export SHERPA_ONNX_ENABLE_TTS=ON
|
||||||
|
|
||||||
mkdir -p apks
|
mkdir -p apks
|
||||||
|
|
||||||
{% for tts_model in tts_model_list %}
|
{% for tts_model in tts_model_list %}
|
||||||
|
|||||||
@@ -29,6 +29,8 @@ log "====================x86-64===================="
|
|||||||
log "====================x86===================="
|
log "====================x86===================="
|
||||||
./build-android-x86.sh
|
./build-android-x86.sh
|
||||||
|
|
||||||
|
export SHERPA_ONNX_ENABLE_TTS=ON
|
||||||
|
|
||||||
mkdir -p apks
|
mkdir -p apks
|
||||||
|
|
||||||
{% for tts_model in tts_model_list %}
|
{% for tts_model in tts_model_list %}
|
||||||
|
|||||||
117
scripts/apk/generate-asr-apk-script.py
Executable file
117
scripts/apk/generate-asr-apk-script.py
Executable file
@@ -0,0 +1,117 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
import jinja2
|
||||||
|
|
||||||
|
|
||||||
|
def get_args():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"--total",
|
||||||
|
type=int,
|
||||||
|
default=1,
|
||||||
|
help="Number of runners",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--index",
|
||||||
|
type=int,
|
||||||
|
default=0,
|
||||||
|
help="Index of the current runner",
|
||||||
|
)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Model:
|
||||||
|
# We will download
|
||||||
|
# https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_name}.tar.bz2
|
||||||
|
model_name: str
|
||||||
|
|
||||||
|
# The type of the model, e..g, 0, 1, 2. It is hardcoded in the kotlin code
|
||||||
|
idx: int
|
||||||
|
|
||||||
|
# e.g., zh, en, zh_en
|
||||||
|
lang: str
|
||||||
|
|
||||||
|
# e.g., whisper, paraformer, zipformer
|
||||||
|
short_name: str = ""
|
||||||
|
|
||||||
|
# cmd is used to remove extra file from the model directory
|
||||||
|
cmd: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
def get_models():
|
||||||
|
models = [
|
||||||
|
Model(
|
||||||
|
model_name="sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20",
|
||||||
|
idx=8,
|
||||||
|
lang="bilingual_zh_en",
|
||||||
|
short_name="zipformer",
|
||||||
|
cmd="""
|
||||||
|
pushd $model_name
|
||||||
|
rm -v decoder-epoch-99-avg-1.int8.onnx
|
||||||
|
rm -v encoder-epoch-99-avg-1.onnx
|
||||||
|
rm -v joiner-epoch-99-avg-1.onnx
|
||||||
|
|
||||||
|
rm -v *.sh
|
||||||
|
rm -v .gitattributes
|
||||||
|
rm -v *state*
|
||||||
|
rm -rfv test_wavs
|
||||||
|
|
||||||
|
ls -lh
|
||||||
|
|
||||||
|
popd
|
||||||
|
""",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
return models
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = get_args()
|
||||||
|
index = args.index
|
||||||
|
total = args.total
|
||||||
|
assert 0 <= index < total, (index, total)
|
||||||
|
|
||||||
|
all_model_list = get_models()
|
||||||
|
|
||||||
|
num_models = len(all_model_list)
|
||||||
|
|
||||||
|
num_per_runner = num_models // total
|
||||||
|
if num_per_runner <= 0:
|
||||||
|
raise ValueError(f"num_models: {num_models}, num_runners: {total}")
|
||||||
|
|
||||||
|
start = index * num_per_runner
|
||||||
|
end = start + num_per_runner
|
||||||
|
|
||||||
|
remaining = num_models - args.total * num_per_runner
|
||||||
|
|
||||||
|
print(f"{index}/{total}: {start}-{end}/{num_models}")
|
||||||
|
|
||||||
|
d = dict()
|
||||||
|
d["model_list"] = all_model_list[start:end]
|
||||||
|
if index < remaining:
|
||||||
|
s = args.total * num_per_runner + index
|
||||||
|
d["model_list"].append(all_model_list[s])
|
||||||
|
print(f"{s}/{num_models}")
|
||||||
|
|
||||||
|
filename_list = [
|
||||||
|
"./build-apk-asr.sh",
|
||||||
|
]
|
||||||
|
for filename in filename_list:
|
||||||
|
environment = jinja2.Environment()
|
||||||
|
with open(f"{filename}.in") as f:
|
||||||
|
s = f.read()
|
||||||
|
template = environment.from_string(s)
|
||||||
|
|
||||||
|
s = template.render(**d)
|
||||||
|
with open(filename, "w") as f:
|
||||||
|
print(s, file=f)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user