Refactor the JNI interface to make it more modular and maintainable (#802)

This commit is contained in:
Fangjun Kuang
2024-04-24 09:48:42 +08:00
committed by GitHub
parent dc5af04830
commit 9b67a476e6
116 changed files with 3502 additions and 3316 deletions

View File

@@ -15,7 +15,8 @@
android:theme="@style/Theme.SherpaOnnx"
tools:targetApi="31">
<activity
android:name=".MainActivity"
android:name=".kws.MainActivity"
android:label="Keyword-spotter"
android:exported="true">
<intent-filter>
<action android:name="android.intent.action.MAIN" />

View File

@@ -0,0 +1 @@
../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt

View File

@@ -0,0 +1 @@
../../../../../../../../../../sherpa-onnx/kotlin-api/KeywordSpotter.kt

View File

@@ -1,4 +1,4 @@
package com.k2fsa.sherpa.onnx
package com.k2fsa.sherpa.onnx.kws
import android.Manifest
import android.content.pm.PackageManager
@@ -14,7 +14,13 @@ import android.widget.TextView
import android.widget.Toast
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import com.k2fsa.sherpa.onnx.*
import com.k2fsa.sherpa.onnx.KeywordSpotter
import com.k2fsa.sherpa.onnx.KeywordSpotterConfig
import com.k2fsa.sherpa.onnx.OnlineStream
import com.k2fsa.sherpa.onnx.R
import com.k2fsa.sherpa.onnx.getFeatureConfig
import com.k2fsa.sherpa.onnx.getKeywordsFile
import com.k2fsa.sherpa.onnx.getKwsModelConfig
import kotlin.concurrent.thread
private const val TAG = "sherpa-onnx"
@@ -23,7 +29,8 @@ private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
class MainActivity : AppCompatActivity() {
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
private lateinit var model: SherpaOnnxKws
private lateinit var kws: KeywordSpotter
private lateinit var stream: OnlineStream
private var audioRecord: AudioRecord? = null
private lateinit var recordButton: Button
private lateinit var textView: TextView
@@ -87,15 +94,18 @@ class MainActivity : AppCompatActivity() {
Log.i(TAG, keywords)
keywords = keywords.replace("\n", "/")
keywords = keywords.trim()
// If keywords is an empty string, it just resets the decoding stream
// always returns true in this case.
// If keywords is not empty, it will create a new decoding stream with
// the given keywords appended to the default keywords.
// Return false if errors occured when adding keywords, true otherwise.
val status = model.reset(keywords)
if (!status) {
Log.i(TAG, "Failed to reset with keywords.")
Toast.makeText(this, "Failed to set keywords.", Toast.LENGTH_LONG).show();
// Return false if errors occurred when adding keywords, true otherwise.
stream.release()
stream = kws.createStream(keywords)
if (stream.ptr == 0L) {
Log.i(TAG, "Failed to create stream with keywords: $keywords")
Toast.makeText(this, "Failed to set keywords to $keywords.", Toast.LENGTH_LONG)
.show()
return
}
@@ -122,6 +132,7 @@ class MainActivity : AppCompatActivity() {
audioRecord!!.release()
audioRecord = null
recordButton.setText(R.string.start)
stream.release()
Log.i(TAG, "Stopped recording")
}
}
@@ -137,22 +148,22 @@ class MainActivity : AppCompatActivity() {
val ret = audioRecord?.read(buffer, 0, buffer.size)
if (ret != null && ret > 0) {
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
model.acceptWaveform(samples, sampleRate=sampleRateInHz)
while (model.isReady()) {
model.decode()
stream.acceptWaveform(samples, sampleRate = sampleRateInHz)
while (kws.isReady(stream)) {
kws.decode(stream)
}
val text = model.keyword
val text = kws.getResult(stream).keyword
var textToDisplay = lastText;
var textToDisplay = lastText
if(text.isNotBlank()) {
if (text.isNotBlank()) {
if (lastText.isBlank()) {
textToDisplay = "${idx}: ${text}"
textToDisplay = "$idx: $text"
} else {
textToDisplay = "${idx}: ${text}\n${lastText}"
textToDisplay = "$idx: $text\n$lastText"
}
lastText = "${idx}: ${text}\n${lastText}"
lastText = "$idx: $text\n$lastText"
idx += 1
}
@@ -188,20 +199,21 @@ class MainActivity : AppCompatActivity() {
}
private fun initModel() {
// Please change getModelConfig() to add new models
// Please change getKwsModelConfig() to add new models
// See https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html
// for a list of available models
val type = 0
Log.i(TAG, "Select model type ${type}")
Log.i(TAG, "Select model type $type")
val config = KeywordSpotterConfig(
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
modelConfig = getModelConfig(type = type)!!,
keywordsFile = getKeywordsFile(type = type)!!,
modelConfig = getKwsModelConfig(type = type)!!,
keywordsFile = getKeywordsFile(type = type),
)
model = SherpaOnnxKws(
kws = KeywordSpotter(
assetManager = application.assets,
config = config,
)
stream = kws.createStream()
}
}
}

View File

@@ -0,0 +1 @@
../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt

View File

@@ -0,0 +1 @@
../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt

View File

@@ -1,162 +0,0 @@
// Copyright (c) 2024 Xiaomi Corporation
package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
data class OnlineTransducerModelConfig(
var encoder: String = "",
var decoder: String = "",
var joiner: String = "",
)
data class OnlineModelConfig(
var transducer: OnlineTransducerModelConfig = OnlineTransducerModelConfig(),
var tokens: String,
var numThreads: Int = 1,
var debug: Boolean = false,
var provider: String = "cpu",
var modelType: String = "",
)
data class FeatureConfig(
var sampleRate: Int = 16000,
var featureDim: Int = 80,
)
data class KeywordSpotterConfig(
var featConfig: FeatureConfig = FeatureConfig(),
var modelConfig: OnlineModelConfig,
var maxActivePaths: Int = 4,
var keywordsFile: String = "keywords.txt",
var keywordsScore: Float = 1.5f,
var keywordsThreshold: Float = 0.25f,
var numTrailingBlanks: Int = 2,
)
class SherpaOnnxKws(
assetManager: AssetManager? = null,
var config: KeywordSpotterConfig,
) {
private val ptr: Long
init {
if (assetManager != null) {
ptr = new(assetManager, config)
} else {
ptr = newFromFile(config)
}
}
protected fun finalize() {
delete(ptr)
}
fun acceptWaveform(samples: FloatArray, sampleRate: Int) =
acceptWaveform(ptr, samples, sampleRate)
fun inputFinished() = inputFinished(ptr)
fun decode() = decode(ptr)
fun isReady(): Boolean = isReady(ptr)
fun reset(keywords: String): Boolean = reset(ptr, keywords)
val keyword: String
get() = getKeyword(ptr)
private external fun delete(ptr: Long)
private external fun new(
assetManager: AssetManager,
config: KeywordSpotterConfig,
): Long
private external fun newFromFile(
config: KeywordSpotterConfig,
): Long
private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int)
private external fun inputFinished(ptr: Long)
private external fun getKeyword(ptr: Long): String
private external fun reset(ptr: Long, keywords: String): Boolean
private external fun decode(ptr: Long)
private external fun isReady(ptr: Long): Boolean
companion object {
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}
fun getFeatureConfig(sampleRate: Int, featureDim: Int): FeatureConfig {
return FeatureConfig(sampleRate = sampleRate, featureDim = featureDim)
}
/*
Please see
https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html
for a list of pre-trained models.
We only add a few here. Please change the following code
to add your own. (It should be straightforward to add a new model
by following the code)
@param type
0 - sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 (Chinese)
https://www.modelscope.cn/models/pkufool/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/summary
1 - sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01 (English)
https://www.modelscope.cn/models/pkufool/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/summary
*/
fun getModelConfig(type: Int): OnlineModelConfig? {
when (type) {
0 -> {
val modelDir = "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01"
return OnlineModelConfig(
transducer = OnlineTransducerModelConfig(
encoder = "$modelDir/encoder-epoch-12-avg-2-chunk-16-left-64.onnx",
decoder = "$modelDir/decoder-epoch-12-avg-2-chunk-16-left-64.onnx",
joiner = "$modelDir/joiner-epoch-12-avg-2-chunk-16-left-64.onnx",
),
tokens = "$modelDir/tokens.txt",
modelType = "zipformer2",
)
}
1 -> {
val modelDir = "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01"
return OnlineModelConfig(
transducer = OnlineTransducerModelConfig(
encoder = "$modelDir/encoder-epoch-12-avg-2-chunk-16-left-64.onnx",
decoder = "$modelDir/decoder-epoch-12-avg-2-chunk-16-left-64.onnx",
joiner = "$modelDir/joiner-epoch-12-avg-2-chunk-16-left-64.onnx",
),
tokens = "$modelDir/tokens.txt",
modelType = "zipformer2",
)
}
}
return null;
}
/*
* Get the default keywords for each model.
* Caution: The types and modelDir should be the same as those in getModelConfig
* function above.
*/
fun getKeywordsFile(type: Int) : String {
when (type) {
0 -> {
val modelDir = "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01"
return "$modelDir/keywords.txt"
}
1 -> {
val modelDir = "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01"
return "$modelDir/keywords.txt"
}
}
return "";
}

View File

@@ -1,29 +0,0 @@
// Copyright (c) 2023 Xiaomi Corporation
package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
class WaveReader {
companion object {
// Read a mono wave file asset
// The returned array has two entries:
// - the first entry contains an 1-D float array
// - the second entry is the sample rate
external fun readWaveFromAsset(
assetManager: AssetManager,
filename: String,
): Array<Any>
// Read a mono wave file from disk
// The returned array has two entries:
// - the first entry contains an 1-D float array
// - the second entry is the sample rate
external fun readWaveFromFile(
filename: String,
): Array<Any>
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}

View File

@@ -1,12 +1,12 @@
<resources>
<string name="app_name">KWS with Next-gen Kaldi</string>
<string name="app_name">Keyword spotting</string>
<string name="hint">Click the Start button to play keyword spotting with Next-gen Kaldi.
\n
\n\n\n
The source code and pre-trained models are publicly available.
Please see https://github.com/k2-fsa/sherpa-onnx for details.
</string>
<string name="keyword_hint">Input your keywords here, one keyword perline.</string>
<string name="keyword_hint">Input your keywords here, one keyword per line.\nTwo example keywords are given below:\n\nn ǐ h ǎo @你好\nd àn g ē d àn g ē @蛋哥蛋哥</string>
<string name="start">Start</string>
<string name="stop">Stop</string>
</resources>