Add two-pass speech recognition Android/iOS demo (#304)
@@ -0,0 +1,24 @@
|
||||
package com.k2fsa.sherpa.onnx
|
||||
|
||||
import androidx.test.platform.app.InstrumentationRegistry
|
||||
import androidx.test.ext.junit.runners.AndroidJUnit4
|
||||
|
||||
import org.junit.Test
|
||||
import org.junit.runner.RunWith
|
||||
|
||||
import org.junit.Assert.*
|
||||
|
||||
/**
|
||||
* Instrumented test, which will execute on an Android device.
|
||||
*
|
||||
* See [testing documentation](http://d.android.com/tools/testing).
|
||||
*/
|
||||
@RunWith(AndroidJUnit4::class)
|
||||
class ExampleInstrumentedTest {
|
||||
@Test
|
||||
fun useAppContext() {
|
||||
// Context of the app under test.
|
||||
val appContext = InstrumentationRegistry.getInstrumentation().targetContext
|
||||
assertEquals("com.k2fsa.sherpa.onnx", appContext.packageName)
|
||||
}
|
||||
}
|
||||
1
android/SherpaOnnx2Pass/app/src/main/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
*.so
|
||||
32
android/SherpaOnnx2Pass/app/src/main/AndroidManifest.xml
Normal file
@@ -0,0 +1,32 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:tools="http://schemas.android.com/tools">
|
||||
|
||||
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
||||
|
||||
<application
|
||||
android:allowBackup="true"
|
||||
android:dataExtractionRules="@xml/data_extraction_rules"
|
||||
android:fullBackupContent="@xml/backup_rules"
|
||||
android:icon="@mipmap/ic_launcher"
|
||||
android:label="@string/app_name"
|
||||
android:roundIcon="@mipmap/ic_launcher_round"
|
||||
android:supportsRtl="true"
|
||||
android:theme="@style/Theme.SherpaOnnx2Pass"
|
||||
tools:targetApi="31">
|
||||
<activity
|
||||
android:name=".MainActivity"
|
||||
android:exported="true">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN" />
|
||||
|
||||
<category android:name="android.intent.category.LAUNCHER" />
|
||||
</intent-filter>
|
||||
|
||||
<meta-data
|
||||
android:name="android.app.lib_name"
|
||||
android:value="" />
|
||||
</activity>
|
||||
</application>
|
||||
|
||||
</manifest>
|
||||
@@ -0,0 +1,251 @@
|
||||
package com.k2fsa.sherpa.onnx
|
||||
|
||||
import android.Manifest
|
||||
import android.content.pm.PackageManager
|
||||
import android.media.AudioFormat
|
||||
import android.media.AudioRecord
|
||||
import android.media.MediaRecorder
|
||||
import android.os.Bundle
|
||||
import android.text.method.ScrollingMovementMethod
|
||||
import android.util.Log
|
||||
import android.widget.Button
|
||||
import android.widget.TextView
|
||||
import androidx.appcompat.app.AppCompatActivity
|
||||
import androidx.core.app.ActivityCompat
|
||||
import kotlin.concurrent.thread
|
||||
|
||||
private const val TAG = "sherpa-onnx"
|
||||
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
|
||||
|
||||
class MainActivity : AppCompatActivity() {
|
||||
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
|
||||
|
||||
private lateinit var onlineRecognizer: SherpaOnnx
|
||||
private lateinit var offlineRecognizer: SherpaOnnxOffline
|
||||
private var audioRecord: AudioRecord? = null
|
||||
private lateinit var recordButton: Button
|
||||
private lateinit var textView: TextView
|
||||
private var recordingThread: Thread? = null
|
||||
|
||||
private val audioSource = MediaRecorder.AudioSource.MIC
|
||||
private val sampleRateInHz = 16000
|
||||
private val channelConfig = AudioFormat.CHANNEL_IN_MONO
|
||||
|
||||
private var samplesBuffer = arrayListOf<FloatArray>()
|
||||
|
||||
// Note: We don't use AudioFormat.ENCODING_PCM_FLOAT
|
||||
// since the AudioRecord.read(float[]) needs API level >= 23
|
||||
// but we are targeting API level >= 21
|
||||
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
|
||||
private var idx: Int = 0
|
||||
private var lastText: String = ""
|
||||
|
||||
@Volatile
|
||||
private var isRecording: Boolean = false
|
||||
|
||||
override fun onRequestPermissionsResult(
|
||||
requestCode: Int, permissions: Array<String>, grantResults: IntArray
|
||||
) {
|
||||
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
|
||||
val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
|
||||
grantResults[0] == PackageManager.PERMISSION_GRANTED
|
||||
} else {
|
||||
false
|
||||
}
|
||||
|
||||
if (!permissionToRecordAccepted) {
|
||||
Log.e(TAG, "Audio record is disallowed")
|
||||
finish()
|
||||
}
|
||||
|
||||
Log.i(TAG, "Audio record is permitted")
|
||||
}
|
||||
|
||||
override fun onCreate(savedInstanceState: Bundle?) {
|
||||
super.onCreate(savedInstanceState)
|
||||
setContentView(R.layout.activity_main)
|
||||
|
||||
ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
|
||||
|
||||
Log.i(TAG, "Start to initialize first-pass recognizer")
|
||||
initOnlineRecognizer()
|
||||
Log.i(TAG, "Finished initializing first-pass recognizer")
|
||||
|
||||
Log.i(TAG, "Start to initialize second-pass recognizer")
|
||||
initOfflineRecognizer()
|
||||
Log.i(TAG, "Finished initializing second-pass recognizer")
|
||||
|
||||
recordButton = findViewById(R.id.record_button)
|
||||
recordButton.setOnClickListener { onclick() }
|
||||
|
||||
textView = findViewById(R.id.my_text)
|
||||
textView.movementMethod = ScrollingMovementMethod()
|
||||
}
|
||||
|
||||
private fun onclick() {
|
||||
if (!isRecording) {
|
||||
val ret = initMicrophone()
|
||||
if (!ret) {
|
||||
Log.e(TAG, "Failed to initialize microphone")
|
||||
return
|
||||
}
|
||||
Log.i(TAG, "state: ${audioRecord?.state}")
|
||||
audioRecord!!.startRecording()
|
||||
recordButton.setText(R.string.stop)
|
||||
isRecording = true
|
||||
onlineRecognizer.reset(true)
|
||||
samplesBuffer.clear()
|
||||
textView.text = ""
|
||||
lastText = ""
|
||||
idx = 0
|
||||
|
||||
recordingThread = thread(true) {
|
||||
processSamples()
|
||||
}
|
||||
Log.i(TAG, "Started recording")
|
||||
} else {
|
||||
isRecording = false
|
||||
audioRecord!!.stop()
|
||||
audioRecord!!.release()
|
||||
audioRecord = null
|
||||
recordButton.setText(R.string.start)
|
||||
Log.i(TAG, "Stopped recording")
|
||||
}
|
||||
}
|
||||
|
||||
private fun processSamples() {
|
||||
Log.i(TAG, "processing samples")
|
||||
|
||||
val interval = 0.1 // i.e., 100 ms
|
||||
val bufferSize = (interval * sampleRateInHz).toInt() // in samples
|
||||
val buffer = ShortArray(bufferSize)
|
||||
|
||||
while (isRecording) {
|
||||
val ret = audioRecord?.read(buffer, 0, buffer.size)
|
||||
if (ret != null && ret > 0) {
|
||||
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
|
||||
samplesBuffer.add(samples)
|
||||
|
||||
onlineRecognizer.acceptWaveform(samples, sampleRate = sampleRateInHz)
|
||||
while (onlineRecognizer.isReady()) {
|
||||
onlineRecognizer.decode()
|
||||
}
|
||||
val isEndpoint = onlineRecognizer.isEndpoint()
|
||||
var textToDisplay = lastText
|
||||
|
||||
var text = onlineRecognizer.text
|
||||
if (text.isNotBlank()) {
|
||||
if (lastText.isBlank()) {
|
||||
// textView.text = "${idx}: ${text}"
|
||||
textToDisplay = "${idx}: ${text}"
|
||||
} else {
|
||||
textToDisplay = "${lastText}\n${idx}: ${text}"
|
||||
}
|
||||
}
|
||||
|
||||
if (isEndpoint) {
|
||||
onlineRecognizer.reset()
|
||||
|
||||
if (text.isNotBlank()) {
|
||||
text = runSecondPass()
|
||||
lastText = "${lastText}\n${idx}: ${text}"
|
||||
idx += 1
|
||||
} else {
|
||||
samplesBuffer.clear()
|
||||
}
|
||||
}
|
||||
|
||||
runOnUiThread {
|
||||
textView.text = textToDisplay.lowercase()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun initMicrophone(): Boolean {
|
||||
if (ActivityCompat.checkSelfPermission(
|
||||
this, Manifest.permission.RECORD_AUDIO
|
||||
) != PackageManager.PERMISSION_GRANTED
|
||||
) {
|
||||
ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
|
||||
return false
|
||||
}
|
||||
|
||||
val numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
|
||||
Log.i(
|
||||
TAG, "buffer size in milliseconds: ${numBytes * 1000.0f / sampleRateInHz}"
|
||||
)
|
||||
|
||||
audioRecord = AudioRecord(
|
||||
audioSource,
|
||||
sampleRateInHz,
|
||||
channelConfig,
|
||||
audioFormat,
|
||||
numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
|
||||
)
|
||||
return true
|
||||
}
|
||||
|
||||
private fun initOnlineRecognizer() {
|
||||
// Please change getModelConfig() to add new models
|
||||
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
// for a list of available models
|
||||
val firstType = 1
|
||||
println("Select model type ${firstType} for the first pass")
|
||||
val config = OnlineRecognizerConfig(
|
||||
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
||||
modelConfig = getModelConfig(type = firstType)!!,
|
||||
endpointConfig = getEndpointConfig(),
|
||||
enableEndpoint = true,
|
||||
)
|
||||
|
||||
onlineRecognizer = SherpaOnnx(
|
||||
assetManager = application.assets,
|
||||
config = config,
|
||||
)
|
||||
}
|
||||
|
||||
private fun initOfflineRecognizer() {
|
||||
// Please change getOfflineModelConfig() to add new models
|
||||
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
// for a list of available models
|
||||
val secondType = 1
|
||||
println("Select model type ${secondType} for the second pass")
|
||||
|
||||
val config = OfflineRecognizerConfig(
|
||||
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
|
||||
modelConfig = getOfflineModelConfig(type = secondType)!!,
|
||||
)
|
||||
|
||||
offlineRecognizer = SherpaOnnxOffline(
|
||||
assetManager = application.assets,
|
||||
config = config,
|
||||
)
|
||||
}
|
||||
|
||||
private fun runSecondPass(): String {
|
||||
var totalSamples = 0
|
||||
for (a in samplesBuffer) {
|
||||
totalSamples += a.size
|
||||
}
|
||||
var i = 0
|
||||
|
||||
val samples = FloatArray(totalSamples)
|
||||
|
||||
// todo(fangjun): Make it more efficient
|
||||
for (a in samplesBuffer) {
|
||||
for (s in a) {
|
||||
samples[i] = s
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
val n = maxOf(0, samples.size - 8000)
|
||||
|
||||
samplesBuffer.clear()
|
||||
samplesBuffer.add(samples.sliceArray(n..samples.size-1))
|
||||
|
||||
return offlineRecognizer.decode(samples.sliceArray(0..n), sampleRateInHz)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,375 @@
|
||||
package com.k2fsa.sherpa.onnx
|
||||
|
||||
import android.content.res.AssetManager
|
||||
|
||||
data class EndpointRule(
|
||||
var mustContainNonSilence: Boolean,
|
||||
var minTrailingSilence: Float,
|
||||
var minUtteranceLength: Float,
|
||||
)
|
||||
|
||||
data class EndpointConfig(
|
||||
var rule1: EndpointRule = EndpointRule(false, 2.0f, 0.0f),
|
||||
var rule2: EndpointRule = EndpointRule(true, 1.2f, 0.0f),
|
||||
var rule3: EndpointRule = EndpointRule(false, 0.0f, 20.0f)
|
||||
)
|
||||
|
||||
data class OnlineTransducerModelConfig(
|
||||
var encoder: String = "",
|
||||
var decoder: String = "",
|
||||
var joiner: String = "",
|
||||
)
|
||||
|
||||
data class OnlineParaformerModelConfig(
|
||||
var encoder: String = "",
|
||||
var decoder: String = "",
|
||||
)
|
||||
|
||||
data class OnlineModelConfig(
|
||||
var transducer: OnlineTransducerModelConfig = OnlineTransducerModelConfig(),
|
||||
var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(),
|
||||
var tokens: String,
|
||||
var numThreads: Int = 1,
|
||||
var debug: Boolean = false,
|
||||
var provider: String = "cpu",
|
||||
var modelType: String = "",
|
||||
)
|
||||
|
||||
data class OnlineLMConfig(
|
||||
var model: String = "",
|
||||
var scale: Float = 0.5f,
|
||||
)
|
||||
|
||||
data class FeatureConfig(
|
||||
var sampleRate: Int = 16000,
|
||||
var featureDim: Int = 80,
|
||||
)
|
||||
|
||||
data class OnlineRecognizerConfig(
|
||||
var featConfig: FeatureConfig = FeatureConfig(),
|
||||
var modelConfig: OnlineModelConfig,
|
||||
var lmConfig: OnlineLMConfig = OnlineLMConfig(),
|
||||
var endpointConfig: EndpointConfig = EndpointConfig(),
|
||||
var enableEndpoint: Boolean = true,
|
||||
var decodingMethod: String = "greedy_search",
|
||||
var maxActivePaths: Int = 4,
|
||||
)
|
||||
|
||||
data class OfflineTransducerModelConfig(
|
||||
var encoder: String = "",
|
||||
var decoder: String = "",
|
||||
var joiner: String = "",
|
||||
)
|
||||
|
||||
data class OfflineParaformerModelConfig(
|
||||
var model: String = "",
|
||||
)
|
||||
|
||||
data class OfflineWhisperModelConfig(
|
||||
var encoder: String = "",
|
||||
var decoder: String = "",
|
||||
)
|
||||
|
||||
data class OfflineModelConfig(
|
||||
var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(),
|
||||
var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(),
|
||||
var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(),
|
||||
var numThreads: Int = 1,
|
||||
var debug: Boolean = false,
|
||||
var provider: String = "cpu",
|
||||
var modelType: String = "",
|
||||
var tokens: String,
|
||||
)
|
||||
|
||||
data class OfflineRecognizerConfig(
|
||||
var featConfig: FeatureConfig = FeatureConfig(),
|
||||
var modelConfig: OfflineModelConfig,
|
||||
// var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it
|
||||
var decodingMethod: String = "greedy_search",
|
||||
var maxActivePaths: Int = 4,
|
||||
)
|
||||
|
||||
class SherpaOnnx(
|
||||
assetManager: AssetManager? = null,
|
||||
var config: OnlineRecognizerConfig,
|
||||
) {
|
||||
private val ptr: Long
|
||||
|
||||
init {
|
||||
if (assetManager != null) {
|
||||
ptr = new(assetManager, config)
|
||||
} else {
|
||||
ptr = newFromFile(config)
|
||||
}
|
||||
}
|
||||
|
||||
protected fun finalize() {
|
||||
delete(ptr)
|
||||
}
|
||||
|
||||
fun acceptWaveform(samples: FloatArray, sampleRate: Int) =
|
||||
acceptWaveform(ptr, samples, sampleRate)
|
||||
|
||||
fun inputFinished() = inputFinished(ptr)
|
||||
fun reset(recreate: Boolean = false) = reset(ptr, recreate = recreate)
|
||||
fun decode() = decode(ptr)
|
||||
fun isEndpoint(): Boolean = isEndpoint(ptr)
|
||||
fun isReady(): Boolean = isReady(ptr)
|
||||
|
||||
val text: String
|
||||
get() = getText(ptr)
|
||||
|
||||
private external fun delete(ptr: Long)
|
||||
|
||||
private external fun new(
|
||||
assetManager: AssetManager,
|
||||
config: OnlineRecognizerConfig,
|
||||
): Long
|
||||
|
||||
private external fun newFromFile(
|
||||
config: OnlineRecognizerConfig,
|
||||
): Long
|
||||
|
||||
private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int)
|
||||
private external fun inputFinished(ptr: Long)
|
||||
private external fun getText(ptr: Long): String
|
||||
private external fun reset(ptr: Long, recreate: Boolean)
|
||||
private external fun decode(ptr: Long)
|
||||
private external fun isEndpoint(ptr: Long): Boolean
|
||||
private external fun isReady(ptr: Long): Boolean
|
||||
|
||||
companion object {
|
||||
init {
|
||||
System.loadLibrary("sherpa-onnx-jni")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class SherpaOnnxOffline(
|
||||
assetManager: AssetManager? = null,
|
||||
var config: OfflineRecognizerConfig,
|
||||
) {
|
||||
private val ptr: Long
|
||||
|
||||
init {
|
||||
if (assetManager != null) {
|
||||
ptr = new(assetManager, config)
|
||||
} else {
|
||||
ptr = newFromFile(config)
|
||||
}
|
||||
}
|
||||
|
||||
protected fun finalize() {
|
||||
delete(ptr)
|
||||
}
|
||||
|
||||
fun decode(samples: FloatArray, sampleRate: Int) = decode(ptr, samples, sampleRate)
|
||||
|
||||
private external fun delete(ptr: Long)
|
||||
|
||||
private external fun new(
|
||||
assetManager: AssetManager,
|
||||
config: OfflineRecognizerConfig,
|
||||
): Long
|
||||
|
||||
private external fun newFromFile(
|
||||
config: OfflineRecognizerConfig,
|
||||
): Long
|
||||
|
||||
private external fun decode(ptr: Long, samples: FloatArray, sampleRate: Int): String
|
||||
|
||||
companion object {
|
||||
init {
|
||||
System.loadLibrary("sherpa-onnx-jni")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun getFeatureConfig(sampleRate: Int, featureDim: Int): FeatureConfig {
|
||||
return FeatureConfig(sampleRate = sampleRate, featureDim = featureDim)
|
||||
}
|
||||
|
||||
/*
|
||||
Please see
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
for a list of pre-trained models.
|
||||
|
||||
We only add a few here. Please change the following code
|
||||
to add your own. (It should be straightforward to add a new model
|
||||
by following the code)
|
||||
|
||||
@param type
|
||||
0 - csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23 (Chinese)
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23
|
||||
encoder/joiner int8, decoder float32
|
||||
|
||||
1 - csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17 (English)
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-20m-2023-02-17-english
|
||||
encoder/joiner int8, decoder fp32
|
||||
|
||||
*/
|
||||
fun getModelConfig(type: Int): OnlineModelConfig? {
|
||||
when (type) {
|
||||
0 -> {
|
||||
val modelDir = "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23"
|
||||
return OnlineModelConfig(
|
||||
transducer = OnlineTransducerModelConfig(
|
||||
encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx",
|
||||
decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",
|
||||
joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx",
|
||||
),
|
||||
tokens = "$modelDir/tokens.txt",
|
||||
modelType = "zipformer",
|
||||
)
|
||||
}
|
||||
|
||||
1 -> {
|
||||
val modelDir = "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17"
|
||||
return OnlineModelConfig(
|
||||
transducer = OnlineTransducerModelConfig(
|
||||
encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx",
|
||||
decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",
|
||||
joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx",
|
||||
),
|
||||
tokens = "$modelDir/tokens.txt",
|
||||
modelType = "zipformer",
|
||||
)
|
||||
}
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
/*
|
||||
Please see
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
for a list of pre-trained models.
|
||||
|
||||
We only add a few here. Please change the following code
|
||||
to add your own LM model. (It should be straightforward to train a new NN LM model
|
||||
by following the code, https://github.com/k2-fsa/icefall/blob/master/icefall/rnn_lm/train.py)
|
||||
|
||||
@param type
|
||||
0 - sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
|
||||
*/
|
||||
fun getOnlineLMConfig(type: Int): OnlineLMConfig {
|
||||
when (type) {
|
||||
0 -> {
|
||||
val modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20"
|
||||
return OnlineLMConfig(
|
||||
model = "$modelDir/with-state-epoch-99-avg-1.int8.onnx",
|
||||
scale = 0.5f,
|
||||
)
|
||||
}
|
||||
}
|
||||
return OnlineLMConfig()
|
||||
}
|
||||
|
||||
// for English models, use a small value for rule2.minTrailingSilence, e.g., 0.8
|
||||
fun getEndpointConfig(): EndpointConfig {
|
||||
return EndpointConfig(
|
||||
rule1 = EndpointRule(false, 2.4f, 0.0f),
|
||||
rule2 = EndpointRule(true, 0.8f, 0.0f),
|
||||
rule3 = EndpointRule(false, 0.0f, 20.0f)
|
||||
)
|
||||
}
|
||||
|
||||
/*
|
||||
Please see
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
for a list of pre-trained models.
|
||||
|
||||
We only add a few here. Please change the following code
|
||||
to add your own. (It should be straightforward to add a new model
|
||||
by following the code)
|
||||
|
||||
@param type
|
||||
|
||||
0 - csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 (Chinese)
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese
|
||||
int8
|
||||
|
||||
1 - icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04 (English)
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#icefall-asr-multidataset-pruned-transducer-stateless7-2023-05-04-english
|
||||
encoder int8, decoder/joiner float32
|
||||
|
||||
2 - sherpa-onnx-whisper-tiny.en
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en
|
||||
encoder int8, decoder int8
|
||||
|
||||
3 - sherpa-onnx-whisper-base.en
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en
|
||||
encoder int8, decoder int8
|
||||
|
||||
4 - pkufool/icefall-asr-zipformer-wenetspeech-20230615 (Chinese)
|
||||
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-wenetspeech-20230615-chinese
|
||||
encoder/joiner int8, decoder fp32
|
||||
|
||||
*/
|
||||
fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
|
||||
when (type) {
|
||||
0 -> {
|
||||
val modelDir = "sherpa-onnx-paraformer-zh-2023-03-28"
|
||||
return OfflineModelConfig(
|
||||
paraformer = OfflineParaformerModelConfig(
|
||||
model = "$modelDir/model.int8.onnx",
|
||||
),
|
||||
tokens = "$modelDir/tokens.txt",
|
||||
modelType = "paraformer",
|
||||
)
|
||||
}
|
||||
|
||||
1 -> {
|
||||
val modelDir = "icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04"
|
||||
return OfflineModelConfig(
|
||||
transducer = OfflineTransducerModelConfig(
|
||||
encoder = "$modelDir/encoder-epoch-30-avg-4.int8.onnx",
|
||||
decoder = "$modelDir/decoder-epoch-30-avg-4.onnx",
|
||||
joiner = "$modelDir/joiner-epoch-30-avg-4.onnx",
|
||||
),
|
||||
tokens = "$modelDir/tokens.txt",
|
||||
modelType = "zipformer",
|
||||
)
|
||||
}
|
||||
|
||||
2 -> {
|
||||
val modelDir = "sherpa-onnx-whisper-tiny.en"
|
||||
return OfflineModelConfig(
|
||||
whisper = OfflineWhisperModelConfig(
|
||||
encoder = "$modelDir/tiny.en-encoder.int8.onnx",
|
||||
decoder = "$modelDir/tiny.en-decoder.int8.onnx",
|
||||
),
|
||||
tokens = "$modelDir/tiny.en-tokens.txt",
|
||||
modelType = "whisper",
|
||||
)
|
||||
}
|
||||
|
||||
3 -> {
|
||||
val modelDir = "sherpa-onnx-whisper-base.en"
|
||||
return OfflineModelConfig(
|
||||
whisper = OfflineWhisperModelConfig(
|
||||
encoder = "$modelDir/base.en-encoder.int8.onnx",
|
||||
decoder = "$modelDir/base.en-decoder.int8.onnx",
|
||||
),
|
||||
tokens = "$modelDir/base.en-tokens.txt",
|
||||
modelType = "whisper",
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
4 -> {
|
||||
val modelDir = "icefall-asr-zipformer-wenetspeech-20230615"
|
||||
return OfflineModelConfig(
|
||||
transducer = OfflineTransducerModelConfig(
|
||||
encoder = "$modelDir/encoder-epoch-12-avg-4.int8.onnx",
|
||||
decoder = "$modelDir/decoder-epoch-12-avg-4.onnx",
|
||||
joiner = "$modelDir/joiner-epoch-12-avg-4.int8.onnx",
|
||||
),
|
||||
tokens = "$modelDir/tokens.txt",
|
||||
modelType = "zipformer",
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
return null
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
../../../../../../../../../SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt
|
||||
@@ -0,0 +1,30 @@
|
||||
<vector xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:aapt="http://schemas.android.com/aapt"
|
||||
android:width="108dp"
|
||||
android:height="108dp"
|
||||
android:viewportWidth="108"
|
||||
android:viewportHeight="108">
|
||||
<path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
|
||||
<aapt:attr name="android:fillColor">
|
||||
<gradient
|
||||
android:endX="85.84757"
|
||||
android:endY="92.4963"
|
||||
android:startX="42.9492"
|
||||
android:startY="49.59793"
|
||||
android:type="linear">
|
||||
<item
|
||||
android:color="#44000000"
|
||||
android:offset="0.0" />
|
||||
<item
|
||||
android:color="#00000000"
|
||||
android:offset="1.0" />
|
||||
</gradient>
|
||||
</aapt:attr>
|
||||
</path>
|
||||
<path
|
||||
android:fillColor="#FFFFFF"
|
||||
android:fillType="nonZero"
|
||||
android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
|
||||
android:strokeWidth="1"
|
||||
android:strokeColor="#00000000" />
|
||||
</vector>
|
||||
@@ -0,0 +1,170 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<vector xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
android:width="108dp"
|
||||
android:height="108dp"
|
||||
android:viewportWidth="108"
|
||||
android:viewportHeight="108">
|
||||
<path
|
||||
android:fillColor="#3DDC84"
|
||||
android:pathData="M0,0h108v108h-108z" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M9,0L9,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,0L19,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M29,0L29,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M39,0L39,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M49,0L49,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M59,0L59,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M69,0L69,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M79,0L79,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M89,0L89,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M99,0L99,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,9L108,9"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,19L108,19"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,29L108,29"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,39L108,39"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,49L108,49"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,59L108,59"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,69L108,69"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,79L108,79"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,89L108,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,99L108,99"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,29L89,29"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,39L89,39"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,49L89,49"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,59L89,59"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,69L89,69"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,79L89,79"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M29,19L29,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M39,19L39,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M49,19L49,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M59,19L59,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M69,19L69,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M79,19L79,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
</vector>
|
||||
@@ -0,0 +1,39 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:app="http://schemas.android.com/apk/res-auto"
|
||||
xmlns:tools="http://schemas.android.com/tools"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent"
|
||||
tools:context=".MainActivity">
|
||||
|
||||
<LinearLayout
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent"
|
||||
android:gravity="center"
|
||||
android:orientation="vertical">
|
||||
|
||||
<TextView
|
||||
android:id="@+id/my_text"
|
||||
android:layout_width="match_parent"
|
||||
android:layout_height="match_parent"
|
||||
android:layout_weight="2.5"
|
||||
android:padding="24dp"
|
||||
android:scrollbars="vertical"
|
||||
android:singleLine="false"
|
||||
android:text="@string/hint"
|
||||
app:layout_constraintBottom_toBottomOf="parent"
|
||||
app:layout_constraintEnd_toEndOf="parent"
|
||||
app:layout_constraintStart_toStartOf="parent"
|
||||
android:gravity="bottom"
|
||||
app:layout_constraintTop_toTopOf="parent" />
|
||||
|
||||
<Button
|
||||
android:id="@+id/record_button"
|
||||
android:layout_width="wrap_content"
|
||||
android:layout_height="wrap_content"
|
||||
android:layout_weight="0.5"
|
||||
android:text="@string/start" />
|
||||
</LinearLayout>
|
||||
|
||||
|
||||
</androidx.constraintlayout.widget.ConstraintLayout>
|
||||
@@ -0,0 +1,5 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<background android:drawable="@drawable/ic_launcher_background" />
|
||||
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
||||
</adaptive-icon>
|
||||
@@ -0,0 +1,5 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<background android:drawable="@drawable/ic_launcher_background" />
|
||||
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
||||
</adaptive-icon>
|
||||
|
After Width: | Height: | Size: 1.4 KiB |
|
After Width: | Height: | Size: 2.8 KiB |
|
After Width: | Height: | Size: 982 B |
|
After Width: | Height: | Size: 1.7 KiB |
|
After Width: | Height: | Size: 1.9 KiB |
|
After Width: | Height: | Size: 3.8 KiB |
|
After Width: | Height: | Size: 2.8 KiB |
|
After Width: | Height: | Size: 5.8 KiB |
|
After Width: | Height: | Size: 3.8 KiB |
|
After Width: | Height: | Size: 7.6 KiB |
@@ -0,0 +1,16 @@
|
||||
<resources xmlns:tools="http://schemas.android.com/tools">
|
||||
<!-- Base application theme. -->
|
||||
<style name="Theme.SherpaOnnx2Pass" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
|
||||
<!-- Primary brand color. -->
|
||||
<item name="colorPrimary">@color/purple_200</item>
|
||||
<item name="colorPrimaryVariant">@color/purple_700</item>
|
||||
<item name="colorOnPrimary">@color/black</item>
|
||||
<!-- Secondary brand color. -->
|
||||
<item name="colorSecondary">@color/teal_200</item>
|
||||
<item name="colorSecondaryVariant">@color/teal_200</item>
|
||||
<item name="colorOnSecondary">@color/black</item>
|
||||
<!-- Status bar color. -->
|
||||
<item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
|
||||
<!-- Customize your theme here. -->
|
||||
</style>
|
||||
</resources>
|
||||
10
android/SherpaOnnx2Pass/app/src/main/res/values/colors.xml
Normal file
@@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
<color name="purple_200">#FFBB86FC</color>
|
||||
<color name="purple_500">#FF6200EE</color>
|
||||
<color name="purple_700">#FF3700B3</color>
|
||||
<color name="teal_200">#FF03DAC5</color>
|
||||
<color name="teal_700">#FF018786</color>
|
||||
<color name="black">#FF000000</color>
|
||||
<color name="white">#FFFFFFFF</color>
|
||||
</resources>
|
||||
13
android/SherpaOnnx2Pass/app/src/main/res/values/strings.xml
Normal file
@@ -0,0 +1,13 @@
|
||||
<resources>
|
||||
<string name="app_name">ASR with Next-gen Kaldi</string>
|
||||
<string name="hint">Click the Start button to play speech-to-text with Next-gen Kaldi.
|
||||
\n
|
||||
\n\n\n
|
||||
The source code and pre-trained models are publicly available.
|
||||
Please see https://github.com/k2-fsa/sherpa-onnx for details.
|
||||
\n\n
|
||||
Two-pass speech recognition with Next-gen Kaldi.
|
||||
</string>
|
||||
<string name="start">Start</string>
|
||||
<string name="stop">Stop</string>
|
||||
</resources>
|
||||
16
android/SherpaOnnx2Pass/app/src/main/res/values/themes.xml
Normal file
@@ -0,0 +1,16 @@
|
||||
<resources xmlns:tools="http://schemas.android.com/tools">
|
||||
<!-- Base application theme. -->
|
||||
<style name="Theme.SherpaOnnx2Pass" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
|
||||
<!-- Primary brand color. -->
|
||||
<item name="colorPrimary">@color/purple_500</item>
|
||||
<item name="colorPrimaryVariant">@color/purple_700</item>
|
||||
<item name="colorOnPrimary">@color/white</item>
|
||||
<!-- Secondary brand color. -->
|
||||
<item name="colorSecondary">@color/teal_200</item>
|
||||
<item name="colorSecondaryVariant">@color/teal_700</item>
|
||||
<item name="colorOnSecondary">@color/black</item>
|
||||
<!-- Status bar color. -->
|
||||
<item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
|
||||
<!-- Customize your theme here. -->
|
||||
</style>
|
||||
</resources>
|
||||
@@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><!--
|
||||
Sample backup rules file; uncomment and customize as necessary.
|
||||
See https://developer.android.com/guide/topics/data/autobackup
|
||||
for details.
|
||||
Note: This file is ignored for devices older that API 31
|
||||
See https://developer.android.com/about/versions/12/backup-restore
|
||||
-->
|
||||
<full-backup-content>
|
||||
<!--
|
||||
<include domain="sharedpref" path="."/>
|
||||
<exclude domain="sharedpref" path="device.xml"/>
|
||||
-->
|
||||
</full-backup-content>
|
||||
@@ -0,0 +1,19 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><!--
|
||||
Sample data extraction rules file; uncomment and customize as necessary.
|
||||
See https://developer.android.com/about/versions/12/backup-restore#xml-changes
|
||||
for details.
|
||||
-->
|
||||
<data-extraction-rules>
|
||||
<cloud-backup>
|
||||
<!-- TODO: Use <include> and <exclude> to control what is backed up.
|
||||
<include .../>
|
||||
<exclude .../>
|
||||
-->
|
||||
</cloud-backup>
|
||||
<!--
|
||||
<device-transfer>
|
||||
<include .../>
|
||||
<exclude .../>
|
||||
</device-transfer>
|
||||
-->
|
||||
</data-extraction-rules>
|
||||
@@ -0,0 +1,17 @@
|
||||
package com.k2fsa.sherpa.onnx
|
||||
|
||||
import org.junit.Test
|
||||
|
||||
import org.junit.Assert.*
|
||||
|
||||
/**
|
||||
* Example local unit test, which will execute on the development machine (host).
|
||||
*
|
||||
* See [testing documentation](http://d.android.com/tools/testing).
|
||||
*/
|
||||
class ExampleUnitTest {
|
||||
@Test
|
||||
fun addition_isCorrect() {
|
||||
assertEquals(4, 2 + 2)
|
||||
}
|
||||
}
|
||||