package com.k2fsa.sherpa.onnx import android.content.res.AssetManager data class OfflineRecognizerResult( val text: String, val tokens: Array, val timestamps: FloatArray, ) data class OfflineTransducerModelConfig( var encoder: String = "", var decoder: String = "", var joiner: String = "", ) data class OfflineParaformerModelConfig( var model: String = "", ) data class OfflineNemoEncDecCtcModelConfig( var model: String = "", ) data class OfflineWhisperModelConfig( var encoder: String = "", var decoder: String = "", var language: String = "en", // Used with multilingual model var task: String = "transcribe", // transcribe or translate var tailPaddings: Int = 1000, // Padding added at the end of the samples ) data class OfflineModelConfig( var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(), var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(), var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(), var nemo: OfflineNemoEncDecCtcModelConfig = OfflineNemoEncDecCtcModelConfig(), var numThreads: Int = 1, var debug: Boolean = false, var provider: String = "cpu", var modelType: String = "", var tokens: String, ) data class OfflineRecognizerConfig( var featConfig: FeatureConfig = FeatureConfig(), var modelConfig: OfflineModelConfig, // var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it var decodingMethod: String = "greedy_search", var maxActivePaths: Int = 4, var hotwordsFile: String = "", var hotwordsScore: Float = 1.5f, ) class OfflineRecognizer( assetManager: AssetManager? = null, config: OfflineRecognizerConfig, ) { private val ptr: Long init { ptr = if (assetManager != null) { newFromAsset(assetManager, config) } else { newFromFile(config) } } protected fun finalize() { delete(ptr) } fun release() = finalize() fun createStream(): OfflineStream { val p = createStream(ptr) return OfflineStream(p) } fun getResult(stream: OfflineStream): OfflineRecognizerResult { val objArray = getResult(stream.ptr) val text = objArray[0] as String val tokens = objArray[1] as Array val timestamps = objArray[2] as FloatArray return OfflineRecognizerResult(text = text, tokens = tokens, timestamps = timestamps) } fun decode(stream: OfflineStream) = decode(ptr, stream.ptr) private external fun delete(ptr: Long) private external fun createStream(ptr: Long): Long private external fun newFromAsset( assetManager: AssetManager, config: OfflineRecognizerConfig, ): Long private external fun newFromFile( config: OfflineRecognizerConfig, ): Long private external fun decode(ptr: Long, streamPtr: Long) private external fun getResult(streamPtr: Long): Array companion object { init { System.loadLibrary("sherpa-onnx-jni") } } } /* Please see https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html for a list of pre-trained models. We only add a few here. Please change the following code to add your own. (It should be straightforward to add a new model by following the code) @param type 0 - csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 (Chinese) https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese int8 1 - icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04 (English) https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#icefall-asr-multidataset-pruned-transducer-stateless7-2023-05-04-english encoder int8, decoder/joiner float32 2 - sherpa-onnx-whisper-tiny.en https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en encoder int8, decoder int8 3 - sherpa-onnx-whisper-base.en https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en encoder int8, decoder int8 4 - pkufool/icefall-asr-zipformer-wenetspeech-20230615 (Chinese) https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-wenetspeech-20230615-chinese encoder/joiner int8, decoder fp32 */ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { when (type) { 0 -> { val modelDir = "sherpa-onnx-paraformer-zh-2023-03-28" return OfflineModelConfig( paraformer = OfflineParaformerModelConfig( model = "$modelDir/model.int8.onnx", ), tokens = "$modelDir/tokens.txt", modelType = "paraformer", ) } 1 -> { val modelDir = "icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04" return OfflineModelConfig( transducer = OfflineTransducerModelConfig( encoder = "$modelDir/encoder-epoch-30-avg-4.int8.onnx", decoder = "$modelDir/decoder-epoch-30-avg-4.onnx", joiner = "$modelDir/joiner-epoch-30-avg-4.onnx", ), tokens = "$modelDir/tokens.txt", modelType = "zipformer", ) } 2 -> { val modelDir = "sherpa-onnx-whisper-tiny.en" return OfflineModelConfig( whisper = OfflineWhisperModelConfig( encoder = "$modelDir/tiny.en-encoder.int8.onnx", decoder = "$modelDir/tiny.en-decoder.int8.onnx", ), tokens = "$modelDir/tiny.en-tokens.txt", modelType = "whisper", ) } 3 -> { val modelDir = "sherpa-onnx-whisper-base.en" return OfflineModelConfig( whisper = OfflineWhisperModelConfig( encoder = "$modelDir/base.en-encoder.int8.onnx", decoder = "$modelDir/base.en-decoder.int8.onnx", ), tokens = "$modelDir/base.en-tokens.txt", modelType = "whisper", ) } 4 -> { val modelDir = "icefall-asr-zipformer-wenetspeech-20230615" return OfflineModelConfig( transducer = OfflineTransducerModelConfig( encoder = "$modelDir/encoder-epoch-12-avg-4.int8.onnx", decoder = "$modelDir/decoder-epoch-12-avg-4.onnx", joiner = "$modelDir/joiner-epoch-12-avg-4.int8.onnx", ), tokens = "$modelDir/tokens.txt", modelType = "zipformer", ) } 5 -> { val modelDir = "sherpa-onnx-zipformer-multi-zh-hans-2023-9-2" return OfflineModelConfig( transducer = OfflineTransducerModelConfig( encoder = "$modelDir/encoder-epoch-20-avg-1.int8.onnx", decoder = "$modelDir/decoder-epoch-20-avg-1.onnx", joiner = "$modelDir/joiner-epoch-20-avg-1.int8.onnx", ), tokens = "$modelDir/tokens.txt", modelType = "zipformer2", ) } 6 -> { val modelDir = "sherpa-onnx-nemo-ctc-en-citrinet-512" return OfflineModelConfig( nemo = OfflineNemoEncDecCtcModelConfig( model = "$modelDir/model.int8.onnx", ), tokens = "$modelDir/tokens.txt", ) } } return null }