diff --git a/sherpa-onnx/flutter/example/lib/non_streaming_paraformer_asr_test.dart b/sherpa-onnx/flutter/example/lib/non_streaming_paraformer_asr_test.dart new file mode 100644 index 00000000..5f7245e7 --- /dev/null +++ b/sherpa-onnx/flutter/example/lib/non_streaming_paraformer_asr_test.dart @@ -0,0 +1,47 @@ +// Copyright (c) 2024 Xiaomi Corporation +import 'package:path/path.dart'; +import 'package:path_provider/path_provider.dart'; +import 'package:flutter/services.dart' show rootBundle; +import 'dart:typed_data'; +import "dart:io"; + +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; +import './utils.dart'; + +Future testNonStreamingParaformerAsr() async { + var model = 'assets/sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx'; + var tokens = 'assets/sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt'; + var testWave = 'assets/sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav'; + + model = await copyAssetFile(src: model, dst: 'model.int8.onnx'); + tokens = await copyAssetFile(src: tokens, dst: 'tokens.txt'); + testWave = await copyAssetFile(src: testWave, dst: '0.wav'); + + final paraformer = sherpa_onnx.OfflineParaformerModelConfig( + model: model, + ); + + final modelConfig = sherpa_onnx.OfflineModelConfig( + paraformer: paraformer, + tokens: tokens, + modelType: 'paraformer', + ); + + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig); + print(config); + final recognizer = sherpa_onnx.OfflineRecognizer(config); + + final waveData = sherpa_onnx.readWave(testWave); + final stream = recognizer.createStream(); + + stream.acceptWaveform( + samples: waveData.samples, sampleRate: waveData.sampleRate); + recognizer.decode(stream); + + final result = recognizer.getResult(stream); + print('result is: ${result}'); + + print('recognizer: ${recognizer.ptr}'); + stream.free(); + recognizer.free(); +} diff --git a/sherpa-onnx/flutter/example/pubspec.yaml b/sherpa-onnx/flutter/example/pubspec.yaml index 73681081..0292fa34 100644 --- a/sherpa-onnx/flutter/example/pubspec.yaml +++ b/sherpa-onnx/flutter/example/pubspec.yaml @@ -74,6 +74,8 @@ flutter: assets: - assets/ - assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/ + # - assets/sherpa-onnx-paraformer-zh-2023-03-28/ + # - assets/sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/ # - assets/sr-data/enroll/ # - assets/sr-data/test/ # - images/a_dot_ham.jpeg diff --git a/sherpa-onnx/flutter/lib/sherpa_onnx.dart b/sherpa-onnx/flutter/lib/sherpa_onnx.dart index fe29e17c..77988d50 100644 --- a/sherpa-onnx/flutter/lib/sherpa_onnx.dart +++ b/sherpa-onnx/flutter/lib/sherpa_onnx.dart @@ -2,12 +2,16 @@ import 'dart:io'; import 'dart:ffi'; +export 'src/feature_config.dart'; +export 'src/offline_recognizer.dart'; +export 'src/offline_stream.dart'; export 'src/online_recognizer.dart'; export 'src/online_stream.dart'; export 'src/speaker_identification.dart'; export 'src/vad.dart'; export 'src/wave_reader.dart'; export 'src/wave_writer.dart'; + import 'src/sherpa_onnx_bindings.dart'; final DynamicLibrary _dylib = () { diff --git a/sherpa-onnx/flutter/lib/src/feature_config.dart b/sherpa-onnx/flutter/lib/src/feature_config.dart new file mode 100644 index 00000000..2af7f2cc --- /dev/null +++ b/sherpa-onnx/flutter/lib/src/feature_config.dart @@ -0,0 +1,13 @@ +// Copyright (c) 2024 Xiaomi Corporation + +class FeatureConfig { + const FeatureConfig({this.sampleRate = 16000, this.featureDim = 80}); + + @override + String toString() { + return 'FeatureConfig(sampleRate: $sampleRate, featureDim: $featureDim)'; + } + + final int sampleRate; + final int featureDim; +} diff --git a/sherpa-onnx/flutter/lib/src/offline_recognizer.dart b/sherpa-onnx/flutter/lib/src/offline_recognizer.dart new file mode 100644 index 00000000..6b6c29fa --- /dev/null +++ b/sherpa-onnx/flutter/lib/src/offline_recognizer.dart @@ -0,0 +1,281 @@ +// Copyright (c) 2024 Xiaomi Corporation +import 'dart:convert'; +import 'dart:ffi'; +import 'dart:typed_data'; + +import 'package:ffi/ffi.dart'; + +import './feature_config.dart'; +import './offline_stream.dart'; +import './sherpa_onnx_bindings.dart'; + +class OfflineTransducerModelConfig { + const OfflineTransducerModelConfig({ + this.encoder = '', + this.decoder = '', + this.joiner = '', + }); + + @override + String toString() { + return 'OfflineTransducerModelConfig(encoder: $encoder, decoder: $decoder, joiner: $joiner)'; + } + + final String encoder; + final String decoder; + final String joiner; +} + +class OfflineParaformerModelConfig { + const OfflineParaformerModelConfig({this.model = ''}); + + @override + String toString() { + return 'OfflineParaformerModelConfig(model: $model)'; + } + + final String model; +} + +class OfflineNemoEncDecCtcModelConfig { + const OfflineNemoEncDecCtcModelConfig({this.model = ''}); + + @override + String toString() { + return 'OfflineNemoEncDecCtcModelConfig(model: $model)'; + } + + final String model; +} + +class OfflineWhisperModelConfig { + const OfflineWhisperModelConfig( + {this.encoder = '', + this.decoder = '', + this.language = '', + this.task = '', + this.tailPaddings = -1}); + + @override + String toString() { + return 'OfflineWhisperModelConfig(encoder: $encoder, decoder: $decoder, language: $language, task: $task, tailPaddings: $tailPaddings)'; + } + + final String encoder; + final String decoder; + final String language; + final String task; + final int tailPaddings; +} + +class OfflineTdnnModelConfig { + const OfflineTdnnModelConfig({this.model = ''}); + + @override + String toString() { + return 'OfflineTdnnModelConfig(model: $model)'; + } + + final String model; +} + +class OfflineLMConfig { + const OfflineLMConfig({this.model = '', this.scale = 1.0}); + + @override + String toString() { + return 'OfflineLMConfig(model: $model, scale: $scale)'; + } + + final String model; + final double scale; +} + +class OfflineModelConfig { + const OfflineModelConfig({ + this.transducer = const OfflineTransducerModelConfig(), + this.paraformer = const OfflineParaformerModelConfig(), + this.nemoCtc = const OfflineNemoEncDecCtcModelConfig(), + this.whisper = const OfflineWhisperModelConfig(), + this.tdnn = const OfflineTdnnModelConfig(), + required this.tokens, + this.numThreads = 1, + this.debug = true, + this.provider = 'cpu', + this.modelType = '', + }); + + @override + String toString() { + return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType)'; + } + + final OfflineTransducerModelConfig transducer; + final OfflineParaformerModelConfig paraformer; + final OfflineNemoEncDecCtcModelConfig nemoCtc; + final OfflineWhisperModelConfig whisper; + final OfflineTdnnModelConfig tdnn; + + final String tokens; + final int numThreads; + final bool debug; + final String provider; + final String modelType; +} + +class OfflineRecognizerConfig { + const OfflineRecognizerConfig({ + this.feat = const FeatureConfig(), + required this.model, + this.lm = const OfflineLMConfig(), + this.decodingMethod = 'greedy_search', + this.maxActivePaths = 4, + this.hotwordsFile = '', + this.hotwordsScore = 1.5, + }); + + @override + String toString() { + return 'OfflineRecognizerConfig(feat: $feat, model: $model, lm: $lm, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore)'; + } + + final FeatureConfig feat; + final OfflineModelConfig model; + final OfflineLMConfig lm; + final String decodingMethod; + + final int maxActivePaths; + + final String hotwordsFile; + + final double hotwordsScore; +} + +class OfflineRecognizerResult { + OfflineRecognizerResult( + {required this.text, required this.tokens, required this.timestamps}); + + @override + String toString() { + return 'OfflineRecognizerResult(text: $text, tokens: $tokens, timestamps: $timestamps)'; + } + + final String text; + final List tokens; + final List timestamps; +} + +class OfflineRecognizer { + OfflineRecognizer._({required this.ptr, required this.config}); + + void free() { + SherpaOnnxBindings.destroyOfflineRecognizer?.call(ptr); + ptr = nullptr; + } + + /// The user is responsible to call the OfflineRecognizer.free() + /// method of the returned instance to avoid memory leak. + factory OfflineRecognizer(OfflineRecognizerConfig config) { + final c = calloc(); + + c.ref.feat.sampleRate = config.feat.sampleRate; + c.ref.feat.featureDim = config.feat.featureDim; + + // transducer + c.ref.model.transducer.encoder = + config.model.transducer.encoder.toNativeUtf8(); + c.ref.model.transducer.decoder = + config.model.transducer.decoder.toNativeUtf8(); + c.ref.model.transducer.joiner = + config.model.transducer.joiner.toNativeUtf8(); + + // paraformer + c.ref.model.paraformer.model = config.model.paraformer.model.toNativeUtf8(); + + // nemoCtc + c.ref.model.nemoCtc.model = config.model.nemoCtc.model.toNativeUtf8(); + + // whisper + c.ref.model.whisper.encoder = config.model.whisper.encoder.toNativeUtf8(); + + c.ref.model.whisper.decoder = config.model.whisper.decoder.toNativeUtf8(); + + c.ref.model.whisper.language = config.model.whisper.language.toNativeUtf8(); + + c.ref.model.whisper.task = config.model.whisper.task.toNativeUtf8(); + + c.ref.model.whisper.tailPaddings = config.model.whisper.tailPaddings; + + c.ref.model.tdnn.model = config.model.tdnn.model.toNativeUtf8(); + + c.ref.model.tokens = config.model.tokens.toNativeUtf8(); + + c.ref.model.numThreads = config.model.numThreads; + c.ref.model.debug = config.model.debug ? 1 : 0; + c.ref.model.provider = config.model.provider.toNativeUtf8(); + c.ref.model.modelType = config.model.modelType.toNativeUtf8(); + + c.ref.lm.model = config.lm.model.toNativeUtf8(); + c.ref.lm.scale = config.lm.scale; + + c.ref.decodingMethod = config.decodingMethod.toNativeUtf8(); + c.ref.maxActivePaths = config.maxActivePaths; + + c.ref.hotwordsFile = config.hotwordsFile.toNativeUtf8(); + c.ref.hotwordsScore = config.hotwordsScore; + + final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr; + + calloc.free(c.ref.hotwordsFile); + calloc.free(c.ref.decodingMethod); + calloc.free(c.ref.lm.model); + calloc.free(c.ref.model.modelType); + calloc.free(c.ref.model.provider); + calloc.free(c.ref.model.tokens); + calloc.free(c.ref.model.tdnn.model); + calloc.free(c.ref.model.whisper.task); + calloc.free(c.ref.model.whisper.language); + calloc.free(c.ref.model.whisper.decoder); + calloc.free(c.ref.model.whisper.encoder); + calloc.free(c.ref.model.nemoCtc.model); + calloc.free(c.ref.model.paraformer.model); + calloc.free(c.ref.model.transducer.encoder); + calloc.free(c.ref.model.transducer.decoder); + calloc.free(c.ref.model.transducer.joiner); + calloc.free(c); + + return OfflineRecognizer._(ptr: ptr, config: config); + } + + /// The user has to invoke stream.free() on the returned instance + /// to avoid memory leak + OfflineStream createStream() { + final p = SherpaOnnxBindings.createOfflineStream?.call(ptr) ?? nullptr; + return OfflineStream(ptr: p); + } + + void decode(OfflineStream stream) { + SherpaOnnxBindings.decodeOfflineStream?.call(ptr, stream.ptr); + } + + OfflineRecognizerResult getResult(OfflineStream stream) { + final json = + SherpaOnnxBindings.getOfflineStreamResultAsJson?.call(stream.ptr) ?? + nullptr; + if (json == null) { + return OfflineRecognizerResult(text: '', tokens: [], timestamps: []); + } + + final parsedJson = jsonDecode(json.toDartString()); + + SherpaOnnxBindings.destroyOfflineStreamResultJson?.call(json); + + return OfflineRecognizerResult( + text: parsedJson['text'], + tokens: List.from(parsedJson['tokens']), + timestamps: List.from(parsedJson['timestamps'])); + } + + Pointer ptr; + OfflineRecognizerConfig config; +} diff --git a/sherpa-onnx/flutter/lib/src/offline_stream.dart b/sherpa-onnx/flutter/lib/src/offline_stream.dart new file mode 100644 index 00000000..4157886d --- /dev/null +++ b/sherpa-onnx/flutter/lib/src/offline_stream.dart @@ -0,0 +1,37 @@ +// Copyright (c) 2024 Xiaomi Corporation +import 'dart:ffi'; +import 'dart:typed_data'; +import 'package:ffi/ffi.dart'; + +import './sherpa_onnx_bindings.dart'; + +class OfflineStream { + /// The user has to call OfflineStream.free() to avoid memory leak. + OfflineStream({required this.ptr}); + + void free() { + SherpaOnnxBindings.destroyOfflineStream?.call(ptr); + ptr = nullptr; + } + + /// If you have List data, then you can use + /// Float32List.fromList(data) to convert data to Float32List + /// + /// See + /// https://api.flutter.dev/flutter/dart-core/List-class.html + /// and + /// https://api.flutter.dev/flutter/dart-typed_data/Float32List-class.html + void acceptWaveform({required Float32List samples, required int sampleRate}) { + final n = samples.length; + final Pointer p = calloc(n); + + final pList = p.asTypedList(n); + pList.setAll(0, samples); + + SherpaOnnxBindings.acceptWaveformOffline?.call(this.ptr, sampleRate, p, n); + + calloc.free(p); + } + + Pointer ptr; +} diff --git a/sherpa-onnx/flutter/lib/src/online_recognizer.dart b/sherpa-onnx/flutter/lib/src/online_recognizer.dart index 4bfae0b7..8445e9d7 100644 --- a/sherpa-onnx/flutter/lib/src/online_recognizer.dart +++ b/sherpa-onnx/flutter/lib/src/online_recognizer.dart @@ -5,21 +5,10 @@ import 'dart:typed_data'; import 'package:ffi/ffi.dart'; +import './feature_config.dart'; import './online_stream.dart'; import './sherpa_onnx_bindings.dart'; -class FeatureConfig { - const FeatureConfig({this.sampleRate = 16000, this.featureDim = 80}); - - @override - String toString() { - return 'FeatureConfig(sampleRate: $sampleRate, featureDim: $featureDim)'; - } - - final int sampleRate; - final int featureDim; -} - class OnlineTransducerModelConfig { const OnlineTransducerModelConfig({ this.encoder = '', diff --git a/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart b/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart index b635819a..273b48e5 100644 --- a/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart +++ b/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart @@ -10,6 +10,76 @@ final class SherpaOnnxFeatureConfig extends Struct { external int featureDim; } +final class SherpaOnnxOfflineTransducerModelConfig extends Struct { + external Pointer encoder; + external Pointer decoder; + external Pointer joiner; +} + +final class SherpaOnnxOfflineParaformerModelConfig extends Struct { + external Pointer model; +} + +final class SherpaOnnxOfflineNemoEncDecCtcModelConfig extends Struct { + external Pointer model; +} + +final class SherpaOnnxOfflineWhisperModelConfig extends Struct { + external Pointer encoder; + external Pointer decoder; + external Pointer language; + external Pointer task; + + @Int32() + external int tailPaddings; +} + +final class SherpaOnnxOfflineTdnnModelConfig extends Struct { + external Pointer model; +} + +final class SherpaOnnxOfflineLMConfig extends Struct { + external Pointer model; + + @Float() + external double scale; +} + +final class SherpaOnnxOfflineModelConfig extends Struct { + external SherpaOnnxOfflineTransducerModelConfig transducer; + external SherpaOnnxOfflineParaformerModelConfig paraformer; + external SherpaOnnxOfflineNemoEncDecCtcModelConfig nemoCtc; + external SherpaOnnxOfflineWhisperModelConfig whisper; + external SherpaOnnxOfflineTdnnModelConfig tdnn; + + external Pointer tokens; + + @Int32() + external int numThreads; + + @Int32() + external int debug; + + external Pointer provider; + + external Pointer modelType; +} + +final class SherpaOnnxOfflineRecognizerConfig extends Struct { + external SherpaOnnxFeatureConfig feat; + external SherpaOnnxOfflineModelConfig model; + external SherpaOnnxOfflineLMConfig lm; + external Pointer decodingMethod; + + @Int32() + external int maxActivePaths; + + external Pointer hotwordsFile; + + @Float() + external double hotwordsScore; +} + final class SherpaOnnxOnlineTransducerModelConfig extends Struct { external Pointer encoder; external Pointer decoder; @@ -149,10 +219,56 @@ final class SherpaOnnxOnlineStream extends Opaque {} final class SherpaOnnxOnlineRecognizer extends Opaque {} +final class SherpaOnnxOfflineRecognizer extends Opaque {} + +final class SherpaOnnxOfflineStream extends Opaque {} + final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {} final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {} +typedef CreateOfflineRecognizerNative = Pointer + Function(Pointer); + +typedef CreateOfflineRecognizer = CreateOfflineRecognizerNative; + +typedef DestroyOfflineRecognizerNative = Void Function( + Pointer); + +typedef DestroyOfflineRecognizer = void Function( + Pointer); + +typedef CreateOfflineStreamNative = Pointer Function( + Pointer); + +typedef CreateOfflineStream = CreateOfflineStreamNative; + +typedef DestroyOfflineStreamNative = Void Function( + Pointer); + +typedef DestroyOfflineStream = void Function(Pointer); + +typedef AcceptWaveformOfflineNative = Void Function( + Pointer, Int32, Pointer, Int32); + +typedef AcceptWaveformOffline = void Function( + Pointer, int, Pointer, int); + +typedef DecodeOfflineStreamNative = Void Function( + Pointer, Pointer); + +typedef DecodeOfflineStream = void Function( + Pointer, Pointer); + +typedef GetOfflineStreamResultAsJsonNative = Pointer Function( + Pointer); + +typedef GetOfflineStreamResultAsJson = GetOfflineStreamResultAsJsonNative; + +typedef DestroyOfflineStreamResultJsonNative = Void Function(Pointer); + +typedef DestroyOfflineStreamResultJson = void Function(Pointer); + typedef CreateOnlineRecognizerNative = Pointer Function(Pointer); @@ -488,6 +604,15 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer); typedef SherpaOnnxFreeWave = void Function(Pointer); class SherpaOnnxBindings { + static CreateOfflineRecognizer? createOfflineRecognizer; + static DestroyOfflineRecognizer? destroyOfflineRecognizer; + static CreateOfflineStream? createOfflineStream; + static DestroyOfflineStream? destroyOfflineStream; + static AcceptWaveformOffline? acceptWaveformOffline; + static DecodeOfflineStream? decodeOfflineStream; + static GetOfflineStreamResultAsJson? getOfflineStreamResultAsJson; + static DestroyOfflineStreamResultJson? destroyOfflineStreamResultJson; + static CreateOnlineRecognizer? createOnlineRecognizer; static DestroyOnlineRecognizer? destroyOnlineRecognizer; @@ -611,6 +736,46 @@ class SherpaOnnxBindings { static SherpaOnnxFreeWave? freeWave; static void init(DynamicLibrary dynamicLibrary) { + createOfflineRecognizer ??= dynamicLibrary + .lookup>( + 'CreateOfflineRecognizer') + .asFunction(); + + destroyOfflineRecognizer ??= dynamicLibrary + .lookup>( + 'DestroyOfflineRecognizer') + .asFunction(); + + createOfflineStream ??= dynamicLibrary + .lookup>( + 'CreateOfflineStream') + .asFunction(); + + destroyOfflineStream ??= dynamicLibrary + .lookup>( + 'DestroyOfflineStream') + .asFunction(); + + acceptWaveformOffline ??= dynamicLibrary + .lookup>( + 'AcceptWaveformOffline') + .asFunction(); + + decodeOfflineStream ??= dynamicLibrary + .lookup>( + 'DecodeOfflineStream') + .asFunction(); + + getOfflineStreamResultAsJson ??= dynamicLibrary + .lookup>( + 'GetOfflineStreamResultAsJson') + .asFunction(); + + destroyOfflineStreamResultJson ??= dynamicLibrary + .lookup>( + 'DestroyOfflineStreamResultJson') + .asFunction(); + createOnlineRecognizer ??= dynamicLibrary .lookup>( 'CreateOnlineRecognizer')