diff --git a/.github/scripts/test-dart.sh b/.github/scripts/test-dart.sh index f579e34f..65da3be4 100755 --- a/.github/scripts/test-dart.sh +++ b/.github/scripts/test-dart.sh @@ -4,6 +4,12 @@ set -ex cd dart-api-examples +pushd speech-enhancement-gtcrn +echo "speech enhancement with gtcrn models" +./run.sh +ls -lh +popd + pushd tts echo '----------matcha tts----------' diff --git a/.github/workflows/test-dart.yaml b/.github/workflows/test-dart.yaml index d9e27e86..3f667bea 100644 --- a/.github/workflows/test-dart.yaml +++ b/.github/workflows/test-dart.yaml @@ -115,6 +115,7 @@ jobs: cp scripts/dart/add-punctuations-pubspec.yaml dart-api-examples/add-punctuations/pubspec.yaml cp scripts/dart/speaker-id-pubspec.yaml dart-api-examples/speaker-identification/pubspec.yaml cp scripts/dart/speaker-diarization-pubspec.yaml dart-api-examples/speaker-diarization/pubspec.yaml + cp scripts/dart/speech-enhancement-gtcrn-pubspec.yaml dart-api-examples/speech-enhancement-gtcrn/pubspec.yaml cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml diff --git a/dart-api-examples/README.md b/dart-api-examples/README.md index 3d66cb04..d57a7ba7 100644 --- a/dart-api-examples/README.md +++ b/dart-api-examples/README.md @@ -19,6 +19,7 @@ https://pub.dev/packages/sherpa_onnx | [./tts](./tts)| Example for text to speech| | [./vad-with-non-streaming-asr](./vad-with-non-streaming-asr)| Example for voice activity detection with non-streaming speech recognition. You can use it to generate subtitles.| | [./vad](./vad)| Example for voice activity detection| +| [./speech-enhancement-gtcrn](./speech-enhancement-gtcrn)| Example for speech enhancement/denoising| ## How to create an example in this folder diff --git a/dart-api-examples/speech-enhancement-gtcrn/.gitignore b/dart-api-examples/speech-enhancement-gtcrn/.gitignore new file mode 100644 index 00000000..3a857904 --- /dev/null +++ b/dart-api-examples/speech-enhancement-gtcrn/.gitignore @@ -0,0 +1,3 @@ +# https://dart.dev/guides/libraries/private-files +# Created by `dart pub` +.dart_tool/ diff --git a/dart-api-examples/speech-enhancement-gtcrn/CHANGELOG.md b/dart-api-examples/speech-enhancement-gtcrn/CHANGELOG.md new file mode 100644 index 00000000..effe43c8 --- /dev/null +++ b/dart-api-examples/speech-enhancement-gtcrn/CHANGELOG.md @@ -0,0 +1,3 @@ +## 1.0.0 + +- Initial version. diff --git a/dart-api-examples/speech-enhancement-gtcrn/README.md b/dart-api-examples/speech-enhancement-gtcrn/README.md new file mode 100644 index 00000000..3816eca3 --- /dev/null +++ b/dart-api-examples/speech-enhancement-gtcrn/README.md @@ -0,0 +1,2 @@ +A sample command-line application with an entrypoint in `bin/`, library code +in `lib/`, and example unit test in `test/`. diff --git a/dart-api-examples/speech-enhancement-gtcrn/analysis_options.yaml b/dart-api-examples/speech-enhancement-gtcrn/analysis_options.yaml new file mode 100644 index 00000000..dee8927a --- /dev/null +++ b/dart-api-examples/speech-enhancement-gtcrn/analysis_options.yaml @@ -0,0 +1,30 @@ +# This file configures the static analysis results for your project (errors, +# warnings, and lints). +# +# This enables the 'recommended' set of lints from `package:lints`. +# This set helps identify many issues that may lead to problems when running +# or consuming Dart code, and enforces writing Dart using a single, idiomatic +# style and format. +# +# If you want a smaller set of lints you can change this to specify +# 'package:lints/core.yaml'. These are just the most critical lints +# (the recommended set includes the core lints). +# The core lints are also what is used by pub.dev for scoring packages. + +include: package:lints/recommended.yaml + +# Uncomment the following section to specify additional rules. + +# linter: +# rules: +# - camel_case_types + +# analyzer: +# exclude: +# - path/to/excluded/files/** + +# For more information about the core and recommended set of lints, see +# https://dart.dev/go/core-lints + +# For additional information about configuring this file, see +# https://dart.dev/guides/language/analysis-options diff --git a/dart-api-examples/speech-enhancement-gtcrn/bin/init.dart b/dart-api-examples/speech-enhancement-gtcrn/bin/init.dart new file mode 120000 index 00000000..48508cfd --- /dev/null +++ b/dart-api-examples/speech-enhancement-gtcrn/bin/init.dart @@ -0,0 +1 @@ +../../vad/bin/init.dart \ No newline at end of file diff --git a/dart-api-examples/speech-enhancement-gtcrn/bin/speech_enhancement_gtcrn.dart b/dart-api-examples/speech-enhancement-gtcrn/bin/speech_enhancement_gtcrn.dart new file mode 100644 index 00000000..316d08c2 --- /dev/null +++ b/dart-api-examples/speech-enhancement-gtcrn/bin/speech_enhancement_gtcrn.dart @@ -0,0 +1,51 @@ +// Copyright (c) 2025 Xiaomi Corporation +import 'dart:io'; + +import 'package:args/args.dart'; +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; +import './init.dart'; + +void main(List arguments) async { + await initSherpaOnnx(); + + final parser = ArgParser() + ..addOption('model', help: 'Path to gtcrn onnx model') + ..addOption('input-wav', help: 'Path to input.wav') + ..addOption('output-wav', help: 'Path to output.wav'); + + final res = parser.parse(arguments); + if (res['model'] == null || + res['input-wav'] == null || + res['output-wav'] == null) { + print(parser.usage); + exit(1); + } + + final model = res['model'] as String; + final inputWav = res['input-wav'] as String; + final outputWav = res['output-wav'] as String; + + final config = sherpa_onnx.OfflineSpeechDenoiserConfig( + model: sherpa_onnx.OfflineSpeechDenoiserModelConfig( + gtcrn: sherpa_onnx.OfflineSpeechDenoiserGtcrnModelConfig(model: model), + numThreads: 1, + debug: true, + provider: 'cpu', + )); + + final sd = sherpa_onnx.OfflineSpeechDenoiser(config); + + final waveData = sherpa_onnx.readWave(inputWav); + + final denoised = + sd.run(samples: waveData.samples, sampleRate: waveData.sampleRate); + + sd.free(); + + sherpa_onnx.writeWave( + filename: outputWav, + samples: denoised.samples, + sampleRate: denoised.sampleRate); + + print('Saved to $outputWav'); +} diff --git a/dart-api-examples/speech-enhancement-gtcrn/pubspec.yaml b/dart-api-examples/speech-enhancement-gtcrn/pubspec.yaml new file mode 100644 index 00000000..0f696fc9 --- /dev/null +++ b/dart-api-examples/speech-enhancement-gtcrn/pubspec.yaml @@ -0,0 +1,20 @@ +name: speech_enhancement_gtcrn + +description: > + This example demonstrates how to use the Dart API for speech enhancement/denoising. + +version: 1.0.0 + +environment: + sdk: ">=3.0.0 <4.0.0" + +# Add regular dependencies here. +dependencies: + sherpa_onnx: ^1.10.46 + # sherpa_onnx: + # path: ../../flutter/sherpa_onnx + path: ^1.9.0 + args: ^2.5.0 + +dev_dependencies: + lints: ^3.0.0 diff --git a/dart-api-examples/speech-enhancement-gtcrn/run.sh b/dart-api-examples/speech-enhancement-gtcrn/run.sh new file mode 100755 index 00000000..96e52a99 --- /dev/null +++ b/dart-api-examples/speech-enhancement-gtcrn/run.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +set -ex + +dart pub get + +if [ ! -f ./gtcrn_simple.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx +fi + +if [ ! -f ./inp_16k.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav +fi + + +dart run \ + ./bin/speech_enhancement_gtcrn.dart \ + --model ./gtcrn_simple.onnx \ + --input-wav ./inp_16k.wav \ + --output-wav ./enhanced-16k.wav + +ls -lh *.wav diff --git a/flutter/sherpa_onnx/example/example.md b/flutter/sherpa_onnx/example/example.md index 9255b7ee..265cbd2d 100644 --- a/flutter/sherpa_onnx/example/example.md +++ b/flutter/sherpa_onnx/example/example.md @@ -9,6 +9,8 @@ ## Pure dart-examples +Hint: All of the following functions can be used in Flutter, even if some of them are only provided in pure dart api examples. + | Functions | URL | Supported Platforms| |---|---|---| |Speaker diarization| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-diarization)| macOS, Windows, Linux| @@ -20,3 +22,5 @@ |Speaker identification and verification| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-identification)| macOS, Windows, Linux| |Audio tagging| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/audio-tagging)| macOS, Windows, Linux| |Keyword spotter| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/keyword-spotter)| macOS, Windows, Linux| +|Add punctuions| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/add-punctuations)| macOS, Windows, Linux| +|Speech enhancement/denoising| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speech-enhancement-gtcrn)| macOS, Windows, Linux| diff --git a/flutter/sherpa_onnx/lib/sherpa_onnx.dart b/flutter/sherpa_onnx/lib/sherpa_onnx.dart index 30206fc6..284360e7 100644 --- a/flutter/sherpa_onnx/lib/sherpa_onnx.dart +++ b/flutter/sherpa_onnx/lib/sherpa_onnx.dart @@ -8,6 +8,7 @@ export 'src/keyword_spotter.dart'; export 'src/offline_punctuation.dart'; export 'src/offline_recognizer.dart'; export 'src/offline_speaker_diarization.dart'; +export 'src/offline_speech_denoiser.dart'; export 'src/offline_stream.dart'; export 'src/online_punctuation.dart'; export 'src/online_recognizer.dart'; diff --git a/flutter/sherpa_onnx/lib/src/offline_speech_denoiser.dart b/flutter/sherpa_onnx/lib/src/offline_speech_denoiser.dart new file mode 100644 index 00000000..f3153de8 --- /dev/null +++ b/flutter/sherpa_onnx/lib/src/offline_speech_denoiser.dart @@ -0,0 +1,169 @@ +// Copyright (c) 2025 Xiaomi Corporation +import 'dart:ffi'; +import 'dart:typed_data'; + +import 'package:ffi/ffi.dart'; +import './sherpa_onnx_bindings.dart'; + +class OfflineSpeechDenoiserGtcrnModelConfig { + const OfflineSpeechDenoiserGtcrnModelConfig({ + this.model = '', + }); + + factory OfflineSpeechDenoiserGtcrnModelConfig.fromJson( + Map json) { + return OfflineSpeechDenoiserGtcrnModelConfig( + model: json['model'] as String? ?? '', + ); + } + + @override + String toString() { + return 'OfflineSpeechDenoiserGtcrnModelConfig(model: $model)'; + } + + Map toJson() => { + 'model': model, + }; + + final String model; +} + +class OfflineSpeechDenoiserModelConfig { + const OfflineSpeechDenoiserModelConfig({ + this.gtcrn = const OfflineSpeechDenoiserGtcrnModelConfig(), + this.numThreads = 1, + this.debug = true, + this.provider = 'cpu', + }); + + factory OfflineSpeechDenoiserModelConfig.fromJson(Map json) { + return OfflineSpeechDenoiserModelConfig( + gtcrn: json['gtcrn'] != null + ? OfflineSpeechDenoiserGtcrnModelConfig.fromJson( + json['gtcrn'] as Map) + : const OfflineSpeechDenoiserGtcrnModelConfig(), + numThreads: json['numThreads'] as int? ?? 1, + debug: json['debug'] as bool? ?? true, + provider: json['provider'] as String? ?? 'cpu', + ); + } + + @override + String toString() { + return 'OfflineSpeechDenoiserModelConfig(gtcrn: $gtcrn, numThreads: $numThreads, debug: $debug, provider: $provider)'; + } + + Map toJson() => { + 'gtcrn': gtcrn.toJson(), + 'numThreads': numThreads, + 'debug': debug, + 'provider': provider, + }; + + final OfflineSpeechDenoiserGtcrnModelConfig gtcrn; + final int numThreads; + final bool debug; + final String provider; +} + +class OfflineSpeechDenoiserConfig { + const OfflineSpeechDenoiserConfig({ + this.model = const OfflineSpeechDenoiserModelConfig(), + }); + + factory OfflineSpeechDenoiserConfig.fromJson(Map json) { + return OfflineSpeechDenoiserConfig( + model: json['model'] != null + ? OfflineSpeechDenoiserModelConfig.fromJson( + json['model'] as Map) + : const OfflineSpeechDenoiserModelConfig(), + ); + } + + @override + String toString() { + return 'OfflineSpeechDenoiserConfig(model: $model)'; + } + + Map toJson() => { + 'model': model.toJson(), + }; + + final OfflineSpeechDenoiserModelConfig model; +} + +class DenoisedAudio { + DenoisedAudio({ + required this.samples, + required this.sampleRate, + }); + + final Float32List samples; + final int sampleRate; +} + +class OfflineSpeechDenoiser { + OfflineSpeechDenoiser.fromPtr({required this.ptr, required this.config}); + + OfflineSpeechDenoiser._({required this.ptr, required this.config}); + + /// The user is responsible to call the OfflineSpeechDenoiser.free() + /// method of the returned instance to avoid memory leak. + factory OfflineSpeechDenoiser(OfflineSpeechDenoiserConfig config) { + final c = calloc(); + c.ref.model.gtcrn.model = config.model.gtcrn.model.toNativeUtf8(); + + c.ref.model.numThreads = config.model.numThreads; + c.ref.model.debug = config.model.debug ? 1 : 0; + c.ref.model.provider = config.model.provider.toNativeUtf8(); + + final ptr = + SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeechDenoiser?.call(c) ?? + nullptr; + + calloc.free(c.ref.model.provider); + calloc.free(c.ref.model.gtcrn.model); + + return OfflineSpeechDenoiser._(ptr: ptr, config: config); + } + + void free() { + SherpaOnnxBindings.sherpaOnnxDestroyOfflineSpeechDenoiser?.call(ptr); + ptr = nullptr; + } + + DenoisedAudio run({required Float32List samples, required int sampleRate}) { + final n = samples.length; + final Pointer psamples = calloc(n); + + final pList = psamples.asTypedList(n); + pList.setAll(0, samples); + + final p = SherpaOnnxBindings.sherpaOnnxOfflineSpeechDenoiserRun + ?.call(ptr, psamples, n, sampleRate) ?? + nullptr; + + calloc.free(psamples); + + if (p == nullptr) { + return DenoisedAudio(samples: Float32List(0), sampleRate: 0); + } + + final denoisedSamples = p.ref.samples.asTypedList(p.ref.n); + final denoisedSampleRate = p.ref.sampleRate; + final newSamples = Float32List.fromList(denoisedSamples); + + SherpaOnnxBindings.sherpaOnnxDestroyDenoisedAudio?.call(p); + + return DenoisedAudio(samples: newSamples, sampleRate: denoisedSampleRate); + } + + int get sampleRate => + SherpaOnnxBindings.sherpaOnnxOfflineSpeechDenoiserGetSampleRate + ?.call(ptr) ?? + 0; + + Pointer ptr; + OfflineSpeechDenoiserConfig config; +} diff --git a/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart b/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart index e3d9906f..09550469 100644 --- a/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart +++ b/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart @@ -2,6 +2,36 @@ import 'dart:ffi'; import 'package:ffi/ffi.dart'; +final class SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig extends Struct { + external Pointer model; +} + +final class SherpaOnnxOfflineSpeechDenoiserModelConfig extends Struct { + external SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig gtcrn; + + @Int32() + external int numThreads; + + @Int32() + external int debug; + + external Pointer provider; +} + +final class SherpaOnnxOfflineSpeechDenoiserConfig extends Struct { + external SherpaOnnxOfflineSpeechDenoiserModelConfig model; +} + +final class SherpaOnnxDenoisedAudio extends Struct { + external Pointer samples; + + @Int32() + external int n; + + @Int32() + external int sampleRate; +} + final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct { external Pointer model; @@ -517,6 +547,41 @@ final class SherpaOnnxOfflineSpeakerDiarization extends Opaque {} final class SherpaOnnxOfflineSpeakerDiarizationResult extends Opaque {} +final class SherpaOnnxOfflineSpeechDenoiser extends Opaque {} + +typedef SherpaOnnxCreateOfflineSpeechDenoiserNative + = Pointer Function( + Pointer); + +typedef SherpaOnnxCreateOfflineSpeechDenoiser + = SherpaOnnxCreateOfflineSpeechDenoiserNative; + +typedef SherpaOnnxDestroyOfflineSpeechDenoiserNative = Void Function( + Pointer); + +typedef SherpaOnnxDestroyOfflineSpeechDenoiser = void Function( + Pointer); + +typedef SherpaOnnxOfflineSpeechDenoiserGetSampleRateNative = Int32 Function( + Pointer); + +typedef SherpaOnnxOfflineSpeechDenoiserGetSampleRate = int Function( + Pointer); + +typedef SherpaOnnxOfflineSpeechDenoiserRunNative + = Pointer Function( + Pointer, Pointer, Int32, Int32); + +typedef SherpaOnnxOfflineSpeechDenoiserRun + = Pointer Function( + Pointer, Pointer, int, int); + +typedef SherpaOnnxDestroyDenoisedAudioNative = Void Function( + Pointer); + +typedef SherpaOnnxDestroyDenoisedAudio = void Function( + Pointer); + typedef SherpaOnnxCreateOfflineSpeakerDiarizationNative = Pointer Function( Pointer); @@ -1172,6 +1237,17 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer); typedef SherpaOnnxFreeWave = void Function(Pointer); class SherpaOnnxBindings { + static SherpaOnnxCreateOfflineSpeechDenoiser? + sherpaOnnxCreateOfflineSpeechDenoiser; + + static SherpaOnnxDestroyOfflineSpeechDenoiser? + sherpaOnnxDestroyOfflineSpeechDenoiser; + + static SherpaOnnxOfflineSpeechDenoiserGetSampleRate? + sherpaOnnxOfflineSpeechDenoiserGetSampleRate; + static SherpaOnnxOfflineSpeechDenoiserRun? sherpaOnnxOfflineSpeechDenoiserRun; + static SherpaOnnxDestroyDenoisedAudio? sherpaOnnxDestroyDenoisedAudio; + static SherpaOnnxCreateOfflineSpeakerDiarization? sherpaOnnxCreateOfflineSpeakerDiarization; static SherpaOnnxDestroyOfflineSpeakerDiarization? @@ -1370,6 +1446,33 @@ class SherpaOnnxBindings { static SherpaOnnxFreeWave? freeWave; static void init(DynamicLibrary dynamicLibrary) { + sherpaOnnxCreateOfflineSpeechDenoiser ??= dynamicLibrary + .lookup>( + 'SherpaOnnxCreateOfflineSpeechDenoiser') + .asFunction(); + + sherpaOnnxDestroyOfflineSpeechDenoiser ??= dynamicLibrary + .lookup>( + 'SherpaOnnxDestroyOfflineSpeechDenoiser') + .asFunction(); + + sherpaOnnxOfflineSpeechDenoiserGetSampleRate ??= dynamicLibrary + .lookup< + NativeFunction< + SherpaOnnxOfflineSpeechDenoiserGetSampleRateNative>>( + 'SherpaOnnxOfflineSpeechDenoiserGetSampleRate') + .asFunction(); + + sherpaOnnxOfflineSpeechDenoiserRun ??= dynamicLibrary + .lookup>( + 'SherpaOnnxOfflineSpeechDenoiserRun') + .asFunction(); + + sherpaOnnxDestroyDenoisedAudio ??= dynamicLibrary + .lookup>( + 'SherpaOnnxDestroyDenoisedAudio') + .asFunction(); + sherpaOnnxCreateOfflineSpeakerDiarization ??= dynamicLibrary .lookup< NativeFunction< diff --git a/scripts/dart/speech-enhancement-gtcrn-pubspec.yaml b/scripts/dart/speech-enhancement-gtcrn-pubspec.yaml new file mode 100644 index 00000000..b5aee978 --- /dev/null +++ b/scripts/dart/speech-enhancement-gtcrn-pubspec.yaml @@ -0,0 +1,18 @@ +name: speech_enhancement_gtcrn + +description: > + This example demonstrates how to use the Dart API for speech enhancement/denoising. + +version: 1.0.0 + +environment: + sdk: ">=3.0.0 <4.0.0" + +dependencies: + sherpa_onnx: + path: ../../flutter/sherpa_onnx + path: ^1.9.0 + args: ^2.5.0 + +dev_dependencies: + lints: ^3.0.0