Add Dart API for speech enhancement GTCRN models (#1993)

This commit is contained in:
Fangjun Kuang
2025-03-12 12:39:08 +08:00
committed by GitHub
parent c3b009988b
commit fd78a482df
16 changed files with 435 additions and 0 deletions

View File

@@ -19,6 +19,7 @@ https://pub.dev/packages/sherpa_onnx
| [./tts](./tts)| Example for text to speech|
| [./vad-with-non-streaming-asr](./vad-with-non-streaming-asr)| Example for voice activity detection with non-streaming speech recognition. You can use it to generate subtitles.|
| [./vad](./vad)| Example for voice activity detection|
| [./speech-enhancement-gtcrn](./speech-enhancement-gtcrn)| Example for speech enhancement/denoising|
## How to create an example in this folder

View File

@@ -0,0 +1,3 @@
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/

View File

@@ -0,0 +1,3 @@
## 1.0.0
- Initial version.

View File

@@ -0,0 +1,2 @@
A sample command-line application with an entrypoint in `bin/`, library code
in `lib/`, and example unit test in `test/`.

View File

@@ -0,0 +1,30 @@
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.
include: package:lints/recommended.yaml
# Uncomment the following section to specify additional rules.
# linter:
# rules:
# - camel_case_types
# analyzer:
# exclude:
# - path/to/excluded/files/**
# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints
# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options

View File

@@ -0,0 +1 @@
../../vad/bin/init.dart

View File

@@ -0,0 +1,51 @@
// Copyright (c) 2025 Xiaomi Corporation
import 'dart:io';
import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';
void main(List<String> arguments) async {
await initSherpaOnnx();
final parser = ArgParser()
..addOption('model', help: 'Path to gtcrn onnx model')
..addOption('input-wav', help: 'Path to input.wav')
..addOption('output-wav', help: 'Path to output.wav');
final res = parser.parse(arguments);
if (res['model'] == null ||
res['input-wav'] == null ||
res['output-wav'] == null) {
print(parser.usage);
exit(1);
}
final model = res['model'] as String;
final inputWav = res['input-wav'] as String;
final outputWav = res['output-wav'] as String;
final config = sherpa_onnx.OfflineSpeechDenoiserConfig(
model: sherpa_onnx.OfflineSpeechDenoiserModelConfig(
gtcrn: sherpa_onnx.OfflineSpeechDenoiserGtcrnModelConfig(model: model),
numThreads: 1,
debug: true,
provider: 'cpu',
));
final sd = sherpa_onnx.OfflineSpeechDenoiser(config);
final waveData = sherpa_onnx.readWave(inputWav);
final denoised =
sd.run(samples: waveData.samples, sampleRate: waveData.sampleRate);
sd.free();
sherpa_onnx.writeWave(
filename: outputWav,
samples: denoised.samples,
sampleRate: denoised.sampleRate);
print('Saved to $outputWav');
}

View File

@@ -0,0 +1,20 @@
name: speech_enhancement_gtcrn
description: >
This example demonstrates how to use the Dart API for speech enhancement/denoising.
version: 1.0.0
environment:
sdk: ">=3.0.0 <4.0.0"
# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.46
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx
path: ^1.9.0
args: ^2.5.0
dev_dependencies:
lints: ^3.0.0

View File

@@ -0,0 +1,22 @@
#!/usr/bin/env bash
set -ex
dart pub get
if [ ! -f ./gtcrn_simple.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
fi
if [ ! -f ./inp_16k.wav ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
fi
dart run \
./bin/speech_enhancement_gtcrn.dart \
--model ./gtcrn_simple.onnx \
--input-wav ./inp_16k.wav \
--output-wav ./enhanced-16k.wav
ls -lh *.wav