Add Dart API for streaming ASR (#933)
This commit is contained in:
BIN
sherpa-onnx/flutter/example/assets/streaming-asr.ico
Normal file
BIN
sherpa-onnx/flutter/example/assets/streaming-asr.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 16 KiB |
BIN
sherpa-onnx/flutter/example/assets/vad.ico
Normal file
BIN
sherpa-onnx/flutter/example/assets/vad.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 16 KiB |
@@ -2,9 +2,8 @@
|
||||
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||
import 'package:flutter/material.dart';
|
||||
|
||||
import "./speaker_identification_test.dart";
|
||||
import "./vad_test.dart";
|
||||
import './home.dart';
|
||||
import './vad.dart';
|
||||
import './streaming_asr.dart';
|
||||
import './info.dart';
|
||||
|
||||
void main() {
|
||||
@@ -20,7 +19,7 @@ class MyApp extends StatelessWidget {
|
||||
theme: ThemeData(
|
||||
primarySwatch: Colors.blue,
|
||||
),
|
||||
home: const MyHomePage(title: 'Next-gen Kaldi: VAD demo'),
|
||||
home: const MyHomePage(title: 'Next-gen Kaldi Demo'),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -35,7 +34,8 @@ class MyHomePage extends StatefulWidget {
|
||||
class _MyHomePageState extends State<MyHomePage> {
|
||||
int _currentIndex = 0;
|
||||
final List<Widget> _tabs = [
|
||||
HomeScreen(),
|
||||
StreamingAsrScreen(),
|
||||
VadScreen(),
|
||||
InfoScreen(),
|
||||
];
|
||||
@override
|
||||
@@ -52,10 +52,15 @@ class _MyHomePageState extends State<MyHomePage> {
|
||||
_currentIndex = index;
|
||||
});
|
||||
},
|
||||
// https://www.xiconeditor.com/
|
||||
items: [
|
||||
BottomNavigationBarItem(
|
||||
icon: Icon(Icons.home),
|
||||
label: 'Home',
|
||||
icon: new Image.asset("assets/streaming-asr.ico"),
|
||||
label: '',
|
||||
),
|
||||
BottomNavigationBarItem(
|
||||
icon: new Image.asset("assets/vad.ico"),
|
||||
label: '',
|
||||
),
|
||||
BottomNavigationBarItem(
|
||||
icon: Icon(Icons.info),
|
||||
|
||||
259
sherpa-onnx/flutter/example/lib/streaming_asr.dart
Normal file
259
sherpa-onnx/flutter/example/lib/streaming_asr.dart
Normal file
@@ -0,0 +1,259 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
import 'dart:async';
|
||||
|
||||
import 'package:flutter/foundation.dart';
|
||||
import 'package:flutter/material.dart';
|
||||
import 'package:path/path.dart' as p;
|
||||
import 'package:path_provider/path_provider.dart';
|
||||
import 'package:record/record.dart';
|
||||
|
||||
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||
|
||||
import './utils.dart';
|
||||
|
||||
import './streaming_transducer_asr_test.dart'; // TODO(fangjun): remove it
|
||||
|
||||
Future<sherpa_onnx.OnlineRecognizer> createOnlineRecognizer() async {
|
||||
var encoder =
|
||||
'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';
|
||||
var decoder =
|
||||
'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';
|
||||
var joiner =
|
||||
'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx';
|
||||
var tokens =
|
||||
'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';
|
||||
|
||||
encoder = await copyAssetFile(src: encoder, dst: 'encoder.onnx');
|
||||
decoder = await copyAssetFile(src: decoder, dst: 'decoder.onnx');
|
||||
joiner = await copyAssetFile(src: joiner, dst: 'joiner.onnx');
|
||||
tokens = await copyAssetFile(src: tokens, dst: 'tokens.txt');
|
||||
|
||||
final transducer = sherpa_onnx.OnlineTransducerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder,
|
||||
joiner: joiner,
|
||||
);
|
||||
|
||||
final modelConfig = sherpa_onnx.OnlineModelConfig(
|
||||
transducer: transducer,
|
||||
tokens: tokens,
|
||||
modelType: 'zipformer',
|
||||
);
|
||||
|
||||
final config = sherpa_onnx.OnlineRecognizerConfig(model: modelConfig);
|
||||
return sherpa_onnx.OnlineRecognizer(config);
|
||||
}
|
||||
|
||||
class StreamingAsrScreen extends StatefulWidget {
|
||||
const StreamingAsrScreen({super.key});
|
||||
|
||||
@override
|
||||
State<StreamingAsrScreen> createState() => _StreamingAsrScreenState();
|
||||
}
|
||||
|
||||
class _StreamingAsrScreenState extends State<StreamingAsrScreen> {
|
||||
late final TextEditingController _controller;
|
||||
late final AudioRecorder _audioRecorder;
|
||||
|
||||
String _title = 'Real-time speech recognition';
|
||||
String _last = '';
|
||||
int _index = 0;
|
||||
bool _isInitialized = false;
|
||||
|
||||
sherpa_onnx.OnlineRecognizer? _recognizer;
|
||||
sherpa_onnx.OnlineStream? _stream;
|
||||
int _sampleRate = 16000;
|
||||
|
||||
StreamSubscription<RecordState>? _recordSub;
|
||||
RecordState _recordState = RecordState.stop;
|
||||
|
||||
@override
|
||||
void initState() {
|
||||
_audioRecorder = AudioRecorder();
|
||||
_controller = TextEditingController();
|
||||
|
||||
_recordSub = _audioRecorder.onStateChanged().listen((recordState) {
|
||||
_updateRecordState(recordState);
|
||||
});
|
||||
|
||||
super.initState();
|
||||
}
|
||||
|
||||
Future<void> _start() async {
|
||||
if (!_isInitialized) {
|
||||
sherpa_onnx.initBindings();
|
||||
_recognizer = await createOnlineRecognizer();
|
||||
_stream = _recognizer?.createStream();
|
||||
|
||||
_isInitialized = true;
|
||||
}
|
||||
|
||||
try {
|
||||
if (await _audioRecorder.hasPermission()) {
|
||||
const encoder = AudioEncoder.pcm16bits;
|
||||
|
||||
if (!await _isEncoderSupported(encoder)) {
|
||||
return;
|
||||
}
|
||||
|
||||
final devs = await _audioRecorder.listInputDevices();
|
||||
debugPrint(devs.toString());
|
||||
|
||||
const config = RecordConfig(
|
||||
encoder: encoder,
|
||||
sampleRate: 16000,
|
||||
numChannels: 1,
|
||||
);
|
||||
|
||||
final stream = await _audioRecorder.startStream(config);
|
||||
|
||||
stream.listen(
|
||||
(data) {
|
||||
final samplesFloat32 =
|
||||
convertBytesToFloat32(Uint8List.fromList(data));
|
||||
|
||||
_stream!.acceptWaveform(
|
||||
samples: samplesFloat32, sampleRate: _sampleRate);
|
||||
while (_recognizer!.isReady(_stream!)) {
|
||||
_recognizer!.decode(_stream!);
|
||||
}
|
||||
final text = _recognizer!.getResult(_stream!).text;
|
||||
String textToDisplay = _last;
|
||||
if (text != '') {
|
||||
if (_last == '') {
|
||||
textToDisplay = '$_index: $text';
|
||||
} else {
|
||||
textToDisplay = '$_index: $text\n$_last';
|
||||
}
|
||||
}
|
||||
|
||||
if (_recognizer!.isEndpoint(_stream!)) {
|
||||
_recognizer!.reset(_stream!);
|
||||
if (text != '') {
|
||||
_last = textToDisplay;
|
||||
_index += 1;
|
||||
}
|
||||
}
|
||||
print('text: $textToDisplay');
|
||||
|
||||
_controller.value = TextEditingValue(
|
||||
text: textToDisplay,
|
||||
selection: TextSelection.collapsed(offset: textToDisplay.length),
|
||||
);
|
||||
},
|
||||
onDone: () {
|
||||
print('stream stopped.');
|
||||
},
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
print(e);
|
||||
}
|
||||
}
|
||||
|
||||
Future<void> _stop() async {
|
||||
_stream!.free();
|
||||
_stream = _recognizer!.createStream();
|
||||
|
||||
await _audioRecorder.stop();
|
||||
}
|
||||
|
||||
Future<void> _pause() => _audioRecorder.pause();
|
||||
|
||||
Future<void> _resume() => _audioRecorder.resume();
|
||||
|
||||
void _updateRecordState(RecordState recordState) {
|
||||
setState(() => _recordState = recordState);
|
||||
}
|
||||
|
||||
Future<bool> _isEncoderSupported(AudioEncoder encoder) async {
|
||||
final isSupported = await _audioRecorder.isEncoderSupported(
|
||||
encoder,
|
||||
);
|
||||
|
||||
if (!isSupported) {
|
||||
debugPrint('${encoder.name} is not supported on this platform.');
|
||||
debugPrint('Supported encoders are:');
|
||||
|
||||
for (final e in AudioEncoder.values) {
|
||||
if (await _audioRecorder.isEncoderSupported(e)) {
|
||||
debugPrint('- ${encoder.name}');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return isSupported;
|
||||
}
|
||||
|
||||
@override
|
||||
Widget build(BuildContext context) {
|
||||
return MaterialApp(
|
||||
home: Scaffold(
|
||||
body: Column(
|
||||
mainAxisAlignment: MainAxisAlignment.center,
|
||||
children: [
|
||||
Text(_title),
|
||||
const SizedBox(height: 50),
|
||||
TextField(
|
||||
maxLines: 5,
|
||||
controller: _controller,
|
||||
readOnly: true,
|
||||
),
|
||||
const SizedBox(height: 50),
|
||||
Row(
|
||||
mainAxisAlignment: MainAxisAlignment.center,
|
||||
children: <Widget>[
|
||||
_buildRecordStopControl(),
|
||||
const SizedBox(width: 20),
|
||||
_buildText(),
|
||||
],
|
||||
),
|
||||
],
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
@override
|
||||
void dispose() {
|
||||
_recordSub?.cancel();
|
||||
_audioRecorder.dispose();
|
||||
_stream?.free();
|
||||
_recognizer?.free();
|
||||
super.dispose();
|
||||
}
|
||||
|
||||
Widget _buildRecordStopControl() {
|
||||
late Icon icon;
|
||||
late Color color;
|
||||
|
||||
if (_recordState != RecordState.stop) {
|
||||
icon = const Icon(Icons.stop, color: Colors.red, size: 30);
|
||||
color = Colors.red.withOpacity(0.1);
|
||||
} else {
|
||||
final theme = Theme.of(context);
|
||||
icon = Icon(Icons.mic, color: theme.primaryColor, size: 30);
|
||||
color = theme.primaryColor.withOpacity(0.1);
|
||||
}
|
||||
|
||||
return ClipOval(
|
||||
child: Material(
|
||||
color: color,
|
||||
child: InkWell(
|
||||
child: SizedBox(width: 56, height: 56, child: icon),
|
||||
onTap: () {
|
||||
(_recordState != RecordState.stop) ? _stop() : _start();
|
||||
},
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
Widget _buildText() {
|
||||
if (_recordState == RecordState.stop) {
|
||||
return const Text("Start");
|
||||
} else {
|
||||
return const Text("Stop");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
import 'package:path/path.dart';
|
||||
import 'package:path_provider/path_provider.dart';
|
||||
import 'package:flutter/services.dart' show rootBundle;
|
||||
import 'dart:typed_data';
|
||||
import "dart:io";
|
||||
|
||||
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||
import './utils.dart';
|
||||
|
||||
Future<void> testStreamingTransducerAsr() async {
|
||||
var encoder =
|
||||
'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';
|
||||
var decoder =
|
||||
'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';
|
||||
var joiner =
|
||||
'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx';
|
||||
var tokens =
|
||||
'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';
|
||||
|
||||
var testWave =
|
||||
'assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav';
|
||||
|
||||
encoder = await copyAssetFile(src: encoder, dst: 'encoder.onnx');
|
||||
decoder = await copyAssetFile(src: decoder, dst: 'decoder.onnx');
|
||||
joiner = await copyAssetFile(src: joiner, dst: 'joiner.onnx');
|
||||
tokens = await copyAssetFile(src: tokens, dst: 'tokens.txt');
|
||||
testWave = await copyAssetFile(src: testWave, dst: 'test.wav');
|
||||
|
||||
final transducer = sherpa_onnx.OnlineTransducerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder,
|
||||
joiner: joiner,
|
||||
);
|
||||
|
||||
final modelConfig = sherpa_onnx.OnlineModelConfig(
|
||||
transducer: transducer,
|
||||
tokens: tokens,
|
||||
modelType: 'zipformer',
|
||||
);
|
||||
|
||||
final config = sherpa_onnx.OnlineRecognizerConfig(model: modelConfig);
|
||||
print(config);
|
||||
final recognizer = sherpa_onnx.OnlineRecognizer(config);
|
||||
|
||||
final waveData = sherpa_onnx.readWave(testWave);
|
||||
final stream = recognizer.createStream();
|
||||
|
||||
stream.acceptWaveform(
|
||||
samples: waveData.samples, sampleRate: waveData.sampleRate);
|
||||
while (recognizer.isReady(stream)) {
|
||||
recognizer.decode(stream);
|
||||
}
|
||||
|
||||
final result = recognizer.getResult(stream);
|
||||
print('result is: ${result}');
|
||||
|
||||
print('recognizer: ${recognizer.ptr}');
|
||||
stream.free();
|
||||
recognizer.free();
|
||||
}
|
||||
@@ -11,14 +11,14 @@ import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
|
||||
|
||||
import './utils.dart';
|
||||
|
||||
class HomeScreen extends StatefulWidget {
|
||||
const HomeScreen({super.key});
|
||||
class VadScreen extends StatefulWidget {
|
||||
const VadScreen({super.key});
|
||||
|
||||
@override
|
||||
State<HomeScreen> createState() => _HomeScreenState();
|
||||
State<VadScreen> createState() => _VadScreenState();
|
||||
}
|
||||
|
||||
class _HomeScreenState extends State<HomeScreen> {
|
||||
class _VadScreenState extends State<VadScreen> {
|
||||
late final AudioRecorder _audioRecorder;
|
||||
|
||||
bool _printed = false;
|
||||
@@ -73,6 +73,7 @@ flutter:
|
||||
# To add assets to your application, add an assets section, like this:
|
||||
assets:
|
||||
- assets/
|
||||
- assets/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/
|
||||
# - assets/sr-data/enroll/
|
||||
# - assets/sr-data/test/
|
||||
# - images/a_dot_ham.jpeg
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
import 'dart:io';
|
||||
import 'dart:ffi';
|
||||
|
||||
export 'src/online_recognizer.dart';
|
||||
export 'src/online_stream.dart';
|
||||
export 'src/speaker_identification.dart';
|
||||
export 'src/vad.dart';
|
||||
|
||||
291
sherpa-onnx/flutter/lib/src/online_recognizer.dart
Normal file
291
sherpa-onnx/flutter/lib/src/online_recognizer.dart
Normal file
@@ -0,0 +1,291 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
import 'dart:convert';
|
||||
import 'dart:ffi';
|
||||
import 'dart:typed_data';
|
||||
|
||||
import 'package:ffi/ffi.dart';
|
||||
|
||||
import './online_stream.dart';
|
||||
import './sherpa_onnx_bindings.dart';
|
||||
|
||||
class FeatureConfig {
|
||||
const FeatureConfig({this.sampleRate = 16000, this.featureDim = 80});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'FeatureConfig(sampleRate: $sampleRate, featureDim: $featureDim)';
|
||||
}
|
||||
|
||||
final int sampleRate;
|
||||
final int featureDim;
|
||||
}
|
||||
|
||||
class OnlineTransducerModelConfig {
|
||||
const OnlineTransducerModelConfig({
|
||||
this.encoder = '',
|
||||
this.decoder = '',
|
||||
this.joiner = '',
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OnlineTransducerModelConfig(encoder: $encoder, decoder: $decoder, joiner: $joiner)';
|
||||
}
|
||||
|
||||
final String encoder;
|
||||
final String decoder;
|
||||
final String joiner;
|
||||
}
|
||||
|
||||
class OnlineParaformerModelConfig {
|
||||
const OnlineParaformerModelConfig({this.encoder = '', this.decoder = ''});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OnlineParaformerModelConfig(encoder: $encoder, decoder: $decoder)';
|
||||
}
|
||||
|
||||
final String encoder;
|
||||
final String decoder;
|
||||
}
|
||||
|
||||
class OnlineZipformer2CtcModelConfig {
|
||||
const OnlineZipformer2CtcModelConfig({this.model = ''});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OnlineZipformer2CtcModelConfig(model: $model)';
|
||||
}
|
||||
|
||||
final String model;
|
||||
}
|
||||
|
||||
class OnlineModelConfig {
|
||||
const OnlineModelConfig({
|
||||
this.transducer = const OnlineTransducerModelConfig(),
|
||||
this.paraformer = const OnlineParaformerModelConfig(),
|
||||
this.zipformer2Ctc = const OnlineZipformer2CtcModelConfig(),
|
||||
required this.tokens,
|
||||
this.numThreads = 1,
|
||||
this.provider = 'cpu',
|
||||
this.debug = true,
|
||||
this.modelType = '',
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType)';
|
||||
}
|
||||
|
||||
final OnlineTransducerModelConfig transducer;
|
||||
final OnlineParaformerModelConfig paraformer;
|
||||
final OnlineZipformer2CtcModelConfig zipformer2Ctc;
|
||||
|
||||
final String tokens;
|
||||
|
||||
final int numThreads;
|
||||
|
||||
final String provider;
|
||||
|
||||
final bool debug;
|
||||
|
||||
final String modelType;
|
||||
}
|
||||
|
||||
class OnlineCtcFstDecoderConfig {
|
||||
const OnlineCtcFstDecoderConfig({this.graph = '', this.maxActive = 3000});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OnlineCtcFstDecoderConfig(graph: $graph, maxActive: $maxActive)';
|
||||
}
|
||||
|
||||
final String graph;
|
||||
final int maxActive;
|
||||
}
|
||||
|
||||
class OnlineRecognizerConfig {
|
||||
const OnlineRecognizerConfig({
|
||||
this.feat = const FeatureConfig(),
|
||||
required this.model,
|
||||
this.decodingMethod = 'greedy_search',
|
||||
this.maxActivePaths = 4,
|
||||
this.enableEndpoint = true,
|
||||
this.rule1MinTrailingSilence = 2.4,
|
||||
this.rule2MinTrailingSilence = 1.2,
|
||||
this.rule3MinUtteranceLength = 20,
|
||||
this.hotwordsFile = '',
|
||||
this.hotwordsScore = 1.5,
|
||||
this.ctcFstDecoderConfig = const OnlineCtcFstDecoderConfig(),
|
||||
});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OnlineRecognizerConfig(feat: $feat, model: $model, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, enableEndpoint: $enableEndpoint, rule1MinTrailingSilence: $rule1MinTrailingSilence, rule2MinTrailingSilence: $rule2MinTrailingSilence, rule3MinUtteranceLength: $rule3MinUtteranceLength, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore, ctcFstDecoderConfig: $ctcFstDecoderConfig)';
|
||||
}
|
||||
|
||||
final FeatureConfig feat;
|
||||
final OnlineModelConfig model;
|
||||
final String decodingMethod;
|
||||
|
||||
final int maxActivePaths;
|
||||
|
||||
final bool enableEndpoint;
|
||||
|
||||
final double rule1MinTrailingSilence;
|
||||
|
||||
final double rule2MinTrailingSilence;
|
||||
|
||||
final double rule3MinUtteranceLength;
|
||||
|
||||
final String hotwordsFile;
|
||||
|
||||
final double hotwordsScore;
|
||||
|
||||
final OnlineCtcFstDecoderConfig ctcFstDecoderConfig;
|
||||
}
|
||||
|
||||
class OnlineRecognizerResult {
|
||||
OnlineRecognizerResult(
|
||||
{required this.text, required this.tokens, required this.timestamps});
|
||||
|
||||
@override
|
||||
String toString() {
|
||||
return 'OnlineRecognizerResult(text: $text, tokens: $tokens, timestamps: $timestamps)';
|
||||
}
|
||||
|
||||
final String text;
|
||||
final List<String> tokens;
|
||||
final List<double> timestamps;
|
||||
}
|
||||
|
||||
class OnlineRecognizer {
|
||||
OnlineRecognizer._({required this.ptr, required this.config});
|
||||
|
||||
/// The user is responsible to call the OnlineRecognizer.free()
|
||||
/// method of the returned instance to avoid memory leak.
|
||||
factory OnlineRecognizer(OnlineRecognizerConfig config) {
|
||||
final c = calloc<SherpaOnnxOnlineRecognizerConfig>();
|
||||
c.ref.feat.sampleRate = config.feat.sampleRate;
|
||||
c.ref.feat.featureDim = config.feat.featureDim;
|
||||
|
||||
// transducer
|
||||
c.ref.model.transducer.encoder =
|
||||
config.model.transducer.encoder.toNativeUtf8();
|
||||
c.ref.model.transducer.decoder =
|
||||
config.model.transducer.decoder.toNativeUtf8();
|
||||
c.ref.model.transducer.joiner =
|
||||
config.model.transducer.joiner.toNativeUtf8();
|
||||
|
||||
// paraformer
|
||||
c.ref.model.paraformer.encoder =
|
||||
config.model.paraformer.encoder.toNativeUtf8();
|
||||
c.ref.model.paraformer.decoder =
|
||||
config.model.paraformer.decoder.toNativeUtf8();
|
||||
|
||||
// zipformer2Ctc
|
||||
c.ref.model.zipformer2Ctc.model =
|
||||
config.model.zipformer2Ctc.model.toNativeUtf8();
|
||||
|
||||
c.ref.model.tokens = config.model.tokens.toNativeUtf8();
|
||||
c.ref.model.numThreads = config.model.numThreads;
|
||||
c.ref.model.provider = config.model.provider.toNativeUtf8();
|
||||
c.ref.model.debug = config.model.debug ? 1 : 0;
|
||||
c.ref.model.modelType = config.model.modelType.toNativeUtf8();
|
||||
|
||||
c.ref.decodingMethod = config.decodingMethod.toNativeUtf8();
|
||||
c.ref.maxActivePaths = config.maxActivePaths;
|
||||
c.ref.enableEndpoint = config.enableEndpoint ? 1 : 0;
|
||||
c.ref.rule1MinTrailingSilence = config.rule1MinTrailingSilence;
|
||||
c.ref.rule2MinTrailingSilence = config.rule2MinTrailingSilence;
|
||||
c.ref.rule3MinUtteranceLength = config.rule3MinUtteranceLength;
|
||||
c.ref.hotwordsFile = config.hotwordsFile.toNativeUtf8();
|
||||
c.ref.hotwordsScore = config.hotwordsScore;
|
||||
|
||||
c.ref.ctcFstDecoderConfig.graph =
|
||||
config.ctcFstDecoderConfig.graph.toNativeUtf8();
|
||||
c.ref.ctcFstDecoderConfig.maxActive = config.ctcFstDecoderConfig.maxActive;
|
||||
|
||||
final ptr = SherpaOnnxBindings.createOnlineRecognizer?.call(c) ?? nullptr;
|
||||
|
||||
calloc.free(c.ref.ctcFstDecoderConfig.graph);
|
||||
calloc.free(c.ref.hotwordsFile);
|
||||
calloc.free(c.ref.decodingMethod);
|
||||
calloc.free(c.ref.model.modelType);
|
||||
calloc.free(c.ref.model.provider);
|
||||
calloc.free(c.ref.model.tokens);
|
||||
calloc.free(c.ref.model.zipformer2Ctc.model);
|
||||
calloc.free(c.ref.model.paraformer.encoder);
|
||||
calloc.free(c.ref.model.paraformer.decoder);
|
||||
|
||||
calloc.free(c.ref.model.transducer.encoder);
|
||||
calloc.free(c.ref.model.transducer.decoder);
|
||||
calloc.free(c.ref.model.transducer.joiner);
|
||||
calloc.free(c);
|
||||
|
||||
return OnlineRecognizer._(ptr: ptr, config: config);
|
||||
}
|
||||
|
||||
void free() {
|
||||
SherpaOnnxBindings.destroyOnlineRecognizer?.call(ptr);
|
||||
ptr = nullptr;
|
||||
}
|
||||
|
||||
/// The user has to invoke stream.free() on the returned instance
|
||||
/// to avoid memory leak
|
||||
OnlineStream createStream({String hotwords = ''}) {
|
||||
if (hotwords == '') {
|
||||
final p = SherpaOnnxBindings.createOnlineStream?.call(ptr) ?? nullptr;
|
||||
return OnlineStream(ptr: p);
|
||||
}
|
||||
|
||||
final utf8 = hotwords.toNativeUtf8();
|
||||
final p =
|
||||
SherpaOnnxBindings.createOnlineStreamWithHotwords?.call(ptr, utf8) ??
|
||||
nullptr;
|
||||
calloc.free(utf8);
|
||||
return OnlineStream(ptr: p);
|
||||
}
|
||||
|
||||
bool isReady(OnlineStream stream) {
|
||||
int ready =
|
||||
SherpaOnnxBindings.isOnlineStreamReady?.call(ptr, stream.ptr) ?? 0;
|
||||
|
||||
return ready == 1;
|
||||
}
|
||||
|
||||
OnlineRecognizerResult getResult(OnlineStream stream) {
|
||||
final json =
|
||||
SherpaOnnxBindings.getOnlineStreamResultAsJson?.call(ptr, stream.ptr) ??
|
||||
nullptr;
|
||||
if (json == null) {
|
||||
return OnlineRecognizerResult(text: '', tokens: [], timestamps: []);
|
||||
}
|
||||
|
||||
final parsedJson = jsonDecode(json.toDartString());
|
||||
|
||||
SherpaOnnxBindings.destroyOnlineStreamResultJson?.call(json);
|
||||
|
||||
return OnlineRecognizerResult(
|
||||
text: parsedJson['text'],
|
||||
tokens: List<String>.from(parsedJson['tokens']),
|
||||
timestamps: List<double>.from(parsedJson['timestamps']));
|
||||
}
|
||||
|
||||
void reset(OnlineStream stream) {
|
||||
SherpaOnnxBindings.reset?.call(ptr, stream.ptr);
|
||||
}
|
||||
|
||||
void decode(OnlineStream stream) {
|
||||
SherpaOnnxBindings.decodeOnlineStream?.call(ptr, stream.ptr);
|
||||
}
|
||||
|
||||
bool isEndpoint(OnlineStream stream) {
|
||||
int yes = SherpaOnnxBindings.isEndpoint?.call(ptr, stream.ptr) ?? 0;
|
||||
|
||||
return yes == 1;
|
||||
}
|
||||
|
||||
Pointer<SherpaOnnxOnlineRecognizer> ptr;
|
||||
OnlineRecognizerConfig config;
|
||||
}
|
||||
@@ -2,6 +2,82 @@
|
||||
import 'dart:ffi';
|
||||
import 'package:ffi/ffi.dart';
|
||||
|
||||
final class SherpaOnnxFeatureConfig extends Struct {
|
||||
@Int32()
|
||||
external int sampleRate;
|
||||
|
||||
@Int32()
|
||||
external int featureDim;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOnlineTransducerModelConfig extends Struct {
|
||||
external Pointer<Utf8> encoder;
|
||||
external Pointer<Utf8> decoder;
|
||||
external Pointer<Utf8> joiner;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOnlineParaformerModelConfig extends Struct {
|
||||
external Pointer<Utf8> encoder;
|
||||
external Pointer<Utf8> decoder;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOnlineZipformer2CtcModelConfig extends Struct {
|
||||
external Pointer<Utf8> model;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOnlineModelConfig extends Struct {
|
||||
external SherpaOnnxOnlineTransducerModelConfig transducer;
|
||||
external SherpaOnnxOnlineParaformerModelConfig paraformer;
|
||||
external SherpaOnnxOnlineZipformer2CtcModelConfig zipformer2Ctc;
|
||||
|
||||
external Pointer<Utf8> tokens;
|
||||
|
||||
@Int32()
|
||||
external int numThreads;
|
||||
|
||||
external Pointer<Utf8> provider;
|
||||
|
||||
@Int32()
|
||||
external int debug;
|
||||
|
||||
external Pointer<Utf8> modelType;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOnlineCtcFstDecoderConfig extends Struct {
|
||||
external Pointer<Utf8> graph;
|
||||
|
||||
@Int32()
|
||||
external int maxActive;
|
||||
}
|
||||
|
||||
final class SherpaOnnxOnlineRecognizerConfig extends Struct {
|
||||
external SherpaOnnxFeatureConfig feat;
|
||||
external SherpaOnnxOnlineModelConfig model;
|
||||
external Pointer<Utf8> decodingMethod;
|
||||
|
||||
@Int32()
|
||||
external int maxActivePaths;
|
||||
|
||||
@Int32()
|
||||
external int enableEndpoint;
|
||||
|
||||
@Float()
|
||||
external double rule1MinTrailingSilence;
|
||||
|
||||
@Float()
|
||||
external double rule2MinTrailingSilence;
|
||||
|
||||
@Float()
|
||||
external double rule3MinUtteranceLength;
|
||||
|
||||
external Pointer<Utf8> hotwordsFile;
|
||||
|
||||
@Float()
|
||||
external double hotwordsScore;
|
||||
|
||||
external SherpaOnnxOnlineCtcFstDecoderConfig ctcFstDecoderConfig;
|
||||
}
|
||||
|
||||
final class SherpaOnnxSileroVadModelConfig extends Struct {
|
||||
external Pointer<Utf8> model;
|
||||
|
||||
@@ -71,10 +147,66 @@ final class SherpaOnnxVoiceActivityDetector extends Opaque {}
|
||||
|
||||
final class SherpaOnnxOnlineStream extends Opaque {}
|
||||
|
||||
final class SherpaOnnxOnlineRecognizer extends Opaque {}
|
||||
|
||||
final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {}
|
||||
|
||||
final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {}
|
||||
|
||||
typedef CreateOnlineRecognizerNative = Pointer<SherpaOnnxOnlineRecognizer>
|
||||
Function(Pointer<SherpaOnnxOnlineRecognizerConfig>);
|
||||
|
||||
typedef CreateOnlineRecognizer = CreateOnlineRecognizerNative;
|
||||
|
||||
typedef DestroyOnlineRecognizerNative = Void Function(
|
||||
Pointer<SherpaOnnxOnlineRecognizer>);
|
||||
|
||||
typedef DestroyOnlineRecognizer = void Function(
|
||||
Pointer<SherpaOnnxOnlineRecognizer>);
|
||||
|
||||
typedef CreateOnlineStreamNative = Pointer<SherpaOnnxOnlineStream> Function(
|
||||
Pointer<SherpaOnnxOnlineRecognizer>);
|
||||
|
||||
typedef CreateOnlineStream = CreateOnlineStreamNative;
|
||||
|
||||
typedef CreateOnlineStreamWithHotwordsNative = Pointer<SherpaOnnxOnlineStream>
|
||||
Function(Pointer<SherpaOnnxOnlineRecognizer>, Pointer<Utf8>);
|
||||
|
||||
typedef CreateOnlineStreamWithHotwords = CreateOnlineStreamWithHotwordsNative;
|
||||
|
||||
typedef IsOnlineStreamReadyNative = Int32 Function(
|
||||
Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>);
|
||||
|
||||
typedef IsOnlineStreamReady = int Function(
|
||||
Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>);
|
||||
|
||||
typedef DecodeOnlineStreamNative = Void Function(
|
||||
Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>);
|
||||
|
||||
typedef DecodeOnlineStream = void Function(
|
||||
Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>);
|
||||
|
||||
typedef GetOnlineStreamResultAsJsonNative = Pointer<Utf8> Function(
|
||||
Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>);
|
||||
|
||||
typedef GetOnlineStreamResultAsJson = GetOnlineStreamResultAsJsonNative;
|
||||
|
||||
typedef ResetNative = Void Function(
|
||||
Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>);
|
||||
|
||||
typedef Reset = void Function(
|
||||
Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>);
|
||||
|
||||
typedef IsEndpointNative = Int32 Function(
|
||||
Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>);
|
||||
|
||||
typedef IsEndpoint = int Function(
|
||||
Pointer<SherpaOnnxOnlineRecognizer>, Pointer<SherpaOnnxOnlineStream>);
|
||||
|
||||
typedef DestroyOnlineStreamResultJsonNative = Void Function(Pointer<Utf8>);
|
||||
|
||||
typedef DestroyOnlineStreamResultJson = void Function(Pointer<Utf8>);
|
||||
|
||||
typedef SherpaOnnxCreateVoiceActivityDetectorNative
|
||||
= Pointer<SherpaOnnxVoiceActivityDetector> Function(
|
||||
Pointer<SherpaOnnxVadModelConfig>, Float);
|
||||
@@ -356,6 +488,26 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>);
|
||||
typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>);
|
||||
|
||||
class SherpaOnnxBindings {
|
||||
static CreateOnlineRecognizer? createOnlineRecognizer;
|
||||
|
||||
static DestroyOnlineRecognizer? destroyOnlineRecognizer;
|
||||
|
||||
static CreateOnlineStream? createOnlineStream;
|
||||
|
||||
static CreateOnlineStreamWithHotwords? createOnlineStreamWithHotwords;
|
||||
|
||||
static IsOnlineStreamReady? isOnlineStreamReady;
|
||||
|
||||
static DecodeOnlineStream? decodeOnlineStream;
|
||||
|
||||
static GetOnlineStreamResultAsJson? getOnlineStreamResultAsJson;
|
||||
|
||||
static Reset? reset;
|
||||
|
||||
static IsEndpoint? isEndpoint;
|
||||
|
||||
static DestroyOnlineStreamResultJson? destroyOnlineStreamResultJson;
|
||||
|
||||
static SherpaOnnxCreateVoiceActivityDetector? createVoiceActivityDetector;
|
||||
|
||||
static SherpaOnnxDestroyVoiceActivityDetector? destroyVoiceActivityDetector;
|
||||
@@ -459,6 +611,52 @@ class SherpaOnnxBindings {
|
||||
static SherpaOnnxFreeWave? freeWave;
|
||||
|
||||
static void init(DynamicLibrary dynamicLibrary) {
|
||||
createOnlineRecognizer ??= dynamicLibrary
|
||||
.lookup<NativeFunction<CreateOnlineRecognizerNative>>(
|
||||
'CreateOnlineRecognizer')
|
||||
.asFunction();
|
||||
|
||||
destroyOnlineRecognizer ??= dynamicLibrary
|
||||
.lookup<NativeFunction<DestroyOnlineRecognizerNative>>(
|
||||
'DestroyOnlineRecognizer')
|
||||
.asFunction();
|
||||
|
||||
createOnlineStream ??= dynamicLibrary
|
||||
.lookup<NativeFunction<CreateOnlineStreamNative>>('CreateOnlineStream')
|
||||
.asFunction();
|
||||
|
||||
createOnlineStreamWithHotwords ??= dynamicLibrary
|
||||
.lookup<NativeFunction<CreateOnlineStreamWithHotwordsNative>>(
|
||||
'CreateOnlineStreamWithHotwords')
|
||||
.asFunction();
|
||||
|
||||
isOnlineStreamReady ??= dynamicLibrary
|
||||
.lookup<NativeFunction<IsOnlineStreamReadyNative>>(
|
||||
'IsOnlineStreamReady')
|
||||
.asFunction();
|
||||
|
||||
decodeOnlineStream ??= dynamicLibrary
|
||||
.lookup<NativeFunction<DecodeOnlineStreamNative>>('DecodeOnlineStream')
|
||||
.asFunction();
|
||||
|
||||
getOnlineStreamResultAsJson ??= dynamicLibrary
|
||||
.lookup<NativeFunction<GetOnlineStreamResultAsJsonNative>>(
|
||||
'GetOnlineStreamResultAsJson')
|
||||
.asFunction();
|
||||
|
||||
reset ??= dynamicLibrary
|
||||
.lookup<NativeFunction<ResetNative>>('Reset')
|
||||
.asFunction();
|
||||
|
||||
isEndpoint ??= dynamicLibrary
|
||||
.lookup<NativeFunction<IsEndpointNative>>('IsEndpoint')
|
||||
.asFunction();
|
||||
|
||||
destroyOnlineStreamResultJson ??= dynamicLibrary
|
||||
.lookup<NativeFunction<DestroyOnlineStreamResultJsonNative>>(
|
||||
'DestroyOnlineStreamResultJson')
|
||||
.asFunction();
|
||||
|
||||
createVoiceActivityDetector ??= dynamicLibrary
|
||||
.lookup<NativeFunction<SherpaOnnxCreateVoiceActivityDetectorNative>>(
|
||||
'SherpaOnnxCreateVoiceActivityDetector')
|
||||
|
||||
Reference in New Issue
Block a user