Handle invalid utf8 sequence from Whisper for Dart API. (#1106)
Fixes #1104
This commit is contained in:
@@ -1,3 +1,7 @@
|
|||||||
|
## 1.10.14 (to-be-released)
|
||||||
|
|
||||||
|
* Fix invalid utf8 sequence from Whisper for Dart API.
|
||||||
|
|
||||||
## 1.10.13
|
## 1.10.13
|
||||||
|
|
||||||
* Update onnxruntime from 1.17.1 to 1.18.0
|
* Update onnxruntime from 1.17.1 to 1.18.0
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import 'package:ffi/ffi.dart';
|
|||||||
import './feature_config.dart';
|
import './feature_config.dart';
|
||||||
import './offline_stream.dart';
|
import './offline_stream.dart';
|
||||||
import './sherpa_onnx_bindings.dart';
|
import './sherpa_onnx_bindings.dart';
|
||||||
|
import './utils.dart';
|
||||||
|
|
||||||
class OfflineTransducerModelConfig {
|
class OfflineTransducerModelConfig {
|
||||||
const OfflineTransducerModelConfig({
|
const OfflineTransducerModelConfig({
|
||||||
@@ -287,7 +288,7 @@ class OfflineRecognizer {
|
|||||||
return OfflineRecognizerResult(text: '', tokens: [], timestamps: []);
|
return OfflineRecognizerResult(text: '', tokens: [], timestamps: []);
|
||||||
}
|
}
|
||||||
|
|
||||||
final parsedJson = jsonDecode(json.toDartString());
|
final parsedJson = jsonDecode(toDartString(json));
|
||||||
|
|
||||||
SherpaOnnxBindings.destroyOfflineStreamResultJson?.call(json);
|
SherpaOnnxBindings.destroyOfflineStreamResultJson?.call(json);
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import 'package:ffi/ffi.dart';
|
|||||||
import './feature_config.dart';
|
import './feature_config.dart';
|
||||||
import './online_stream.dart';
|
import './online_stream.dart';
|
||||||
import './sherpa_onnx_bindings.dart';
|
import './sherpa_onnx_bindings.dart';
|
||||||
|
import './utils.dart';
|
||||||
|
|
||||||
class OnlineTransducerModelConfig {
|
class OnlineTransducerModelConfig {
|
||||||
const OnlineTransducerModelConfig({
|
const OnlineTransducerModelConfig({
|
||||||
@@ -268,7 +269,7 @@ class OnlineRecognizer {
|
|||||||
return OnlineRecognizerResult(text: '', tokens: [], timestamps: []);
|
return OnlineRecognizerResult(text: '', tokens: [], timestamps: []);
|
||||||
}
|
}
|
||||||
|
|
||||||
final parsedJson = jsonDecode(json.toDartString());
|
final parsedJson = jsonDecode(toDartString(json));
|
||||||
|
|
||||||
SherpaOnnxBindings.destroyOnlineStreamResultJson?.call(json);
|
SherpaOnnxBindings.destroyOnlineStreamResultJson?.call(json);
|
||||||
|
|
||||||
|
|||||||
25
flutter/sherpa_onnx/lib/src/utils.dart
Normal file
25
flutter/sherpa_onnx/lib/src/utils.dart
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
import 'dart:convert';
|
||||||
|
import 'dart:ffi';
|
||||||
|
import 'dart:typed_data';
|
||||||
|
|
||||||
|
import 'package:ffi/ffi.dart';
|
||||||
|
|
||||||
|
int _strLen(Pointer<Uint8> codeUnits) {
|
||||||
|
// this function is copied from
|
||||||
|
// https://github.com/dart-archive/ffi/blob/main/lib/src/utf8.dart#L52
|
||||||
|
var length = 0;
|
||||||
|
while (codeUnits[length] != 0) {
|
||||||
|
length++;
|
||||||
|
}
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This function is modified from
|
||||||
|
// https://github.com/dart-archive/ffi/blob/main/lib/src/utf8.dart#L41
|
||||||
|
// It ignores invalid utf8 sequence
|
||||||
|
String toDartString(Pointer<Utf8> s) {
|
||||||
|
final codeUnits = s.cast<Uint8>();
|
||||||
|
final length = _strLen(codeUnits);
|
||||||
|
return utf8.decode(codeUnits.asTypedList(length), allowMalformed: true);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user