Handle invalid utf8 sequence from Whisper for Dart API. (#1106)

Fixes #1104
This commit is contained in:
Fangjun Kuang
2024-07-10 21:48:23 +08:00
committed by GitHub
parent 08c758520f
commit 5a2603ff5c
4 changed files with 33 additions and 2 deletions

View File

@@ -0,0 +1,25 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:convert';
import 'dart:ffi';
import 'dart:typed_data';
import 'package:ffi/ffi.dart';
int _strLen(Pointer<Uint8> codeUnits) {
// this function is copied from
// https://github.com/dart-archive/ffi/blob/main/lib/src/utf8.dart#L52
var length = 0;
while (codeUnits[length] != 0) {
length++;
}
return length;
}
// This function is modified from
// https://github.com/dart-archive/ffi/blob/main/lib/src/utf8.dart#L41
// It ignores invalid utf8 sequence
String toDartString(Pointer<Utf8> s) {
final codeUnits = s.cast<Uint8>();
final length = _strLen(codeUnits);
return utf8.decode(codeUnits.asTypedList(length), allowMalformed: true);
}