Add Flush to VAD so that the last segment can be detected. (#1099)
This commit is contained in:
14
.github/workflows/dot-net.yaml
vendored
14
.github/workflows/dot-net.yaml
vendored
@@ -52,11 +52,6 @@ jobs:
|
|||||||
cmake --build . --target install --config Release
|
cmake --build . --target install --config Release
|
||||||
rm -rf install/pkgconfig
|
rm -rf install/pkgconfig
|
||||||
|
|
||||||
- uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: windows-${{ matrix.arch }}
|
|
||||||
path: ./build/install/lib/
|
|
||||||
|
|
||||||
- name: Create tar file
|
- name: Create tar file
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
@@ -72,6 +67,11 @@ jobs:
|
|||||||
ls -lh *.tar.bz2
|
ls -lh *.tar.bz2
|
||||||
mv *.tar.bz2 ../
|
mv *.tar.bz2 ../
|
||||||
|
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: windows-${{ matrix.arch }}
|
||||||
|
path: ./*.tar.bz2
|
||||||
|
|
||||||
# https://huggingface.co/docs/hub/spaces-github-actions
|
# https://huggingface.co/docs/hub/spaces-github-actions
|
||||||
- name: Publish to huggingface
|
- name: Publish to huggingface
|
||||||
if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
|
if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
|
||||||
@@ -88,7 +88,9 @@ jobs:
|
|||||||
|
|
||||||
rm -rf huggingface
|
rm -rf huggingface
|
||||||
export GIT_CLONE_PROTECTION_ACTIVE=false
|
export GIT_CLONE_PROTECTION_ACTIVE=false
|
||||||
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
|
export GIT_LFS_SKIP_SMUDGE=1
|
||||||
|
|
||||||
|
git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
|
||||||
|
|
||||||
cd huggingface
|
cd huggingface
|
||||||
mkdir -p windows-for-dotnet
|
mkdir -p windows-for-dotnet
|
||||||
|
|||||||
@@ -1,3 +1,8 @@
|
|||||||
|
## 1.10.12
|
||||||
|
|
||||||
|
* Add Flush to VAD so that the last speech segment can be detected. See also
|
||||||
|
https://github.com/k2-fsa/sherpa-onnx/discussions/1077#discussioncomment-9979740
|
||||||
|
|
||||||
## 1.10.11
|
## 1.10.11
|
||||||
|
|
||||||
* Support the iOS platform for iOS.
|
* Support the iOS platform for iOS.
|
||||||
|
|||||||
@@ -10,8 +10,8 @@ project(sherpa-onnx)
|
|||||||
# Remember to update
|
# Remember to update
|
||||||
# ./nodejs-addon-examples
|
# ./nodejs-addon-examples
|
||||||
# ./dart-api-examples/
|
# ./dart-api-examples/
|
||||||
# ./sherpa-onnx/flutter/CHANGELOG.md
|
# ./CHANGELOG.md
|
||||||
set(SHERPA_ONNX_VERSION "1.10.11")
|
set(SHERPA_ONNX_VERSION "1.10.12")
|
||||||
|
|
||||||
# Disable warning about
|
# Disable warning about
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -93,6 +93,28 @@ void main(List<String> arguments) async {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vad.flush();
|
||||||
|
while (!vad.isEmpty()) {
|
||||||
|
final stream = recognizer.createStream();
|
||||||
|
final segment = vad.front();
|
||||||
|
stream.acceptWaveform(
|
||||||
|
samples: segment.samples, sampleRate: waveData.sampleRate);
|
||||||
|
recognizer.decode(stream);
|
||||||
|
|
||||||
|
final result = recognizer.getResult(stream);
|
||||||
|
|
||||||
|
final startTime = segment.start * 1.0 / waveData.sampleRate;
|
||||||
|
final duration = segment.samples.length * 1.0 / waveData.sampleRate;
|
||||||
|
final stopTime = startTime + duration;
|
||||||
|
if (result.text != '') {
|
||||||
|
print(
|
||||||
|
'${startTime.toStringAsPrecision(4)} -- ${stopTime.toStringAsPrecision(4)}: ${result.text}');
|
||||||
|
}
|
||||||
|
|
||||||
|
stream.free();
|
||||||
|
vad.pop();
|
||||||
|
}
|
||||||
|
|
||||||
vad.free();
|
vad.free();
|
||||||
recognizer.free();
|
recognizer.free();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ environment:
|
|||||||
|
|
||||||
# Add regular dependencies here.
|
# Add regular dependencies here.
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.11
|
sherpa_onnx: ^1.10.12
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ environment:
|
|||||||
|
|
||||||
# Add regular dependencies here.
|
# Add regular dependencies here.
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.11
|
sherpa_onnx: ^1.10.12
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ environment:
|
|||||||
|
|
||||||
# Add regular dependencies here.
|
# Add regular dependencies here.
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.11
|
sherpa_onnx: ^1.10.12
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -65,6 +65,12 @@ void main(List<String> arguments) async {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vad.flush();
|
||||||
|
while (!vad.isEmpty()) {
|
||||||
|
allSamples.add(vad.front().samples);
|
||||||
|
vad.pop();
|
||||||
|
}
|
||||||
|
|
||||||
vad.free();
|
vad.free();
|
||||||
|
|
||||||
final s = Float32List.fromList(allSamples.expand((x) => x).toList());
|
final s = Float32List.fromList(allSamples.expand((x) => x).toList());
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ environment:
|
|||||||
sdk: ^3.4.0
|
sdk: ^3.4.0
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.11
|
sherpa_onnx: ^1.10.12
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -57,6 +57,26 @@ class VadNonStreamingAsrParaformer
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vad.Flush();
|
||||||
|
|
||||||
|
while (!vad.IsEmpty()) {
|
||||||
|
SpeechSegment segment = vad.Front();
|
||||||
|
float startTime = segment.Start / (float)sampleRate;
|
||||||
|
float duration = segment.Samples.Length / (float)sampleRate;
|
||||||
|
|
||||||
|
OfflineStream stream = recognizer.CreateStream();
|
||||||
|
stream.AcceptWaveform(sampleRate, segment.Samples);
|
||||||
|
recognizer.Decode(stream);
|
||||||
|
String text = stream.Result.Text;
|
||||||
|
|
||||||
|
if (!String.IsNullOrEmpty(text)) {
|
||||||
|
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
|
||||||
|
String.Format("{0:0.00}", startTime+duration), text);
|
||||||
|
}
|
||||||
|
|
||||||
|
vad.Pop();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ description: >
|
|||||||
|
|
||||||
publish_to: 'none'
|
publish_to: 'none'
|
||||||
|
|
||||||
version: 1.10.11
|
version: 1.10.12
|
||||||
|
|
||||||
topics:
|
topics:
|
||||||
- speech-recognition
|
- speech-recognition
|
||||||
@@ -30,7 +30,7 @@ dependencies:
|
|||||||
record: ^5.1.0
|
record: ^5.1.0
|
||||||
url_launcher: ^6.2.6
|
url_launcher: ^6.2.6
|
||||||
|
|
||||||
sherpa_onnx: ^1.10.11
|
sherpa_onnx: ^1.10.12
|
||||||
# sherpa_onnx:
|
# sherpa_onnx:
|
||||||
# path: ../../flutter/sherpa_onnx
|
# path: ../../flutter/sherpa_onnx
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ dependencies:
|
|||||||
cupertino_icons: ^1.0.6
|
cupertino_icons: ^1.0.6
|
||||||
path_provider: ^2.1.3
|
path_provider: ^2.1.3
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
sherpa_onnx: ^1.10.11
|
sherpa_onnx: ^1.10.12
|
||||||
url_launcher: ^6.2.6
|
url_launcher: ^6.2.6
|
||||||
audioplayers: ^5.0.0
|
audioplayers: ^5.0.0
|
||||||
|
|
||||||
|
|||||||
@@ -491,6 +491,12 @@ typedef SherpaOnnxVoiceActivityDetectorResetNative = Void Function(
|
|||||||
typedef SherpaOnnxVoiceActivityDetectorReset = void Function(
|
typedef SherpaOnnxVoiceActivityDetectorReset = void Function(
|
||||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorFlushNative = Void Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
|
typedef SherpaOnnxVoiceActivityDetectorFlush = void Function(
|
||||||
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
|
|
||||||
typedef SherpaOnnxVoiceActivityDetectorFrontNative
|
typedef SherpaOnnxVoiceActivityDetectorFrontNative
|
||||||
= Pointer<SherpaOnnxSpeechSegment> Function(
|
= Pointer<SherpaOnnxSpeechSegment> Function(
|
||||||
Pointer<SherpaOnnxVoiceActivityDetector>);
|
Pointer<SherpaOnnxVoiceActivityDetector>);
|
||||||
@@ -779,6 +785,8 @@ class SherpaOnnxBindings {
|
|||||||
|
|
||||||
static SherpaOnnxVoiceActivityDetectorReset? voiceActivityDetectorReset;
|
static SherpaOnnxVoiceActivityDetectorReset? voiceActivityDetectorReset;
|
||||||
|
|
||||||
|
static SherpaOnnxVoiceActivityDetectorFlush? voiceActivityDetectorFlush;
|
||||||
|
|
||||||
static SherpaOnnxCreateCircularBuffer? createCircularBuffer;
|
static SherpaOnnxCreateCircularBuffer? createCircularBuffer;
|
||||||
|
|
||||||
static SherpaOnnxDestroyCircularBuffer? destroyCircularBuffer;
|
static SherpaOnnxDestroyCircularBuffer? destroyCircularBuffer;
|
||||||
@@ -1036,6 +1044,11 @@ class SherpaOnnxBindings {
|
|||||||
'SherpaOnnxVoiceActivityDetectorReset')
|
'SherpaOnnxVoiceActivityDetectorReset')
|
||||||
.asFunction();
|
.asFunction();
|
||||||
|
|
||||||
|
voiceActivityDetectorFlush ??= dynamicLibrary
|
||||||
|
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorFlushNative>>(
|
||||||
|
'SherpaOnnxVoiceActivityDetectorFlush')
|
||||||
|
.asFunction();
|
||||||
|
|
||||||
createCircularBuffer ??= dynamicLibrary
|
createCircularBuffer ??= dynamicLibrary
|
||||||
.lookup<NativeFunction<SherpaOnnxCreateCircularBufferNative>>(
|
.lookup<NativeFunction<SherpaOnnxCreateCircularBufferNative>>(
|
||||||
'SherpaOnnxCreateCircularBuffer')
|
'SherpaOnnxCreateCircularBuffer')
|
||||||
|
|||||||
@@ -207,6 +207,10 @@ class VoiceActivityDetector {
|
|||||||
SherpaOnnxBindings.voiceActivityDetectorReset?.call(ptr);
|
SherpaOnnxBindings.voiceActivityDetectorReset?.call(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void flush() {
|
||||||
|
SherpaOnnxBindings.voiceActivityDetectorFlush?.call(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
Pointer<SherpaOnnxVoiceActivityDetector> ptr;
|
Pointer<SherpaOnnxVoiceActivityDetector> ptr;
|
||||||
final VadModelConfig config;
|
final VadModelConfig config;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ topics:
|
|||||||
- voice-activity-detection
|
- voice-activity-detection
|
||||||
|
|
||||||
# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
|
# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
|
||||||
version: 1.10.11
|
version: 1.10.12
|
||||||
|
|
||||||
homepage: https://github.com/k2-fsa/sherpa-onnx
|
homepage: https://github.com/k2-fsa/sherpa-onnx
|
||||||
|
|
||||||
@@ -30,19 +30,19 @@ dependencies:
|
|||||||
flutter:
|
flutter:
|
||||||
sdk: flutter
|
sdk: flutter
|
||||||
|
|
||||||
sherpa_onnx_android: ^1.10.11
|
sherpa_onnx_android: ^1.10.12
|
||||||
# path: ../sherpa_onnx_android
|
# path: ../sherpa_onnx_android
|
||||||
|
|
||||||
sherpa_onnx_macos: ^1.10.11
|
sherpa_onnx_macos: ^1.10.12
|
||||||
# path: ../sherpa_onnx_macos
|
# path: ../sherpa_onnx_macos
|
||||||
|
|
||||||
sherpa_onnx_linux: ^1.10.11
|
sherpa_onnx_linux: ^1.10.12
|
||||||
# path: ../sherpa_onnx_linux
|
# path: ../sherpa_onnx_linux
|
||||||
#
|
#
|
||||||
sherpa_onnx_windows: ^1.10.11
|
sherpa_onnx_windows: ^1.10.12
|
||||||
# path: ../sherpa_onnx_windows
|
# path: ../sherpa_onnx_windows
|
||||||
|
|
||||||
sherpa_onnx_ios: ^1.10.11
|
sherpa_onnx_ios: ^1.10.12
|
||||||
# sherpa_onnx_ios:
|
# sherpa_onnx_ios:
|
||||||
# path: ../sherpa_onnx_ios
|
# path: ../sherpa_onnx_ios
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
|
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
|
||||||
Pod::Spec.new do |s|
|
Pod::Spec.new do |s|
|
||||||
s.name = 'sherpa_onnx_ios'
|
s.name = 'sherpa_onnx_ios'
|
||||||
s.version = '1.10.11'
|
s.version = '1.10.12'
|
||||||
s.summary = 'A new Flutter FFI plugin project.'
|
s.summary = 'A new Flutter FFI plugin project.'
|
||||||
s.description = <<-DESC
|
s.description = <<-DESC
|
||||||
A new Flutter FFI plugin project.
|
A new Flutter FFI plugin project.
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
#
|
#
|
||||||
Pod::Spec.new do |s|
|
Pod::Spec.new do |s|
|
||||||
s.name = 'sherpa_onnx_macos'
|
s.name = 'sherpa_onnx_macos'
|
||||||
s.version = '1.10.11'
|
s.version = '1.10.12'
|
||||||
s.summary = 'sherpa-onnx Flutter FFI plugin project.'
|
s.summary = 'sherpa-onnx Flutter FFI plugin project.'
|
||||||
s.description = <<-DESC
|
s.description = <<-DESC
|
||||||
sherpa-onnx Flutter FFI plugin project.
|
sherpa-onnx Flutter FFI plugin project.
|
||||||
|
|||||||
@@ -98,6 +98,25 @@ public class VadNonStreamingParaformer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vad.flush();
|
||||||
|
while (!vad.empty()) {
|
||||||
|
SpeechSegment segment = vad.front();
|
||||||
|
float startTime = segment.getStart() / 16000.0f;
|
||||||
|
float duration = segment.getSamples().length / 16000.0f;
|
||||||
|
|
||||||
|
OfflineStream stream = recognizer.createStream();
|
||||||
|
stream.acceptWaveform(segment.getSamples(), 16000);
|
||||||
|
recognizer.decode(stream);
|
||||||
|
String text = recognizer.getResult(stream).getText();
|
||||||
|
stream.release();
|
||||||
|
|
||||||
|
if (!text.isEmpty()) {
|
||||||
|
System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
|
||||||
|
}
|
||||||
|
|
||||||
|
vad.pop();
|
||||||
|
}
|
||||||
|
|
||||||
vad.release();
|
vad.release();
|
||||||
recognizer.release();
|
recognizer.release();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -59,6 +59,16 @@ public class VadRemoveSilence {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vad.flush();
|
||||||
|
while (!vad.empty()) {
|
||||||
|
|
||||||
|
// if you want to get the starting time of this segment, you can use
|
||||||
|
/* float startTime = vad.front().getStart() / 16000.0f; */
|
||||||
|
|
||||||
|
segments.add(vad.front().getSamples());
|
||||||
|
vad.pop();
|
||||||
|
}
|
||||||
|
|
||||||
// get total number of samples
|
// get total number of samples
|
||||||
int n = 0;
|
int n = 0;
|
||||||
for (float[] s : segments) {
|
for (float[] s : segments) {
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"sherpa-onnx-node": "^1.10.6"
|
"sherpa-onnx-node": "^1.10.12"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -105,6 +105,12 @@ def main():
|
|||||||
speech_samples.extend(vad.front.samples)
|
speech_samples.extend(vad.front.samples)
|
||||||
vad.pop()
|
vad.pop()
|
||||||
|
|
||||||
|
vad.flush()
|
||||||
|
|
||||||
|
while not vad.empty():
|
||||||
|
speech_samples.extend(vad.front.samples)
|
||||||
|
vad.pop()
|
||||||
|
|
||||||
speech_samples = np.array(speech_samples, dtype=np.float32)
|
speech_samples = np.array(speech_samples, dtype=np.float32)
|
||||||
|
|
||||||
sf.write(args.output, speech_samples, samplerate=sample_rate)
|
sf.write(args.output, speech_samples, samplerate=sample_rate)
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ topics:
|
|||||||
- voice-activity-detection
|
- voice-activity-detection
|
||||||
|
|
||||||
# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx.podspec
|
# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx.podspec
|
||||||
version: 1.10.6
|
version: 1.10.12
|
||||||
|
|
||||||
homepage: https://github.com/k2-fsa/sherpa-onnx
|
homepage: https://github.com/k2-fsa/sherpa-onnx
|
||||||
|
|
||||||
|
|||||||
@@ -53,6 +53,11 @@ namespace SherpaOnnx
|
|||||||
SherpaOnnxVoiceActivityDetectorReset(_handle.Handle);
|
SherpaOnnxVoiceActivityDetectorReset(_handle.Handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void Flush()
|
||||||
|
{
|
||||||
|
SherpaOnnxVoiceActivityDetectorFlush(_handle.Handle);
|
||||||
|
}
|
||||||
|
|
||||||
public void Dispose()
|
public void Dispose()
|
||||||
{
|
{
|
||||||
Cleanup();
|
Cleanup();
|
||||||
@@ -106,5 +111,7 @@ namespace SherpaOnnx
|
|||||||
[DllImport(Dll.Filename)]
|
[DllImport(Dll.Filename)]
|
||||||
private static extern void SherpaOnnxVoiceActivityDetectorReset(IntPtr handle);
|
private static extern void SherpaOnnxVoiceActivityDetectorReset(IntPtr handle);
|
||||||
|
|
||||||
|
[DllImport(Dll.Filename)]
|
||||||
|
private static extern void SherpaOnnxVoiceActivityDetectorFlush(IntPtr handle);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -856,6 +856,10 @@ func (vad *VoiceActivityDetector) Reset() {
|
|||||||
C.SherpaOnnxVoiceActivityDetectorReset(vad.impl)
|
C.SherpaOnnxVoiceActivityDetectorReset(vad.impl)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (vad *VoiceActivityDetector) Flush() {
|
||||||
|
C.SherpaOnnxVoiceActivityDetectorFlush(vad.impl)
|
||||||
|
}
|
||||||
|
|
||||||
// Spoken language identification
|
// Spoken language identification
|
||||||
|
|
||||||
type SpokenLanguageIdentificationWhisperConfig struct {
|
type SpokenLanguageIdentificationWhisperConfig struct {
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ class CircularBuffer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
reset() {
|
reset() {
|
||||||
return addon.circularBufferReset(this.handle);
|
addon.circularBufferReset(this.handle);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -79,7 +79,11 @@ config = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
reset() {
|
reset() {
|
||||||
return addon.VoiceActivityDetectorResetWrapper(this.handle);
|
addon.VoiceActivityDetectorResetWrapper(this.handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
flush() {
|
||||||
|
addon.VoiceActivityDetectorFlushWrapper(this.handle);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -590,6 +590,31 @@ static void VoiceActivityDetectorResetWrapper(const Napi::CallbackInfo &info) {
|
|||||||
SherpaOnnxVoiceActivityDetectorReset(vad);
|
SherpaOnnxVoiceActivityDetectorReset(vad);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void VoiceActivityDetectorFlushWrapper(const Napi::CallbackInfo &info) {
|
||||||
|
Napi::Env env = info.Env();
|
||||||
|
|
||||||
|
if (info.Length() != 1) {
|
||||||
|
std::ostringstream os;
|
||||||
|
os << "Expect only 1 argument. Given: " << info.Length();
|
||||||
|
|
||||||
|
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!info[0].IsExternal()) {
|
||||||
|
Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.")
|
||||||
|
.ThrowAsJavaScriptException();
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
SherpaOnnxVoiceActivityDetector *vad =
|
||||||
|
info[0].As<Napi::External<SherpaOnnxVoiceActivityDetector>>().Data();
|
||||||
|
|
||||||
|
SherpaOnnxVoiceActivityDetectorFlush(vad);
|
||||||
|
}
|
||||||
|
|
||||||
void InitVad(Napi::Env env, Napi::Object exports) {
|
void InitVad(Napi::Env env, Napi::Object exports) {
|
||||||
exports.Set(Napi::String::New(env, "createCircularBuffer"),
|
exports.Set(Napi::String::New(env, "createCircularBuffer"),
|
||||||
Napi::Function::New(env, CreateCircularBufferWrapper));
|
Napi::Function::New(env, CreateCircularBufferWrapper));
|
||||||
@@ -636,4 +661,7 @@ void InitVad(Napi::Env env, Napi::Object exports) {
|
|||||||
|
|
||||||
exports.Set(Napi::String::New(env, "voiceActivityDetectorReset"),
|
exports.Set(Napi::String::New(env, "voiceActivityDetectorReset"),
|
||||||
Napi::Function::New(env, VoiceActivityDetectorResetWrapper));
|
Napi::Function::New(env, VoiceActivityDetectorResetWrapper));
|
||||||
|
|
||||||
|
exports.Set(Napi::String::New(env, "voiceActivityDetectorFlush"),
|
||||||
|
Napi::Function::New(env, VoiceActivityDetectorFlushWrapper));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -876,6 +876,10 @@ void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) {
|
|||||||
p->impl->Reset();
|
p->impl->Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SherpaOnnxVoiceActivityDetectorFlush(SherpaOnnxVoiceActivityDetector *p) {
|
||||||
|
p->impl->Flush();
|
||||||
|
}
|
||||||
|
|
||||||
#if SHERPA_ONNX_ENABLE_TTS == 1
|
#if SHERPA_ONNX_ENABLE_TTS == 1
|
||||||
struct SherpaOnnxOfflineTts {
|
struct SherpaOnnxOfflineTts {
|
||||||
std::unique_ptr<sherpa_onnx::OfflineTts> impl;
|
std::unique_ptr<sherpa_onnx::OfflineTts> impl;
|
||||||
|
|||||||
@@ -815,6 +815,9 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment(
|
|||||||
SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset(
|
SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset(
|
||||||
SherpaOnnxVoiceActivityDetector *p);
|
SherpaOnnxVoiceActivityDetector *p);
|
||||||
|
|
||||||
|
SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorFlush(
|
||||||
|
SherpaOnnxVoiceActivityDetector *p);
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// For offline Text-to-Speech (i.e., non-streaming TTS)
|
// For offline Text-to-Speech (i.e., non-streaming TTS)
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|||||||
@@ -118,6 +118,29 @@ class VoiceActivityDetector::Impl {
|
|||||||
start_ = -1;
|
start_ = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Flush() {
|
||||||
|
if (start_ == -1 || buffer_.Size() == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t end = buffer_.Tail() - model_->MinSilenceDurationSamples();
|
||||||
|
if (end <= start_) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<float> s = buffer_.Get(start_, end - start_);
|
||||||
|
|
||||||
|
SpeechSegment segment;
|
||||||
|
|
||||||
|
segment.start = start_;
|
||||||
|
segment.samples = std::move(s);
|
||||||
|
|
||||||
|
segments_.push(std::move(segment));
|
||||||
|
|
||||||
|
buffer_.Pop(end - buffer_.Head());
|
||||||
|
start_ = -1;
|
||||||
|
}
|
||||||
|
|
||||||
bool IsSpeechDetected() const { return start_ != -1; }
|
bool IsSpeechDetected() const { return start_ != -1; }
|
||||||
|
|
||||||
const VadModelConfig &GetConfig() const { return config_; }
|
const VadModelConfig &GetConfig() const { return config_; }
|
||||||
@@ -164,7 +187,9 @@ const SpeechSegment &VoiceActivityDetector::Front() const {
|
|||||||
return impl_->Front();
|
return impl_->Front();
|
||||||
}
|
}
|
||||||
|
|
||||||
void VoiceActivityDetector::Reset() { impl_->Reset(); }
|
void VoiceActivityDetector::Reset() const { impl_->Reset(); }
|
||||||
|
|
||||||
|
void VoiceActivityDetector::Flush() const { impl_->Flush(); }
|
||||||
|
|
||||||
bool VoiceActivityDetector::IsSpeechDetected() const {
|
bool VoiceActivityDetector::IsSpeechDetected() const {
|
||||||
return impl_->IsSpeechDetected();
|
return impl_->IsSpeechDetected();
|
||||||
|
|||||||
@@ -41,7 +41,11 @@ class VoiceActivityDetector {
|
|||||||
|
|
||||||
bool IsSpeechDetected() const;
|
bool IsSpeechDetected() const;
|
||||||
|
|
||||||
void Reset();
|
void Reset() const;
|
||||||
|
|
||||||
|
// At the end of the utterance, you can invoke this method so that
|
||||||
|
// the last speech segment can be detected.
|
||||||
|
void Flush() const;
|
||||||
|
|
||||||
const VadModelConfig &GetConfig() const;
|
const VadModelConfig &GetConfig() const;
|
||||||
|
|
||||||
|
|||||||
@@ -46,6 +46,10 @@ public class Vad {
|
|||||||
reset(this.ptr);
|
reset(this.ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void flush() {
|
||||||
|
flush(this.ptr);
|
||||||
|
}
|
||||||
|
|
||||||
public SpeechSegment front() {
|
public SpeechSegment front() {
|
||||||
Object[] arr = front(this.ptr);
|
Object[] arr = front(this.ptr);
|
||||||
int start = (int) arr[0];
|
int start = (int) arr[0];
|
||||||
@@ -75,4 +79,6 @@ public class Vad {
|
|||||||
private native boolean isSpeechDetected(long ptr);
|
private native boolean isSpeechDetected(long ptr);
|
||||||
|
|
||||||
private native void reset(long ptr);
|
private native void reset(long ptr);
|
||||||
|
|
||||||
|
private native void flush(long ptr);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -173,3 +173,11 @@ JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_reset(JNIEnv * /*env*/,
|
|||||||
auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
|
auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
|
||||||
model->Reset();
|
model->Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SHERPA_ONNX_EXTERN_C
|
||||||
|
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_flush(JNIEnv * /*env*/,
|
||||||
|
jobject /*obj*/,
|
||||||
|
jlong ptr) {
|
||||||
|
auto model = reinterpret_cast<sherpa_onnx::VoiceActivityDetector *>(ptr);
|
||||||
|
model->Flush();
|
||||||
|
}
|
||||||
|
|||||||
@@ -52,6 +52,8 @@ class Vad(
|
|||||||
|
|
||||||
fun reset() = reset(ptr)
|
fun reset() = reset(ptr)
|
||||||
|
|
||||||
|
fun flush() = flush(ptr)
|
||||||
|
|
||||||
private external fun delete(ptr: Long)
|
private external fun delete(ptr: Long)
|
||||||
|
|
||||||
private external fun newFromAsset(
|
private external fun newFromAsset(
|
||||||
@@ -70,6 +72,7 @@ class Vad(
|
|||||||
private external fun front(ptr: Long): Array<Any>
|
private external fun front(ptr: Long): Array<Any>
|
||||||
private external fun isSpeechDetected(ptr: Long): Boolean
|
private external fun isSpeechDetected(ptr: Long): Boolean
|
||||||
private external fun reset(ptr: Long)
|
private external fun reset(ptr: Long)
|
||||||
|
private external fun flush(ptr: Long)
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
init {
|
init {
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ void PybindVoiceActivityDetector(py::module *m) {
|
|||||||
.def("is_speech_detected", &PyClass::IsSpeechDetected,
|
.def("is_speech_detected", &PyClass::IsSpeechDetected,
|
||||||
py::call_guard<py::gil_scoped_release>())
|
py::call_guard<py::gil_scoped_release>())
|
||||||
.def("reset", &PyClass::Reset, py::call_guard<py::gil_scoped_release>())
|
.def("reset", &PyClass::Reset, py::call_guard<py::gil_scoped_release>())
|
||||||
|
.def("flush", &PyClass::Flush, py::call_guard<py::gil_scoped_release>())
|
||||||
.def_property_readonly("front", &PyClass::Front);
|
.def_property_readonly("front", &PyClass::Front);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -633,6 +633,10 @@ class SherpaOnnxVoiceActivityDetectorWrapper {
|
|||||||
func reset() {
|
func reset() {
|
||||||
SherpaOnnxVoiceActivityDetectorReset(vad)
|
SherpaOnnxVoiceActivityDetectorReset(vad)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func flush() {
|
||||||
|
SherpaOnnxVoiceActivityDetectorFlush(vad)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// offline tts
|
// offline tts
|
||||||
|
|||||||
Reference in New Issue
Block a user