diff --git a/.github/workflows/pascal.yaml b/.github/workflows/pascal.yaml index 2ed21318..ba9a7316 100644 --- a/.github/workflows/pascal.yaml +++ b/.github/workflows/pascal.yaml @@ -127,6 +127,21 @@ jobs: cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/tts fi + - name: Run Pascal test (Speaker diarization) + shell: bash + run: | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH + + cd ./pascal-api-examples + pushd speaker-diarization + + ./run.sh + rm -rfv *.onnx *.wav sherpa-onnx-* + ls -lh + echo "---" + + popd + - name: Run Pascal test (TTS) shell: bash run: | diff --git a/pascal-api-examples/README.md b/pascal-api-examples/README.md index 5475d825..5e709cd7 100644 --- a/pascal-api-examples/README.md +++ b/pascal-api-examples/README.md @@ -9,6 +9,7 @@ https://k2-fsa.github.io/sherpa/onnx/pascal-api/index.html |Directory| Description| |---------|------------| |[read-wav](./read-wav)|It shows how to read a wave file.| +|[speaker-diarization](./speaker-diarization)|It shows how to use Pascal API for speaker diarization.| |[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.| |[non-streaming-asr](./non-streaming-asr)| It shows how to use non-streaming models for speech recognition.| |[vad](./vad)| It shows how to use the voice activity detection API.| diff --git a/pascal-api-examples/speaker-diarization/main.pas b/pascal-api-examples/speaker-diarization/main.pas new file mode 100644 index 00000000..35d915d0 --- /dev/null +++ b/pascal-api-examples/speaker-diarization/main.pas @@ -0,0 +1,104 @@ +{ Copyright (c) 2024 Xiaomi Corporation } +{ +This file shows how to use the Pascal API from sherpa-onnx +for speaker diarization. + +Usage: + +Step 1: Download a speaker segmentation model + +Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models +for a list of available models. The following is an example + + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + +Step 2: Download a speaker embedding extractor model + +Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models +for a list of available models. The following is an example + + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + +Step 3. Download test wave files + +Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models +for a list of available test wave files. The following is an example + + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav + +Step 4. Run it +} + +program main; + +{$mode delphi} + +uses + sherpa_onnx, + ctypes, + SysUtils; + +function ProgressCallback( + NumProcessedChunks: cint32; + NumTotalChunks: cint32): cint32; cdecl; +var + Progress: Single; +begin + Progress := 100.0 * NumProcessedChunks / NumTotalChunks; + WriteLn(Format('Progress: %.3f%%', [Progress])); + + Result := 0; +end; + +var + Wave: TSherpaOnnxWave; + Config: TSherpaOnnxOfflineSpeakerDiarizationConfig; + Sd: TSherpaOnnxOfflineSpeakerDiarization; + Segments: TSherpaOnnxOfflineSpeakerDiarizationSegmentArray; + I: Integer; +begin + Wave := SherpaOnnxReadWave('./0-four-speakers-zh.wav'); + + Config.Segmentation.Pyannote.Model := './sherpa-onnx-pyannote-segmentation-3-0/model.onnx'; + Config.Embedding.Model := './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx'; + + { + Since we know that there are 4 speakers in ./0-four-speakers-zh.wav, we + set NumClusters to 4 here. + If you don't have such information, please set NumClusters to -1. + In that case, you have to set Config.Clustering.Threshold. + A larger threshold leads to fewer clusters, i.e., fewer speakers. + } + Config.Clustering.NumClusters := 4; + Config.Segmentation.Debug := True; + Config.Embedding.Debug := True; + + Sd := TSherpaOnnxOfflineSpeakerDiarization.Create(Config); + if Sd.GetHandle = nil then + begin + WriteLn('Please check you config'); + Exit; + end; + + if Sd.GetSampleRate <> Wave.SampleRate then + begin + WriteLn(Format('Expected sample rate: %d, given: %d', [Sd.GetSampleRate, Wave.SampleRate])); + Exit; + end; + + { + // If you don't want to use a callback + Segments := Sd.Process(Wave.Samples); + } + Segments := Sd.Process(Wave.Samples, @ProgressCallback); + + for I := Low(Segments) to High(Segments) do + begin + WriteLn(Format('%.3f -- %.3f speaker_%d', + [Segments[I].Start, Segments[I].Stop, Segments[I].Speaker])); + end; + + FreeAndNil(Sd); +end. diff --git a/pascal-api-examples/speaker-diarization/run.sh b/pascal-api-examples/speaker-diarization/run.sh new file mode 100755 index 00000000..866dc63c --- /dev/null +++ b/pascal-api-examples/speaker-diarization/run.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +set -ex + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) + +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" + +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then + mkdir -p ../../build + pushd ../../build + cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + .. + + cmake --build . --target install --config Release + popd +fi + +fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ + -Fl$SHERPA_ONNX_DIR/build/install/lib \ + ./main.pas + +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH + +if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +fi + +if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx +fi + +if [ ! -f ./0-four-speakers-zh.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav +fi + +./main diff --git a/sherpa-onnx/pascal-api/sherpa_onnx.pas b/sherpa-onnx/pascal-api/sherpa_onnx.pas index 7f05793e..1b24dec8 100644 --- a/sherpa-onnx/pascal-api/sherpa_onnx.pas +++ b/sherpa-onnx/pascal-api/sherpa_onnx.pas @@ -102,7 +102,7 @@ type function Generate(Text: AnsiString; SpeakerId: Integer; Speed: Single; - Callback:PSherpaOnnxGeneratedAudioCallbackWithArg; + Callback: PSherpaOnnxGeneratedAudioCallbackWithArg; Arg: Pointer ): TSherpaOnnxGeneratedAudio; overload; @@ -398,6 +398,78 @@ type property GetHandle: Pointer Read Handle; end; + + TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig = record + Model: AnsiString; + function ToString: AnsiString; + end; + + TSherpaOnnxOfflineSpeakerSegmentationModelConfig = record + Pyannote: TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig; + NumThreads: Integer; + Debug: Boolean; + Provider: AnsiString; + function ToString: AnsiString; + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerSegmentationModelConfig); + end; + + TSherpaOnnxFastClusteringConfig = record + NumClusters: Integer; + Threshold: Single; + function ToString: AnsiString; + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxFastClusteringConfig); + end; + + TSherpaOnnxSpeakerEmbeddingExtractorConfig = record + Model: AnsiString; + NumThreads: Integer; + Debug: Boolean; + Provider: AnsiString; + function ToString: AnsiString; + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSpeakerEmbeddingExtractorConfig); + end; + + TSherpaOnnxOfflineSpeakerDiarizationConfig = record + Segmentation: TSherpaOnnxOfflineSpeakerSegmentationModelConfig; + Embedding: TSherpaOnnxSpeakerEmbeddingExtractorConfig; + Clustering: TSherpaOnnxFastClusteringConfig; + MinDurationOn: Single; + MinDurationOff: Single; + function ToString: AnsiString; + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerDiarizationConfig); + end; + + TSherpaOnnxOfflineSpeakerDiarizationSegment = record + Start: Single; + Stop: Single; + Speaker: Integer; + function ToString: AnsiString; + end; + + TSherpaOnnxOfflineSpeakerDiarizationSegmentArray = array of TSherpaOnnxOfflineSpeakerDiarizationSegment; + + PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg = ^TSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg; + + TSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg = function( + NumProcessChunks: cint32; + NumTotalChunks: cint32): cint32; cdecl; + + TSherpaOnnxOfflineSpeakerDiarization = class + private + Handle: Pointer; + SampleRate: Integer; + _Config: TSherpaOnnxOfflineSpeakerDiarizationConfig; + public + constructor Create(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig); + destructor Destroy; override; + procedure SetConfig(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig); + function Process(Samples: array of Single): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray; overload; + function Process(Samples: array of Single; Callback: PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray; overload; + property GetHandle: Pointer Read Handle; + property GetSampleRate: Integer Read SampleRate; + end; + + { It supports reading a single channel wave with 16-bit encoded samples. Samples are normalized to the range [-1, 1]. } @@ -656,6 +728,47 @@ type PSherpaOnnxResampleOut = ^SherpaOnnxResampleOut; + SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig = record + Model: PAnsiChar; + end; + + SherpaOnnxOfflineSpeakerSegmentationModelConfig = record + Pyannote: SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig; + NumThreads: cint32; + Debug: cint32; + Provider: PAnsiChar; + end; + + SherpaOnnxFastClusteringConfig = record + NumClusters: cint32; + Threshold: cfloat; + end; + + SherpaOnnxSpeakerEmbeddingExtractorConfig = record + Model: PAnsiChar; + NumThreads: cint32; + Debug: cint32; + Provider: PAnsiChar; + end; + + SherpaOnnxOfflineSpeakerDiarizationConfig = record + Segmentation: SherpaOnnxOfflineSpeakerSegmentationModelConfig; + Embedding: SherpaOnnxSpeakerEmbeddingExtractorConfig; + Clustering: SherpaOnnxFastClusteringConfig; + MinDurationOn: cfloat; + MinDurationOff: cfloat; + end; + + SherpaOnnxOfflineSpeakerDiarizationSegment = record + Start: cfloat; + Stop: cfloat; + Speaker: cint32; + end; + + PSherpaOnnxOfflineSpeakerDiarizationSegment = ^SherpaOnnxOfflineSpeakerDiarizationSegment; + + PSherpaOnnxOfflineSpeakerDiarizationConfig = ^SherpaOnnxOfflineSpeakerDiarizationConfig; + function SherpaOnnxCreateLinearResampler(SampleRateInHz: cint32; SampleRateOutHz: cint32; FilterCutoffHz: cfloat; @@ -677,6 +790,37 @@ procedure SherpaOnnxLinearResamplerResampleFree(P: PSherpaOnnxResampleOut); cdec procedure SherpaOnnxLinearResamplerReset(P: Pointer); cdecl; external SherpaOnnxLibName; +function SherpaOnnxCreateOfflineSpeakerDiarization(Config: PSherpaOnnxOfflineSpeakerDiarizationConfig): Pointer; cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxDestroyOfflineSpeakerDiarization(P: Pointer); cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(P: Pointer): cint32; cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxOfflineSpeakerDiarizationSetConfig(P: Pointer; Config: PSherpaOnnxOfflineSpeakerDiarizationConfig); cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(P: Pointer): cint32; cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(P: Pointer): PSherpaOnnxOfflineSpeakerDiarizationSegment; cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxOfflineSpeakerDiarizationDestroySegment(P: Pointer); cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxOfflineSpeakerDiarizationProcess(P: Pointer; Samples: pcfloat; N: cint32): Pointer; cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(P: Pointer; + Samples: pcfloat; N: cint32; Callback: PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg): Pointer; cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxOfflineSpeakerDiarizationDestroyResult(P: Pointer); cdecl; + external SherpaOnnxLibName; + function SherpaOnnxCreateOfflineTts(Config: PSherpaOnnxOfflineTtsConfig): Pointer; cdecl; external SherpaOnnxLibName; @@ -1773,7 +1917,7 @@ end; function TSherpaOnnxOfflineTts.Generate(Text: AnsiString; SpeakerId: Integer; Speed: Single; - Callback:PSherpaOnnxGeneratedAudioCallbackWithArg; + Callback: PSherpaOnnxGeneratedAudioCallbackWithArg; Arg: Pointer ): TSherpaOnnxGeneratedAudio; var @@ -1847,4 +1991,195 @@ begin SherpaOnnxLinearResamplerReset(Self.Handle); end; +function TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(' + + 'Model := %s)',[Self.Model]); +end; + +function TSherpaOnnxOfflineSpeakerSegmentationModelConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(' + + 'Pyannote := %s, ' + + 'NumThreads := %d, ' + + 'Debug := %s, ' + + 'Provider := %s)', + [Self.Pyannote.ToString, Self.NumThreads, + Self.Debug.ToString, Self.Provider]); +end; + +class operator TSherpaOnnxOfflineSpeakerSegmentationModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerSegmentationModelConfig); +begin + Dest.NumThreads := 1; + Dest.Debug := False; + Dest.Provider := 'cpu'; +end; + +function TSherpaOnnxFastClusteringConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxFastClusteringConfig(' + + 'NumClusters := %d, Threshold := %.3f)', + [Self.NumClusters, Self.Threshold]); +end; + +class operator TSherpaOnnxFastClusteringConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxFastClusteringConfig); +begin + Dest.NumClusters := -1; + Dest.Threshold := 0.5; +end; + +function TSherpaOnnxSpeakerEmbeddingExtractorConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxSpeakerEmbeddingExtractorConfig(' + + 'Model := %s, '+ + 'NumThreads := %d, '+ + 'Debug := %s, '+ + 'Provider := %s)', + [Self.Model, Self.NumThreads, Self.Debug.ToString, Self.Provider]); +end; + +class operator TSherpaOnnxSpeakerEmbeddingExtractorConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSpeakerEmbeddingExtractorConfig); +begin + Dest.NumThreads := 1; + Dest.Debug := False; + Dest.Provider := 'cpu'; +end; + +function TSherpaOnnxOfflineSpeakerDiarizationConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOfflineSpeakerDiarizationConfig(' + + 'Segmentation := %s, '+ + 'Embedding := %s, '+ + 'Clustering := %s, '+ + 'MinDurationOn := %.3f, '+ + 'MinDurationOff := %.3f)', + [Self.Segmentation.ToString, Self.Embedding.ToString, + Self.Clustering.ToString, Self.MinDurationOn, Self.MinDurationOff]); +end; + +class operator TSherpaOnnxOfflineSpeakerDiarizationConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerDiarizationConfig); +begin + Dest.MinDurationOn := 0.2; + Dest.MinDurationOff := 0.5; +end; + +function TSherpaOnnxOfflineSpeakerDiarizationSegment.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOfflineSpeakerDiarizationSegment(' + + 'Start := %.3f, '+ + 'Stop := %.3f, '+ + 'Speaker := %d)', + [Self.Start, Self.Stop, Self.Speaker]); +end; + +constructor TSherpaOnnxOfflineSpeakerDiarization.Create(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig); +var + C: SherpaOnnxOfflineSpeakerDiarizationConfig; +begin + C := Default(SherpaOnnxOfflineSpeakerDiarizationConfig); + C.Segmentation.Pyannote.Model := PAnsiChar(Config.Segmentation.Pyannote.Model); + C.Segmentation.NumThreads := Config.Segmentation.NumThreads; + C.Segmentation.Debug := Ord(Config.Segmentation.Debug); + C.Segmentation.Provider := PAnsiChar(Config.Segmentation.Provider); + + C.Embedding.Model := PAnsiChar(Config.Embedding.Model); + C.Embedding.NumThreads := Config.Embedding.NumThreads; + C.Embedding.Debug := Ord(Config.Embedding.Debug); + C.Embedding.Provider := PAnsiChar(Config.Embedding.Provider); + + C.Clustering.NumClusters := Config.Clustering.NumClusters; + C.Clustering.Threshold := Config.Clustering.Threshold; + + C.MinDurationOn := Config.MinDurationOn; + C.MinDurationOff := Config.MinDurationOff; + + Self.Handle := SherpaOnnxCreateOfflineSpeakerDiarization(@C); + Self._Config := Config; + Self.SampleRate := 0; + + if Self.Handle <> nil then + begin + Self.SampleRate := SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(Self.Handle); + end; +end; + +destructor TSherpaOnnxOfflineSpeakerDiarization.Destroy; +begin + SherpaOnnxDestroyOfflineSpeakerDiarization(Self.Handle); + Self.Handle := nil; +end; + +procedure TSherpaOnnxOfflineSpeakerDiarization.SetConfig(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig); +var + C: SherpaOnnxOfflineSpeakerDiarizationConfig; +begin + C := Default(SherpaOnnxOfflineSpeakerDiarizationConfig); + + C.Clustering.NumClusters := Config.Clustering.NumClusters; + C.Clustering.Threshold := Config.Clustering.Threshold; + + SherpaOnnxOfflineSpeakerDiarizationSetConfig(Self.Handle, @C); +end; + +function TSherpaOnnxOfflineSpeakerDiarization.Process(Samples: array of Single): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray; +var + R: Pointer; + NumSegments: Integer; + I: Integer; + Segments: PSherpaOnnxOfflineSpeakerDiarizationSegment; +begin + Result := nil; + + R := SherpaOnnxOfflineSpeakerDiarizationProcess(Self.Handle, pcfloat(Samples), Length(Samples)); + if R = nil then + begin + Exit + end; + NumSegments := SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(R); + + Segments := SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(R); + + SetLength(Result, NumSegments); + for I := Low(Result) to High(Result) do + begin + Result[I].Start := Segments[I].Start; + Result[I].Stop := Segments[I].Stop; + Result[I].Speaker := Segments[I].Speaker; + end; + + SherpaOnnxOfflineSpeakerDiarizationDestroySegment(Segments); + SherpaOnnxOfflineSpeakerDiarizationDestroyResult(R); +end; + +function TSherpaOnnxOfflineSpeakerDiarization.Process(Samples: array of Single; + callback: PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray; +var + R: Pointer; + NumSegments: Integer; + I: Integer; + Segments: PSherpaOnnxOfflineSpeakerDiarizationSegment; +begin + Result := nil; + + R := SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(Self.Handle, pcfloat(Samples), Length(Samples), callback); + if R = nil then + begin + Exit + end; + NumSegments := SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(R); + + Segments := SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(R); + + SetLength(Result, NumSegments); + for I := Low(Result) to High(Result) do + begin + Result[I].Start := Segments[I].Start; + Result[I].Stop := Segments[I].Stop; + Result[I].Speaker := Segments[I].Speaker; + end; + + SherpaOnnxOfflineSpeakerDiarizationDestroySegment(Segments); + SherpaOnnxOfflineSpeakerDiarizationDestroyResult(R); +end; + end.