Add Swift API for spoken language identification. (#696)
This commit is contained in:
3
.github/scripts/test-swift.sh
vendored
3
.github/scripts/test-swift.sh
vendored
@@ -7,6 +7,9 @@ echo "pwd: $PWD"
|
|||||||
cd swift-api-examples
|
cd swift-api-examples
|
||||||
ls -lh
|
ls -lh
|
||||||
|
|
||||||
|
./run-spoken-language-identification.sh
|
||||||
|
rm -rf sherpa-onnx-whisper*
|
||||||
|
|
||||||
mkdir -p /Users/fangjun/Desktop
|
mkdir -p /Users/fangjun/Desktop
|
||||||
pushd /Users/fangjun/Desktop
|
pushd /Users/fangjun/Desktop
|
||||||
curl -SL -O https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav
|
curl -SL -O https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav
|
||||||
|
|||||||
1
swift-api-examples/.gitignore
vendored
1
swift-api-examples/.gitignore
vendored
@@ -1,6 +1,7 @@
|
|||||||
decode-file
|
decode-file
|
||||||
decode-file-non-streaming
|
decode-file-non-streaming
|
||||||
generate-subtitles
|
generate-subtitles
|
||||||
|
spoken-language-identification
|
||||||
tts
|
tts
|
||||||
vits-vctk
|
vits-vctk
|
||||||
sherpa-onnx-paraformer-zh-2023-09-14
|
sherpa-onnx-paraformer-zh-2023-09-14
|
||||||
|
|||||||
@@ -713,3 +713,86 @@ class SherpaOnnxOfflineTtsWrapper {
|
|||||||
return SherpaOnnxGeneratedAudioWrapper(audio: audio)
|
return SherpaOnnxGeneratedAudioWrapper(audio: audio)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// spoken language identification
|
||||||
|
|
||||||
|
func sherpaOnnxSpokenLanguageIdentificationWhisperConfig(
|
||||||
|
encoder: String,
|
||||||
|
decoder: String,
|
||||||
|
tailPaddings: Int = -1
|
||||||
|
) -> SherpaOnnxSpokenLanguageIdentificationWhisperConfig {
|
||||||
|
return SherpaOnnxSpokenLanguageIdentificationWhisperConfig(
|
||||||
|
encoder: toCPointer(encoder),
|
||||||
|
decoder: toCPointer(decoder),
|
||||||
|
tail_paddings: Int32(tailPaddings))
|
||||||
|
}
|
||||||
|
|
||||||
|
func sherpaOnnxSpokenLanguageIdentificationConfig(
|
||||||
|
whisper: SherpaOnnxSpokenLanguageIdentificationWhisperConfig,
|
||||||
|
numThreads: Int = 1,
|
||||||
|
debug: Int = 0,
|
||||||
|
provider: String = "cpu"
|
||||||
|
) -> SherpaOnnxSpokenLanguageIdentificationConfig {
|
||||||
|
return SherpaOnnxSpokenLanguageIdentificationConfig(
|
||||||
|
whisper: whisper,
|
||||||
|
num_threads: Int32(numThreads),
|
||||||
|
debug: Int32(debug),
|
||||||
|
provider: toCPointer(provider))
|
||||||
|
}
|
||||||
|
|
||||||
|
class SherpaOnnxSpokenLanguageIdentificationResultWrapper {
|
||||||
|
/// A pointer to the underlying counterpart in C
|
||||||
|
let result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>!
|
||||||
|
|
||||||
|
/// Return the detected language.
|
||||||
|
/// en for English
|
||||||
|
/// zh for Chinese
|
||||||
|
/// es for Spanish
|
||||||
|
/// de for German
|
||||||
|
/// etc.
|
||||||
|
var lang: String {
|
||||||
|
return String(cString: result.pointee.lang)
|
||||||
|
}
|
||||||
|
|
||||||
|
init(result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>!) {
|
||||||
|
self.result = result
|
||||||
|
}
|
||||||
|
|
||||||
|
deinit {
|
||||||
|
if let result {
|
||||||
|
SherpaOnnxDestroySpokenLanguageIdentificationResult(result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class SherpaOnnxSpokenLanguageIdentificationWrapper {
|
||||||
|
/// A pointer to the underlying counterpart in C
|
||||||
|
let slid: OpaquePointer!
|
||||||
|
|
||||||
|
init(
|
||||||
|
config: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationConfig>!
|
||||||
|
) {
|
||||||
|
slid = SherpaOnnxCreateSpokenLanguageIdentification(config)
|
||||||
|
}
|
||||||
|
|
||||||
|
deinit {
|
||||||
|
if let slid {
|
||||||
|
SherpaOnnxDestroySpokenLanguageIdentification(slid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func decode(samples: [Float], sampleRate: Int = 16000)
|
||||||
|
-> SherpaOnnxSpokenLanguageIdentificationResultWrapper
|
||||||
|
{
|
||||||
|
let stream: OpaquePointer! = SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid)
|
||||||
|
AcceptWaveformOffline(stream, Int32(sampleRate), samples, Int32(samples.count))
|
||||||
|
|
||||||
|
let result: UnsafePointer<SherpaOnnxSpokenLanguageIdentificationResult>? =
|
||||||
|
SherpaOnnxSpokenLanguageIdentificationCompute(
|
||||||
|
slid,
|
||||||
|
stream)
|
||||||
|
|
||||||
|
DestroyOfflineStream(stream)
|
||||||
|
return SherpaOnnxSpokenLanguageIdentificationResultWrapper(result: result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
36
swift-api-examples/run-spoken-language-identification.sh
Executable file
36
swift-api-examples/run-spoken-language-identification.sh
Executable file
@@ -0,0 +1,36 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [ ! -d ../build-swift-macos ]; then
|
||||||
|
echo "Please run ../build-swift-macos.sh first!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -d ./sherpa-onnx-whisper-tiny ]; then
|
||||||
|
echo "Download a pre-trained model for testing."
|
||||||
|
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
rm sherpa-onnx-whisper-tiny.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -e ./spoken-language-identification ]; then
|
||||||
|
# Note: We use -lc++ to link against libc++ instead of libstdc++
|
||||||
|
swiftc \
|
||||||
|
-lc++ \
|
||||||
|
-I ../build-swift-macos/install/include \
|
||||||
|
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
|
||||||
|
./spoken-language-identification.swift ./SherpaOnnx.swift \
|
||||||
|
-L ../build-swift-macos/install/lib/ \
|
||||||
|
-l sherpa-onnx \
|
||||||
|
-l onnxruntime \
|
||||||
|
-o spoken-language-identification
|
||||||
|
|
||||||
|
strip spoken-language-identification
|
||||||
|
else
|
||||||
|
echo "./spoken-language-identification exists - skip building"
|
||||||
|
fi
|
||||||
|
|
||||||
|
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
./spoken-language-identification
|
||||||
57
swift-api-examples/spoken-language-identification.swift
Normal file
57
swift-api-examples/spoken-language-identification.swift
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
import AVFoundation
|
||||||
|
|
||||||
|
extension AudioBuffer {
|
||||||
|
func array() -> [Float] {
|
||||||
|
return Array(UnsafeBufferPointer(self))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
extension AVAudioPCMBuffer {
|
||||||
|
func array() -> [Float] {
|
||||||
|
return self.audioBufferList.pointee.mBuffers.array()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func run() {
|
||||||
|
let encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx"
|
||||||
|
let decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx"
|
||||||
|
|
||||||
|
let whisperConfig = sherpaOnnxSpokenLanguageIdentificationWhisperConfig(
|
||||||
|
encoder: encoder,
|
||||||
|
decoder: decoder
|
||||||
|
)
|
||||||
|
|
||||||
|
var config = sherpaOnnxSpokenLanguageIdentificationConfig(
|
||||||
|
whisper: whisperConfig,
|
||||||
|
numThreads: 1,
|
||||||
|
debug: 1,
|
||||||
|
provider: "cpu"
|
||||||
|
)
|
||||||
|
let filePath = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav"
|
||||||
|
|
||||||
|
let slid = SherpaOnnxSpokenLanguageIdentificationWrapper(config: &config)
|
||||||
|
|
||||||
|
let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
|
||||||
|
let audioFile = try! AVAudioFile(forReading: fileURL as URL)
|
||||||
|
|
||||||
|
let audioFormat = audioFile.processingFormat
|
||||||
|
assert(audioFormat.sampleRate == 16000)
|
||||||
|
assert(audioFormat.channelCount == 1)
|
||||||
|
assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
|
||||||
|
|
||||||
|
let audioFrameCount = UInt32(audioFile.length)
|
||||||
|
let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
|
||||||
|
|
||||||
|
try! audioFile.read(into: audioFileBuffer!)
|
||||||
|
let array: [Float]! = audioFileBuffer?.array()
|
||||||
|
let result = slid.decode(samples: array)
|
||||||
|
|
||||||
|
print("\nDetectedllanguage is:\n\(result.lang)")
|
||||||
|
}
|
||||||
|
|
||||||
|
@main
|
||||||
|
struct App {
|
||||||
|
static func main() {
|
||||||
|
run()
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user