Add iOS support (#65)

This commit is contained in:
Fangjun Kuang
2023-02-25 21:56:25 +08:00
committed by GitHub
parent fb1e24bebb
commit 475caf22f9
34 changed files with 2669 additions and 23 deletions

View File

@@ -0,0 +1,36 @@
//
// AppDelegate.swift
// SherpaOnnx
//
// Created by fangjun on 2023/2/25.
//
import UIKit
@main
class AppDelegate: UIResponder, UIApplicationDelegate {
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
// Override point for customization after application launch.
return true
}
// MARK: UISceneSession Lifecycle
func application(_ application: UIApplication, configurationForConnecting connectingSceneSession: UISceneSession, options: UIScene.ConnectionOptions) -> UISceneConfiguration {
// Called when a new scene session is being created.
// Use this method to select a configuration to create the new scene with.
return UISceneConfiguration(name: "Default Configuration", sessionRole: connectingSceneSession.role)
}
func application(_ application: UIApplication, didDiscardSceneSessions sceneSessions: Set<UISceneSession>) {
// Called when the user discards a scene session.
// If any sessions were discarded while the application was not running, this will be called shortly after application:didFinishLaunchingWithOptions.
// Use this method to release any resources that were specific to the discarded scenes, as they will not return.
}
}

View File

@@ -0,0 +1,11 @@
{
"colors" : [
{
"idiom" : "universal"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}

View File

@@ -0,0 +1,14 @@
{
"images" : [
{
"filename" : "k2-1024x1024.png",
"idiom" : "universal",
"platform" : "ios",
"size" : "1024x1024"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 411 KiB

View File

@@ -0,0 +1,6 @@
{
"info" : {
"author" : "xcode",
"version" : 1
}
}

View File

@@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
<dependencies>
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
</dependencies>
<scenes>
<!--View Controller-->
<scene sceneID="EHf-IW-A2E">
<objects>
<viewController id="01J-lp-oVM" sceneMemberID="viewController">
<view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<color key="backgroundColor" xcode11CocoaTouchSystemColor="systemBackgroundColor" cocoaTouchSystemColor="whiteColor"/>
<viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
</view>
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="53" y="375"/>
</scene>
</scenes>
</document>

View File

@@ -0,0 +1,60 @@
<?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="21507" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
<device id="retina6_12" orientation="portrait" appearance="light"/>
<dependencies>
<deployment identifier="iOS"/>
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="21505"/>
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
<capability name="System colors in document resources" minToolsVersion="11.0"/>
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
</dependencies>
<scenes>
<!--View Controller-->
<scene sceneID="tne-QT-ifu">
<objects>
<viewController id="BYZ-38-t0r" customClass="ViewController" customModule="SherpaNcnn" customModuleProvider="target" sceneMemberID="viewController">
<view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
<rect key="frame" x="0.0" y="0.0" width="393" height="852"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<subviews>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="system" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="7q8-Y3-WbJ">
<rect key="frame" x="166" y="773" width="61.333333333333343" height="35"/>
<state key="normal" title="Button"/>
<buttonConfiguration key="configuration" style="plain" title="Start"/>
<connections>
<action selector="onRecordBtnClick:" destination="BYZ-38-t0r" eventType="touchUpInside" id="rS6-DT-XWm"/>
</connections>
</button>
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Label" lineBreakMode="tailTruncation" numberOfLines="0" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="jfS-7J-m9C">
<rect key="frame" x="8" y="67" width="377" height="20.333333333333329"/>
<fontDescription key="fontDescription" type="system" pointSize="17"/>
<nil key="textColor"/>
<nil key="highlightedColor"/>
</label>
</subviews>
<viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
<color key="backgroundColor" systemColor="systemBackgroundColor"/>
<constraints>
<constraint firstItem="jfS-7J-m9C" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="8" id="HX3-rI-U9E"/>
<constraint firstItem="jfS-7J-m9C" firstAttribute="top" secondItem="6Tk-OE-BBY" secondAttribute="top" constant="8" id="NEv-PD-DHj"/>
<constraint firstItem="7q8-Y3-WbJ" firstAttribute="centerX" secondItem="8bC-Xf-vdC" secondAttribute="centerX" id="Nha-gf-R2b"/>
<constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="jfS-7J-m9C" secondAttribute="trailing" constant="8" id="P2f-hG-O2e"/>
<constraint firstAttribute="bottomMargin" secondItem="7q8-Y3-WbJ" secondAttribute="bottom" constant="10" id="Pgb-4G-ySa"/>
</constraints>
</view>
<connections>
<outlet property="recordBtn" destination="7q8-Y3-WbJ" id="mFd-cu-zjn"/>
<outlet property="resultLabel" destination="jfS-7J-m9C" id="xQU-ID-m5Q"/>
</connections>
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="32.824427480916029" y="3.5211267605633805"/>
</scene>
</scenes>
<resources>
<systemColor name="systemBackgroundColor">
<color white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
</systemColor>
</resources>
</document>

View File

@@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>NSMicrophoneUsageDescription</key>
<string>Need microphone access for Next-gen Kaldi to work</string>
<key>UIApplicationSceneManifest</key>
<dict>
<key>UIApplicationSupportsMultipleScenes</key>
<false/>
<key>UISceneConfigurations</key>
<dict>
<key>UIWindowSceneSessionRoleApplication</key>
<array>
<dict>
<key>UISceneConfigurationName</key>
<string>Default Configuration</string>
<key>UISceneDelegateClassName</key>
<string>$(PRODUCT_MODULE_NAME).SceneDelegate</string>
<key>UISceneStoryboardFile</key>
<string>Main</string>
</dict>
</array>
</dict>
</dict>
</dict>
</plist>

View File

@@ -0,0 +1,35 @@
import Foundation
func getResource(_ forResource: String, _ ofType: String) -> String {
let path = Bundle.main.path(forResource: forResource, ofType: ofType)
precondition(
path != nil,
"\(forResource).\(ofType) does not exist!\n" + "Remember to change \n"
+ " Build Phases -> Copy Bundle Resources\n" + "to add it!"
)
return path!
}
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to download pre-trained models
/// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineTransducerModelConfig {
let encoder = getResource("encoder-epoch-99-avg-1", "onnx")
let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
let tokens = getResource("tokens", "txt")
return sherpaOnnxOnlineTransducerModelConfig(
encoder: encoder,
decoder: decoder,
joiner: joiner,
tokens: tokens,
numThreads: 2
)
}
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to add more models if you need

View File

@@ -0,0 +1,52 @@
//
// SceneDelegate.swift
// SherpaOnnx
//
// Created by fangjun on 2023/2/25.
//
import UIKit
class SceneDelegate: UIResponder, UIWindowSceneDelegate {
var window: UIWindow?
func scene(_ scene: UIScene, willConnectTo session: UISceneSession, options connectionOptions: UIScene.ConnectionOptions) {
// Use this method to optionally configure and attach the UIWindow `window` to the provided UIWindowScene `scene`.
// If using a storyboard, the `window` property will automatically be initialized and attached to the scene.
// This delegate does not imply the connecting scene or session are new (see `application:configurationForConnectingSceneSession` instead).
guard let _ = (scene as? UIWindowScene) else { return }
}
func sceneDidDisconnect(_ scene: UIScene) {
// Called as the scene is being released by the system.
// This occurs shortly after the scene enters the background, or when its session is discarded.
// Release any resources associated with this scene that can be re-created the next time the scene connects.
// The scene may re-connect later, as its session was not necessarily discarded (see `application:didDiscardSceneSessions` instead).
}
func sceneDidBecomeActive(_ scene: UIScene) {
// Called when the scene has moved from an inactive state to an active state.
// Use this method to restart any tasks that were paused (or not yet started) when the scene was inactive.
}
func sceneWillResignActive(_ scene: UIScene) {
// Called when the scene will move from an active state to an inactive state.
// This may occur due to temporary interruptions (ex. an incoming phone call).
}
func sceneWillEnterForeground(_ scene: UIScene) {
// Called as the scene transitions from the background to the foreground.
// Use this method to undo the changes made on entering the background.
}
func sceneDidEnterBackground(_ scene: UIScene) {
// Called as the scene transitions from the foreground to the background.
// Use this method to save data, release shared resources, and store enough scene-specific state information
// to restore the scene back to its current state.
}
}

View File

@@ -0,0 +1,197 @@
//
// ViewController.swift
// SherpaOnnx
//
// Created by fangjun on 2023/1/28.
//
import AVFoundation
import UIKit
extension AudioBuffer {
func array() -> [Float] {
return Array(UnsafeBufferPointer(self))
}
}
extension AVAudioPCMBuffer {
func array() -> [Float] {
return self.audioBufferList.pointee.mBuffers.array()
}
}
class ViewController: UIViewController {
@IBOutlet weak var resultLabel: UILabel!
@IBOutlet weak var recordBtn: UIButton!
var audioEngine: AVAudioEngine? = nil
var recognizer: SherpaOnnxRecognizer! = nil
/// It saves the decoded results so far
var sentences: [String] = [] {
didSet {
updateLabel()
}
}
var lastSentence: String = ""
let maxSentence: Int = 20
var results: String {
if sentences.isEmpty && lastSentence.isEmpty {
return ""
}
if sentences.isEmpty {
return "0: \(lastSentence.lowercased())"
}
let start = max(sentences.count - maxSentence, 0)
if lastSentence.isEmpty {
return sentences.enumerated().map { (index, s) in "\(index): \(s.lowercased())" }[start...]
.joined(separator: "\n")
} else {
return sentences.enumerated().map { (index, s) in "\(index): \(s.lowercased())" }[start...]
.joined(separator: "\n") + "\n\(sentences.count): \(lastSentence.lowercased())"
}
}
func updateLabel() {
DispatchQueue.main.async {
self.resultLabel.text = self.results
}
}
override func viewDidLoad() {
super.viewDidLoad()
// Do any additional setup after loading the view.
resultLabel.text = "ASR with Next-gen Kaldi\n\nPress the Start button to run!"
recordBtn.setTitle("Start", for: .normal)
initRecognizer()
initRecorder()
}
@IBAction func onRecordBtnClick(_ sender: UIButton) {
if recordBtn.currentTitle == "Start" {
startRecorder()
recordBtn.setTitle("Stop", for: .normal)
} else {
stopRecorder()
recordBtn.setTitle("Start", for: .normal)
}
}
func initRecognizer() {
// Please select one model that is best suitable for you.
//
// You can also modify Model.swift to add new pre-trained models from
// https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
let modelConfig = getBilingualStreamZhEnZipformer20230220()
let featConfig = sherpaOnnxFeatureConfig(
sampleRate: 16000,
featureDim: 80)
var config = sherpaOnnxOnlineRecognizerConfig(
featConfig: featConfig,
modelConfig: modelConfig,
enableEndpoint: true,
rule1MinTrailingSilence: 2.4,
rule2MinTrailingSilence: 0.8,
rule3MinUtteranceLength: 30
)
recognizer = SherpaOnnxRecognizer(config: &config)
}
func initRecorder() {
print("init recorder")
audioEngine = AVAudioEngine()
let inputNode = self.audioEngine?.inputNode
let bus = 0
let inputFormat = inputNode?.outputFormat(forBus: bus)
let outputFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: 16000, channels: 1,
interleaved: false)!
let converter = AVAudioConverter(from: inputFormat!, to: outputFormat)!
inputNode!.installTap(
onBus: bus,
bufferSize: 1024,
format: inputFormat
) {
(buffer: AVAudioPCMBuffer, when: AVAudioTime) in
var newBufferAvailable = true
let inputCallback: AVAudioConverterInputBlock = {
inNumPackets, outStatus in
if newBufferAvailable {
outStatus.pointee = .haveData
newBufferAvailable = false
return buffer
} else {
outStatus.pointee = .noDataNow
return nil
}
}
let convertedBuffer = AVAudioPCMBuffer(
pcmFormat: outputFormat,
frameCapacity:
AVAudioFrameCount(outputFormat.sampleRate)
* buffer.frameLength
/ AVAudioFrameCount(buffer.format.sampleRate))!
var error: NSError?
let _ = converter.convert(
to: convertedBuffer,
error: &error, withInputFrom: inputCallback)
// TODO(fangjun): Handle status != haveData
let array = convertedBuffer.array()
if !array.isEmpty {
self.recognizer.acceptWaveform(samples: array)
while (self.recognizer.isReady()){
self.recognizer.decode()
}
let isEndpoint = self.recognizer.isEndpoint()
let text = self.recognizer.getResult().text
if !text.isEmpty && self.lastSentence != text {
self.lastSentence = text
self.updateLabel()
print(text)
}
if isEndpoint {
if !text.isEmpty {
let tmp = self.lastSentence
self.lastSentence = ""
self.sentences.append(tmp)
}
self.recognizer.reset()
}
}
}
}
func startRecorder() {
lastSentence = ""
sentences = []
do {
try self.audioEngine?.start()
} catch let error as NSError {
print("Got an error starting audioEngine: \(error.domain), \(error)")
}
print("started")
}
func stopRecorder() {
audioEngine?.stop()
print("stopped")
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 411 KiB