Add Android APK for Silero VAD (#335)

This commit is contained in:
Fangjun Kuang
2023-09-23 20:39:13 +08:00
committed by GitHub
parent 65ec4dc741
commit 6e60a77d89
68 changed files with 1562 additions and 45 deletions

View File

@@ -38,6 +38,7 @@ jobs:
shell: bash
run: |
export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
./build-apk-vad.sh
./build-apk-two-pass.sh
./build-apk.sh

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-onnx)
set(SHERPA_ONNX_VERSION "1.7.18")
set(SHERPA_ONNX_VERSION "1.7.19")
# Disable warning about
#

5
android/README.md Normal file
View File

@@ -0,0 +1,5 @@
# Introduction
Please refer to
https://k2-fsa.github.io/sherpa/onnx/android/index.html
for usage.

View File

@@ -1,3 +1,4 @@
// Copyright (c) 2023 Xiaomi Corporation
package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager

View File

@@ -1,3 +1,4 @@
// Copyright (c) 2023 Xiaomi Corporation
package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager

15
android/SherpaOnnxVad/.gitignore vendored Normal file
View File

@@ -0,0 +1,15 @@
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties

3
android/SherpaOnnxVad/.idea/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,3 @@
# Default ignored files
/shelf/
/workspace.xml

View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<bytecodeTargetLevel target="11" />
</component>
</project>

19
android/SherpaOnnxVad/.idea/gradle.xml generated Normal file
View File

@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="GradleMigrationSettings" migrationVersion="1" />
<component name="GradleSettings">
<option name="linkedExternalProjectsSettings">
<GradleProjectSettings>
<option name="testRunner" value="GRADLE" />
<option name="distributionType" value="DEFAULT_WRAPPED" />
<option name="externalProjectPath" value="$PROJECT_DIR$" />
<option name="modules">
<set>
<option value="$PROJECT_DIR$" />
<option value="$PROJECT_DIR$/app" />
</set>
</option>
</GradleProjectSettings>
</option>
</component>
</project>

10
android/SherpaOnnxVad/.idea/misc.xml generated Normal file
View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" default="true" project-jdk-name="Android Studio default JDK" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/build/classes" />
</component>
<component name="ProjectType">
<option name="id" value="Android" />
</component>
</project>

6
android/SherpaOnnxVad/.idea/vcs.xml generated Normal file
View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
</component>
</project>

1
android/SherpaOnnxVad/app/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/build

View File

@@ -0,0 +1,44 @@
plugins {
id 'com.android.application'
id 'org.jetbrains.kotlin.android'
}
android {
namespace 'com.k2fsa.sherpa.onnx'
compileSdk 33
defaultConfig {
applicationId "com.k2fsa.sherpa.onnx"
minSdk 21
targetSdk 33
versionCode 1
versionName "1.0"
testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
}
buildTypes {
release {
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
}
}
compileOptions {
sourceCompatibility JavaVersion.VERSION_1_8
targetCompatibility JavaVersion.VERSION_1_8
}
kotlinOptions {
jvmTarget = '1.8'
}
}
dependencies {
implementation 'androidx.core:core-ktx:1.7.0'
implementation 'androidx.appcompat:appcompat:1.6.1'
implementation 'com.google.android.material:material:1.9.0'
implementation 'androidx.constraintlayout:constraintlayout:2.1.4'
testImplementation 'junit:junit:4.13.2'
androidTestImplementation 'androidx.test.ext:junit:1.1.5'
androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1'
}

View File

@@ -0,0 +1,21 @@
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
# http://developer.android.com/guide/developing/tools/proguard.html
# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
# public *;
#}
# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable
# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

View File

@@ -0,0 +1,24 @@
package com.k2fsa.sherpa.onnx
import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4
import org.junit.Test
import org.junit.runner.RunWith
import org.junit.Assert.*
/**
* Instrumented test, which will execute on an Android device.
*
* See [testing documentation](http://d.android.com/tools/testing).
*/
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
@Test
fun useAppContext() {
// Context of the app under test.
val appContext = InstrumentationRegistry.getInstrumentation().targetContext
assertEquals("com.k2fsa.sherpa.onnx", appContext.packageName)
}
}

View File

@@ -0,0 +1,32 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools">
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<application
android:allowBackup="true"
android:dataExtractionRules="@xml/data_extraction_rules"
android:fullBackupContent="@xml/backup_rules"
android:icon="@mipmap/ic_launcher"
android:label="@string/app_name"
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:theme="@style/Theme.SherpaOnnxVad"
tools:targetApi="31">
<activity
android:name=".MainActivity"
android:exported="true">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
<meta-data
android:name="android.app.lib_name"
android:value="" />
</activity>
</application>
</manifest>

View File

@@ -0,0 +1 @@
*.onnx

View File

@@ -0,0 +1,174 @@
package com.k2fsa.sherpa.onnx
import android.Manifest
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.os.Bundle
import android.util.Log
import android.view.View
import android.widget.Button
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import kotlin.concurrent.thread
private const val TAG = "sherpa-onnx"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
class MainActivity : AppCompatActivity() {
private lateinit var recordButton: Button
private lateinit var circle: View
private lateinit var vad: Vad
private var audioRecord: AudioRecord? = null
private var recordingThread: Thread? = null
private val audioSource = MediaRecorder.AudioSource.MIC
private val sampleRateInHz = 16000
private val channelConfig = AudioFormat.CHANNEL_IN_MONO
// Note: We don't use AudioFormat.ENCODING_PCM_FLOAT
// since the AudioRecord.read(float[]) needs API level >= 23
// but we are targeting API level >= 21
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
@Volatile
private var isRecording: Boolean = false
override fun onRequestPermissionsResult(
requestCode: Int, permissions: Array<String>, grantResults: IntArray
) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
grantResults[0] == PackageManager.PERMISSION_GRANTED
} else {
false
}
if (!permissionToRecordAccepted) {
Log.e(TAG, "Audio record is disallowed")
finish()
}
Log.i(TAG, "Audio record is permitted")
}
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_main)
ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
Log.i(TAG, "Start to initialize model")
initVadModel()
Log.i(TAG, "Finished initializing model")
circle= findViewById(R.id.powerCircle)
recordButton = findViewById(R.id.record_button)
recordButton.setOnClickListener { onclick() }
}
private fun onclick() {
if (!isRecording) {
val ret = initMicrophone()
if (!ret) {
Log.e(TAG, "Failed to initialize microphone")
return
}
Log.i(TAG, "state: ${audioRecord?.state}")
audioRecord!!.startRecording()
recordButton.setText(R.string.stop)
isRecording = true
vad.reset()
recordingThread = thread(true) {
processSamples()
}
Log.i(TAG, "Started recording")
onVad(false)
} else {
isRecording = false
audioRecord!!.stop()
audioRecord!!.release()
audioRecord = null
recordButton.setText(R.string.start)
onVad(false)
Log.i(TAG, "Stopped recording")
}
}
private fun onVad(isSpeech: Boolean) {
if(isSpeech) {
circle.background = resources.getDrawable(R.drawable.red_circle)
} else {
circle.background = resources.getDrawable(R.drawable.black_circle)
}
}
private fun initVadModel() {
val type = 0
println("Select VAD model type ${type}")
val config = getVadModelConfig(type)
vad = Vad(
assetManager = application.assets,
config = config!!,
)
}
private fun initMicrophone(): Boolean {
if (ActivityCompat.checkSelfPermission(
this, Manifest.permission.RECORD_AUDIO
) != PackageManager.PERMISSION_GRANTED
) {
ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
return false
}
val numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
Log.i(
TAG, "buffer size in milliseconds: ${numBytes * 1000.0f / sampleRateInHz}"
)
audioRecord = AudioRecord(
audioSource,
sampleRateInHz,
channelConfig,
audioFormat,
numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
)
return true
}
private fun processSamples() {
Log.i(TAG, "processing samples")
val bufferSize = 512 // in samples
val buffer = ShortArray(bufferSize)
while (isRecording) {
val ret = audioRecord?.read(buffer, 0, buffer.size)
if (ret != null && ret > 0) {
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
vad.acceptWaveform(samples)
while(!vad.empty()) {vad.pop();}
val isSpeechDetected = vad.isSpeechDetected()
runOnUiThread {
onVad(isSpeechDetected)
}
}
}
}
}

View File

@@ -0,0 +1,101 @@
// Copyright (c) 2023 Xiaomi Corporation
package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
data class SileroVadModelConfig(
var model: String,
var threshold: Float = 0.5F,
var minSilenceDuration: Float = 0.25F,
var minSpeechDuration: Float = 0.25F,
var windowSize: Int = 512,
)
data class VadModelConfig(
var sileroVadModelConfig: SileroVadModelConfig,
var sampleRate: Int = 16000,
var numThreads: Int = 1,
var provider: String = "cpu",
var debug: Boolean = false,
)
class Vad(
assetManager: AssetManager? = null,
var config: VadModelConfig,
) {
private val ptr: Long
init {
if (assetManager != null) {
ptr = new(assetManager, config)
} else {
ptr = newFromFile(config)
}
}
protected fun finalize() {
delete(ptr)
}
fun acceptWaveform(samples: FloatArray) = acceptWaveform(ptr, samples)
fun empty(): Boolean = empty(ptr)
fun pop() = pop(ptr)
// return an array containing
// [start: Int, samples: FloatArray]
fun front() = front(ptr)
fun isSpeechDetected(): Boolean = isSpeechDetected(ptr)
fun reset() = reset(ptr)
private external fun delete(ptr: Long)
private external fun new(
assetManager: AssetManager,
config: VadModelConfig,
): Long
private external fun newFromFile(
config: VadModelConfig,
): Long
private external fun acceptWaveform(ptr: Long, samples: FloatArray)
private external fun empty(ptr: Long): Boolean
private external fun pop(ptr: Long)
private external fun front(ptr: Long): Array<Any>
private external fun isSpeechDetected(ptr: Long): Boolean
private external fun reset(ptr: Long)
companion object {
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}
// Please visit
// https://github.com/snakers4/silero-vad/blob/master/files/silero_vad.onnx
// to download silero_vad.onnx
// and put it inside the assets/
// directory
fun getVadModelConfig(type: Int): VadModelConfig? {
when (type) {
0 -> {
return VadModelConfig(
sileroVadModelConfig = SileroVadModelConfig(
model = "silero_vad.onnx",
threshold = 0.5F,
minSilenceDuration = 0.25F,
minSpeechDuration = 0.25F,
windowSize = 512,
),
sampleRate = 16000,
numThreads = 1,
provider = "cpu",
)
}
}
return null;
}

View File

@@ -0,0 +1 @@
*.so

View File

@@ -0,0 +1,30 @@
<vector xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:aapt="http://schemas.android.com/aapt"
android:width="108dp"
android:height="108dp"
android:viewportWidth="108"
android:viewportHeight="108">
<path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
<aapt:attr name="android:fillColor">
<gradient
android:endX="85.84757"
android:endY="92.4963"
android:startX="42.9492"
android:startY="49.59793"
android:type="linear">
<item
android:color="#44000000"
android:offset="0.0" />
<item
android:color="#00000000"
android:offset="1.0" />
</gradient>
</aapt:attr>
</path>
<path
android:fillColor="#FFFFFF"
android:fillType="nonZero"
android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
android:strokeWidth="1"
android:strokeColor="#00000000" />
</vector>

View File

@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="utf-8"?>
<selector xmlns:android="http://schemas.android.com/apk/res/android">
<item>
<shape android:shape="oval">
<solid android:color="#FF000000"/>
<size
android:width="300dp"
android:height="300dp"/>
</shape>
</item>
</selector>

View File

@@ -0,0 +1,170 @@
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
android:width="108dp"
android:height="108dp"
android:viewportWidth="108"
android:viewportHeight="108">
<path
android:fillColor="#3DDC84"
android:pathData="M0,0h108v108h-108z" />
<path
android:fillColor="#00000000"
android:pathData="M9,0L9,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,0L19,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M29,0L29,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M39,0L39,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M49,0L49,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M59,0L59,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M69,0L69,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M79,0L79,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M89,0L89,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M99,0L99,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,9L108,9"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,19L108,19"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,29L108,29"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,39L108,39"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,49L108,49"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,59L108,59"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,69L108,69"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,79L108,79"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,89L108,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,99L108,99"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,29L89,29"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,39L89,39"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,49L89,49"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,59L89,59"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,69L89,69"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,79L89,79"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M29,19L29,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M39,19L39,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M49,19L49,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M59,19L59,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M69,19L69,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M79,19L79,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
</vector>

View File

@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="utf-8"?>
<selector xmlns:android="http://schemas.android.com/apk/res/android">
<item>
<shape android:shape="oval">
<solid android:color="#FFFF0000"/>
<size
android:width="300dp"
android:height="300dp"/>
</shape>
</item>
</selector>

View File

@@ -0,0 +1,43 @@
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent"
tools:context=".MainActivity">
<LinearLayout
android:layout_width="match_parent"
android:layout_height="match_parent"
android:gravity="bottom"
android:orientation="vertical"
>
<Space
android:layout_width="match_parent"
android:layout_height="10dp" />
<LinearLayout
android:id="@+id/powerCircle"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_gravity="center_horizontal"
android:background="@drawable/black_circle"
android:orientation="vertical" />
<Space
android:layout_width="match_parent"
android:layout_height="200dp" />
<Button
android:id="@+id/record_button"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:text="@string/start" />
</LinearLayout>
</androidx.constraintlayout.widget.ConstraintLayout>

View File

@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

View File

@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 982 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 KiB

View File

@@ -0,0 +1,16 @@
<resources xmlns:tools="http://schemas.android.com/tools">
<!-- Base application theme. -->
<style name="Theme.SherpaOnnxVad" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
<!-- Primary brand color. -->
<item name="colorPrimary">@color/purple_200</item>
<item name="colorPrimaryVariant">@color/purple_700</item>
<item name="colorOnPrimary">@color/black</item>
<!-- Secondary brand color. -->
<item name="colorSecondary">@color/teal_200</item>
<item name="colorSecondaryVariant">@color/teal_200</item>
<item name="colorOnSecondary">@color/black</item>
<!-- Status bar color. -->
<item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
<!-- Customize your theme here. -->
</style>
</resources>

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<color name="purple_200">#FFBB86FC</color>
<color name="purple_500">#FF6200EE</color>
<color name="purple_700">#FF3700B3</color>
<color name="teal_200">#FF03DAC5</color>
<color name="teal_700">#FF018786</color>
<color name="black">#FF000000</color>
<color name="white">#FFFFFFFF</color>
</resources>

View File

@@ -0,0 +1,7 @@
<resources>
<string name="app_name">Next-gen Kaldi: SileroVAD</string>
<string name="hint">Click the Start button to play Silero VAD with Next-gen Kaldi.</string>
<string name="start">Start</string>
<string name="stop">Stop</string>
</resources>

View File

@@ -0,0 +1,16 @@
<resources xmlns:tools="http://schemas.android.com/tools">
<!-- Base application theme. -->
<style name="Theme.SherpaOnnxVad" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
<!-- Primary brand color. -->
<item name="colorPrimary">@color/purple_500</item>
<item name="colorPrimaryVariant">@color/purple_700</item>
<item name="colorOnPrimary">@color/white</item>
<!-- Secondary brand color. -->
<item name="colorSecondary">@color/teal_200</item>
<item name="colorSecondaryVariant">@color/teal_700</item>
<item name="colorOnSecondary">@color/black</item>
<!-- Status bar color. -->
<item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
<!-- Customize your theme here. -->
</style>
</resources>

View File

@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="utf-8"?><!--
Sample backup rules file; uncomment and customize as necessary.
See https://developer.android.com/guide/topics/data/autobackup
for details.
Note: This file is ignored for devices older that API 31
See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
<!--
<include domain="sharedpref" path="."/>
<exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

View File

@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="utf-8"?><!--
Sample data extraction rules file; uncomment and customize as necessary.
See https://developer.android.com/about/versions/12/backup-restore#xml-changes
for details.
-->
<data-extraction-rules>
<cloud-backup>
<!-- TODO: Use <include> and <exclude> to control what is backed up.
<include .../>
<exclude .../>
-->
</cloud-backup>
<!--
<device-transfer>
<include .../>
<exclude .../>
</device-transfer>
-->
</data-extraction-rules>

View File

@@ -0,0 +1,17 @@
package com.k2fsa.sherpa.onnx
import org.junit.Test
import org.junit.Assert.*
/**
* Example local unit test, which will execute on the development machine (host).
*
* See [testing documentation](http://d.android.com/tools/testing).
*/
class ExampleUnitTest {
@Test
fun addition_isCorrect() {
assertEquals(4, 2 + 2)
}
}

View File

@@ -0,0 +1,6 @@
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
id 'com.android.application' version '7.3.1' apply false
id 'com.android.library' version '7.3.1' apply false
id 'org.jetbrains.kotlin.android' version '1.7.20' apply false
}

View File

@@ -0,0 +1,23 @@
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

View File

@@ -0,0 +1,6 @@
#Sat Sep 23 10:24:21 CST 2023
distributionBase=GRADLE_USER_HOME
distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME

185
android/SherpaOnnxVad/gradlew vendored Executable file
View File

@@ -0,0 +1,185 @@
#!/usr/bin/env sh
#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn () {
echo "$*"
}
die () {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=`expr $i + 1`
done
case $i in
0) set -- ;;
1) set -- "$args0" ;;
2) set -- "$args0" "$args1" ;;
3) set -- "$args0" "$args1" "$args2" ;;
4) set -- "$args0" "$args1" "$args2" "$args3" ;;
5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi
# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=`save "$@"`
# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
exec "$JAVACMD" "$@"

89
android/SherpaOnnxVad/gradlew.bat vendored Normal file
View File

@@ -0,0 +1,89 @@
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto execute
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega

View File

@@ -0,0 +1,16 @@
pluginManagement {
repositories {
gradlePluginPortal()
google()
mavenCentral()
}
}
dependencyResolutionManagement {
repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
repositories {
google()
mavenCentral()
}
}
rootProject.name = "SherpaOnnxVad"
include ':app'

View File

@@ -45,7 +45,9 @@ sleep 1
onnxruntime_version=v1.16.0
if [ ! -f ./android-onnxruntime-libs/$onnxruntime_version/jni/arm64-v8a/libonnxruntime.so ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
if [ ! -d android-onnxruntime-libs ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
fi
pushd android-onnxruntime-libs
git lfs pull --include "$onnxruntime_version/jni/arm64-v8a/libonnxruntime.so"
ln -s $onnxruntime_version/jni .

View File

@@ -46,7 +46,9 @@ sleep 1
onnxruntime_version=v1.16.0
if [ ! -f ./android-onnxruntime-libs/$onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
if [ ! -d android-onnxruntime-libs ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
fi
pushd android-onnxruntime-libs
git lfs pull --include "$onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so"
ln -s $onnxruntime_version/jni .

View File

@@ -46,7 +46,9 @@ sleep 1
onnxruntime_version=v1.16.0
if [ ! -f ./android-onnxruntime-libs/$onnxruntime_version/jni/x86_64/libonnxruntime.so ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
if [ ! -d android-onnxruntime-libs ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
fi
pushd android-onnxruntime-libs
git lfs pull --include "$onnxruntime_version/jni/x86_64/libonnxruntime.so"
ln -s $onnxruntime_version/jni .

View File

@@ -46,7 +46,9 @@ sleep 1
onnxruntime_version=v1.16.0
if [ ! -f ./android-onnxruntime-libs/$onnxruntime_version/jni/x86/libonnxruntime.so ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
if [ ! -d android-onnxruntime-libs ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
fi
pushd android-onnxruntime-libs
git lfs pull --include "$onnxruntime_version/jni/x86/libonnxruntime.so"
ln -s $onnxruntime_version/jni .

66
build-apk-vad.sh Executable file
View File

@@ -0,0 +1,66 @@
#!/usr/bin/env bash
# Please set the environment variable ANDROID_NDK
# before running this script
# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
log "Building APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"
log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh
mkdir -p apks
log "https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx"
# Download the model
pushd ./android/SherpaOnnxVad/app/src/main/assets/
wget https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx
popd
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
log "------------------------------------------------------------"
log "build apk for $arch"
log "------------------------------------------------------------"
src_arch=$arch
if [ $arch == "armeabi-v7a" ]; then
src_arch=armv7-eabi
elif [ $arch == "x86_64" ]; then
src_arch=x86-64
fi
ls -lh ./build-android-$src_arch/install/lib/*.so
cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxVad/app/src/main/jniLibs/$arch/
pushd ./android/SherpaOnnxVad
./gradlew build
popd
mv android/SherpaOnnxVad/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-silero-vad.apk
ls -lh apks
rm -v ./android/SherpaOnnxVad/app/src/main/jniLibs/$arch/*.so
done
rm -rf ./android/SherpaOnnxVad/app/src/main/assets/*.onnx
ls -lh apks/

1
kotlin-api-examples/Vad.kt Symbolic link
View File

@@ -0,0 +1 @@
../android/SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt

View File

@@ -32,7 +32,7 @@ std::vector<std::vector<std::string>> SplitToBatches(
process_num += batch_size;
}
if (itr != input.cend()) {
outputs.emplace_back(itr, input.cend());
outputs.emplace_back(itr, input.cend());
}
return outputs;
}
@@ -41,8 +41,8 @@ std::vector<std::string> LoadScpFile(const std::string &wav_scp_path) {
std::vector<std::string> wav_paths;
std::ifstream in(wav_scp_path);
if (!in.is_open()) {
fprintf(stderr, "Failed to open file: %s.\n", wav_scp_path.c_str());
return wav_paths;
fprintf(stderr, "Failed to open file: %s.\n", wav_scp_path.c_str());
return wav_paths;
}
std::string line, column1, column2;
while (std::getline(in, line)) {
@@ -55,8 +55,8 @@ std::vector<std::string> LoadScpFile(const std::string &wav_scp_path) {
}
void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
sherpa_onnx::OfflineRecognizer* recognizer,
float* total_length, float* total_time) {
sherpa_onnx::OfflineRecognizer *recognizer,
float *total_length, float *total_time) {
std::vector<std::unique_ptr<sherpa_onnx::OfflineStream>> ss;
std::vector<sherpa_onnx::OfflineStream *> ss_pointers;
float duration = 0.0f;
@@ -70,7 +70,7 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
if (!is_ok) {
fprintf(stderr, "Failed to read %s\n", wav_filename.c_str());
continue;
continue;
}
duration += samples.size() / static_cast<float>(sampling_rate);
auto s = recognizer->CreateStream();
@@ -97,7 +97,7 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
if (!is_ok) {
fprintf(stderr, "Failed to read %s\n", wav_filename.c_str());
continue;
continue;
}
duration += samples.size() / static_cast<float>(sampling_rate);
auto s = recognizer->CreateStream();
@@ -109,9 +109,9 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
recognizer->DecodeStreams(ss_pointers.data(), ss_pointers.size());
const auto end = std::chrono::steady_clock::now();
float elapsed_seconds =
std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
.count() /
1000.;
std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
.count() /
1000.;
elapsed_seconds_batch += elapsed_seconds;
int i = 0;
for (const auto &wav_filename : wav_paths) {
@@ -122,7 +122,7 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
ss_pointers.clear();
ss.clear();
}
fprintf(stderr, "thread %lu.\n", std::this_thread::get_id());
{
std::lock_guard<std::mutex> guard(mtx);
*total_length += duration;
@@ -132,7 +132,6 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
}
}
int main(int32_t argc, char *argv[]) {
const char *kUsageMessage = R"usage(
Speech recognition using non-streaming models with sherpa-onnx.
@@ -223,17 +222,17 @@ https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.
)usage";
std::string wav_scp = ""; // file path, kaldi style wav list.
int32_t nj = 1; // thread number
int32_t batch_size = 1; // number of wav files processed at once.
int32_t nj = 1; // thread number
int32_t batch_size = 1; // number of wav files processed at once.
sherpa_onnx::ParseOptions po(kUsageMessage);
sherpa_onnx::OfflineRecognizerConfig config;
config.Register(&po);
po.Register("wav-scp", &wav_scp,
"a file including wav-id and wav-path, kaldi style wav list."
"default="". when it is not empty, wav files which positional "
"default="
". when it is not empty, wav files which positional "
"parameters provide are invalid.");
po.Register("nj", &nj,
"multi-thread num for decoding, default=1");
po.Register("nj", &nj, "multi-thread num for decoding, default=1");
po.Register("batch-size", &batch_size,
"number of wav files processed at once during the decoding"
"process. default=1");
@@ -262,7 +261,8 @@ for a list of pre-trained models to download.
1000.;
fprintf(stderr,
"Started nj: %d, batch_size: %d, wav_path: %s. recognizer init time: "
"%.6f\n", nj, batch_size, wav_scp.c_str(), elapsed_seconds);
"%.6f\n",
nj, batch_size, wav_scp.c_str(), elapsed_seconds);
std::this_thread::sleep_for(std::chrono::seconds(10)); // sleep 10s
std::vector<std::string> wav_paths;
if (!wav_scp.empty()) {
@@ -282,12 +282,12 @@ for a list of pre-trained models to download.
float total_length = 0.0f;
float total_time = 0.0f;
for (int i = 0; i < nj; i++) {
threads.emplace_back(std::thread(AsrInference, batch_wav_paths,
&recognizer, &total_length, &total_time));
threads.emplace_back(std::thread(AsrInference, batch_wav_paths, &recognizer,
&total_length, &total_time));
}
for (auto& thread : threads) {
thread.join();
for (auto &thread : threads) {
thread.join();
}
fprintf(stderr, "num threads: %d\n", config.model_config.num_threads);
@@ -297,8 +297,8 @@ for a list of pre-trained models to download.
}
fprintf(stderr, "Elapsed seconds: %.3f s\n", total_time);
float rtf = total_time / total_length;
fprintf(stderr, "Real time factor (RTF): %.6f / %.6f = %.4f\n",
total_time, total_length, rtf);
fprintf(stderr, "Real time factor (RTF): %.6f / %.6f = %.4f\n", total_time,
total_length, rtf);
fprintf(stderr, "SPEEDUP: %.4f\n", 1.0 / rtf);
return 0;

View File

@@ -37,6 +37,29 @@ class SileroVadModel::Impl {
min_speech_samples_ = sample_rate_ * config_.silero_vad.min_speech_duration;
}
#if __ANDROID_API__ >= 9
Impl(AAssetManager *mgr, const VadModelConfig &config)
: config_(config),
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)),
allocator_{} {
auto buf = ReadFile(mgr, config.silero_vad.model);
Init(buf.data(), buf.size());
sample_rate_ = config.sample_rate;
if (sample_rate_ != 16000) {
SHERPA_ONNX_LOGE("Expected sample rate 16000. Given: %d",
config.sample_rate);
exit(-1);
}
min_silence_samples_ =
sample_rate_ * config_.silero_vad.min_silence_duration;
min_speech_samples_ = sample_rate_ * config_.silero_vad.min_speech_duration;
}
#endif
void Reset() {
// 2 - number of LSTM layer
// 1 - batch size
@@ -260,6 +283,11 @@ class SileroVadModel::Impl {
SileroVadModel::SileroVadModel(const VadModelConfig &config)
: impl_(std::make_unique<Impl>(config)) {}
#if __ANDROID_API__ >= 9
SileroVadModel::SileroVadModel(AAssetManager *mgr, const VadModelConfig &config)
: impl_(std::make_unique<Impl>(mgr, config)) {}
#endif
SileroVadModel::~SileroVadModel() = default;
void SileroVadModel::Reset() { return impl_->Reset(); }

View File

@@ -6,6 +6,11 @@
#include <memory>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/vad-model.h"
namespace sherpa_onnx {
@@ -13,6 +18,11 @@ namespace sherpa_onnx {
class SileroVadModel : public VadModel {
public:
explicit SileroVadModel(const VadModelConfig &config);
#if __ANDROID_API__ >= 9
SileroVadModel(AAssetManager *mgr, const VadModelConfig &config);
#endif
~SileroVadModel() override;
// reset the internal model states

View File

@@ -13,4 +13,12 @@ std::unique_ptr<VadModel> VadModel::Create(const VadModelConfig &config) {
return std::make_unique<SileroVadModel>(config);
}
#if __ANDROID_API__ >= 9
std::unique_ptr<VadModel> VadModel::Create(AAssetManager *mgr,
const VadModelConfig &config) {
// TODO(fangjun): Support other VAD models.
return std::make_unique<SileroVadModel>(mgr, config);
}
#endif
} // namespace sherpa_onnx

View File

@@ -6,6 +6,11 @@
#include <memory>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/vad-model-config.h"
namespace sherpa_onnx {
@@ -16,6 +21,11 @@ class VadModel {
static std::unique_ptr<VadModel> Create(const VadModelConfig &config);
#if __ANDROID_API__ >= 9
static std::unique_ptr<VadModel> Create(AAssetManager *mgr,
const VadModelConfig &config);
#endif
// reset the internal model states
virtual void Reset() = 0;

View File

@@ -19,10 +19,32 @@ class VoiceActivityDetector::Impl {
config_(config),
buffer_(buffer_size_in_seconds * config.sample_rate) {}
void AcceptWaveform(const float *samples, int32_t n) {
buffer_.Push(samples, n);
#if __ANDROID_API__ >= 9
Impl(AAssetManager *mgr, const VadModelConfig &config,
float buffer_size_in_seconds = 60)
: model_(VadModel::Create(mgr, config)),
config_(config),
buffer_(buffer_size_in_seconds * config.sample_rate) {}
#endif
void AcceptWaveform(const float *samples, int32_t n) {
int32_t window_size = model_->WindowSize();
// note n is usally window_size and there is no need to use
// an extra buffer here
last_.insert(last_.end(), samples, samples + n);
int32_t k = static_cast<int32_t>(last_.size()) / window_size;
const float *p = last_.data();
bool is_speech = false;
for (int32_t i = 0; i != k; ++i, p += window_size) {
buffer_.Push(p, window_size);
is_speech = model_->IsSpeech(p, window_size);
}
last_ = std::vector<float>(
p, static_cast<const float *>(last_.data()) + last_.size());
bool is_speech = model_->IsSpeech(samples, n);
if (is_speech) {
if (start_ == -1) {
// beginning of speech
@@ -31,15 +53,15 @@ class VoiceActivityDetector::Impl {
}
} else {
// non-speech
if (start_ != -1) {
if (start_ != -1 && buffer_.Size()) {
// end of speech, save the speech segment
int32_t end = buffer_.Tail() - model_->MinSilenceDurationSamples();
std::vector<float> samples = buffer_.Get(start_, end - start_);
std::vector<float> s = buffer_.Get(start_, end - start_);
SpeechSegment segment;
segment.start = start_;
segment.samples = std::move(samples);
segment.samples = std::move(s);
segments_.push(std::move(segment));
@@ -73,6 +95,7 @@ class VoiceActivityDetector::Impl {
std::unique_ptr<VadModel> model_;
VadModelConfig config_;
CircularBuffer buffer_;
std::vector<float> last_;
int32_t start_ = -1;
};
@@ -81,6 +104,13 @@ VoiceActivityDetector::VoiceActivityDetector(
const VadModelConfig &config, float buffer_size_in_seconds /*= 60*/)
: impl_(std::make_unique<Impl>(config, buffer_size_in_seconds)) {}
#if __ANDROID_API__ >= 9
VoiceActivityDetector::VoiceActivityDetector(
AAssetManager *mgr, const VadModelConfig &config,
float buffer_size_in_seconds /*= 60*/)
: impl_(std::make_unique<Impl>(mgr, config, buffer_size_in_seconds)) {}
#endif
VoiceActivityDetector::~VoiceActivityDetector() = default;
void VoiceActivityDetector::AcceptWaveform(const float *samples, int32_t n) {

View File

@@ -7,6 +7,11 @@
#include <memory>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/vad-model-config.h"
namespace sherpa_onnx {
@@ -20,6 +25,12 @@ class VoiceActivityDetector {
public:
explicit VoiceActivityDetector(const VadModelConfig &config,
float buffer_size_in_seconds = 60);
#if __ANDROID_API__ >= 9
VoiceActivityDetector(AAssetManager *mgr, const VadModelConfig &config,
float buffer_size_in_seconds = 60);
#endif
~VoiceActivityDetector();
void AcceptWaveform(const float *samples, int32_t n);

View File

@@ -23,6 +23,7 @@
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include "sherpa-onnx/csrc/wave-reader.h"
#define SHERPA_ONNX_EXTERN_C extern "C"
@@ -106,6 +107,33 @@ class SherpaOnnxOffline {
OfflineRecognizer recognizer_;
};
class SherpaOnnxVad {
public:
#if __ANDROID_API__ >= 9
SherpaOnnxVad(AAssetManager *mgr, const VadModelConfig &config)
: vad_(mgr, config) {}
#endif
explicit SherpaOnnxVad(const VadModelConfig &config) : vad_(config) {}
void AcceptWaveform(const float *samples, int32_t n) {
vad_.AcceptWaveform(samples, n);
}
bool Empty() const { return vad_.Empty(); }
void Pop() { vad_.Pop(); }
const SpeechSegment &Front() const { return vad_.Front(); }
bool IsSpeechDetected() const { return vad_.IsSpeechDetected(); }
void Reset() { vad_.Reset(); }
private:
VoiceActivityDetector vad_;
};
static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) {
OnlineRecognizerConfig ans;
@@ -411,8 +439,165 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) {
return ans;
}
static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) {
VadModelConfig ans;
jclass cls = env->GetObjectClass(config);
jfieldID fid;
// silero_vad
fid = env->GetFieldID(cls, "sileroVadModelConfig",
"Lcom/k2fsa/sherpa/onnx/SileroVadModelConfig;");
jobject silero_vad_config = env->GetObjectField(config, fid);
jclass silero_vad_config_cls = env->GetObjectClass(silero_vad_config);
fid = env->GetFieldID(silero_vad_config_cls, "model", "Ljava/lang/String;");
auto s = (jstring)env->GetObjectField(silero_vad_config, fid);
auto p = env->GetStringUTFChars(s, nullptr);
ans.silero_vad.model = p;
env->ReleaseStringUTFChars(s, p);
fid = env->GetFieldID(silero_vad_config_cls, "threshold", "F");
ans.silero_vad.threshold = env->GetFloatField(silero_vad_config, fid);
fid = env->GetFieldID(silero_vad_config_cls, "minSilenceDuration", "F");
ans.silero_vad.min_silence_duration =
env->GetFloatField(silero_vad_config, fid);
fid = env->GetFieldID(silero_vad_config_cls, "minSpeechDuration", "F");
ans.silero_vad.min_speech_duration =
env->GetFloatField(silero_vad_config, fid);
fid = env->GetFieldID(silero_vad_config_cls, "windowSize", "I");
ans.silero_vad.window_size = env->GetIntField(silero_vad_config, fid);
fid = env->GetFieldID(cls, "sampleRate", "I");
ans.sample_rate = env->GetIntField(config, fid);
fid = env->GetFieldID(cls, "numThreads", "I");
ans.num_threads = env->GetIntField(config, fid);
fid = env->GetFieldID(cls, "provider", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.provider = p;
env->ReleaseStringUTFChars(s, p);
fid = env->GetFieldID(cls, "debug", "Z");
ans.debug = env->GetBooleanField(config, fid);
return ans;
}
} // namespace sherpa_onnx
SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_Vad_new(
JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
#if __ANDROID_API__ >= 9
AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
if (!mgr) {
SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
}
#endif
auto config = sherpa_onnx::GetVadModelConfig(env, _config);
SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
auto model = new sherpa_onnx::SherpaOnnxVad(
#if __ANDROID_API__ >= 9
mgr,
#endif
config);
return (jlong)model;
}
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_Vad_newFromFile(
JNIEnv *env, jobject /*obj*/, jobject _config) {
auto config = sherpa_onnx::GetVadModelConfig(env, _config);
SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
auto model = new sherpa_onnx::SherpaOnnxVad(config);
return (jlong)model;
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_delete(JNIEnv *env,
jobject /*obj*/,
jlong ptr) {
delete reinterpret_cast<sherpa_onnx::SherpaOnnxVad *>(ptr);
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_acceptWaveform(
JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples) {
auto model = reinterpret_cast<sherpa_onnx::SherpaOnnxVad *>(ptr);
jfloat *p = env->GetFloatArrayElements(samples, nullptr);
jsize n = env->GetArrayLength(samples);
model->AcceptWaveform(p, n);
env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT bool JNICALL Java_com_k2fsa_sherpa_onnx_Vad_empty(JNIEnv *env,
jobject /*obj*/,
jlong ptr) {
auto model = reinterpret_cast<sherpa_onnx::SherpaOnnxVad *>(ptr);
return model->Empty();
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_pop(JNIEnv *env,
jobject /*obj*/,
jlong ptr) {
auto model = reinterpret_cast<sherpa_onnx::SherpaOnnxVad *>(ptr);
model->Pop();
}
// see
// https://stackoverflow.com/questions/29043872/android-jni-return-multiple-variables
static jobject NewInteger(JNIEnv *env, int32_t value) {
jclass cls = env->FindClass("java/lang/Integer");
jmethodID constructor = env->GetMethodID(cls, "<init>", "(I)V");
return env->NewObject(cls, constructor, value);
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT jobjectArray JNICALL
Java_com_k2fsa_sherpa_onnx_Vad_front(JNIEnv *env, jobject /*obj*/, jlong ptr) {
const auto &front =
reinterpret_cast<sherpa_onnx::SherpaOnnxVad *>(ptr)->Front();
jfloatArray samples_arr = env->NewFloatArray(front.samples.size());
env->SetFloatArrayRegion(samples_arr, 0, front.samples.size(),
front.samples.data());
jobjectArray obj_arr = (jobjectArray)env->NewObjectArray(
2, env->FindClass("java/lang/Object"), nullptr);
env->SetObjectArrayElement(obj_arr, 0, NewInteger(env, front.start));
env->SetObjectArrayElement(obj_arr, 1, samples_arr);
return obj_arr;
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT bool JNICALL Java_com_k2fsa_sherpa_onnx_Vad_isSpeechDetected(
JNIEnv *env, jobject /*obj*/, jlong ptr) {
auto model = reinterpret_cast<sherpa_onnx::SherpaOnnxVad *>(ptr);
return model->IsSpeechDetected();
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_reset(JNIEnv *env,
jobject /*obj*/,
jlong ptr) {
auto model = reinterpret_cast<sherpa_onnx::SherpaOnnxVad *>(ptr);
model->Reset();
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_SherpaOnnx_new(
JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
@@ -564,12 +749,12 @@ SHERPA_ONNX_EXTERN_C
JNIEXPORT jobjectArray JNICALL Java_com_k2fsa_sherpa_onnx_SherpaOnnx_getTokens(
JNIEnv *env, jobject /*obj*/, jlong ptr) {
auto tokens = reinterpret_cast<sherpa_onnx::SherpaOnnx *>(ptr)->GetTokens();
int size = tokens.size();
int32_t size = tokens.size();
jclass stringClass = env->FindClass("java/lang/String");
// convert C++ list into jni string array
jobjectArray result = env->NewObjectArray(size, stringClass, NULL);
for (int i = 0; i < size; i++) {
for (int32_t i = 0; i < size; i++) {
// Convert the C++ string to a C string
const char *cstr = tokens[i].c_str();
@@ -583,14 +768,6 @@ JNIEXPORT jobjectArray JNICALL Java_com_k2fsa_sherpa_onnx_SherpaOnnx_getTokens(
return result;
}
// see
// https://stackoverflow.com/questions/29043872/android-jni-return-multiple-variables
static jobject NewInteger(JNIEnv *env, int32_t value) {
jclass cls = env->FindClass("java/lang/Integer");
jmethodID constructor = env->GetMethodID(cls, "<init>", "(I)V");
return env->NewObject(cls, constructor, value);
}
static jobjectArray ReadWaveImpl(JNIEnv *env, std::istream &is,
const char *p_filename) {
bool is_ok = false;