Add Android demo for speaker recognition (#536)

See pre-built Android APKs at 
https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html
This commit is contained in:
Fangjun Kuang
2024-01-23 16:50:52 +08:00
committed by GitHub
parent 626775e5e2
commit bbd7c7fc18
73 changed files with 3022 additions and 6 deletions

View File

@@ -0,0 +1,30 @@
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools">
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<application
android:allowBackup="true"
android:dataExtractionRules="@xml/data_extraction_rules"
android:fullBackupContent="@xml/backup_rules"
android:icon="@mipmap/ic_launcher"
android:label="@string/app_name"
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:theme="@style/Theme.SherpaOnnxSpeakerIdentification"
tools:targetApi="31">
<activity
android:name=".MainActivity"
android:exported="true"
android:label="@string/app_name"
android:theme="@style/Theme.SherpaOnnxSpeakerIdentification">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>
</manifest>

View File

@@ -0,0 +1,13 @@
package com.k2fsa.sherpa.onnx.speaker.identification
import androidx.compose.ui.graphics.vector.ImageVector
data class BarItem (
val title: String,
// see https://www.composables.com/icons
// and
// https://developer.android.com/reference/kotlin/androidx/compose/material/icons/filled/package-summary
val image: ImageVector,
val route: String,
)

View File

@@ -0,0 +1,179 @@
package com.k2fsa.sherpa.onnx.speaker.identification
import android.Manifest
import android.content.pm.PackageManager
import android.os.Bundle
import android.util.Log
import android.widget.Toast
import androidx.activity.ComponentActivity
import androidx.activity.compose.setContent
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.padding
import androidx.compose.material3.CenterAlignedTopAppBar
import androidx.compose.material3.ExperimentalMaterial3Api
import androidx.compose.material3.Icon
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.NavigationBar
import androidx.compose.material3.NavigationBarItem
import androidx.compose.material3.Scaffold
import androidx.compose.material3.Surface
import androidx.compose.material3.Text
import androidx.compose.material3.TopAppBarDefaults
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.ui.Modifier
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.tooling.preview.Preview
import androidx.core.app.ActivityCompat
import androidx.navigation.NavGraph.Companion.findStartDestination
import androidx.navigation.NavHostController
import androidx.navigation.compose.NavHost
import androidx.navigation.compose.composable
import androidx.navigation.compose.currentBackStackEntryAsState
import androidx.navigation.compose.rememberNavController
import com.k2fsa.sherpa.onnx.SpeakerRecognition
import com.k2fsa.sherpa.onnx.speaker.identification.screens.HelpScreen
import com.k2fsa.sherpa.onnx.speaker.identification.screens.HomeScreen
import com.k2fsa.sherpa.onnx.speaker.identification.screens.RegisterScreen
import com.k2fsa.sherpa.onnx.speaker.identification.screens.ViewScreen
import com.k2fsa.sherpa.onnx.speaker.identification.ui.theme.SherpaOnnxSpeakerIdentificationTheme
const val TAG = "sherpa-onnx-speaker"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
class MainActivity : ComponentActivity() {
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContent {
SherpaOnnxSpeakerIdentificationTheme {
// A surface container using the 'background' color from the theme
Surface(
modifier = Modifier.fillMaxSize(),
color = MaterialTheme.colorScheme.background
) {
MainScreen()
}
}
}
ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
SpeakerRecognition.initExtractor(this.assets)
}
@Deprecated("Deprecated in Java")
override fun onRequestPermissionsResult(
requestCode: Int,
permissions: Array<out String>,
grantResults: IntArray
) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
grantResults[0] == PackageManager.PERMISSION_GRANTED
} else {
false
}
if (!permissionToRecordAccepted) {
Log.e(TAG, "Audio record is disallowed")
Toast.makeText(
this,
"This App needs access to the microphone",
Toast.LENGTH_SHORT
)
.show()
finish()
}
Log.i(TAG, "Audio record is permitted")
}
}
@OptIn(ExperimentalMaterial3Api::class)
@Composable
fun MainScreen(modifier: Modifier = Modifier) {
val navController = rememberNavController()
Scaffold(
topBar = {
CenterAlignedTopAppBar(
colors = TopAppBarDefaults.topAppBarColors(
containerColor = MaterialTheme.colorScheme.primaryContainer,
titleContentColor = MaterialTheme.colorScheme.primary,
),
title = {
Text(
"Next-gen Kaldi: Speaker Identification",
fontWeight = FontWeight.Bold,
)
},
)
},
content = { padding ->
Column(Modifier.padding(padding)) {
NavigationHost(navController = navController)
}
},
bottomBar = {
BottomNavigationBar(navController = navController)
}
)
}
@Composable
fun NavigationHost(navController: NavHostController) {
NavHost(navController = navController, startDestination = NavRoutes.Home.route) {
composable(NavRoutes.Home.route) {
HomeScreen()
}
composable(NavRoutes.Register.route) {
RegisterScreen()
}
composable(NavRoutes.View.route) {
ViewScreen()
}
composable(NavRoutes.Help.route) {
HelpScreen()
}
}
}
@Composable
fun BottomNavigationBar(navController: NavHostController) {
NavigationBar {
val backStackEntry by navController.currentBackStackEntryAsState()
val currentRoute = backStackEntry?.destination?.route
NavBarItems.BarItems.forEach { navItem ->
NavigationBarItem(selected = currentRoute == navItem.route,
onClick = {
navController.navigate(navItem.route) {
popUpTo(navController.graph.findStartDestination().id) {
saveState = true
}
launchSingleTop = true
restoreState = true
}
},
icon = {
Icon(imageVector = navItem.image, contentDescription = navItem.title)
}, label = {
Text(text = navItem.title)
})
}
}
}
@Preview(showBackground = true)
@Composable
fun MainScreenPreview() {
SherpaOnnxSpeakerIdentificationTheme {
MainScreen()
}
}

View File

@@ -0,0 +1,33 @@
package com.k2fsa.sherpa.onnx.speaker.identification
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.filled.AccountCircle
import androidx.compose.material.icons.filled.Add
import androidx.compose.material.icons.filled.Home
import androidx.compose.material.icons.filled.Info
object NavBarItems {
val BarItems = listOf(
BarItem(
title = "Home",
image = Icons.Filled.Home,
route = "home",
),
BarItem(
title = "Register",
image = Icons.Filled.Add,
route = "register",
),
BarItem(
title = "View",
image = Icons.Filled.AccountCircle,
route = "view",
),
BarItem(
title = "Help",
image = Icons.Filled.Info,
route = "help",
),
)
}

View File

@@ -0,0 +1,8 @@
package com.k2fsa.sherpa.onnx.speaker.identification
sealed class NavRoutes(val route: String) {
object Home: NavRoutes("home")
object Register: NavRoutes("register")
object View: NavRoutes("view")
object Help: NavRoutes("help")
}

View File

@@ -0,0 +1,189 @@
package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
import android.util.Log
import com.k2fsa.sherpa.onnx.speaker.identification.TAG
data class SpeakerEmbeddingExtractorConfig(
val model: String,
var numThreads: Int = 1,
var debug: Boolean = false,
var provider: String = "cpu",
)
class SpeakerEmbeddingExtractorStream(var ptr: Long) {
fun acceptWaveform(samples: FloatArray, sampleRate: Int) =
acceptWaveform(ptr, samples, sampleRate)
fun inputFinished() = inputFinished(ptr)
protected fun finalize() {
delete(ptr)
ptr = 0
}
private external fun myTest(ptr: Long, v: Array<FloatArray>)
fun release() = finalize()
private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int)
private external fun inputFinished(ptr: Long)
private external fun delete(ptr: Long)
companion object {
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}
class SpeakerEmbeddingExtractor(
assetManager: AssetManager? = null,
config: SpeakerEmbeddingExtractorConfig,
) {
private var ptr: Long
init {
ptr = if (assetManager != null) {
new(assetManager, config)
} else {
newFromFile(config)
}
}
protected fun finalize() {
delete(ptr)
ptr = 0
}
fun release() = finalize()
fun createStream(): SpeakerEmbeddingExtractorStream {
val p = createStream(ptr)
return SpeakerEmbeddingExtractorStream(p)
}
fun isReady(stream: SpeakerEmbeddingExtractorStream) = isReady(ptr, stream.ptr)
fun compute(stream: SpeakerEmbeddingExtractorStream) = compute(ptr, stream.ptr)
fun dim() = dim(ptr)
private external fun new(
assetManager: AssetManager,
config: SpeakerEmbeddingExtractorConfig,
): Long
private external fun newFromFile(
config: SpeakerEmbeddingExtractorConfig,
): Long
private external fun delete(ptr: Long)
private external fun createStream(ptr: Long): Long
private external fun isReady(ptr: Long, streamPtr: Long): Boolean
private external fun compute(ptr: Long, streamPtr: Long): FloatArray
private external fun dim(ptr: Long): Int
companion object {
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}
class SpeakerEmbeddingManager(val dim: Int) {
private var ptr: Long
init {
ptr = new(dim)
}
protected fun finalize() {
delete(ptr)
ptr = 0
}
fun release() = finalize()
fun add(name: String, embedding: FloatArray) = add(ptr, name, embedding)
fun add(name: String, embedding: Array<FloatArray>) = addList(ptr, name, embedding)
fun remove(name: String) = remove(ptr, name)
fun search(embedding: FloatArray, threshold: Float) = search(ptr, embedding, threshold)
fun verify(name: String, embedding: FloatArray, threshold: Float) =
verify(ptr, name, embedding, threshold)
fun contains(name: String) = contains(ptr, name)
fun numSpeakers() = numSpeakers(ptr)
fun allSpeakerNames() = allSpeakerNames(ptr)
private external fun new(dim: Int): Long
private external fun delete(ptr: Long): Unit
private external fun add(ptr: Long, name: String, embedding: FloatArray): Boolean
private external fun addList(ptr: Long, name: String, embedding: Array<FloatArray>): Boolean
private external fun remove(ptr: Long, name: String): Boolean
private external fun search(ptr: Long, embedding: FloatArray, threshold: Float): String
private external fun verify(
ptr: Long,
name: String,
embedding: FloatArray,
threshold: Float
): Boolean
private external fun contains(ptr: Long, name: String): Boolean
private external fun numSpeakers(ptr: Long): Int
private external fun allSpeakerNames(ptr: Long): Array<String>
companion object {
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}
// Please download the model file from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
// and put it inside the assets directory.
//
// Please don't put it in a subdirectory of assets
private val modelName = "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
object SpeakerRecognition {
var _extractor: SpeakerEmbeddingExtractor? = null
var _manager: SpeakerEmbeddingManager? = null
val extractor: SpeakerEmbeddingExtractor
get() {
return _extractor!!
}
val manager: SpeakerEmbeddingManager
get() {
return _manager!!
}
fun initExtractor(assetManager: AssetManager? = null) {
synchronized(this) {
if (_extractor != null) {
return
}
Log.i(TAG, "Initializing speaker embedding extractor")
_extractor = SpeakerEmbeddingExtractor(
assetManager = assetManager,
config = SpeakerEmbeddingExtractorConfig(
model = modelName,
numThreads = 2,
debug = false,
provider = "cpu",
)
)
_manager = SpeakerEmbeddingManager(dim = _extractor!!.dim())
}
}
}

View File

@@ -0,0 +1,29 @@
package com.k2fsa.sherpa.onnx.speaker.identification.screens
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Spacer
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.height
import androidx.compose.foundation.layout.padding
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.ui.Modifier
import androidx.compose.ui.unit.dp
@Composable
fun HelpScreen() {
Box(modifier= Modifier.fillMaxSize()) {
Column(
modifier = Modifier.padding(16.dp)
) {
Text("Please see http://github.com/k2-fsa/sherpa-onnx ")
Spacer(modifier = Modifier.height(16.dp))
Text("https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models")
Spacer(modifier = Modifier.height(16.dp))
Text("https://k2-fsa.github.io/sherpa/social-groups.html")
Spacer(modifier = Modifier.height(16.dp))
Text("Everything is open-sourced!")
}
}
}

View File

@@ -0,0 +1,228 @@
package com.k2fsa.sherpa.onnx.speaker.identification.screens
import android.Manifest
import android.annotation.SuppressLint
import android.app.Activity
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.util.Log
import androidx.compose.foundation.layout.Arrangement
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Row
import androidx.compose.foundation.layout.Spacer
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.height
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.layout.width
import androidx.compose.material3.Button
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.Slider
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.res.stringResource
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.unit.dp
import androidx.core.app.ActivityCompat
import com.k2fsa.sherpa.onnx.SpeakerRecognition
import com.k2fsa.sherpa.onnx.speaker.identification.R
import com.k2fsa.sherpa.onnx.speaker.identification.TAG
import kotlin.concurrent.thread
private var audioRecord: AudioRecord? = null
private var sampleList: MutableList<FloatArray>? = null
private val clearedResult = "-cleared-"
@Composable
fun HomeScreen() {
val activity = LocalContext.current as Activity
var threshold by remember {
mutableStateOf(0.5F)
}
var detectedName by remember {
mutableStateOf(clearedResult)
}
var isStarted by remember { mutableStateOf(false) }
val onRecordingButtonClick: () -> Unit = {
isStarted = !isStarted
if (isStarted) {
if (ActivityCompat.checkSelfPermission(
activity,
Manifest.permission.RECORD_AUDIO
) != PackageManager.PERMISSION_GRANTED
) {
Log.i(TAG, "Recording is not allowed")
} else {
// recording is allowed
val audioSource = MediaRecorder.AudioSource.MIC
val channelConfig = AudioFormat.CHANNEL_IN_MONO
val audioFormat = AudioFormat.ENCODING_PCM_16BIT
val numBytes =
AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
audioRecord = AudioRecord(
audioSource,
sampleRateInHz,
AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT,
numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
)
sampleList = null
detectedName = clearedResult
// recording is started here
thread(true) {
Log.i(TAG, "processing samples")
val interval = 0.1 // i.e., 100 ms
val bufferSize = (interval * sampleRateInHz).toInt() // in samples
val buffer = ShortArray(bufferSize)
audioRecord?.let {
it.startRecording()
while (isStarted) {
val ret = audioRecord?.read(buffer, 0, buffer.size)
ret?.let { n ->
val samples = FloatArray(n) { buffer[it] / 32768.0f }
if (sampleList == null) {
sampleList = mutableListOf(samples)
} else {
sampleList?.add(samples)
}
}
}
}
Log.i(TAG, "Home: Recording is stopped. ${sampleList?.count()}")
}
}
} else {
// recording is stopped here
audioRecord?.stop()
audioRecord?.release()
audioRecord = null
sampleList?.let {
val stream = SpeakerRecognition.extractor.createStream()
for (samples in it) {
stream.acceptWaveform(samples = samples, sampleRate = sampleRateInHz)
}
stream.inputFinished()
if (SpeakerRecognition.extractor.isReady(stream)) {
val embedding = SpeakerRecognition.extractor.compute(stream)
detectedName = SpeakerRecognition.manager.search(
embedding = embedding,
threshold = threshold,
)
}
}
}
}
val onThresholdChange = { newValue: Float ->
threshold = newValue
}
Box(
modifier = Modifier.fillMaxSize(),
contentAlignment = Alignment.TopCenter,
) {
Column(
horizontalAlignment = Alignment.CenterHorizontally,
) {
HomeThresholdRow(
threshold = threshold,
onValueChange = onThresholdChange,
)
HomeButtonRow(
isStarted = isStarted,
onRecordingButtonClick = onRecordingButtonClick,
onClearButtonClick = {
detectedName = clearedResult
},
)
Spacer(modifier = Modifier.height(48.dp))
if(detectedName == clearedResult) {
// do nothing
} else if (detectedName.length > 0) {
Text(
text = "Speaker: ${detectedName}",
style = MaterialTheme.typography.headlineLarge,
fontWeight = FontWeight.Bold,
)
} else {
Text(
text = "Unknown speaker",
style = MaterialTheme.typography.headlineLarge,
fontWeight = FontWeight.Bold,
)
}
}
}
}
@SuppressLint("UnrememberedMutableState")
@Composable
private fun HomeButtonRow(
modifier: Modifier = Modifier,
isStarted: Boolean,
onRecordingButtonClick: () -> Unit,
onClearButtonClick: () -> Unit,
) {
val numSpeakers: Int by mutableStateOf(SpeakerRecognition.manager.numSpeakers())
Row(
modifier = modifier.fillMaxWidth(),
horizontalArrangement = Arrangement.Center,
) {
Button(
enabled = numSpeakers > 0,
onClick = onRecordingButtonClick
) {
Text(text = stringResource(if (isStarted) R.string.stop else R.string.start))
}
Spacer(modifier = Modifier.width(24.dp))
Button(onClick = onClearButtonClick) {
Text(text = stringResource(id = R.string.clear))
}
}
}
@Composable
fun HomeThresholdRow(
modifier: Modifier = Modifier,
threshold: Float,
onValueChange: (Float) -> Unit,
) {
Column(modifier = Modifier) {
Text(
text = "Threshold: " + String.format("%.2f", threshold),
style = MaterialTheme.typography.headlineMedium,
fontWeight = FontWeight.Bold,
modifier = modifier.padding(bottom = 8.dp, top = 8.dp),
)
Slider(
value = threshold,
onValueChange = onValueChange,
valueRange = 0.1F..1.0F,
modifier = modifier.fillMaxWidth(),
)
}
}

View File

@@ -0,0 +1,254 @@
package com.k2fsa.sherpa.onnx.speaker.identification.screens
import android.Manifest
import android.annotation.SuppressLint
import android.app.Activity
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.util.Log
import android.widget.Toast
import androidx.compose.foundation.layout.Arrangement
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Row
import androidx.compose.foundation.layout.Spacer
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.layout.width
import androidx.compose.material3.Button
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.OutlinedTextField
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.res.stringResource
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.tooling.preview.Preview
import androidx.compose.ui.unit.dp
import androidx.core.app.ActivityCompat
import com.k2fsa.sherpa.onnx.SpeakerRecognition
import com.k2fsa.sherpa.onnx.speaker.identification.R
import com.k2fsa.sherpa.onnx.speaker.identification.TAG
import kotlin.concurrent.thread
private var audioRecord: AudioRecord? = null
private var sampleList: MutableList<FloatArray>? = null
private var embeddingList: MutableList<FloatArray>? = null
val sampleRateInHz = 16000
@SuppressLint("UnrememberedMutableState")
@Preview
@Composable
fun RegisterScreen(modifier: Modifier = Modifier) {
val activity = LocalContext.current as Activity
var firstTime by remember { mutableStateOf(true) }
if (firstTime) {
firstTime = false
// clear states
embeddingList = null
}
val numberAudio: Int by mutableStateOf(embeddingList?.count() ?: 0)
Box(
modifier = Modifier.fillMaxSize(),
contentAlignment = Alignment.TopCenter
) {
var speakerName by remember { mutableStateOf("") }
val onSpeakerNameChange = { newName: String -> speakerName = newName }
var isStarted by remember { mutableStateOf(false) }
val onRecordingButtonClick: () -> Unit = {
isStarted = !isStarted
if (isStarted) {
if (ActivityCompat.checkSelfPermission(
activity,
Manifest.permission.RECORD_AUDIO
) != PackageManager.PERMISSION_GRANTED
) {
Log.i(TAG, "Recording is not allowed")
} else {
// recording is allowed
val audioSource = MediaRecorder.AudioSource.MIC
val channelConfig = AudioFormat.CHANNEL_IN_MONO
val audioFormat = AudioFormat.ENCODING_PCM_16BIT
val numBytes =
AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
audioRecord = AudioRecord(
audioSource,
sampleRateInHz,
AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT,
numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
)
sampleList = null
// recording is started here
thread(true) {
Log.i(TAG, "processing samples")
val interval = 0.1 // i.e., 100 ms
val bufferSize = (interval * sampleRateInHz).toInt() // in samples
val buffer = ShortArray(bufferSize)
audioRecord?.let {
it.startRecording()
while (isStarted) {
val ret = audioRecord?.read(buffer, 0, buffer.size)
ret?.let { n ->
val samples = FloatArray(n) { buffer[it] / 32768.0f }
if (sampleList == null) {
sampleList = mutableListOf(samples)
} else {
sampleList?.add(samples)
}
}
}
}
Log.i(TAG, "Recording is stopped. ${sampleList?.count()}")
}
}
} else {
// recording is stopped here
audioRecord?.stop()
audioRecord?.release()
audioRecord = null
sampleList?.let {
val stream = SpeakerRecognition.extractor.createStream()
for (samples in it) {
stream.acceptWaveform(samples=samples, sampleRate=sampleRateInHz)
}
stream.inputFinished()
if(SpeakerRecognition.extractor.isReady(stream)) {
val embedding = SpeakerRecognition.extractor.compute(stream)
if(embeddingList == null) {
embeddingList = mutableListOf(embedding)
} else {
embeddingList?.add(embedding)
}
}
}
}
}
val onAddButtonClick: () -> Unit = {
if(speakerName.isEmpty() || speakerName.isBlank()) {
Toast.makeText(
activity,
"please input a speaker name",
Toast.LENGTH_SHORT
).show()
} else if(SpeakerRecognition.manager.contains(speakerName.trim())) {
Toast.makeText(
activity,
"A speaker with $speakerName already exists. Please choose a new name",
Toast.LENGTH_SHORT
).show()
} else {
val ok = SpeakerRecognition.manager.add(speakerName.trim(), embedding = embeddingList!!.toTypedArray())
if(ok) {
Log.i(TAG, "Added ${speakerName.trim()} successfully")
Toast.makeText(
activity,
"Added ${speakerName.trim()}",
Toast.LENGTH_SHORT
).show()
embeddingList = null
sampleList = null
speakerName = ""
firstTime = true
} else {
Log.i(TAG, "Failed to add ${speakerName.trim()}")
Toast.makeText(
activity,
"Failed to add ${speakerName.trim()}",
Toast.LENGTH_SHORT
).show()
}
}
}
Column(horizontalAlignment = Alignment.CenterHorizontally) {
SpeakerNameRow(speakerName = speakerName, onValueChange = onSpeakerNameChange)
Text(
"Number of recordings: ${numberAudio}",
modifier = modifier.padding(24.dp),
style = MaterialTheme.typography.headlineMedium,
fontWeight = FontWeight.Bold,
)
RegisterSpeakerButtonRow(
modifier,
isStarted = isStarted,
onRecordingButtonClick = onRecordingButtonClick,
onAddButtonClick = onAddButtonClick,
)
}
}
}
@Composable
fun SpeakerNameRow(
modifier: Modifier = Modifier,
speakerName: String,
onValueChange: (String) -> Unit
) {
OutlinedTextField(
value = speakerName,
onValueChange = onValueChange,
label = {
Text("Please input the speaker name")
},
singleLine = true,
modifier = modifier
.fillMaxWidth()
.padding(8.dp)
)
}
@SuppressLint("UnrememberedMutableState")
@Composable
fun RegisterSpeakerButtonRow(
modifier: Modifier = Modifier,
isStarted: Boolean,
onRecordingButtonClick: () -> Unit,
onAddButtonClick: () -> Unit,
) {
val numberAudio: Int by mutableStateOf(embeddingList?.count() ?: 0)
Row(
modifier = modifier.fillMaxWidth(),
horizontalArrangement = Arrangement.Center,
) {
Button(onClick = onRecordingButtonClick) {
Text(text = stringResource(if (isStarted) R.string.stop else R.string.start))
}
Spacer(modifier = Modifier.width(24.dp))
Button(
enabled = numberAudio > 0,
onClick = onAddButtonClick,
) {
Text(text = stringResource(id = R.string.add))
}
}
}

View File

@@ -0,0 +1,113 @@
package com.k2fsa.sherpa.onnx.speaker.identification.screens
import android.annotation.SuppressLint
import androidx.compose.foundation.ExperimentalFoundationApi
import androidx.compose.foundation.layout.Arrangement
import androidx.compose.foundation.layout.Box
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Row
import androidx.compose.foundation.layout.fillMaxSize
import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.lazy.LazyColumn
import androidx.compose.foundation.lazy.items
import androidx.compose.material3.Button
import androidx.compose.material3.Checkbox
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.Surface
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.runtime.toMutableStateList
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.unit.dp
import com.k2fsa.sherpa.onnx.SpeakerRecognition
class SpeakerName(val name: String) {
val nameState = mutableStateOf(name)
val checked = mutableStateOf(false)
fun onCheckedChange(newValue: Boolean) {
checked.value = newValue
}
}
@SuppressLint("UnrememberedMutableState")
@OptIn(ExperimentalFoundationApi::class)
@Composable
fun ViewScreen() {
val allSpeakerNames = SpeakerRecognition.manager.allSpeakerNames()
val allSpeakerNameList = remember {
MutableList(
allSpeakerNames.size
) {
SpeakerName(allSpeakerNames[it])
}.toMutableStateList()
}
var enabled by remember {
mutableStateOf(SpeakerRecognition.manager.numSpeakers() > 0)
}
Box(
modifier = Modifier.fillMaxSize(),
contentAlignment = Alignment.TopCenter
) {
Column(
modifier = Modifier.padding(16.dp),
horizontalAlignment = Alignment.CenterHorizontally,
) {
Button(
enabled = enabled,
onClick = {
val toRemove: MutableList<SpeakerName> = mutableListOf()
for (s in allSpeakerNameList) {
if (s.checked.value) {
SpeakerRecognition.manager.remove(s.name)
toRemove.add(s)
}
}
allSpeakerNameList.removeAll(toRemove)
enabled = SpeakerRecognition.manager.numSpeakers() > 0
}) {
Text("Delete selected")
}
LazyColumn(modifier = Modifier.fillMaxSize()) {
items(allSpeakerNameList) { s: SpeakerName ->
ViewRow(speakerName = s)
}
}
}
}
}
@Composable
fun ViewRow(
modifier: Modifier = Modifier,
speakerName: SpeakerName
) {
Surface(
modifier = modifier
.fillMaxWidth()
.padding(8.dp),
color = MaterialTheme.colorScheme.inversePrimary,
) {
Row(
modifier = modifier,
horizontalArrangement = Arrangement.Center,
verticalAlignment = Alignment.CenterVertically,
) {
Text(
text = speakerName.name,
modifier = modifier.weight(1.0F),
)
Checkbox(checked = speakerName.checked.value,
onCheckedChange = { speakerName.onCheckedChange(it) }
)
}
}
}

View File

@@ -0,0 +1,11 @@
package com.k2fsa.sherpa.onnx.speaker.identification.ui.theme
import androidx.compose.ui.graphics.Color
val Purple80 = Color(0xFFD0BCFF)
val PurpleGrey80 = Color(0xFFCCC2DC)
val Pink80 = Color(0xFFEFB8C8)
val Purple40 = Color(0xFF6650a4)
val PurpleGrey40 = Color(0xFF625b71)
val Pink40 = Color(0xFF7D5260)

View File

@@ -0,0 +1,70 @@
package com.k2fsa.sherpa.onnx.speaker.identification.ui.theme
import android.app.Activity
import android.os.Build
import androidx.compose.foundation.isSystemInDarkTheme
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.darkColorScheme
import androidx.compose.material3.dynamicDarkColorScheme
import androidx.compose.material3.dynamicLightColorScheme
import androidx.compose.material3.lightColorScheme
import androidx.compose.runtime.Composable
import androidx.compose.runtime.SideEffect
import androidx.compose.ui.graphics.toArgb
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.platform.LocalView
import androidx.core.view.WindowCompat
private val DarkColorScheme = darkColorScheme(
primary = Purple80,
secondary = PurpleGrey80,
tertiary = Pink80
)
private val LightColorScheme = lightColorScheme(
primary = Purple40,
secondary = PurpleGrey40,
tertiary = Pink40
/* Other default colors to override
background = Color(0xFFFFFBFE),
surface = Color(0xFFFFFBFE),
onPrimary = Color.White,
onSecondary = Color.White,
onTertiary = Color.White,
onBackground = Color(0xFF1C1B1F),
onSurface = Color(0xFF1C1B1F),
*/
)
@Composable
fun SherpaOnnxSpeakerIdentificationTheme(
darkTheme: Boolean = isSystemInDarkTheme(),
// Dynamic color is available on Android 12+
dynamicColor: Boolean = true,
content: @Composable () -> Unit
) {
val colorScheme = when {
dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> {
val context = LocalContext.current
if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context)
}
darkTheme -> DarkColorScheme
else -> LightColorScheme
}
val view = LocalView.current
if (!view.isInEditMode) {
SideEffect {
val window = (view.context as Activity).window
window.statusBarColor = colorScheme.primary.toArgb()
WindowCompat.getInsetsController(window, view).isAppearanceLightStatusBars = darkTheme
}
}
MaterialTheme(
colorScheme = colorScheme,
typography = Typography,
content = content
)
}

View File

@@ -0,0 +1,34 @@
package com.k2fsa.sherpa.onnx.speaker.identification.ui.theme
import androidx.compose.material3.Typography
import androidx.compose.ui.text.TextStyle
import androidx.compose.ui.text.font.FontFamily
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.unit.sp
// Set of Material typography styles to start with
val Typography = Typography(
bodyLarge = TextStyle(
fontFamily = FontFamily.Default,
fontWeight = FontWeight.Normal,
fontSize = 16.sp,
lineHeight = 24.sp,
letterSpacing = 0.5.sp
)
/* Other default text styles to override
titleLarge = TextStyle(
fontFamily = FontFamily.Default,
fontWeight = FontWeight.Normal,
fontSize = 22.sp,
lineHeight = 28.sp,
letterSpacing = 0.sp
),
labelSmall = TextStyle(
fontFamily = FontFamily.Default,
fontWeight = FontWeight.Medium,
fontSize = 11.sp,
lineHeight = 16.sp,
letterSpacing = 0.5.sp
)
*/
)

View File

@@ -0,0 +1,30 @@
<vector xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:aapt="http://schemas.android.com/aapt"
android:width="108dp"
android:height="108dp"
android:viewportWidth="108"
android:viewportHeight="108">
<path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
<aapt:attr name="android:fillColor">
<gradient
android:endX="85.84757"
android:endY="92.4963"
android:startX="42.9492"
android:startY="49.59793"
android:type="linear">
<item
android:color="#44000000"
android:offset="0.0" />
<item
android:color="#00000000"
android:offset="1.0" />
</gradient>
</aapt:attr>
</path>
<path
android:fillColor="#FFFFFF"
android:fillType="nonZero"
android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
android:strokeWidth="1"
android:strokeColor="#00000000" />
</vector>

View File

@@ -0,0 +1,170 @@
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
android:width="108dp"
android:height="108dp"
android:viewportWidth="108"
android:viewportHeight="108">
<path
android:fillColor="#3DDC84"
android:pathData="M0,0h108v108h-108z" />
<path
android:fillColor="#00000000"
android:pathData="M9,0L9,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,0L19,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M29,0L29,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M39,0L39,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M49,0L49,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M59,0L59,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M69,0L69,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M79,0L79,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M89,0L89,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M99,0L99,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,9L108,9"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,19L108,19"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,29L108,29"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,39L108,39"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,49L108,49"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,59L108,59"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,69L108,69"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,79L108,79"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,89L108,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,99L108,99"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,29L89,29"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,39L89,39"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,49L89,49"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,59L89,59"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,69L89,69"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,79L89,79"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M29,19L29,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M39,19L39,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M49,19L49,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M59,19L59,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M69,19L69,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M79,19L79,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
</vector>

View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
<monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
<monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 982 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 KiB

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<color name="purple_200">#FFBB86FC</color>
<color name="purple_500">#FF6200EE</color>
<color name="purple_700">#FF3700B3</color>
<color name="teal_200">#FF03DAC5</color>
<color name="teal_700">#FF018786</color>
<color name="black">#FF000000</color>
<color name="white">#FFFFFFFF</color>
</resources>

View File

@@ -0,0 +1,7 @@
<resources>
<string name="app_name">Speaker Identification</string>
<string name="start">Start recording</string>
<string name="stop">Stop recording</string>
<string name="add">Add speaker</string>
<string name="clear">Clear result</string>
</resources>

View File

@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<style name="Theme.SherpaOnnxSpeakerIdentification" parent="android:Theme.Material.Light.NoActionBar" />
</resources>

View File

@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="utf-8"?><!--
Sample backup rules file; uncomment and customize as necessary.
See https://developer.android.com/guide/topics/data/autobackup
for details.
Note: This file is ignored for devices older that API 31
See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
<!--
<include domain="sharedpref" path="."/>
<exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

View File

@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="utf-8"?><!--
Sample data extraction rules file; uncomment and customize as necessary.
See https://developer.android.com/about/versions/12/backup-restore#xml-changes
for details.
-->
<data-extraction-rules>
<cloud-backup>
<!-- TODO: Use <include> and <exclude> to control what is backed up.
<include .../>
<exclude .../>
-->
</cloud-backup>
<!--
<device-transfer>
<include .../>
<exclude .../>
</device-transfer>
-->
</data-extraction-rules>