Android demo for speaker diarization (#1423)
@@ -0,0 +1,32 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:tools="http://schemas.android.com/tools">
|
||||
|
||||
<uses-permission
|
||||
android:name="android.permission.READ_EXTERNAL_STORAGE"
|
||||
android:maxSdkVersion="32" />
|
||||
|
||||
<application
|
||||
android:allowBackup="true"
|
||||
android:dataExtractionRules="@xml/data_extraction_rules"
|
||||
android:fullBackupContent="@xml/backup_rules"
|
||||
android:icon="@mipmap/ic_launcher"
|
||||
android:label="@string/app_name"
|
||||
android:roundIcon="@mipmap/ic_launcher_round"
|
||||
android:supportsRtl="true"
|
||||
android:theme="@style/Theme.SherpaOnnxSpeakerDiarization"
|
||||
tools:targetApi="31">
|
||||
<activity
|
||||
android:name=".MainActivity"
|
||||
android:exported="true"
|
||||
android:label="@string/app_name"
|
||||
android:theme="@style/Theme.SherpaOnnxSpeakerDiarization">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN" />
|
||||
|
||||
<category android:name="android.intent.category.LAUNCHER" />
|
||||
</intent-filter>
|
||||
</activity>
|
||||
</application>
|
||||
|
||||
</manifest>
|
||||
@@ -0,0 +1,13 @@
|
||||
package com.k2fsa.sherpa.onnx.speaker.diarization
|
||||
|
||||
import androidx.compose.ui.graphics.vector.ImageVector
|
||||
|
||||
data class BarItem(
|
||||
val title: String,
|
||||
|
||||
// see https://www.composables.com/icons
|
||||
// and
|
||||
// https://developer.android.com/reference/kotlin/androidx/compose/material/icons/filled/package-summary
|
||||
val image: ImageVector,
|
||||
val route: String,
|
||||
)
|
||||
@@ -0,0 +1,132 @@
|
||||
package com.k2fsa.sherpa.onnx.speaker.diarization
|
||||
|
||||
import android.os.Bundle
|
||||
import androidx.activity.ComponentActivity
|
||||
import androidx.activity.compose.setContent
|
||||
import androidx.activity.enableEdgeToEdge
|
||||
import androidx.compose.foundation.layout.Column
|
||||
import androidx.compose.foundation.layout.fillMaxSize
|
||||
import androidx.compose.foundation.layout.padding
|
||||
import androidx.compose.material3.CenterAlignedTopAppBar
|
||||
import androidx.compose.material3.ExperimentalMaterial3Api
|
||||
import androidx.compose.material3.Icon
|
||||
import androidx.compose.material3.MaterialTheme
|
||||
import androidx.compose.material3.NavigationBar
|
||||
import androidx.compose.material3.NavigationBarItem
|
||||
import androidx.compose.material3.Scaffold
|
||||
import androidx.compose.material3.Surface
|
||||
import androidx.compose.material3.Text
|
||||
import androidx.compose.material3.TopAppBarDefaults
|
||||
import androidx.compose.runtime.Composable
|
||||
import androidx.compose.runtime.getValue
|
||||
import androidx.compose.ui.Modifier
|
||||
import androidx.compose.ui.text.font.FontWeight
|
||||
import androidx.compose.ui.tooling.preview.Preview
|
||||
import androidx.navigation.NavGraph.Companion.findStartDestination
|
||||
import androidx.navigation.NavHostController
|
||||
import androidx.navigation.compose.NavHost
|
||||
import androidx.navigation.compose.composable
|
||||
import androidx.navigation.compose.currentBackStackEntryAsState
|
||||
import androidx.navigation.compose.rememberNavController
|
||||
import com.k2fsa.sherpa.onnx.speaker.diarization.screens.HelpScreen
|
||||
import com.k2fsa.sherpa.onnx.speaker.diarization.screens.HomeScreen
|
||||
import com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme.SherpaOnnxSpeakerDiarizationTheme
|
||||
|
||||
const val TAG = "sherpa-onnx-sd"
|
||||
|
||||
class MainActivity : ComponentActivity() {
|
||||
override fun onCreate(savedInstanceState: Bundle?) {
|
||||
super.onCreate(savedInstanceState)
|
||||
enableEdgeToEdge()
|
||||
setContent {
|
||||
SherpaOnnxSpeakerDiarizationTheme {
|
||||
// A surface container using the 'background' color from the theme
|
||||
Surface(
|
||||
modifier = Modifier.fillMaxSize(),
|
||||
color = MaterialTheme.colorScheme.background
|
||||
) {
|
||||
MainScreen()
|
||||
}
|
||||
}
|
||||
}
|
||||
SpeakerDiarizationObject.initSpeakerDiarization(this.assets)
|
||||
}
|
||||
}
|
||||
|
||||
@OptIn(ExperimentalMaterial3Api::class)
|
||||
@Composable
|
||||
fun MainScreen(modifier: Modifier = Modifier) {
|
||||
val navController = rememberNavController()
|
||||
Scaffold(
|
||||
topBar = {
|
||||
CenterAlignedTopAppBar(
|
||||
colors = TopAppBarDefaults.topAppBarColors(
|
||||
containerColor = MaterialTheme.colorScheme.primaryContainer,
|
||||
titleContentColor = MaterialTheme.colorScheme.primary,
|
||||
),
|
||||
title = {
|
||||
Text(
|
||||
"Next-gen Kaldi: Speaker Diarization",
|
||||
fontWeight = FontWeight.Bold,
|
||||
)
|
||||
},
|
||||
)
|
||||
},
|
||||
content = { padding ->
|
||||
Column(Modifier.padding(padding)) {
|
||||
NavigationHost(navController = navController)
|
||||
|
||||
}
|
||||
},
|
||||
bottomBar = {
|
||||
BottomNavigationBar(navController = navController)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@Composable
|
||||
fun NavigationHost(navController: NavHostController) {
|
||||
NavHost(navController = navController, startDestination = NavRoutes.Home.route) {
|
||||
composable(NavRoutes.Home.route) {
|
||||
HomeScreen()
|
||||
}
|
||||
|
||||
composable(NavRoutes.Help.route) {
|
||||
HelpScreen()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Composable
|
||||
fun BottomNavigationBar(navController: NavHostController) {
|
||||
NavigationBar {
|
||||
val backStackEntry by navController.currentBackStackEntryAsState()
|
||||
val currentRoute = backStackEntry?.destination?.route
|
||||
|
||||
NavBarItems.BarItems.forEach { navItem ->
|
||||
NavigationBarItem(selected = currentRoute == navItem.route,
|
||||
onClick = {
|
||||
navController.navigate(navItem.route) {
|
||||
popUpTo(navController.graph.findStartDestination().id) {
|
||||
saveState = true
|
||||
}
|
||||
launchSingleTop = true
|
||||
restoreState = true
|
||||
}
|
||||
},
|
||||
icon = {
|
||||
Icon(imageVector = navItem.image, contentDescription = navItem.title)
|
||||
}, label = {
|
||||
Text(text = navItem.title)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Preview(showBackground = true)
|
||||
@Composable
|
||||
fun MainScreenPreview() {
|
||||
SherpaOnnxSpeakerDiarizationTheme {
|
||||
MainScreen()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.k2fsa.sherpa.onnx.speaker.diarization
|
||||
|
||||
import androidx.compose.material.icons.Icons
|
||||
import androidx.compose.material.icons.filled.Home
|
||||
import androidx.compose.material.icons.filled.Info
|
||||
|
||||
object NavBarItems {
|
||||
val BarItems = listOf(
|
||||
BarItem(
|
||||
title = "Home",
|
||||
image = Icons.Filled.Home,
|
||||
route = "home",
|
||||
),
|
||||
BarItem(
|
||||
title = "Help",
|
||||
image = Icons.Filled.Info,
|
||||
route = "help",
|
||||
),
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
package com.k2fsa.sherpa.onnx.speaker.diarization
|
||||
|
||||
sealed class NavRoutes(val route: String) {
|
||||
object Home : NavRoutes("home")
|
||||
object Help : NavRoutes("help")
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
../../../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt
|
||||
@@ -0,0 +1,137 @@
|
||||
package com.k2fsa.sherpa.onnx.speaker.diarization.screens
|
||||
|
||||
import android.content.Context
|
||||
import android.media.AudioFormat
|
||||
import android.media.MediaCodec
|
||||
import android.media.MediaExtractor
|
||||
import android.media.MediaFormat
|
||||
import android.net.Uri
|
||||
|
||||
data class WaveData(
|
||||
val sampleRate: Int? = null,
|
||||
val samples: FloatArray? = null,
|
||||
val msg: String? = null
|
||||
)
|
||||
|
||||
// It supports only 16-bit encoded wave files
|
||||
//
|
||||
// References
|
||||
// - https://gist.github.com/a-m-s/1991ab18fbcb0fcc2cf9
|
||||
// - https://github.com/taehwandev/MediaCodecExample/blob/master/app/src/main/java/tech/thdev/mediacodecexample/audio/AACAudioDecoderThread.kt
|
||||
fun readUri(context: Context, uri: Uri): WaveData {
|
||||
val extractor = MediaExtractor()
|
||||
extractor.setDataSource(context, uri, null)
|
||||
|
||||
val samplesList: MutableList<FloatArray> = ArrayList()
|
||||
|
||||
for (i in 0 until extractor.trackCount) {
|
||||
val format = extractor.getTrackFormat(i)
|
||||
val mime = format.getString(MediaFormat.KEY_MIME)
|
||||
if (mime?.startsWith("audio/") == true) {
|
||||
extractor.selectTrack(i)
|
||||
|
||||
var encoding: Int = -1
|
||||
try {
|
||||
encoding = format.getInteger(MediaFormat.KEY_PCM_ENCODING)
|
||||
} catch (_: Exception) {
|
||||
}
|
||||
|
||||
if (encoding != AudioFormat.ENCODING_PCM_16BIT) {
|
||||
return WaveData(msg = "We support only 16-bit encoded wave files")
|
||||
}
|
||||
|
||||
val sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE)
|
||||
val decoder = MediaCodec.createDecoderByType(mime)
|
||||
decoder.configure(format, null, null, 0)
|
||||
decoder.start()
|
||||
|
||||
val inputBuffers = decoder.inputBuffers
|
||||
var outputBuffers = decoder.outputBuffers
|
||||
|
||||
val info = MediaCodec.BufferInfo()
|
||||
var eof = false
|
||||
|
||||
var outputBufferIndex = -1
|
||||
|
||||
while (true) {
|
||||
if (!eof) {
|
||||
val inputBufferIndex = decoder.dequeueInputBuffer(10000)
|
||||
if (inputBufferIndex > 0) {
|
||||
val size = extractor.readSampleData(inputBuffers[inputBufferIndex], 0)
|
||||
if (size < 0) {
|
||||
decoder.queueInputBuffer(
|
||||
inputBufferIndex,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
MediaCodec.BUFFER_FLAG_END_OF_STREAM
|
||||
)
|
||||
eof = true
|
||||
} else {
|
||||
decoder.queueInputBuffer(
|
||||
inputBufferIndex,
|
||||
0,
|
||||
size,
|
||||
extractor.sampleTime,
|
||||
0
|
||||
)
|
||||
extractor.advance()
|
||||
}
|
||||
}
|
||||
} // if (!eof)
|
||||
|
||||
if (outputBufferIndex >= 0) {
|
||||
outputBuffers[outputBufferIndex].position(0)
|
||||
}
|
||||
|
||||
outputBufferIndex = decoder.dequeueOutputBuffer(info, 10000)
|
||||
if (outputBufferIndex >= 0) {
|
||||
if (info.flags != 0) {
|
||||
decoder.stop()
|
||||
decoder.release()
|
||||
|
||||
var k = 0
|
||||
for (s in samplesList) {
|
||||
k += s.size
|
||||
}
|
||||
if (k == 0) {
|
||||
return WaveData(msg = "Failed to read selected file")
|
||||
}
|
||||
|
||||
val ans = FloatArray(k)
|
||||
k = 0
|
||||
for (s in samplesList) {
|
||||
s.copyInto(ans, k)
|
||||
k += s.size
|
||||
}
|
||||
|
||||
return WaveData(sampleRate = sampleRate, samples = ans)
|
||||
}
|
||||
|
||||
val buffer = outputBuffers[outputBufferIndex]
|
||||
val chunk = ByteArray(info.size)
|
||||
buffer[chunk]
|
||||
buffer.clear()
|
||||
|
||||
val numSamples = info.size / 2
|
||||
|
||||
val samples = FloatArray(numSamples)
|
||||
for (k in 0 until numSamples) {
|
||||
// assume little endian
|
||||
val s = chunk[2 * k] + (chunk[2 * k + 1] * 256.0f)
|
||||
|
||||
samples[k] = s / 32768.0f
|
||||
}
|
||||
samplesList.add(samples)
|
||||
|
||||
decoder.releaseOutputBuffer(outputBufferIndex, false)
|
||||
} else if (outputBufferIndex == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
|
||||
outputBuffers = decoder.outputBuffers
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extractor.release()
|
||||
return WaveData(msg = "not an audio file")
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
package com.k2fsa.sherpa.onnx.speaker.diarization
|
||||
|
||||
import android.content.res.AssetManager
|
||||
import android.util.Log
|
||||
import com.k2fsa.sherpa.onnx.FastClusteringConfig
|
||||
import com.k2fsa.sherpa.onnx.OfflineSpeakerDiarization
|
||||
import com.k2fsa.sherpa.onnx.OfflineSpeakerDiarizationConfig
|
||||
import com.k2fsa.sherpa.onnx.OfflineSpeakerSegmentationModelConfig
|
||||
import com.k2fsa.sherpa.onnx.OfflineSpeakerSegmentationPyannoteModelConfig
|
||||
import com.k2fsa.sherpa.onnx.SpeakerEmbeddingExtractorConfig
|
||||
|
||||
// Please download
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
|
||||
// then unzip it, rename model.onnx to segmentation.onnx, and mv
|
||||
// segmentation.onnx to the assets folder
|
||||
val segmentationModel = "segmentation.onnx"
|
||||
|
||||
// please download it from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
// and move it to the assets folder
|
||||
val embeddingModel = "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
|
||||
|
||||
// in the end, your assets folder should look like below
|
||||
/*
|
||||
(py38) fangjuns-MacBook-Pro:assets fangjun$ pwd
|
||||
/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxSpeakerDiarization/app/src/main/assets
|
||||
(py38) fangjuns-MacBook-Pro:assets fangjun$ ls -lh
|
||||
total 89048
|
||||
-rw-r--r-- 1 fangjun staff 38M Oct 12 20:28 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
-rw-r--r-- 1 fangjun staff 5.7M Oct 12 20:28 segmentation.onnx
|
||||
*/
|
||||
|
||||
object SpeakerDiarizationObject {
|
||||
var _sd: OfflineSpeakerDiarization? = null
|
||||
val sd: OfflineSpeakerDiarization
|
||||
get() {
|
||||
return _sd!!
|
||||
}
|
||||
|
||||
fun initSpeakerDiarization(assetManager: AssetManager? = null) {
|
||||
synchronized(this) {
|
||||
if (_sd != null) {
|
||||
return
|
||||
}
|
||||
Log.i(TAG, "Initializing sherpa-onnx speaker diarization")
|
||||
|
||||
val config = OfflineSpeakerDiarizationConfig(
|
||||
segmentation = OfflineSpeakerSegmentationModelConfig(
|
||||
pyannote = OfflineSpeakerSegmentationPyannoteModelConfig(
|
||||
segmentationModel
|
||||
),
|
||||
debug = true,
|
||||
),
|
||||
embedding = SpeakerEmbeddingExtractorConfig(
|
||||
model = embeddingModel,
|
||||
debug = true,
|
||||
numThreads = 2,
|
||||
),
|
||||
clustering = FastClusteringConfig(numClusters = -1, threshold = 0.5f),
|
||||
minDurationOn = 0.2f,
|
||||
minDurationOff = 0.5f,
|
||||
)
|
||||
_sd = OfflineSpeakerDiarization(assetManager = assetManager, config = config)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
../../../../../../../../../../../../sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt
|
||||
@@ -0,0 +1,38 @@
|
||||
package com.k2fsa.sherpa.onnx.speaker.diarization.screens
|
||||
|
||||
import androidx.compose.foundation.layout.Box
|
||||
import androidx.compose.foundation.layout.Column
|
||||
import androidx.compose.foundation.layout.Spacer
|
||||
import androidx.compose.foundation.layout.fillMaxSize
|
||||
import androidx.compose.foundation.layout.height
|
||||
import androidx.compose.foundation.layout.padding
|
||||
import androidx.compose.material3.Text
|
||||
import androidx.compose.runtime.Composable
|
||||
import androidx.compose.ui.Modifier
|
||||
import androidx.compose.ui.unit.dp
|
||||
import androidx.compose.ui.unit.sp
|
||||
|
||||
@Composable
|
||||
fun HelpScreen() {
|
||||
Box(modifier = Modifier.fillMaxSize()) {
|
||||
Column(
|
||||
modifier = Modifier.padding(8.dp)
|
||||
) {
|
||||
Text(
|
||||
"This app accepts only 16kHz 16-bit 1-channel *.wav files. " +
|
||||
"It has two arguments: Number of speakers and clustering threshold. " +
|
||||
"If you know the actual number of speakers in the file, please set it. " +
|
||||
"Otherwise, please set it to 0. In that case, you have to set the threshold. " +
|
||||
"A larger threshold leads to fewer segmented speakers."
|
||||
)
|
||||
Spacer(modifier = Modifier.height(5.dp))
|
||||
Text("The speaker segmentation model is from " +
|
||||
"pyannote-audio (https://huggingface.co/pyannote/segmentation-3.0), "+
|
||||
"whereas the embedding extractor model is from 3D-Speaker (https://github.com/modelscope/3D-Speaker)")
|
||||
Spacer(modifier = Modifier.height(5.dp))
|
||||
Text("Please see http://github.com/k2-fsa/sherpa-onnx ")
|
||||
Spacer(modifier = Modifier.height(5.dp))
|
||||
Text("Everything is open-sourced!", fontSize = 20.sp)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,210 @@
|
||||
package com.k2fsa.sherpa.onnx.speaker.diarization.screens
|
||||
|
||||
import android.util.Log
|
||||
import androidx.activity.compose.rememberLauncherForActivityResult
|
||||
import androidx.activity.result.contract.ActivityResultContracts
|
||||
import androidx.compose.foundation.layout.Arrangement
|
||||
import androidx.compose.foundation.layout.Column
|
||||
import androidx.compose.foundation.layout.Row
|
||||
import androidx.compose.foundation.layout.Spacer
|
||||
import androidx.compose.foundation.layout.fillMaxWidth
|
||||
import androidx.compose.foundation.layout.padding
|
||||
import androidx.compose.foundation.layout.size
|
||||
import androidx.compose.foundation.rememberScrollState
|
||||
import androidx.compose.foundation.verticalScroll
|
||||
import androidx.compose.material3.Button
|
||||
import androidx.compose.material3.OutlinedTextField
|
||||
import androidx.compose.material3.Text
|
||||
import androidx.compose.runtime.Composable
|
||||
import androidx.compose.runtime.getValue
|
||||
import androidx.compose.runtime.mutableStateOf
|
||||
import androidx.compose.runtime.remember
|
||||
import androidx.compose.runtime.setValue
|
||||
import androidx.compose.ui.Alignment
|
||||
import androidx.compose.ui.Modifier
|
||||
import androidx.compose.ui.platform.LocalClipboardManager
|
||||
import androidx.compose.ui.platform.LocalContext
|
||||
import androidx.compose.ui.text.AnnotatedString
|
||||
import androidx.compose.ui.unit.dp
|
||||
import androidx.compose.ui.unit.sp
|
||||
import androidx.documentfile.provider.DocumentFile
|
||||
import com.k2fsa.sherpa.onnx.speaker.diarization.SpeakerDiarizationObject
|
||||
import com.k2fsa.sherpa.onnx.speaker.diarization.TAG
|
||||
import kotlin.concurrent.thread
|
||||
|
||||
|
||||
private var samples: FloatArray? = null
|
||||
|
||||
@Composable
|
||||
fun HomeScreen() {
|
||||
val context = LocalContext.current
|
||||
|
||||
var sampleRate: Int
|
||||
var filename by remember { mutableStateOf("") }
|
||||
var status by remember { mutableStateOf("") }
|
||||
var progress by remember { mutableStateOf("") }
|
||||
val clipboardManager = LocalClipboardManager.current
|
||||
var done by remember { mutableStateOf(false) }
|
||||
var fileIsOk by remember { mutableStateOf(false) }
|
||||
var started by remember { mutableStateOf(false) }
|
||||
var numSpeakers by remember { mutableStateOf(0) }
|
||||
var threshold by remember { mutableStateOf(0.5f) }
|
||||
|
||||
|
||||
val callback = here@{ numProcessedChunks: Int, numTotalChunks: Int, arg: Long ->
|
||||
Int
|
||||
val percent = 100.0 * numProcessedChunks / numTotalChunks
|
||||
progress = "%.2f%%".format(percent)
|
||||
Log.i(TAG, progress)
|
||||
return@here 0
|
||||
}
|
||||
|
||||
val launcher = rememberLauncherForActivityResult(ActivityResultContracts.OpenDocument()) {
|
||||
it?.let {
|
||||
val documentFile = DocumentFile.fromSingleUri(context, it)
|
||||
filename = documentFile?.name ?: ""
|
||||
|
||||
progress = ""
|
||||
done = false
|
||||
fileIsOk = false
|
||||
|
||||
if (filename.isNotEmpty()) {
|
||||
val data = readUri(context, it)
|
||||
Log.i(TAG, "sample rate: ${data.sampleRate}")
|
||||
Log.i(TAG, "numSamples: ${data.samples?.size ?: 0}")
|
||||
if (data.msg != null) {
|
||||
Log.i(TAG, "failed to read $filename")
|
||||
status = data.msg
|
||||
} else if (data.sampleRate != SpeakerDiarizationObject.sd.sampleRate()) {
|
||||
status =
|
||||
"Expected sample rate: ${SpeakerDiarizationObject.sd.sampleRate()}. Given wave file with sample rate: ${data.sampleRate}"
|
||||
} else {
|
||||
samples = data.samples!!
|
||||
fileIsOk = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Column(
|
||||
modifier = Modifier.padding(10.dp),
|
||||
verticalArrangement = Arrangement.Top,
|
||||
) {
|
||||
Row(
|
||||
modifier = Modifier.fillMaxWidth(),
|
||||
horizontalArrangement = Arrangement.SpaceEvenly,
|
||||
verticalAlignment = Alignment.CenterVertically
|
||||
) {
|
||||
|
||||
Button(onClick = {
|
||||
launcher.launch(arrayOf("audio/*"))
|
||||
}) {
|
||||
Text("Select a .wav file")
|
||||
}
|
||||
|
||||
Button(enabled = fileIsOk && !started,
|
||||
onClick = {
|
||||
Log.i(TAG, "started")
|
||||
Log.i(TAG, "num samples: ${samples?.size}")
|
||||
started = true
|
||||
progress = ""
|
||||
|
||||
val config = SpeakerDiarizationObject.sd.config
|
||||
config.clustering.numClusters = numSpeakers
|
||||
config.clustering.threshold = threshold
|
||||
|
||||
SpeakerDiarizationObject.sd.setConfig(config)
|
||||
|
||||
thread(true) {
|
||||
done = false
|
||||
status = "Started! Please wait"
|
||||
val segments = SpeakerDiarizationObject.sd.processWithCallback(
|
||||
samples!!,
|
||||
callback = callback,
|
||||
)
|
||||
done = true
|
||||
started = false
|
||||
status = ""
|
||||
for (s in segments) {
|
||||
val start = "%.2f".format(s.start)
|
||||
val end = "%.2f".format(s.end)
|
||||
val speaker = "speaker_%02d".format(s.speaker)
|
||||
status += "$start -- $end $speaker\n"
|
||||
Log.i(TAG, "$start -- $end $speaker")
|
||||
}
|
||||
|
||||
Log.i(TAG, status)
|
||||
}
|
||||
}) {
|
||||
Text("Start")
|
||||
}
|
||||
if (progress.isNotEmpty()) {
|
||||
Text(progress, fontSize = 25.sp)
|
||||
}
|
||||
}
|
||||
|
||||
Row(
|
||||
modifier = Modifier.fillMaxWidth(),
|
||||
horizontalArrangement = Arrangement.SpaceEvenly,
|
||||
verticalAlignment = Alignment.CenterVertically
|
||||
) {
|
||||
OutlinedTextField(
|
||||
value = numSpeakers.toString(),
|
||||
onValueChange = {
|
||||
if (it.isEmpty() || it.isBlank()) {
|
||||
numSpeakers = 0
|
||||
} else {
|
||||
numSpeakers = it.toIntOrNull() ?: 0
|
||||
}
|
||||
},
|
||||
label = {
|
||||
Text("Number of Speakers")
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
Row(
|
||||
modifier = Modifier.fillMaxWidth(),
|
||||
horizontalArrangement = Arrangement.SpaceEvenly,
|
||||
verticalAlignment = Alignment.CenterVertically
|
||||
) {
|
||||
OutlinedTextField(
|
||||
value = threshold.toString(),
|
||||
onValueChange = {
|
||||
if (it.isEmpty() || it.isBlank()) {
|
||||
threshold = 0.5f
|
||||
} else {
|
||||
threshold = it.toFloatOrNull() ?: 0.5f
|
||||
}
|
||||
},
|
||||
label = {
|
||||
Text("Clustering threshold")
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
if (filename.isNotEmpty()) {
|
||||
Text(text = "Selected $filename")
|
||||
Spacer(Modifier.size(20.dp))
|
||||
}
|
||||
|
||||
if (done) {
|
||||
Button(onClick = {
|
||||
clipboardManager.setText(AnnotatedString(status))
|
||||
progress = "Copied!"
|
||||
}) {
|
||||
Text("Copy result")
|
||||
}
|
||||
Spacer(Modifier.size(20.dp))
|
||||
}
|
||||
|
||||
if (status.isNotEmpty()) {
|
||||
Text(
|
||||
status,
|
||||
modifier = Modifier.verticalScroll(rememberScrollState()),
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme
|
||||
|
||||
import androidx.compose.ui.graphics.Color
|
||||
|
||||
val Purple80 = Color(0xFFD0BCFF)
|
||||
val PurpleGrey80 = Color(0xFFCCC2DC)
|
||||
val Pink80 = Color(0xFFEFB8C8)
|
||||
|
||||
val Purple40 = Color(0xFF6650a4)
|
||||
val PurpleGrey40 = Color(0xFF625b71)
|
||||
val Pink40 = Color(0xFF7D5260)
|
||||
@@ -0,0 +1,58 @@
|
||||
package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme
|
||||
|
||||
import android.app.Activity
|
||||
import android.os.Build
|
||||
import androidx.compose.foundation.isSystemInDarkTheme
|
||||
import androidx.compose.material3.MaterialTheme
|
||||
import androidx.compose.material3.darkColorScheme
|
||||
import androidx.compose.material3.dynamicDarkColorScheme
|
||||
import androidx.compose.material3.dynamicLightColorScheme
|
||||
import androidx.compose.material3.lightColorScheme
|
||||
import androidx.compose.runtime.Composable
|
||||
import androidx.compose.ui.platform.LocalContext
|
||||
|
||||
private val DarkColorScheme = darkColorScheme(
|
||||
primary = Purple80,
|
||||
secondary = PurpleGrey80,
|
||||
tertiary = Pink80
|
||||
)
|
||||
|
||||
private val LightColorScheme = lightColorScheme(
|
||||
primary = Purple40,
|
||||
secondary = PurpleGrey40,
|
||||
tertiary = Pink40
|
||||
|
||||
/* Other default colors to override
|
||||
background = Color(0xFFFFFBFE),
|
||||
surface = Color(0xFFFFFBFE),
|
||||
onPrimary = Color.White,
|
||||
onSecondary = Color.White,
|
||||
onTertiary = Color.White,
|
||||
onBackground = Color(0xFF1C1B1F),
|
||||
onSurface = Color(0xFF1C1B1F),
|
||||
*/
|
||||
)
|
||||
|
||||
@Composable
|
||||
fun SherpaOnnxSpeakerDiarizationTheme(
|
||||
darkTheme: Boolean = isSystemInDarkTheme(),
|
||||
// Dynamic color is available on Android 12+
|
||||
dynamicColor: Boolean = true,
|
||||
content: @Composable () -> Unit
|
||||
) {
|
||||
val colorScheme = when {
|
||||
dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> {
|
||||
val context = LocalContext.current
|
||||
if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context)
|
||||
}
|
||||
|
||||
darkTheme -> DarkColorScheme
|
||||
else -> LightColorScheme
|
||||
}
|
||||
|
||||
MaterialTheme(
|
||||
colorScheme = colorScheme,
|
||||
typography = Typography,
|
||||
content = content
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme
|
||||
|
||||
import androidx.compose.material3.Typography
|
||||
import androidx.compose.ui.text.TextStyle
|
||||
import androidx.compose.ui.text.font.FontFamily
|
||||
import androidx.compose.ui.text.font.FontWeight
|
||||
import androidx.compose.ui.unit.sp
|
||||
|
||||
// Set of Material typography styles to start with
|
||||
val Typography = Typography(
|
||||
bodyLarge = TextStyle(
|
||||
fontFamily = FontFamily.Default,
|
||||
fontWeight = FontWeight.Normal,
|
||||
fontSize = 16.sp,
|
||||
lineHeight = 24.sp,
|
||||
letterSpacing = 0.5.sp
|
||||
)
|
||||
/* Other default text styles to override
|
||||
titleLarge = TextStyle(
|
||||
fontFamily = FontFamily.Default,
|
||||
fontWeight = FontWeight.Normal,
|
||||
fontSize = 22.sp,
|
||||
lineHeight = 28.sp,
|
||||
letterSpacing = 0.sp
|
||||
),
|
||||
labelSmall = TextStyle(
|
||||
fontFamily = FontFamily.Default,
|
||||
fontWeight = FontWeight.Medium,
|
||||
fontSize = 11.sp,
|
||||
lineHeight = 16.sp,
|
||||
letterSpacing = 0.5.sp
|
||||
)
|
||||
*/
|
||||
)
|
||||
@@ -0,0 +1,30 @@
|
||||
<vector xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:aapt="http://schemas.android.com/aapt"
|
||||
android:width="108dp"
|
||||
android:height="108dp"
|
||||
android:viewportWidth="108"
|
||||
android:viewportHeight="108">
|
||||
<path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
|
||||
<aapt:attr name="android:fillColor">
|
||||
<gradient
|
||||
android:endX="85.84757"
|
||||
android:endY="92.4963"
|
||||
android:startX="42.9492"
|
||||
android:startY="49.59793"
|
||||
android:type="linear">
|
||||
<item
|
||||
android:color="#44000000"
|
||||
android:offset="0.0" />
|
||||
<item
|
||||
android:color="#00000000"
|
||||
android:offset="1.0" />
|
||||
</gradient>
|
||||
</aapt:attr>
|
||||
</path>
|
||||
<path
|
||||
android:fillColor="#FFFFFF"
|
||||
android:fillType="nonZero"
|
||||
android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
|
||||
android:strokeWidth="1"
|
||||
android:strokeColor="#00000000" />
|
||||
</vector>
|
||||
@@ -0,0 +1,170 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<vector xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
android:width="108dp"
|
||||
android:height="108dp"
|
||||
android:viewportWidth="108"
|
||||
android:viewportHeight="108">
|
||||
<path
|
||||
android:fillColor="#3DDC84"
|
||||
android:pathData="M0,0h108v108h-108z" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M9,0L9,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,0L19,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M29,0L29,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M39,0L39,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M49,0L49,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M59,0L59,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M69,0L69,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M79,0L79,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M89,0L89,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M99,0L99,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,9L108,9"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,19L108,19"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,29L108,29"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,39L108,39"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,49L108,49"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,59L108,59"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,69L108,69"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,79L108,79"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,89L108,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,99L108,99"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,29L89,29"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,39L89,39"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,49L89,49"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,59L89,59"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,69L89,69"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,79L89,79"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M29,19L29,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M39,19L39,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M49,19L49,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M59,19L59,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M69,19L69,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M79,19L79,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
</vector>
|
||||
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<background android:drawable="@drawable/ic_launcher_background" />
|
||||
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
||||
<monochrome android:drawable="@drawable/ic_launcher_foreground" />
|
||||
</adaptive-icon>
|
||||
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<background android:drawable="@drawable/ic_launcher_background" />
|
||||
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
||||
<monochrome android:drawable="@drawable/ic_launcher_foreground" />
|
||||
</adaptive-icon>
|
||||
|
After Width: | Height: | Size: 1.4 KiB |
|
After Width: | Height: | Size: 2.8 KiB |
|
After Width: | Height: | Size: 982 B |
|
After Width: | Height: | Size: 1.7 KiB |
|
After Width: | Height: | Size: 1.9 KiB |
|
After Width: | Height: | Size: 3.8 KiB |
|
After Width: | Height: | Size: 2.8 KiB |
|
After Width: | Height: | Size: 5.8 KiB |
|
After Width: | Height: | Size: 3.8 KiB |
|
After Width: | Height: | Size: 7.6 KiB |
@@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
<color name="purple_200">#FFBB86FC</color>
|
||||
<color name="purple_500">#FF6200EE</color>
|
||||
<color name="purple_700">#FF3700B3</color>
|
||||
<color name="teal_200">#FF03DAC5</color>
|
||||
<color name="teal_700">#FF018786</color>
|
||||
<color name="black">#FF000000</color>
|
||||
<color name="white">#FFFFFFFF</color>
|
||||
</resources>
|
||||
@@ -0,0 +1,3 @@
|
||||
<resources>
|
||||
<string name="app_name">SherpaOnnxSpeakerDiarization</string>
|
||||
</resources>
|
||||
@@ -0,0 +1,5 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
|
||||
<style name="Theme.SherpaOnnxSpeakerDiarization" parent="android:Theme.Material.Light.NoActionBar" />
|
||||
</resources>
|
||||
@@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><!--
|
||||
Sample backup rules file; uncomment and customize as necessary.
|
||||
See https://developer.android.com/guide/topics/data/autobackup
|
||||
for details.
|
||||
Note: This file is ignored for devices older that API 31
|
||||
See https://developer.android.com/about/versions/12/backup-restore
|
||||
-->
|
||||
<full-backup-content>
|
||||
<!--
|
||||
<include domain="sharedpref" path="."/>
|
||||
<exclude domain="sharedpref" path="device.xml"/>
|
||||
-->
|
||||
</full-backup-content>
|
||||
@@ -0,0 +1,19 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><!--
|
||||
Sample data extraction rules file; uncomment and customize as necessary.
|
||||
See https://developer.android.com/about/versions/12/backup-restore#xml-changes
|
||||
for details.
|
||||
-->
|
||||
<data-extraction-rules>
|
||||
<cloud-backup>
|
||||
<!-- TODO: Use <include> and <exclude> to control what is backed up.
|
||||
<include .../>
|
||||
<exclude .../>
|
||||
-->
|
||||
</cloud-backup>
|
||||
<!--
|
||||
<device-transfer>
|
||||
<include .../>
|
||||
<exclude .../>
|
||||
</device-transfer>
|
||||
-->
|
||||
</data-extraction-rules>
|
||||