sync from b7516

This commit is contained in:
2026-01-16 11:16:14 +08:00
parent f4ae4cc7da
commit 6ee41dd9e3
380 changed files with 18435 additions and 38806 deletions

View File

@@ -26,7 +26,7 @@ android {
arguments += "-DBUILD_SHARED_LIBS=ON"
arguments += "-DLLAMA_BUILD_COMMON=ON"
arguments += "-DLLAMA_OPENSSL=OFF"
arguments += "-DLLAMA_CURL=OFF"
arguments += "-DGGML_NATIVE=OFF"
arguments += "-DGGML_BACKEND_DL=ON"

View File

@@ -560,6 +560,6 @@ Java_com_arm_aichat_internal_InferenceEngineImpl_unload(JNIEnv * /*unused*/, job
extern "C"
JNIEXPORT void JNICALL
Java_com_arm_aichat_internal_InferenceEngineImpl_shutdown(JNIEnv *, jobject /*unused*/) {
Java_com_arm_aichat_internal_InferenceEngineImpl_shutdown(JNIEnv *env, jobject /*unused*/) {
llama_backend_free();
}

View File

@@ -38,7 +38,7 @@ interface InferenceEngine {
/**
* Unloads the currently loaded model.
*/
fun cleanUp()
suspend fun cleanUp()
/**
* Cleans up resources when the engine is no longer needed.

View File

@@ -15,11 +15,9 @@ import kotlinx.coroutines.cancel
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.MutableStateFlow
import kotlinx.coroutines.flow.StateFlow
import kotlinx.coroutines.flow.asStateFlow
import kotlinx.coroutines.flow.flow
import kotlinx.coroutines.flow.flowOn
import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.withContext
import java.io.File
import java.io.IOException
@@ -111,11 +109,9 @@ internal class InferenceEngineImpl private constructor(
private val _state =
MutableStateFlow<InferenceEngine.State>(InferenceEngine.State.Uninitialized)
override val state: StateFlow<InferenceEngine.State> = _state.asStateFlow()
override val state: StateFlow<InferenceEngine.State> = _state
private var _readyForSystemPrompt = false
@Volatile
private var _cancelGeneration = false
/**
* Single-threaded coroutine dispatcher & scope for LLama asynchronous operations
@@ -173,8 +169,6 @@ internal class InferenceEngineImpl private constructor(
}
Log.i(TAG, "Model loaded!")
_readyForSystemPrompt = true
_cancelGeneration = false
_state.value = InferenceEngine.State.ModelReady
} catch (e: Exception) {
Log.e(TAG, (e.message ?: "Error loading model") + "\n" + pathToModel, e)
@@ -237,19 +231,15 @@ internal class InferenceEngineImpl private constructor(
Log.i(TAG, "User prompt processed. Generating assistant prompt...")
_state.value = InferenceEngine.State.Generating
while (!_cancelGeneration) {
while (true) {
generateNextToken()?.let { utf8token ->
if (utf8token.isNotEmpty()) emit(utf8token)
} ?: break
}
if (_cancelGeneration) {
Log.i(TAG, "Assistant generation aborted per requested.")
} else {
Log.i(TAG, "Assistant generation complete. Awaiting user prompt...")
}
Log.i(TAG, "Assistant generation complete. Awaiting user prompt...")
_state.value = InferenceEngine.State.ModelReady
} catch (e: CancellationException) {
Log.i(TAG, "Assistant generation's flow collection cancelled.")
Log.i(TAG, "Generation cancelled by user.")
_state.value = InferenceEngine.State.ModelReady
throw e
} catch (e: Exception) {
@@ -278,9 +268,8 @@ internal class InferenceEngineImpl private constructor(
/**
* Unloads the model and frees resources, or reset error states
*/
override fun cleanUp() {
_cancelGeneration = true
runBlocking(llamaDispatcher) {
override suspend fun cleanUp() =
withContext(llamaDispatcher) {
when (val state = _state.value) {
is InferenceEngine.State.ModelReady -> {
Log.i(TAG, "Unloading model and free resources...")
@@ -304,21 +293,17 @@ internal class InferenceEngineImpl private constructor(
else -> throw IllegalStateException("Cannot unload model in ${state.javaClass.simpleName}")
}
}
}
/**
* Cancel all ongoing coroutines and free GGML backends
*/
override fun destroy() {
_cancelGeneration = true
runBlocking(llamaDispatcher) {
_readyForSystemPrompt = false
when(_state.value) {
is InferenceEngine.State.Uninitialized -> {}
is InferenceEngine.State.Initialized -> shutdown()
else -> { unload(); shutdown() }
}
}
_readyForSystemPrompt = false
llamaScope.cancel()
when(_state.value) {
is InferenceEngine.State.Uninitialized -> {}
is InferenceEngine.State.Initialized -> shutdown()
else -> { unload(); shutdown() }
}
}
}