Add JavaScript API for Moonshine models (#1480)

This commit is contained in:
Fangjun Kuang
2024-10-27 11:31:01 +08:00
committed by GitHub
parent 3d3edabb5f
commit 6f261d39f3
13 changed files with 719 additions and 88 deletions

View File

@@ -35,6 +35,10 @@ function freeConfig(config, Module) {
freeConfig(config.whisper, Module)
}
if ('moonshine' in config) {
freeConfig(config.moonshine, Module)
}
if ('tdnn' in config) {
freeConfig(config.tdnn, Module)
}
@@ -563,7 +567,7 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
const n = encoderLen + decoderLen + languageLen + taskLen;
const buffer = Module._malloc(n);
const len = 5 * 4; // 4 pointers
const len = 5 * 4; // 4 pointers + 1 int32
const ptr = Module._malloc(len);
let offset = 0;
@@ -598,6 +602,55 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
}
}
function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) {
const preprocessorLen = Module.lengthBytesUTF8(config.preprocessor || '') + 1;
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
const uncachedDecoderLen =
Module.lengthBytesUTF8(config.uncachedDecoder || '') + 1;
const cachedDecoderLen =
Module.lengthBytesUTF8(config.cachedDecoder || '') + 1;
const n =
preprocessorLen + encoderLen + uncachedDecoderLen + cachedDecoderLen;
const buffer = Module._malloc(n);
const len = 4 * 4; // 4 pointers
const ptr = Module._malloc(len);
let offset = 0;
Module.stringToUTF8(
config.preprocessor || '', buffer + offset, preprocessorLen);
offset += preprocessorLen;
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
offset += encoderLen;
Module.stringToUTF8(
config.uncachedDecoder || '', buffer + offset, uncachedDecoderLen);
offset += uncachedDecoderLen;
Module.stringToUTF8(
config.cachedDecoder || '', buffer + offset, cachedDecoderLen);
offset += cachedDecoderLen;
offset = 0;
Module.setValue(ptr, buffer + offset, 'i8*');
offset += preprocessorLen;
Module.setValue(ptr + 4, buffer + offset, 'i8*');
offset += encoderLen;
Module.setValue(ptr + 8, buffer + offset, 'i8*');
offset += uncachedDecoderLen;
Module.setValue(ptr + 12, buffer + offset, 'i8*');
offset += cachedDecoderLen;
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineTdnnModelConfig(config, Module) {
const n = Module.lengthBytesUTF8(config.model || '') + 1;
const buffer = Module._malloc(n);
@@ -693,6 +746,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
};
}
if (!('moonshine' in config)) {
config.moonshine = {
preprocessor: '',
encoder: '',
uncachedDecoder: '',
cachedDecoder: '',
};
}
if (!('tdnn' in config)) {
config.tdnn = {
model: '',
@@ -724,8 +786,11 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
const senseVoice =
initSherpaOnnxOfflineSenseVoiceModelConfig(config.senseVoice, Module);
const moonshine =
initSherpaOnnxOfflineMoonshineModelConfig(config.moonshine, Module);
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
tdnn.len + 8 * 4 + senseVoice.len;
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len;
const ptr = Module._malloc(len);
@@ -745,7 +810,6 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset);
offset += tdnn.len;
const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1;
@@ -817,11 +881,14 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
offset += 4;
Module._CopyHeap(senseVoice.ptr, senseVoice.len, ptr + offset);
offset += senseVoice.len;
Module._CopyHeap(moonshine.ptr, moonshine.len, ptr + offset);
return {
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
senseVoice: senseVoice,
senseVoice: senseVoice, moonshine: moonshine,
}
}

View File

@@ -15,6 +15,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 4 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, "");
@@ -25,7 +26,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) +
sizeof(SherpaOnnxOfflineWhisperModelConfig) +
sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4 +
sizeof(SherpaOnnxOfflineSenseVoiceModelConfig),
sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) +
sizeof(SherpaOnnxOfflineMoonshineModelConfig),
"");
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
@@ -66,6 +68,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
auto whisper = &model_config->whisper;
auto tdnn = &model_config->tdnn;
auto sense_voice = &model_config->sense_voice;
auto moonshine = &model_config->moonshine;
fprintf(stdout, "----------offline transducer model config----------\n");
fprintf(stdout, "encoder: %s\n", transducer->encoder);
@@ -93,6 +96,12 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
fprintf(stdout, "language: %s\n", sense_voice->language);
fprintf(stdout, "use_itn: %d\n", sense_voice->use_itn);
fprintf(stdout, "----------offline moonshine model config----------\n");
fprintf(stdout, "preprocessor: %s\n", moonshine->preprocessor);
fprintf(stdout, "encoder: %s\n", moonshine->encoder);
fprintf(stdout, "uncached_decoder: %s\n", moonshine->uncached_decoder);
fprintf(stdout, "cached_decoder: %s\n", moonshine->cached_decoder);
fprintf(stdout, "tokens: %s\n", model_config->tokens);
fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
fprintf(stdout, "provider: %s\n", model_config->provider);