Add WebAssembly for NodeJS. (#628)

This commit is contained in:
Fangjun Kuang
2024-03-03 20:00:36 +08:00
committed by GitHub
parent ac6825ff11
commit ed06ced16f
39 changed files with 1450 additions and 1887 deletions

View File

@@ -1,3 +0,0 @@
Language: JavaScript
JavaScriptQuotes: Double

View File

@@ -7,3 +7,5 @@ It processes everything locally without accessing the Internet.
Please refer to
https://github.com/k2-fsa/sherpa-onnx/tree/master/nodejs-examples
for examples.
You need Node >= 18 for this package.

View File

@@ -1,726 +1,26 @@
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
//
// Please use
//
// npm install ffi-napi ref-struct-napi
//
// before you use this file
//
//
// Please use node 13. node 16, 18, 20, and 21 are known not working.
// See also
// https://github.com/node-ffi-napi/node-ffi-napi/issues/244
// and
// https://github.com/node-ffi-napi/node-ffi-napi/issues/97
"use strict"
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
'use strict'
const debug = require("debug")("sherpa-onnx");
const os = require("os");
const path = require("path");
const ffi = require("ffi-napi");
const ref = require("ref-napi");
const fs = require("fs");
var ArrayType = require("ref-array-napi");
const wasmModule = require('./sherpa-onnx-wasm-nodejs.js')();
const sherpa_onnx_asr = require('./sherpa-onnx-asr.js');
const sherpa_onnx_tts = require('./sherpa-onnx-tts.js');
const FloatArray = ArrayType(ref.types.float);
const StructType = require("ref-struct-napi");
const cstring = ref.types.CString;
const cstringPtr = ref.refType(cstring);
const int32_t = ref.types.int32;
const float = ref.types.float;
const floatPtr = ref.refType(float);
const SherpaOnnxOnlineTransducerModelConfig = StructType({
"encoder" : cstring,
"decoder" : cstring,
"joiner" : cstring,
});
const SherpaOnnxOnlineParaformerModelConfig = StructType({
"encoder" : cstring,
"decoder" : cstring,
});
const SherpaOnnxOnlineZipformer2CtcModelConfig = StructType({
"model" : cstring,
});
const SherpaOnnxOnlineModelConfig = StructType({
"transducer" : SherpaOnnxOnlineTransducerModelConfig,
"paraformer" : SherpaOnnxOnlineParaformerModelConfig,
"zipformer2Ctc" : SherpaOnnxOnlineZipformer2CtcModelConfig,
"tokens" : cstring,
"numThreads" : int32_t,
"provider" : cstring,
"debug" : int32_t,
"modelType" : cstring,
});
const SherpaOnnxFeatureConfig = StructType({
"sampleRate" : int32_t,
"featureDim" : int32_t,
});
const SherpaOnnxOnlineRecognizerConfig = StructType({
"featConfig" : SherpaOnnxFeatureConfig,
"modelConfig" : SherpaOnnxOnlineModelConfig,
"decodingMethod" : cstring,
"maxActivePaths" : int32_t,
"enableEndpoint" : int32_t,
"rule1MinTrailingSilence" : float,
"rule2MinTrailingSilence" : float,
"rule3MinUtteranceLength" : float,
"hotwordsFile" : cstring,
"hotwordsScore" : float,
});
const SherpaOnnxOnlineRecognizerResult = StructType({
"text" : cstring,
"tokens" : cstring,
"tokensArr" : cstringPtr,
"timestamps" : floatPtr,
"count" : int32_t,
"json" : cstring,
});
const SherpaOnnxOnlineRecognizerPtr = ref.refType(ref.types.void);
const SherpaOnnxOnlineStreamPtr = ref.refType(ref.types.void);
const SherpaOnnxOnlineStreamPtrPtr = ref.refType(SherpaOnnxOnlineStreamPtr);
const SherpaOnnxOnlineRecognizerResultPtr =
ref.refType(SherpaOnnxOnlineRecognizerResult);
const SherpaOnnxOnlineRecognizerConfigPtr =
ref.refType(SherpaOnnxOnlineRecognizerConfig);
const SherpaOnnxOfflineTransducerModelConfig = StructType({
"encoder" : cstring,
"decoder" : cstring,
"joiner" : cstring,
});
const SherpaOnnxOfflineParaformerModelConfig = StructType({
"model" : cstring,
});
const SherpaOnnxOfflineNemoEncDecCtcModelConfig = StructType({
"model" : cstring,
});
const SherpaOnnxOfflineWhisperModelConfig = StructType({
"encoder" : cstring,
"decoder" : cstring,
});
const SherpaOnnxOfflineTdnnModelConfig = StructType({
"model" : cstring,
});
const SherpaOnnxOfflineLMConfig = StructType({
"model" : cstring,
"scale" : float,
});
const SherpaOnnxOfflineModelConfig = StructType({
"transducer" : SherpaOnnxOfflineTransducerModelConfig,
"paraformer" : SherpaOnnxOfflineParaformerModelConfig,
"nemoCtc" : SherpaOnnxOfflineNemoEncDecCtcModelConfig,
"whisper" : SherpaOnnxOfflineWhisperModelConfig,
"tdnn" : SherpaOnnxOfflineTdnnModelConfig,
"tokens" : cstring,
"numThreads" : int32_t,
"debug" : int32_t,
"provider" : cstring,
"modelType" : cstring,
});
const SherpaOnnxOfflineRecognizerConfig = StructType({
"featConfig" : SherpaOnnxFeatureConfig,
"modelConfig" : SherpaOnnxOfflineModelConfig,
"lmConfig" : SherpaOnnxOfflineLMConfig,
"decodingMethod" : cstring,
"maxActivePaths" : int32_t,
"hotwordsFile" : cstring,
"hotwordsScore" : float,
});
const SherpaOnnxOfflineRecognizerResult = StructType({
"text" : cstring,
"timestamps" : floatPtr,
"count" : int32_t,
});
const SherpaOnnxOfflineRecognizerPtr = ref.refType(ref.types.void);
const SherpaOnnxOfflineStreamPtr = ref.refType(ref.types.void);
const SherpaOnnxOfflineStreamPtrPtr = ref.refType(SherpaOnnxOfflineStreamPtr);
const SherpaOnnxOfflineRecognizerResultPtr =
ref.refType(SherpaOnnxOfflineRecognizerResult);
const SherpaOnnxOfflineRecognizerConfigPtr =
ref.refType(SherpaOnnxOfflineRecognizerConfig);
// vad
const SherpaOnnxSileroVadModelConfig = StructType({
"model" : cstring,
"threshold" : float,
"minSilenceDuration" : float,
"minSpeechDuration" : float,
"windowSize" : int32_t,
});
const SherpaOnnxVadModelConfig = StructType({
"sileroVad" : SherpaOnnxSileroVadModelConfig,
"sampleRate" : int32_t,
"numThreads" : int32_t,
"provider" : cstring,
"debug" : int32_t,
});
const SherpaOnnxSpeechSegment = StructType({
"start" : int32_t,
"samples" : FloatArray,
"n" : int32_t,
});
const SherpaOnnxVadModelConfigPtr = ref.refType(SherpaOnnxVadModelConfig);
const SherpaOnnxSpeechSegmentPtr = ref.refType(SherpaOnnxSpeechSegment);
const SherpaOnnxCircularBufferPtr = ref.refType(ref.types.void);
const SherpaOnnxVoiceActivityDetectorPtr = ref.refType(ref.types.void);
// tts
const SherpaOnnxOfflineTtsVitsModelConfig = StructType({
"model" : cstring,
"lexicon" : cstring,
"tokens" : cstring,
"dataDir" : cstring,
"noiseScale" : float,
"noiseScaleW" : float,
"lengthScale" : float,
});
const SherpaOnnxOfflineTtsModelConfig = StructType({
"vits" : SherpaOnnxOfflineTtsVitsModelConfig,
"numThreads" : int32_t,
"debug" : int32_t,
"provider" : cstring,
});
const SherpaOnnxOfflineTtsConfig = StructType({
"model" : SherpaOnnxOfflineTtsModelConfig,
"ruleFsts" : cstring,
"maxNumSentences" : int32_t,
});
const SherpaOnnxGeneratedAudio = StructType({
"samples" : FloatArray,
"n" : int32_t,
"sampleRate" : int32_t,
});
const SherpaOnnxOfflineTtsVitsModelConfigPtr =
ref.refType(SherpaOnnxOfflineTtsVitsModelConfig);
const SherpaOnnxOfflineTtsConfigPtr = ref.refType(SherpaOnnxOfflineTtsConfig);
const SherpaOnnxGeneratedAudioPtr = ref.refType(SherpaOnnxGeneratedAudio);
const SherpaOnnxOfflineTtsPtr = ref.refType(ref.types.void);
const SherpaOnnxDisplayPtr = ref.refType(ref.types.void);
let soname;
if (os.platform() == "win32") {
// see https://nodejs.org/api/process.html#processarch
if (process.arch == "x64") {
let currentPath = process.env.Path;
let dllDirectory = path.resolve(path.join(__dirname, "lib", "win-x64"));
process.env.Path = currentPath + path.delimiter + dllDirectory;
soname = path.join(__dirname, "lib", "win-x64", "sherpa-onnx-c-api.dll")
} else if (process.arch == "ia32") {
let currentPath = process.env.Path;
let dllDirectory = path.resolve(path.join(__dirname, "lib", "win-x86"));
process.env.Path = currentPath + path.delimiter + dllDirectory;
soname = path.join(__dirname, "lib", "win-x86", "sherpa-onnx-c-api.dll")
} else {
throw new Error(
`Support only Windows x86 and x64 for now. Given ${process.arch}`);
}
} else if (os.platform() == "darwin") {
if (process.arch == "x64") {
soname =
path.join(__dirname, "lib", "osx-x64", "libsherpa-onnx-c-api.dylib");
} else if (process.arch == "arm64") {
soname =
path.join(__dirname, "lib", "osx-arm64", "libsherpa-onnx-c-api.dylib");
} else {
throw new Error(
`Support only macOS x64 and arm64 for now. Given ${process.arch}`);
}
} else if (os.platform() == "linux") {
if (process.arch == "x64") {
soname =
path.join(__dirname, "lib", "linux-x64", "libsherpa-onnx-c-api.so");
} else {
throw new Error(`Support only Linux x64 for now. Given ${process.arch}`);
}
} else {
throw new Error(`Unsupported platform ${os.platform()}`);
function createOnlineRecognizer(config) {
return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config);
}
if (!fs.existsSync(soname)) {
throw new Error(`Cannot find file ${soname}. Please make sure you have run
./build.sh`);
function createOfflineRecognizer(config) {
return new sherpa_onnx_asr.OfflineRecognizer(config, wasmModule);
}
debug("soname ", soname)
const libsherpa_onnx = ffi.Library(soname, {
// online asr
"CreateOnlineRecognizer" : [
SherpaOnnxOnlineRecognizerPtr, [ SherpaOnnxOnlineRecognizerConfigPtr ]
],
"DestroyOnlineRecognizer" : [ "void", [ SherpaOnnxOnlineRecognizerPtr ] ],
"CreateOnlineStream" :
[ SherpaOnnxOnlineStreamPtr, [ SherpaOnnxOnlineRecognizerPtr ] ],
"CreateOnlineStreamWithHotwords" :
[ SherpaOnnxOnlineStreamPtr, [ SherpaOnnxOnlineRecognizerPtr, cstring ] ],
"DestroyOnlineStream" : [ "void", [ SherpaOnnxOnlineStreamPtr ] ],
"AcceptWaveform" :
[ "void", [ SherpaOnnxOnlineStreamPtr, int32_t, floatPtr, int32_t ] ],
"IsOnlineStreamReady" :
[ int32_t, [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
"DecodeOnlineStream" :
[ "void", [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
"DecodeMultipleOnlineStreams" : [
"void",
[ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtrPtr, int32_t ]
],
"GetOnlineStreamResult" : [
SherpaOnnxOnlineRecognizerResultPtr,
[ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ]
],
"DestroyOnlineRecognizerResult" :
[ "void", [ SherpaOnnxOnlineRecognizerResultPtr ] ],
"Reset" :
[ "void", [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
"InputFinished" : [ "void", [ SherpaOnnxOnlineStreamPtr ] ],
"IsEndpoint" :
[ int32_t, [ SherpaOnnxOnlineRecognizerPtr, SherpaOnnxOnlineStreamPtr ] ],
// offline asr
"CreateOfflineRecognizer" : [
SherpaOnnxOfflineRecognizerPtr, [ SherpaOnnxOfflineRecognizerConfigPtr ]
],
"DestroyOfflineRecognizer" : [ "void", [ SherpaOnnxOfflineRecognizerPtr ] ],
"CreateOfflineStream" :
[ SherpaOnnxOfflineStreamPtr, [ SherpaOnnxOfflineRecognizerPtr ] ],
"DestroyOfflineStream" : [ "void", [ SherpaOnnxOfflineStreamPtr ] ],
"AcceptWaveformOffline" :
[ "void", [ SherpaOnnxOfflineStreamPtr, int32_t, floatPtr, int32_t ] ],
"DecodeOfflineStream" : [
"void", [ SherpaOnnxOfflineRecognizerPtr, SherpaOnnxOfflineStreamPtr ]
],
"DecodeMultipleOfflineStreams" : [
"void",
[ SherpaOnnxOfflineRecognizerPtr, SherpaOnnxOfflineStreamPtrPtr, int32_t ]
],
"GetOfflineStreamResult" :
[ SherpaOnnxOfflineRecognizerResultPtr, [ SherpaOnnxOfflineStreamPtr ] ],
"DestroyOfflineRecognizerResult" :
[ "void", [ SherpaOnnxOfflineRecognizerResultPtr ] ],
// vad
"SherpaOnnxCreateCircularBuffer" :
[ SherpaOnnxCircularBufferPtr, [ int32_t ] ],
"SherpaOnnxDestroyCircularBuffer" :
[ "void", [ SherpaOnnxCircularBufferPtr ] ],
"SherpaOnnxCircularBufferPush" :
[ "void", [ SherpaOnnxCircularBufferPtr, floatPtr, int32_t ] ],
"SherpaOnnxCircularBufferGet" :
[ FloatArray, [ SherpaOnnxCircularBufferPtr, int32_t, int32_t ] ],
"SherpaOnnxCircularBufferFree" : [ "void", [ FloatArray ] ],
"SherpaOnnxCircularBufferPop" :
[ "void", [ SherpaOnnxCircularBufferPtr, int32_t ] ],
"SherpaOnnxCircularBufferSize" : [ int32_t, [ SherpaOnnxCircularBufferPtr ] ],
"SherpaOnnxCircularBufferHead" : [ int32_t, [ SherpaOnnxCircularBufferPtr ] ],
"SherpaOnnxCircularBufferReset" : [ "void", [ SherpaOnnxCircularBufferPtr ] ],
"SherpaOnnxCreateVoiceActivityDetector" : [
SherpaOnnxVoiceActivityDetectorPtr, [ SherpaOnnxVadModelConfigPtr, float ]
],
"SherpaOnnxDestroyVoiceActivityDetector" :
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
"SherpaOnnxVoiceActivityDetectorAcceptWaveform" :
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr, floatPtr, int32_t ] ],
"SherpaOnnxVoiceActivityDetectorEmpty" :
[ int32_t, [ SherpaOnnxVoiceActivityDetectorPtr ] ],
"SherpaOnnxVoiceActivityDetectorDetected" :
[ int32_t, [ SherpaOnnxVoiceActivityDetectorPtr ] ],
"SherpaOnnxVoiceActivityDetectorPop" :
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
"SherpaOnnxVoiceActivityDetectorClear" :
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
"SherpaOnnxVoiceActivityDetectorFront" :
[ SherpaOnnxSpeechSegmentPtr, [ SherpaOnnxVoiceActivityDetectorPtr ] ],
"SherpaOnnxDestroySpeechSegment" : [ "void", [ SherpaOnnxSpeechSegmentPtr ] ],
"SherpaOnnxVoiceActivityDetectorReset" :
[ "void", [ SherpaOnnxVoiceActivityDetectorPtr ] ],
// tts
"SherpaOnnxCreateOfflineTts" :
[ SherpaOnnxOfflineTtsPtr, [ SherpaOnnxOfflineTtsConfigPtr ] ],
"SherpaOnnxDestroyOfflineTts" : [ "void", [ SherpaOnnxOfflineTtsPtr ] ],
"SherpaOnnxOfflineTtsGenerate" : [
SherpaOnnxGeneratedAudioPtr,
[ SherpaOnnxOfflineTtsPtr, cstring, int32_t, float ]
],
"SherpaOnnxDestroyOfflineTtsGeneratedAudio" :
[ "void", [ SherpaOnnxGeneratedAudioPtr ] ],
"SherpaOnnxWriteWave" : [ "void", [ floatPtr, int32_t, int32_t, cstring ] ],
// display
"CreateDisplay" : [ SherpaOnnxDisplayPtr, [ int32_t ] ],
"DestroyDisplay" : [ "void", [ SherpaOnnxDisplayPtr ] ],
"SherpaOnnxPrint" : [ "void", [ SherpaOnnxDisplayPtr, int32_t, cstring ] ],
});
class Display {
constructor(maxWordPerLine) {
this.handle = libsherpa_onnx.CreateDisplay(maxWordPerLine);
}
free() {
if (this.handle) {
libsherpa_onnx.DestroyDisplay(this.handle);
this.handle = null;
}
}
print(idx, s) { libsherpa_onnx.SherpaOnnxPrint(this.handle, idx, s); }
};
class OnlineResult {
constructor(text) { this.text = Buffer.from(text, "utf-8").toString(); }
};
class OnlineStream {
constructor(handle) { this.handle = handle }
free() {
if (this.handle) {
libsherpa_onnx.DestroyOnlineStream(this.handle);
this.handle = null;
}
}
/**
* @param sampleRate {Number}
* @param samples {Float32Array} Containing samples in the range [-1, 1]
*/
acceptWaveform(sampleRate, samples) {
libsherpa_onnx.AcceptWaveform(this.handle, sampleRate, samples,
samples.length);
}
};
class OnlineRecognizer {
constructor(config) {
this.config = config;
this.recognizer_handle =
libsherpa_onnx.CreateOnlineRecognizer(config.ref());
}
free() {
if (this.recognizer_handle) {
libsherpa_onnx.DestroyOnlineRecognizer(this.recognizer_handle);
this.recognizer_handle = null;
}
}
createStream() {
let handle = libsherpa_onnx.CreateOnlineStream(this.recognizer_handle);
return new OnlineStream(handle);
}
isReady(stream) {
return libsherpa_onnx.IsOnlineStreamReady(this.recognizer_handle,
stream.handle)
}
isEndpoint(stream) {
return libsherpa_onnx.IsEndpoint(this.recognizer_handle, stream.handle);
}
reset(stream) { libsherpa_onnx.Reset(this.recognizer_handle, stream.handle); }
decode(stream) {
libsherpa_onnx.DecodeOnlineStream(this.recognizer_handle, stream.handle)
}
getResult(stream) {
let handle = libsherpa_onnx.GetOnlineStreamResult(this.recognizer_handle,
stream.handle);
let r = handle.deref();
let ans = new OnlineResult(r.text);
libsherpa_onnx.DestroyOnlineRecognizerResult(handle);
return ans
}
};
class OfflineResult {
constructor(text) { this.text = Buffer.from(text, "utf-8").toString(); }
};
class OfflineStream {
constructor(handle) { this.handle = handle }
free() {
if (this.handle) {
libsherpa_onnx.DestroyOfflineStream(this.handle);
this.handle = null;
}
}
/**
* @param sampleRate {Number}
* @param samples {Float32Array} Containing samples in the range [-1, 1]
*/
acceptWaveform(sampleRate, samples) {
libsherpa_onnx.AcceptWaveformOffline(this.handle, sampleRate, samples,
samples.length);
}
};
class OfflineRecognizer {
constructor(config) {
this.config = config;
this.recognizer_handle =
libsherpa_onnx.CreateOfflineRecognizer(config.ref());
}
free() {
if (this.recognizer_handle) {
libsherpa_onnx.DestroyOfflineRecognizer(this.recognizer_handle);
this.recognizer_handle = null;
}
}
createStream() {
let handle = libsherpa_onnx.CreateOfflineStream(this.recognizer_handle);
return new OfflineStream(handle);
}
decode(stream) {
libsherpa_onnx.DecodeOfflineStream(this.recognizer_handle, stream.handle)
}
getResult(stream) {
let handle = libsherpa_onnx.GetOfflineStreamResult(stream.handle);
let r = handle.deref();
let ans = new OfflineResult(r.text);
libsherpa_onnx.DestroyOfflineRecognizerResult(handle);
return ans
}
};
class SpeechSegment {
constructor(start, samples) {
this.start = start;
this.samples = samples;
}
};
// this buffer holds only float entries.
class CircularBuffer {
/**
* @param capacity {int} The capacity of the circular buffer.
*/
constructor(capacity) {
this.handle = libsherpa_onnx.SherpaOnnxCreateCircularBuffer(capacity);
}
free() {
if (this.handle) {
libsherpa_onnx.SherpaOnnxDestroyCircularBuffer(this.handle);
this.handle = null;
}
}
/**
* @param samples {Float32Array}
*/
push(samples) {
libsherpa_onnx.SherpaOnnxCircularBufferPush(this.handle, samples,
samples.length);
}
get(startIndex, n) {
let data =
libsherpa_onnx.SherpaOnnxCircularBufferGet(this.handle, startIndex, n);
// https://tootallnate.github.io/ref/#exports-reinterpret
const buffer = data.buffer.reinterpret(n * ref.sizeof.float).buffer;
// create a copy since we are going to free the buffer at the end
let s = new Float32Array(buffer).slice(0);
libsherpa_onnx.SherpaOnnxCircularBufferFree(data);
return s;
}
pop(n) { libsherpa_onnx.SherpaOnnxCircularBufferPop(this.handle, n); }
size() { return libsherpa_onnx.SherpaOnnxCircularBufferSize(this.handle); }
head() { return libsherpa_onnx.SherpaOnnxCircularBufferHead(this.handle); }
reset() { libsherpa_onnx.SherpaOnnxCircularBufferReset(this.handle); }
};
class VoiceActivityDetector {
constructor(config, bufferSizeInSeconds) {
this.config = config;
this.handle = libsherpa_onnx.SherpaOnnxCreateVoiceActivityDetector(
config.ref(), bufferSizeInSeconds);
}
free() {
if (this.handle) {
libsherpa_onnx.SherpaOnnxDestroyVoiceActivityDetector(this.handle);
}
}
acceptWaveform(samples) {
libsherpa_onnx.SherpaOnnxVoiceActivityDetectorAcceptWaveform(
this.handle, samples, samples.length);
}
isEmpty() {
return libsherpa_onnx.SherpaOnnxVoiceActivityDetectorEmpty(this.handle);
}
isDetected() {
return libsherpa_onnx.SherpaOnnxVoiceActivityDetectorDetected(this.handle);
}
pop() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorPop(this.handle); }
clear() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorClear(this.handle); }
reset() { libsherpa_onnx.SherpaOnnxVoiceActivityDetectorReset(this.handle); }
front() {
let segment =
libsherpa_onnx.SherpaOnnxVoiceActivityDetectorFront(this.handle);
let buffer =
segment.deref()
.samples.buffer.reinterpret(segment.deref().n * ref.sizeof.float)
.buffer;
let samples = new Float32Array(buffer).slice(0);
let ans = new SpeechSegment(segment.deref().start, samples);
libsherpa_onnx.SherpaOnnxDestroySpeechSegment(segment);
return ans;
}
};
class GeneratedAudio {
constructor(sampleRate, samples) {
this.sampleRate = sampleRate;
this.samples = samples;
}
save(filename) {
libsherpa_onnx.SherpaOnnxWriteWave(this.samples, this.samples.length,
this.sampleRate, filename);
}
};
class OfflineTts {
constructor(config) {
this.config = config;
this.handle = libsherpa_onnx.SherpaOnnxCreateOfflineTts(config.ref());
}
free() {
if (this.handle) {
libsherpa_onnx.SherpaOnnxDestroyOfflineTts(this.handle);
this.handle = null;
}
}
generate(text, sid, speed) {
let r = libsherpa_onnx.SherpaOnnxOfflineTtsGenerate(this.handle, text, sid,
speed);
const buffer =
r.deref()
.samples.buffer.reinterpret(r.deref().n * ref.sizeof.float)
.buffer;
let samples = new Float32Array(buffer).slice(0);
let sampleRate = r.deref().sampleRate;
let generatedAudio = new GeneratedAudio(sampleRate, samples);
libsherpa_onnx.SherpaOnnxDestroyOfflineTtsGeneratedAudio(r);
return generatedAudio;
}
};
// online asr
const OnlineTransducerModelConfig = SherpaOnnxOnlineTransducerModelConfig;
const OnlineModelConfig = SherpaOnnxOnlineModelConfig;
const FeatureConfig = SherpaOnnxFeatureConfig;
const OnlineRecognizerConfig = SherpaOnnxOnlineRecognizerConfig;
const OnlineParaformerModelConfig = SherpaOnnxOnlineParaformerModelConfig;
const OnlineZipformer2CtcModelConfig = SherpaOnnxOnlineZipformer2CtcModelConfig;
// offline asr
const OfflineTransducerModelConfig = SherpaOnnxOfflineTransducerModelConfig;
const OfflineModelConfig = SherpaOnnxOfflineModelConfig;
const OfflineRecognizerConfig = SherpaOnnxOfflineRecognizerConfig;
const OfflineParaformerModelConfig = SherpaOnnxOfflineParaformerModelConfig;
const OfflineWhisperModelConfig = SherpaOnnxOfflineWhisperModelConfig;
const OfflineNemoEncDecCtcModelConfig =
SherpaOnnxOfflineNemoEncDecCtcModelConfig;
const OfflineTdnnModelConfig = SherpaOnnxOfflineTdnnModelConfig;
// vad
const SileroVadModelConfig = SherpaOnnxSileroVadModelConfig;
const VadModelConfig = SherpaOnnxVadModelConfig;
// tts
const OfflineTtsVitsModelConfig = SherpaOnnxOfflineTtsVitsModelConfig;
const OfflineTtsModelConfig = SherpaOnnxOfflineTtsModelConfig;
const OfflineTtsConfig = SherpaOnnxOfflineTtsConfig;
function createOfflineTts(config) {
return sherpa_onnx_tts.createOfflineTts(wasmModule, config);
}
// Note: online means streaming and offline means non-streaming here.
// Both of them don't require internet connection.
module.exports = {
// online asr
OnlineTransducerModelConfig,
OnlineModelConfig,
FeatureConfig,
OnlineRecognizerConfig,
OnlineRecognizer,
OnlineStream,
OnlineParaformerModelConfig,
OnlineZipformer2CtcModelConfig,
// offline asr
OfflineRecognizer,
OfflineStream,
OfflineTransducerModelConfig,
OfflineModelConfig,
OfflineRecognizerConfig,
OfflineParaformerModelConfig,
OfflineWhisperModelConfig,
OfflineNemoEncDecCtcModelConfig,
OfflineTdnnModelConfig,
// vad
SileroVadModelConfig,
VadModelConfig,
CircularBuffer,
VoiceActivityDetector,
// tts
OfflineTtsVitsModelConfig,
OfflineTtsModelConfig,
OfflineTtsConfig,
OfflineTts,
//
Display,
createOnlineRecognizer,
createOfflineRecognizer,
createOfflineTts,
};

View File

@@ -1,7 +1,7 @@
{
"name": "sherpa-onnx2",
"version": "1.8.10",
"description": "Real-time speech recognition with Next-gen Kaldi",
"name": "sherpa-onnx",
"version": "SHERPA_ONNX_VERSION",
"description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
@@ -11,15 +11,30 @@
"url": "git+https://github.com/k2-fsa/sherpa-onnx.git"
},
"keywords": [
"speech-to-text",
"text-to-speech",
"speech to text",
"text to speech",
"transcription",
"real-time speech recognition",
"without internet connect",
"without internet connection",
"embedded systems",
"open source",
"zipformer",
"asr",
"speech"
"tts",
"stt",
"c++",
"onnxruntime",
"onnx",
"ai",
"next-gen kaldi",
"offline",
"privacy",
"open source",
"streaming speech recognition",
"speech",
"recognition",
"WebAssembly",
"wasm"
],
"author": "The next-gen Kaldi team",
"license": "Apache-2.0",
@@ -28,10 +43,5 @@
},
"homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",
"dependencies": {
"ffi-napi": "^4.0.3",
"npm": "^6.14.18",
"ref-array-napi": "^1.2.2",
"ref-napi": "^3.0.3",
"ref-struct-napi": "^1.1.1"
}
}

View File

@@ -1,50 +0,0 @@
{
"name": "sherpa-onnx",
"version": "SHERPA_ONNX_VERSION",
"description": "Real-time speech recognition with Next-gen Kaldi",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"repository": {
"type": "git",
"url": "git+https://github.com/k2-fsa/sherpa-onnx.git"
},
"keywords": [
"speech to text",
"text to speech",
"transcription",
"real-time speech recognition",
"without internet connect",
"embedded systems",
"open source",
"zipformer",
"asr",
"tts",
"stt",
"c++",
"onnxruntime",
"onnx",
"ai",
"next-gen kaldi",
"offline",
"privacy",
"open source",
"streaming speech recognition",
"speech",
"recognition"
],
"author": "The next-gen Kaldi team",
"license": "Apache-2.0",
"bugs": {
"url": "https://github.com/k2-fsa/sherpa-onnx/issues"
},
"homepage": "https://github.com/k2-fsa/sherpa-onnx#readme",
"dependencies": {
"ffi-napi": "^4.0.3",
"npm": "^6.14.18",
"ref-array-napi": "^1.2.2",
"ref-napi": "^3.0.3",
"ref-struct-napi": "^1.1.1"
}
}

View File

@@ -1,123 +0,0 @@
#!/usr/bin/env bash
set -ex
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(realpath $SCRIPT_DIR/../..)
echo "SCRIPT_DIR: $SCRIPT_DIR"
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g ./package.json.in
cp package.json.in package.json
rm package.json.in
rm package.json.in.bak
rm .clang-format
function windows_x64() {
echo "Process Windows (x64)"
mkdir -p lib/win-x64
dst=$(realpath lib/win-x64)
mkdir t
cd t
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst
rm -fv $dst/sherpa-onnx-portaudio.dll
cd ..
rm -rf t
}
function windows_x86() {
echo "Process Windows (x86)"
mkdir -p lib/win-x86
dst=$(realpath lib/win-x86)
mkdir t
cd t
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst
rm -fv $dst/sherpa-onnx-portaudio.dll
cd ..
rm -rf t
}
function linux_x64() {
echo "Process Linux (x64)"
mkdir -p lib/linux-x64
dst=$(realpath lib/linux-x64)
mkdir t
cd t
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_28_x86_64.whl
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_28_x86_64.whl
cp -v sherpa_onnx/lib/*.so* $dst
rm -v $dst/libcargs.so
rm -v $dst/libsherpa-onnx-portaudio.so
rm -v $dst/libsherpa-onnx-fst.so
rm -v $dst/libonnxruntime.so
cd ..
rm -rf t
}
function osx_x64() {
echo "Process osx-x64"
mkdir -p lib/osx-x64
dst=$(realpath lib/osx-x64)
mkdir t
cd t
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl
cp -v sherpa_onnx/lib/*.dylib $dst/
rm -v $dst/libonnxruntime.dylib
rm -v $dst/libcargs.dylib
rm -v $dst/libsherpa-onnx-fst.dylib
rm -v $dst/libsherpa-onnx-portaudio.dylib
cd ..
rm -rf t
}
function osx_arm64() {
echo "Process osx-arm64"
mkdir -p lib/osx-arm64
dst=$(realpath lib/osx-arm64)
mkdir t
cd t
wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl
unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl
cp -v sherpa_onnx/lib/*.dylib $dst/
rm -v $dst/libonnxruntime.dylib
rm -v $dst/libcargs.dylib
rm -v $dst/libsherpa-onnx-fst.dylib
rm -v $dst/libsherpa-onnx-portaudio.dylib
cd ..
rm -rf t
}
windows_x64
ls -lh lib/win-x64
windows_x86
ls -lh lib/win-x86
linux_x64
ls -lh lib/linux-x64
osx_x64
ls -lh lib/osx-x64
osx_arm64
ls -lh lib/osx-arm64