Support adding puncutations to text for node-addon-api (#876)
This commit is contained in:
@@ -31,6 +31,12 @@ export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PAT
|
||||
|
||||
The following tables list the examples in this folder.
|
||||
|
||||
## Add punctuations to text
|
||||
|
||||
|File| Description|
|
||||
|---|---|
|
||||
|[./test_punctuation.js](./test_punctuation.js)| Add punctuations to input text using [CT transformer](https://modelscope.cn/models/iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/summary). It supports both Chinese and English.|
|
||||
|
||||
## Voice activity detection (VAD)
|
||||
|
||||
|File| Description|
|
||||
@@ -309,3 +315,13 @@ git clone https://github.com/csukuangfj/sr-data
|
||||
|
||||
node ./test_speaker_identification.js
|
||||
```
|
||||
|
||||
### Add punctuations
|
||||
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
|
||||
|
||||
node ./test_punctuation.js
|
||||
```
|
||||
|
||||
32
nodejs-addon-examples/test_punctuation.js
Normal file
32
nodejs-addon-examples/test_punctuation.js
Normal file
@@ -0,0 +1,32 @@
|
||||
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
const sherpa_onnx = require('sherpa-onnx-node');
|
||||
|
||||
// Please download test files from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
|
||||
function createPunctuation() {
|
||||
const config = {
|
||||
model: {
|
||||
ctTransformer:
|
||||
'./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx',
|
||||
debug: true,
|
||||
numThreads: 1,
|
||||
provider: 'cpu',
|
||||
},
|
||||
};
|
||||
return new sherpa_onnx.Punctuation(config);
|
||||
}
|
||||
|
||||
const punct = createPunctuation();
|
||||
const sentences = [
|
||||
'这是一个测试你好吗How are you我很好thank you are you ok谢谢你',
|
||||
'我们都是木头人不会说话不会动',
|
||||
'The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry',
|
||||
];
|
||||
console.log('---');
|
||||
for (let sentence of sentences) {
|
||||
const punct_text = punct.addPunct(sentence);
|
||||
console.log(`Input: ${sentence}`);
|
||||
console.log(`Output: ${punct_text}`);
|
||||
console.log('---');
|
||||
}
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
const sherpa_onnx = require('sherpa-onnx-node');
|
||||
|
||||
// Please download whisper multi-lingual models from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
function createSpokenLanguageID() {
|
||||
const config = {
|
||||
whisper: {
|
||||
|
||||
Reference in New Issue
Block a user