Support adding puncutations to text for node-addon-api (#876)

This commit is contained in:
Fangjun Kuang
2024-05-14 19:28:56 +08:00
committed by GitHub
parent d19f50b799
commit 75630b986b
12 changed files with 218 additions and 3 deletions

View File

@@ -31,6 +31,12 @@ export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PAT
The following tables list the examples in this folder.
## Add punctuations to text
|File| Description|
|---|---|
|[./test_punctuation.js](./test_punctuation.js)| Add punctuations to input text using [CT transformer](https://modelscope.cn/models/iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/summary). It supports both Chinese and English.|
## Voice activity detection (VAD)
|File| Description|
@@ -309,3 +315,13 @@ git clone https://github.com/csukuangfj/sr-data
node ./test_speaker_identification.js
```
### Add punctuations
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
node ./test_punctuation.js
```

View File

@@ -0,0 +1,32 @@
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
const sherpa_onnx = require('sherpa-onnx-node');
// Please download test files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
function createPunctuation() {
const config = {
model: {
ctTransformer:
'./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx',
debug: true,
numThreads: 1,
provider: 'cpu',
},
};
return new sherpa_onnx.Punctuation(config);
}
const punct = createPunctuation();
const sentences = [
'这是一个测试你好吗How are you我很好thank you are you ok谢谢你',
'我们都是木头人不会说话不会动',
'The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry',
];
console.log('---');
for (let sentence of sentences) {
const punct_text = punct.addPunct(sentence);
console.log(`Input: ${sentence}`);
console.log(`Output: ${punct_text}`);
console.log('---');
}

View File

@@ -2,6 +2,8 @@
const sherpa_onnx = require('sherpa-onnx-node');
// Please download whisper multi-lingual models from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
function createSpokenLanguageID() {
const config = {
whisper: {