Add swift online punctuation (#1661)
This commit is contained in:
@@ -24,6 +24,7 @@
|
|||||||
#include "sherpa-onnx/csrc/macros.h"
|
#include "sherpa-onnx/csrc/macros.h"
|
||||||
#include "sherpa-onnx/csrc/offline-punctuation.h"
|
#include "sherpa-onnx/csrc/offline-punctuation.h"
|
||||||
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
||||||
|
#include "sherpa-onnx/csrc/online-punctuation.h"
|
||||||
#include "sherpa-onnx/csrc/online-recognizer.h"
|
#include "sherpa-onnx/csrc/online-recognizer.h"
|
||||||
#include "sherpa-onnx/csrc/resample.h"
|
#include "sherpa-onnx/csrc/resample.h"
|
||||||
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
|
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
|
||||||
@@ -1717,6 +1718,53 @@ const char *SherpaOfflinePunctuationAddPunct(
|
|||||||
|
|
||||||
void SherpaOfflinePunctuationFreeText(const char *text) { delete[] text; }
|
void SherpaOfflinePunctuationFreeText(const char *text) { delete[] text; }
|
||||||
|
|
||||||
|
struct SherpaOnnxOnlinePunctuation {
|
||||||
|
std::unique_ptr<sherpa_onnx::OnlinePunctuation> impl;
|
||||||
|
};
|
||||||
|
|
||||||
|
const SherpaOnnxOnlinePunctuation *SherpaOnnxCreateOnlinePunctuation(
|
||||||
|
const SherpaOnnxOnlinePunctuationConfig *config) {
|
||||||
|
auto p = new SherpaOnnxOnlinePunctuation;
|
||||||
|
try {
|
||||||
|
sherpa_onnx::OnlinePunctuationConfig punctuation_config;
|
||||||
|
punctuation_config.model.cnn_bilstm = SHERPA_ONNX_OR(config->model.cnn_bilstm, "");
|
||||||
|
punctuation_config.model.bpe_vocab = SHERPA_ONNX_OR(config->model.bpe_vocab, "");
|
||||||
|
punctuation_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
||||||
|
punctuation_config.model.debug = config->model.debug;
|
||||||
|
punctuation_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
||||||
|
|
||||||
|
p->impl =
|
||||||
|
std::make_unique<sherpa_onnx::OnlinePunctuation>(punctuation_config);
|
||||||
|
} catch (const std::exception &e) {
|
||||||
|
SHERPA_ONNX_LOGE("Failed to create online punctuation: %s", e.what());
|
||||||
|
delete p;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SherpaOnnxDestroyOnlinePunctuation(const SherpaOnnxOnlinePunctuation *p) {
|
||||||
|
delete p;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *SherpaOnnxOnlinePunctuationAddPunct(
|
||||||
|
const SherpaOnnxOnlinePunctuation *punctuation, const char *text) {
|
||||||
|
if (!punctuation || !text) return nullptr;
|
||||||
|
|
||||||
|
try {
|
||||||
|
std::string s = punctuation->impl->AddPunctuationWithCase(text);
|
||||||
|
char *p = new char[s.size() + 1];
|
||||||
|
std::copy(s.begin(), s.end(), p);
|
||||||
|
p[s.size()] = '\0';
|
||||||
|
return p;
|
||||||
|
} catch (const std::exception &e) {
|
||||||
|
SHERPA_ONNX_LOGE("Failed to add punctuation: %s", e.what());
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SherpaOnnxOnlinePunctuationFreeText(const char *text) { delete[] text; }
|
||||||
|
|
||||||
struct SherpaOnnxLinearResampler {
|
struct SherpaOnnxLinearResampler {
|
||||||
std::unique_ptr<sherpa_onnx::LinearResample> impl;
|
std::unique_ptr<sherpa_onnx::LinearResample> impl;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1369,6 +1369,39 @@ SHERPA_ONNX_API const char *SherpaOfflinePunctuationAddPunct(
|
|||||||
|
|
||||||
SHERPA_ONNX_API void SherpaOfflinePunctuationFreeText(const char *text);
|
SHERPA_ONNX_API void SherpaOfflinePunctuationFreeText(const char *text);
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxOnlinePunctuationModelConfig {
|
||||||
|
const char *cnn_bilstm;
|
||||||
|
const char *bpe_vocab;
|
||||||
|
int32_t num_threads;
|
||||||
|
int32_t debug;
|
||||||
|
const char *provider;
|
||||||
|
} SherpaOnnxOnlinePunctuationModelConfig;
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxOnlinePunctuationConfig {
|
||||||
|
SherpaOnnxOnlinePunctuationModelConfig model;
|
||||||
|
} SherpaOnnxOnlinePunctuationConfig;
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxOnlinePunctuation SherpaOnnxOnlinePunctuation;
|
||||||
|
|
||||||
|
// Create an online punctuation processor. The user has to invoke
|
||||||
|
// SherpaOnnxDestroyOnlinePunctuation() to free the returned pointer
|
||||||
|
// to avoid memory leak
|
||||||
|
SHERPA_ONNX_API const SherpaOnnxOnlinePunctuation *SherpaOnnxCreateOnlinePunctuation(
|
||||||
|
const SherpaOnnxOnlinePunctuationConfig *config);
|
||||||
|
|
||||||
|
// Free a pointer returned by SherpaOnnxCreateOnlinePunctuation()
|
||||||
|
SHERPA_ONNX_API void SherpaOnnxDestroyOnlinePunctuation(
|
||||||
|
const SherpaOnnxOnlinePunctuation *punctuation);
|
||||||
|
|
||||||
|
// Add punctuations to the input text. The user has to invoke
|
||||||
|
// SherpaOnnxOnlinePunctuationFreeText() to free the returned pointer
|
||||||
|
// to avoid memory leak
|
||||||
|
SHERPA_ONNX_API const char *SherpaOnnxOnlinePunctuationAddPunct(
|
||||||
|
const SherpaOnnxOnlinePunctuation *punctuation, const char *text);
|
||||||
|
|
||||||
|
// Free a pointer returned by SherpaOnnxOnlinePunctuationAddPunct()
|
||||||
|
SHERPA_ONNX_API void SherpaOnnxOnlinePunctuationFreeText(const char *text);
|
||||||
|
|
||||||
// for resampling
|
// for resampling
|
||||||
SHERPA_ONNX_API typedef struct SherpaOnnxLinearResampler
|
SHERPA_ONNX_API typedef struct SherpaOnnxLinearResampler
|
||||||
SherpaOnnxLinearResampler;
|
SherpaOnnxLinearResampler;
|
||||||
|
|||||||
@@ -1095,6 +1095,52 @@ class SherpaOnnxOfflinePunctuationWrapper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func sherpaOnnxOnlinePunctuationModelConfig(
|
||||||
|
cnnBiLstm: String,
|
||||||
|
bpeVocab: String,
|
||||||
|
numThreads: Int = 1,
|
||||||
|
debug: Int = 0,
|
||||||
|
provider: String = "cpu"
|
||||||
|
) -> SherpaOnnxOnlinePunctuationModelConfig {
|
||||||
|
return SherpaOnnxOnlinePunctuationModelConfig(
|
||||||
|
cnn_bilstm: toCPointer(cnnBiLstm),
|
||||||
|
bpe_vocab: toCPointer(bpeVocab),
|
||||||
|
num_threads: Int32(numThreads),
|
||||||
|
debug: Int32(debug),
|
||||||
|
provider: toCPointer(provider))
|
||||||
|
}
|
||||||
|
|
||||||
|
func sherpaOnnxOnlinePunctuationConfig(
|
||||||
|
model: SherpaOnnxOnlinePunctuationModelConfig
|
||||||
|
) -> SherpaOnnxOnlinePunctuationConfig {
|
||||||
|
return SherpaOnnxOnlinePunctuationConfig(model: model)
|
||||||
|
}
|
||||||
|
|
||||||
|
class SherpaOnnxOnlinePunctuationWrapper {
|
||||||
|
/// A pointer to the underlying counterpart in C
|
||||||
|
let ptr: OpaquePointer!
|
||||||
|
|
||||||
|
/// Constructor taking a model config
|
||||||
|
init(
|
||||||
|
config: UnsafePointer<SherpaOnnxOnlinePunctuationConfig>!
|
||||||
|
) {
|
||||||
|
ptr = SherpaOnnxCreateOnlinePunctuation(config)
|
||||||
|
}
|
||||||
|
|
||||||
|
deinit {
|
||||||
|
if let ptr {
|
||||||
|
SherpaOnnxDestroyOnlinePunctuation(ptr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func addPunct(text: String) -> String {
|
||||||
|
let cText = SherpaOnnxOnlinePunctuationAddPunct(ptr, toCPointer(text))
|
||||||
|
let ans = String(cString: cText!)
|
||||||
|
SherpaOnnxOnlinePunctuationFreeText(cText)
|
||||||
|
return ans
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func sherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(model: String)
|
func sherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(model: String)
|
||||||
-> SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
|
-> SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig
|
||||||
{
|
{
|
||||||
|
|||||||
35
swift-api-examples/add-punctuation-online.swift
Normal file
35
swift-api-examples/add-punctuation-online.swift
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
func run() {
|
||||||
|
let model = "./sherpa-onnx-online-punct-en-2024-08-06/model.onnx"
|
||||||
|
let bpe = "./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab"
|
||||||
|
|
||||||
|
// Create model config
|
||||||
|
let modelConfig = sherpaOnnxOnlinePunctuationModelConfig(
|
||||||
|
cnnBiLstm: model,
|
||||||
|
bpeVocab: bpe
|
||||||
|
)
|
||||||
|
|
||||||
|
// Create punctuation config
|
||||||
|
var config = sherpaOnnxOnlinePunctuationConfig(model: modelConfig)
|
||||||
|
|
||||||
|
// Create punctuation instance
|
||||||
|
let punct = SherpaOnnxOnlinePunctuationWrapper(config: &config)
|
||||||
|
|
||||||
|
// Test texts
|
||||||
|
let textList = [
|
||||||
|
"how are you i am fine thank you",
|
||||||
|
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry"
|
||||||
|
]
|
||||||
|
|
||||||
|
// Process each text
|
||||||
|
for i in 0..<textList.count {
|
||||||
|
let t = punct.addPunct(text: textList[i])
|
||||||
|
print("\nresult is:\n\(t)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@main
|
||||||
|
struct App {
|
||||||
|
static func main() {
|
||||||
|
run()
|
||||||
|
}
|
||||||
|
}
|
||||||
36
swift-api-examples/run-add-punctuations-online.sh
Executable file
36
swift-api-examples/run-add-punctuations-online.sh
Executable file
@@ -0,0 +1,36 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [ ! -d ../build-swift-macos ]; then
|
||||||
|
echo "Please run ../build-swift-macos.sh first!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Download and extract the online punctuation model if not exists
|
||||||
|
if [ ! -d ./sherpa-onnx-online-punct-en-2024-08-06 ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
|
||||||
|
rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -e ./add-punctuation-online ]; then
|
||||||
|
# Note: We use -lc++ to link against libc++ instead of libstdc++
|
||||||
|
swiftc \
|
||||||
|
-lc++ \
|
||||||
|
-I ../build-swift-macos/install/include \
|
||||||
|
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
|
||||||
|
./add-punctuation-online.swift ./SherpaOnnx.swift \
|
||||||
|
-L ../build-swift-macos/install/lib/ \
|
||||||
|
-l sherpa-onnx \
|
||||||
|
-l onnxruntime \
|
||||||
|
-o ./add-punctuation-online
|
||||||
|
|
||||||
|
strip ./add-punctuation-online
|
||||||
|
else
|
||||||
|
echo "./add-punctuation-online exists - skip building"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Set library path and run the executable
|
||||||
|
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
./add-punctuation-online
|
||||||
Reference in New Issue
Block a user