Add Java and Kotlin API for punctuation models (#818)
This commit is contained in:
@@ -9,6 +9,11 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
#include "android/asset_manager.h"
|
||||
#include "android/asset_manager_jni.h"
|
||||
#endif
|
||||
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
#include "sherpa-onnx/csrc/math.h"
|
||||
#include "sherpa-onnx/csrc/offline-ct-transformer-model.h"
|
||||
@@ -24,6 +29,12 @@ class OfflinePunctuationCtTransformerImpl : public OfflinePunctuationImpl {
|
||||
const OfflinePunctuationConfig &config)
|
||||
: config_(config), model_(config.model) {}
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
OfflinePunctuationCtTransformerImpl(AAssetManager *mgr,
|
||||
const OfflinePunctuationConfig &config)
|
||||
: config_(config), model_(mgr, config.model) {}
|
||||
#endif
|
||||
|
||||
std::string AddPunctuation(const std::string &text) const override {
|
||||
if (text.empty()) {
|
||||
return {};
|
||||
|
||||
@@ -4,6 +4,11 @@
|
||||
|
||||
#include "sherpa-onnx/csrc/offline-punctuation-impl.h"
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
#include "android/asset_manager.h"
|
||||
#include "android/asset_manager_jni.h"
|
||||
#endif
|
||||
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
#include "sherpa-onnx/csrc/offline-punctuation-ct-transformer-impl.h"
|
||||
|
||||
@@ -19,4 +24,16 @@ std::unique_ptr<OfflinePunctuationImpl> OfflinePunctuationImpl::Create(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
std::unique_ptr<OfflinePunctuationImpl> OfflinePunctuationImpl::Create(
|
||||
AAssetManager *mgr, const OfflinePunctuationConfig &config) {
|
||||
if (!config.model.ct_transformer.empty()) {
|
||||
return std::make_unique<OfflinePunctuationCtTransformerImpl>(mgr, config);
|
||||
}
|
||||
|
||||
SHERPA_ONNX_LOGE("Please specify a punctuation model! Return a null pointer");
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
@@ -7,6 +7,10 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#if __ANDROID_API__ >= 9
|
||||
#include "android/asset_manager.h"
|
||||
#include "android/asset_manager_jni.h"
|
||||
#endif
|
||||
|
||||
#include "sherpa-onnx/csrc/offline-punctuation.h"
|
||||
|
||||
@@ -19,6 +23,11 @@ class OfflinePunctuationImpl {
|
||||
static std::unique_ptr<OfflinePunctuationImpl> Create(
|
||||
const OfflinePunctuationConfig &config);
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
static std::unique_ptr<OfflinePunctuationImpl> Create(
|
||||
AAssetManager *mgr, const OfflinePunctuationConfig &config);
|
||||
#endif
|
||||
|
||||
virtual std::string AddPunctuation(const std::string &text) const = 0;
|
||||
};
|
||||
|
||||
|
||||
@@ -4,6 +4,11 @@
|
||||
|
||||
#include "sherpa-onnx/csrc/offline-punctuation.h"
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
#include "android/asset_manager.h"
|
||||
#include "android/asset_manager_jni.h"
|
||||
#endif
|
||||
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
#include "sherpa-onnx/csrc/offline-punctuation-impl.h"
|
||||
|
||||
@@ -33,6 +38,12 @@ std::string OfflinePunctuationConfig::ToString() const {
|
||||
OfflinePunctuation::OfflinePunctuation(const OfflinePunctuationConfig &config)
|
||||
: impl_(OfflinePunctuationImpl::Create(config)) {}
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
OfflinePunctuation::OfflinePunctuation(AAssetManager *mgr,
|
||||
const OfflinePunctuationConfig &config)
|
||||
: impl_(OfflinePunctuationImpl::Create(mgr, config)) {}
|
||||
#endif
|
||||
|
||||
OfflinePunctuation::~OfflinePunctuation() = default;
|
||||
|
||||
std::string OfflinePunctuation::AddPunctuation(const std::string &text) const {
|
||||
|
||||
@@ -8,6 +8,11 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
#include "android/asset_manager.h"
|
||||
#include "android/asset_manager_jni.h"
|
||||
#endif
|
||||
|
||||
#include "sherpa-onnx/csrc/offline-punctuation-model-config.h"
|
||||
#include "sherpa-onnx/csrc/parse-options.h"
|
||||
|
||||
@@ -33,6 +38,11 @@ class OfflinePunctuation {
|
||||
public:
|
||||
explicit OfflinePunctuation(const OfflinePunctuationConfig &config);
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
OfflinePunctuation(AAssetManager *mgr,
|
||||
const OfflinePunctuationConfig &config);
|
||||
#endif
|
||||
|
||||
~OfflinePunctuation();
|
||||
|
||||
// Add punctuation to the input text and return it.
|
||||
|
||||
@@ -40,6 +40,10 @@ java_files += SpokenLanguageIdentificationWhisperConfig.java
|
||||
java_files += SpokenLanguageIdentificationConfig.java
|
||||
java_files += SpokenLanguageIdentification.java
|
||||
|
||||
java_files += OfflinePunctuationModelConfig.java
|
||||
java_files += OfflinePunctuationConfig.java
|
||||
java_files += OfflinePunctuation.java
|
||||
|
||||
class_files := $(java_files:%.java=%.class)
|
||||
|
||||
java_files := $(addprefix src/$(package_dir)/,$(java_files))
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OfflinePunctuation {
|
||||
static {
|
||||
System.loadLibrary("sherpa-onnx-jni");
|
||||
}
|
||||
|
||||
private long ptr = 0; // this is the asr engine ptrss
|
||||
|
||||
public OfflinePunctuation(OfflinePunctuationConfig config) {
|
||||
ptr = newFromFile(config);
|
||||
}
|
||||
|
||||
public String addPunctuation(String text) {
|
||||
return addPunctuation(ptr, text);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void finalize() throws Throwable {
|
||||
release();
|
||||
}
|
||||
|
||||
// You'd better call it manually if it is not used anymore
|
||||
public void release() {
|
||||
if (this.ptr == 0) {
|
||||
return;
|
||||
}
|
||||
delete(this.ptr);
|
||||
this.ptr = 0;
|
||||
}
|
||||
|
||||
private native void delete(long ptr);
|
||||
|
||||
private native long newFromFile(OfflinePunctuationConfig config);
|
||||
|
||||
private native String addPunctuation(long ptr, String text);
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OfflinePunctuationConfig {
|
||||
private final OfflinePunctuationModelConfig model;
|
||||
|
||||
private OfflinePunctuationConfig(Builder builder) {
|
||||
this.model = builder.model;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public OfflinePunctuationModelConfig getModel() {
|
||||
return model;
|
||||
}
|
||||
|
||||
|
||||
public static class Builder {
|
||||
private OfflinePunctuationModelConfig model = OfflinePunctuationModelConfig.builder().build();
|
||||
|
||||
public OfflinePunctuationConfig build() {
|
||||
return new OfflinePunctuationConfig(this);
|
||||
}
|
||||
|
||||
public Builder setModel(OfflinePunctuationModelConfig model) {
|
||||
this.model = model;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
// Copyright 2024 Xiaomi Corporation
|
||||
|
||||
package com.k2fsa.sherpa.onnx;
|
||||
|
||||
public class OfflinePunctuationModelConfig {
|
||||
private final String ctTransformer;
|
||||
private final int numThreads;
|
||||
private final boolean debug;
|
||||
private final String provider;
|
||||
|
||||
private OfflinePunctuationModelConfig(Builder builder) {
|
||||
this.ctTransformer = builder.ctTransformer;
|
||||
this.numThreads = builder.numThreads;
|
||||
this.debug = builder.debug;
|
||||
this.provider = builder.provider;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public String getCtTransformer() {
|
||||
return ctTransformer;
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private String ctTransformer = "";
|
||||
private int numThreads = 1;
|
||||
private boolean debug = true;
|
||||
private String provider = "cpu";
|
||||
|
||||
public OfflinePunctuationModelConfig build() {
|
||||
return new OfflinePunctuationModelConfig(this);
|
||||
}
|
||||
|
||||
public Builder setCtTransformer(String ctTransformer) {
|
||||
this.ctTransformer = ctTransformer;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setNumThreads(int numThreads) {
|
||||
this.numThreads = numThreads;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setDebug(boolean debug) {
|
||||
this.debug = debug;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setProvider(String provider) {
|
||||
this.provider = provider;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -13,6 +13,7 @@ set(sources
|
||||
audio-tagging.cc
|
||||
jni.cc
|
||||
keyword-spotter.cc
|
||||
offline-punctuation.cc
|
||||
offline-recognizer.cc
|
||||
offline-stream.cc
|
||||
online-recognizer.cc
|
||||
|
||||
108
sherpa-onnx/jni/offline-punctuation.cc
Normal file
108
sherpa-onnx/jni/offline-punctuation.cc
Normal file
@@ -0,0 +1,108 @@
|
||||
// sherpa-onnx/jni/offline-punctuation.cc
|
||||
//
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
|
||||
#include "sherpa-onnx/csrc/offline-punctuation.h"
|
||||
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
#include "sherpa-onnx/jni/common.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
static OfflinePunctuationConfig GetOfflinePunctuationConfig(JNIEnv *env,
|
||||
jobject config) {
|
||||
OfflinePunctuationConfig ans;
|
||||
|
||||
jclass cls = env->GetObjectClass(config);
|
||||
jfieldID fid;
|
||||
|
||||
fid = env->GetFieldID(
|
||||
cls, "model", "Lcom/k2fsa/sherpa/onnx/OfflinePunctuationModelConfig;");
|
||||
jobject model_config = env->GetObjectField(config, fid);
|
||||
jclass model_config_cls = env->GetObjectClass(model_config);
|
||||
|
||||
fid =
|
||||
env->GetFieldID(model_config_cls, "ctTransformer", "Ljava/lang/String;");
|
||||
jstring s = (jstring)env->GetObjectField(model_config, fid);
|
||||
const char *p = env->GetStringUTFChars(s, nullptr);
|
||||
ans.model.ct_transformer = p;
|
||||
env->ReleaseStringUTFChars(s, p);
|
||||
|
||||
fid = env->GetFieldID(model_config_cls, "numThreads", "I");
|
||||
ans.model.num_threads = env->GetIntField(model_config, fid);
|
||||
|
||||
fid = env->GetFieldID(model_config_cls, "debug", "Z");
|
||||
ans.model.debug = env->GetBooleanField(model_config, fid);
|
||||
|
||||
fid = env->GetFieldID(model_config_cls, "provider", "Ljava/lang/String;");
|
||||
s = (jstring)env->GetObjectField(model_config, fid);
|
||||
p = env->GetStringUTFChars(s, nullptr);
|
||||
ans.model.provider = p;
|
||||
env->ReleaseStringUTFChars(s, p);
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_OfflinePunctuation_newFromAsset(
|
||||
JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
|
||||
#if __ANDROID_API__ >= 9
|
||||
AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
|
||||
if (!mgr) {
|
||||
SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
|
||||
}
|
||||
#endif
|
||||
auto config = sherpa_onnx::GetOfflinePunctuationConfig(env, _config);
|
||||
SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
|
||||
auto model = new sherpa_onnx::OfflinePunctuation(
|
||||
#if __ANDROID_API__ >= 9
|
||||
mgr,
|
||||
#endif
|
||||
config);
|
||||
|
||||
return (jlong)model;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_OfflinePunctuation_newFromFile(JNIEnv *env,
|
||||
jobject /*obj*/,
|
||||
jobject _config) {
|
||||
auto config = sherpa_onnx::GetOfflinePunctuationConfig(env, _config);
|
||||
SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
|
||||
|
||||
if (!config.Validate()) {
|
||||
SHERPA_ONNX_LOGE("Errors found in config!");
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto model = new sherpa_onnx::OfflinePunctuation(config);
|
||||
|
||||
return (jlong)model;
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_OfflinePunctuation_delete(
|
||||
JNIEnv *env, jobject /*obj*/, jlong ptr) {
|
||||
delete reinterpret_cast<sherpa_onnx::OfflinePunctuation *>(ptr);
|
||||
}
|
||||
|
||||
SHERPA_ONNX_EXTERN_C
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_k2fsa_sherpa_onnx_OfflinePunctuation_addPunctuation(JNIEnv *env,
|
||||
jobject /*obj*/,
|
||||
jlong ptr,
|
||||
jstring text) {
|
||||
auto punct = reinterpret_cast<const sherpa_onnx::OfflinePunctuation *>(ptr);
|
||||
|
||||
const char *ptext = env->GetStringUTFChars(text, nullptr);
|
||||
|
||||
std::string result = punct->AddPunctuation(ptext);
|
||||
|
||||
env->ReleaseStringUTFChars(text, ptext);
|
||||
|
||||
return env->NewStringUTF(result.c_str());
|
||||
}
|
||||
57
sherpa-onnx/kotlin-api/OfflinePunctuation.kt
Normal file
57
sherpa-onnx/kotlin-api/OfflinePunctuation.kt
Normal file
@@ -0,0 +1,57 @@
|
||||
package com.k2fsa.sherpa.onnx
|
||||
|
||||
import android.content.res.AssetManager
|
||||
|
||||
data class OfflinePunctuationModelConfig(
|
||||
var ctTransformer: String,
|
||||
var numThreads: Int = 1,
|
||||
var debug: Boolean = false,
|
||||
var provider: String = "cpu",
|
||||
)
|
||||
|
||||
|
||||
data class OfflinePunctuationConfig(
|
||||
var model: OfflinePunctuationModelConfig,
|
||||
)
|
||||
|
||||
class OfflinePunctuation(
|
||||
assetManager: AssetManager? = null,
|
||||
config: OfflinePunctuationConfig,
|
||||
) {
|
||||
private val ptr: Long
|
||||
|
||||
init {
|
||||
ptr = if (assetManager != null) {
|
||||
newFromAsset(assetManager, config)
|
||||
} else {
|
||||
newFromFile(config)
|
||||
}
|
||||
}
|
||||
|
||||
protected fun finalize() {
|
||||
delete(ptr)
|
||||
}
|
||||
|
||||
fun release() = finalize()
|
||||
|
||||
fun addPunctuation(text: String) = addPunctuation(ptr, text)
|
||||
|
||||
private external fun delete(ptr: Long)
|
||||
|
||||
private external fun addPunctuation(ptr: Long, text: String): String
|
||||
|
||||
private external fun newFromAsset(
|
||||
assetManager: AssetManager,
|
||||
config: OfflinePunctuationConfig,
|
||||
): Long
|
||||
|
||||
private external fun newFromFile(
|
||||
config: OfflinePunctuationConfig,
|
||||
): Long
|
||||
|
||||
companion object {
|
||||
init {
|
||||
System.loadLibrary("sherpa-onnx-jni")
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user