Inverse text normalization API for other programming languages (#1019)

This commit is contained in:
Fangjun Kuang
2024-06-17 17:02:39 +08:00
committed by GitHub
parent b0f7ed3ee3
commit 6e09933d99
39 changed files with 669 additions and 104 deletions

View File

@@ -388,6 +388,9 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
recognizer_config.hotwords_score =
SHERPA_ONNX_OR(config->hotwords_score, 1.5);
recognizer_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
recognizer_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
if (config->model_config.debug) {
SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str());
}

View File

@@ -411,6 +411,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {
/// Bonus score for each token in hotwords.
float hotwords_score;
const char *rule_fsts;
const char *rule_fars;
} SherpaOnnxOfflineRecognizerConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizer

View File

@@ -137,11 +137,13 @@ class OfflineRecognizerConfig {
this.maxActivePaths = 4,
this.hotwordsFile = '',
this.hotwordsScore = 1.5,
this.ruleFsts = '',
this.ruleFars = '',
});
@override
String toString() {
return 'OfflineRecognizerConfig(feat: $feat, model: $model, lm: $lm, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore)';
return 'OfflineRecognizerConfig(feat: $feat, model: $model, lm: $lm, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore, ruleFsts: $ruleFsts, ruleFars: $ruleFars)';
}
final FeatureConfig feat;
@@ -154,6 +156,9 @@ class OfflineRecognizerConfig {
final String hotwordsFile;
final double hotwordsScore;
final String ruleFsts;
final String ruleFars;
}
class OfflineRecognizerResult {
@@ -232,8 +237,13 @@ class OfflineRecognizer {
c.ref.hotwordsFile = config.hotwordsFile.toNativeUtf8();
c.ref.hotwordsScore = config.hotwordsScore;
c.ref.ruleFsts = config.ruleFsts.toNativeUtf8();
c.ref.ruleFars = config.ruleFars.toNativeUtf8();
final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr;
calloc.free(c.ref.ruleFars);
calloc.free(c.ref.ruleFsts);
calloc.free(c.ref.hotwordsFile);
calloc.free(c.ref.decodingMethod);
calloc.free(c.ref.lm.model);

View File

@@ -130,6 +130,9 @@ final class SherpaOnnxOfflineRecognizerConfig extends Struct {
@Float()
external double hotwordsScore;
external Pointer<Utf8> ruleFsts;
external Pointer<Utf8> ruleFars;
}
final class SherpaOnnxOnlineTransducerModelConfig extends Struct {

View File

@@ -9,6 +9,8 @@ public class OfflineRecognizerConfig {
private final int maxActivePaths;
private final String hotwordsFile;
private final float hotwordsScore;
private final String ruleFsts;
private final String ruleFars;
private OfflineRecognizerConfig(Builder builder) {
this.featConfig = builder.featConfig;
@@ -17,6 +19,8 @@ public class OfflineRecognizerConfig {
this.maxActivePaths = builder.maxActivePaths;
this.hotwordsFile = builder.hotwordsFile;
this.hotwordsScore = builder.hotwordsScore;
this.ruleFsts = builder.ruleFsts;
this.ruleFars = builder.ruleFars;
}
public static Builder builder() {
@@ -34,6 +38,8 @@ public class OfflineRecognizerConfig {
private int maxActivePaths = 4;
private String hotwordsFile = "";
private float hotwordsScore = 1.5f;
private String ruleFsts = "";
private String ruleFars = "";
public OfflineRecognizerConfig build() {
return new OfflineRecognizerConfig(this);
@@ -68,5 +74,15 @@ public class OfflineRecognizerConfig {
this.hotwordsScore = hotwordsScore;
return this;
}
public Builder setRuleFsts(String ruleFsts) {
this.ruleFsts = ruleFsts;
return this;
}
public Builder setRuleFars(String ruleFars) {
this.ruleFars = ruleFars;
return this;
}
}
}

View File

@@ -34,6 +34,18 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) {
fid = env->GetFieldID(cls, "hotwordsScore", "F");
ans.hotwords_score = env->GetFloatField(config, fid);
fid = env->GetFieldID(cls, "ruleFsts", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.rule_fsts = p;
env->ReleaseStringUTFChars(s, p);
fid = env->GetFieldID(cls, "ruleFars", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.rule_fars = p;
env->ReleaseStringUTFChars(s, p);
//---------- feat config ----------
fid = env->GetFieldID(cls, "featConfig",
"Lcom/k2fsa/sherpa/onnx/FeatureConfig;");

View File

@@ -53,6 +53,8 @@ data class OfflineRecognizerConfig(
var maxActivePaths: Int = 4,
var hotwordsFile: String = "",
var hotwordsScore: Float = 1.5f,
var ruleFsts: String = "",
var ruleFars: String = "",
)
class OfflineRecognizer(