Update kaldi-native-fbank. (#2259)
Now it supports FFT of an even number, not necessarily a power of 2.
This commit is contained in:
@@ -1,9 +1,9 @@
|
||||
function(download_kaldi_native_fbank)
|
||||
include(FetchContent)
|
||||
|
||||
set(kaldi_native_fbank_URL "https://github.com/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.21.1.tar.gz")
|
||||
set(kaldi_native_fbank_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-native-fbank-1.21.1.tar.gz")
|
||||
set(kaldi_native_fbank_HASH "SHA256=37c1aa230b00fe062791d800d8fc50aa3de215918d3dce6440699e67275d859e")
|
||||
set(kaldi_native_fbank_URL "https://github.com/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.21.2.tar.gz")
|
||||
set(kaldi_native_fbank_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-native-fbank-1.21.2.tar.gz")
|
||||
set(kaldi_native_fbank_HASH "SHA256=f4bd7d53fe8aeaecc4eda9680c72696bb86bf74e86371d81aacacd6f4ca3914d")
|
||||
|
||||
set(KALDI_NATIVE_FBANK_BUILD_TESTS OFF CACHE BOOL "" FORCE)
|
||||
set(KALDI_NATIVE_FBANK_BUILD_PYTHON OFF CACHE BOOL "" FORCE)
|
||||
@@ -12,11 +12,11 @@ function(download_kaldi_native_fbank)
|
||||
# If you don't have access to the Internet,
|
||||
# please pre-download kaldi-native-fbank
|
||||
set(possible_file_locations
|
||||
$ENV{HOME}/Downloads/kaldi-native-fbank-1.21.1.tar.gz
|
||||
${CMAKE_SOURCE_DIR}/kaldi-native-fbank-1.21.1.tar.gz
|
||||
${CMAKE_BINARY_DIR}/kaldi-native-fbank-1.21.1.tar.gz
|
||||
/tmp/kaldi-native-fbank-1.21.1.tar.gz
|
||||
/star-fj/fangjun/download/github/kaldi-native-fbank-1.21.1.tar.gz
|
||||
$ENV{HOME}/Downloads/kaldi-native-fbank-1.21.2.tar.gz
|
||||
${CMAKE_SOURCE_DIR}/kaldi-native-fbank-1.21.2.tar.gz
|
||||
${CMAKE_BINARY_DIR}/kaldi-native-fbank-1.21.2.tar.gz
|
||||
/tmp/kaldi-native-fbank-1.21.2.tar.gz
|
||||
/star-fj/fangjun/download/github/kaldi-native-fbank-1.21.2.tar.gz
|
||||
)
|
||||
|
||||
foreach(f IN LISTS possible_file_locations)
|
||||
|
||||
@@ -22,4 +22,4 @@ Cflags: -I"${includedir}"
|
||||
# Note: -lcargs is required only for the following file
|
||||
# https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
|
||||
# We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c
|
||||
Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lonnxruntime -lssentencepiece_core -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_WITH_CARGS@ @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@
|
||||
Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lkissfft-float -lonnxruntime -lssentencepiece_core -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_WITH_CARGS@ @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@
|
||||
|
||||
@@ -22,4 +22,4 @@ Cflags: -I"${includedir}"
|
||||
# Note: -lcargs is required only for the following file
|
||||
# https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
|
||||
# We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c
|
||||
Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lonnxruntime -lssentencepiece_core -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_WITH_CARGS@ @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@
|
||||
Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lkissfft-float -lpiper_phonemize -lespeak-ng -lucd -lonnxruntime -lssentencepiece_core -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_WITH_CARGS@ @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@
|
||||
|
||||
@@ -18,9 +18,7 @@ def create_fbank():
|
||||
opts.frame_opts.preemph_coeff = 0
|
||||
opts.frame_opts.window_type = "hann"
|
||||
|
||||
# Even though GigaAM uses 400 for fft, here we use 512
|
||||
# since kaldi-native-fbank only supports fft for power of 2.
|
||||
opts.frame_opts.round_to_power_of_two = True
|
||||
opts.frame_opts.round_to_power_of_two = False
|
||||
|
||||
opts.mel_opts.low_freq = 0
|
||||
opts.mel_opts.high_freq = 8000
|
||||
|
||||
@@ -19,9 +19,7 @@ def create_fbank():
|
||||
opts.frame_opts.preemph_coeff = 0
|
||||
opts.frame_opts.window_type = "hann"
|
||||
|
||||
# Even though GigaAM uses 400 for fft, here we use 512
|
||||
# since kaldi-native-fbank only supports fft for power of 2.
|
||||
opts.frame_opts.round_to_power_of_two = True
|
||||
opts.frame_opts.round_to_power_of_two = False
|
||||
|
||||
opts.mel_opts.low_freq = 0
|
||||
opts.mel_opts.high_freq = 8000
|
||||
|
||||
@@ -197,6 +197,7 @@ class FeatureExtractor::Impl {
|
||||
opts_.frame_opts.remove_dc_offset = config_.remove_dc_offset;
|
||||
opts_.frame_opts.preemph_coeff = config_.preemph_coeff;
|
||||
opts_.frame_opts.window_type = config_.window_type;
|
||||
opts_.frame_opts.round_to_power_of_two = config_.round_to_power_of_two;
|
||||
|
||||
opts_.mel_opts.num_bins = config_.feature_dim;
|
||||
|
||||
@@ -216,6 +217,7 @@ class FeatureExtractor::Impl {
|
||||
mfcc_opts_.frame_opts.remove_dc_offset = config_.remove_dc_offset;
|
||||
mfcc_opts_.frame_opts.preemph_coeff = config_.preemph_coeff;
|
||||
mfcc_opts_.frame_opts.window_type = config_.window_type;
|
||||
mfcc_opts_.frame_opts.round_to_power_of_two = config_.round_to_power_of_two;
|
||||
|
||||
mfcc_opts_.mel_opts.num_bins = config_.feature_dim;
|
||||
|
||||
|
||||
@@ -79,6 +79,8 @@ struct FeatureExtractorConfig {
|
||||
|
||||
bool is_mfcc = false;
|
||||
|
||||
bool round_to_power_of_two = true;
|
||||
|
||||
std::string ToString() const;
|
||||
|
||||
void Register(ParseOptions *po);
|
||||
|
||||
@@ -109,6 +109,12 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
|
||||
config_.feat_config.preemph_coeff = 0;
|
||||
config_.feat_config.window_type = "hann";
|
||||
config_.feat_config.feature_dim = 64;
|
||||
|
||||
// see
|
||||
// https://github.com/salute-developers/GigaAM/blob/main/gigaam/preprocess.py#L68
|
||||
//
|
||||
// GigaAM uses n_fft 400
|
||||
config_.feat_config.round_to_power_of_two = false;
|
||||
} else {
|
||||
config_.feat_config.low_freq = 0;
|
||||
config_.feat_config.high_freq = 0;
|
||||
|
||||
@@ -156,6 +156,12 @@ class OfflineRecognizerTransducerNeMoImpl : public OfflineRecognizerImpl {
|
||||
config_.feat_config.preemph_coeff = 0;
|
||||
config_.feat_config.window_type = "hann";
|
||||
config_.feat_config.feature_dim = 64;
|
||||
|
||||
// see
|
||||
// https://github.com/salute-developers/GigaAM/blob/main/gigaam/preprocess.py#L68
|
||||
//
|
||||
// GigaAM uses n_fft 400
|
||||
config_.feat_config.round_to_power_of_two = false;
|
||||
} else {
|
||||
config_.feat_config.low_freq = 0;
|
||||
// config_.feat_config.high_freq = 8000;
|
||||
|
||||
Reference in New Issue
Block a user