Add CTC HLG decoding using OpenFst (#349)

This commit is contained in:
Fangjun Kuang
2023-10-08 11:32:39 +08:00
committed by GitHub
parent c12286fe5e
commit 407602445d
39 changed files with 964 additions and 56 deletions

48
cmake/eigen.cmake Normal file
View File

@@ -0,0 +1,48 @@
function(download_eigen)
include(FetchContent)
set(eigen_URL "https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz")
set(eigen_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/eigen-3.4.0.tar.gz")
set(eigen_HASH "SHA256=8586084f71f9bde545ee7fa6d00288b264a2b7ac3607b974e54d13e7162c1c72")
# If you don't have access to the Internet,
# please pre-download eigen
set(possible_file_locations
$ENV{HOME}/Downloads/eigen-3.4.0.tar.gz
${PROJECT_SOURCE_DIR}/eigen-3.4.0.tar.gz
${PROJECT_BINARY_DIR}/eigen-3.4.0.tar.gz
/tmp/eigen-3.4.0.tar.gz
/star-fj/fangjun/download/github/eigen-3.4.0.tar.gz
)
foreach(f IN LISTS possible_file_locations)
if(EXISTS ${f})
set(eigen_URL "${f}")
file(TO_CMAKE_PATH "${eigen_URL}" eigen_URL)
message(STATUS "Found local downloaded eigen: ${eigen_URL}")
set(eigen_URL2)
break()
endif()
endforeach()
set(BUILD_TESTING OFF CACHE BOOL "" FORCE)
set(EIGEN_BUILD_DOC OFF CACHE BOOL "" FORCE)
FetchContent_Declare(eigen
URL ${eigen_URL}
URL_HASH ${eigen_HASH}
)
FetchContent_GetProperties(eigen)
if(NOT eigen_POPULATED)
message(STATUS "Downloading eigen from ${eigen_URL}")
FetchContent_Populate(eigen)
endif()
message(STATUS "eigen is downloaded to ${eigen_SOURCE_DIR}")
message(STATUS "eigen's binary dir is ${eigen_BINARY_DIR}")
add_subdirectory(${eigen_SOURCE_DIR} ${eigen_BINARY_DIR} EXCLUDE_FROM_ALL)
endfunction()
download_eigen()

78
cmake/kaldi-decoder.cmake Normal file
View File

@@ -0,0 +1,78 @@
function(download_kaldi_decoder)
include(FetchContent)
set(kaldi_decoder_URL "https://github.com/k2-fsa/kaldi-decoder/archive/refs/tags/v0.2.3.tar.gz")
set(kaldi_decoder_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-decoder-0.2.3.tar.gz")
set(kaldi_decoder_HASH "SHA256=98bf445a5b7961ccf3c3522317d900054eaadb6a9cdcf4531e7d9caece94a56d")
set(KALDI_DECODER_BUILD_PYTHON OFF CACHE BOOL "" FORCE)
set(KALDI_DECODER_BUILD_PYTHON OFF CACHE BOOL "" FORCE)
set(KALDIFST_BUILD_PYTHON OFF CACHE BOOL "" FORCE)
# If you don't have access to the Internet,
# please pre-download kaldi-decoder
set(possible_file_locations
$ENV{HOME}/Downloads/kaldi-decoder-0.2.3.tar.gz
${PROJECT_SOURCE_DIR}/kaldi-decoder-0.2.3.tar.gz
${PROJECT_BINARY_DIR}/kaldi-decoder-0.2.3.tar.gz
/tmp/kaldi-decoder-0.2.3.tar.gz
/star-fj/fangjun/download/github/kaldi-decoder-0.2.3.tar.gz
)
foreach(f IN LISTS possible_file_locations)
if(EXISTS ${f})
set(kaldi_decoder_URL "${f}")
file(TO_CMAKE_PATH "${kaldi_decoder_URL}" kaldi_decoder_URL)
message(STATUS "Found local downloaded kaldi-decoder: ${kaldi_decoder_URL}")
set(kaldi_decoder_URL2 )
break()
endif()
endforeach()
FetchContent_Declare(kaldi_decoder
URL
${kaldi_decoder_URL}
${kaldi_decoder_URL2}
URL_HASH ${kaldi_decoder_HASH}
)
FetchContent_GetProperties(kaldi_decoder)
if(NOT kaldi_decoder_POPULATED)
message(STATUS "Downloading kaldi-decoder from ${kaldi_decoder_URL}")
FetchContent_Populate(kaldi_decoder)
endif()
message(STATUS "kaldi-decoder is downloaded to ${kaldi_decoder_SOURCE_DIR}")
message(STATUS "kaldi-decoder's binary dir is ${kaldi_decoder_BINARY_DIR}")
include_directories(${kaldi_decoder_SOURCE_DIR})
add_subdirectory(${kaldi_decoder_SOURCE_DIR} ${kaldi_decoder_BINARY_DIR} EXCLUDE_FROM_ALL)
target_include_directories(kaldi-decoder-core
INTERFACE
${kaldi-decoder_SOURCE_DIR}/
)
if(SHERPA_ONNX_ENABLE_PYTHON AND WIN32)
install(TARGETS
kaldi-decoder-core
kaldifst_core
fst
DESTINATION ..)
else()
install(TARGETS
kaldi-decoder-core
kaldifst_core
fst
DESTINATION lib)
endif()
if(WIN32 AND BUILD_SHARED_LIBS)
install(TARGETS
kaldi-decoder-core
kaldifst_core
fst
DESTINATION bin)
endif()
endfunction()
download_kaldi_decoder()

62
cmake/kaldifst.cmake Normal file
View File

@@ -0,0 +1,62 @@
function(download_kaldifst)
include(FetchContent)
set(kaldifst_URL "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.6.tar.gz")
set(kaldifst_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/kaldifst-1.7.6.tar.gz")
set(kaldifst_HASH "SHA256=79280c0bb08b5ed1a2ab7c21320a2b071f1f0eb10d2f047e8d6f027f0d32b4d2")
# If you don't have access to the Internet,
# please pre-download kaldifst
set(possible_file_locations
$ENV{HOME}/Downloads/kaldifst-1.7.6.tar.gz
${PROJECT_SOURCE_DIR}/kaldifst-1.7.6.tar.gz
${PROJECT_BINARY_DIR}/kaldifst-1.7.6.tar.gz
/tmp/kaldifst-1.7.6.tar.gz
/star-fj/fangjun/download/github/kaldifst-1.7.6.tar.gz
)
foreach(f IN LISTS possible_file_locations)
if(EXISTS ${f})
set(kaldifst_URL "${f}")
file(TO_CMAKE_PATH "${kaldifst_URL}" kaldifst_URL)
message(STATUS "Found local downloaded kaldifst: ${kaldifst_URL}")
set(kaldifst_URL2)
break()
endif()
endforeach()
set(KALDIFST_BUILD_TESTS OFF CACHE BOOL "" FORCE)
set(KALDIFST_BUILD_PYTHON OFF CACHE BOOL "" FORCE)
FetchContent_Declare(kaldifst
URL ${kaldifst_URL}
URL_HASH ${kaldifst_HASH}
)
FetchContent_GetProperties(kaldifst)
if(NOT kaldifst_POPULATED)
message(STATUS "Downloading kaldifst from ${kaldifst_URL}")
FetchContent_Populate(kaldifst)
endif()
message(STATUS "kaldifst is downloaded to ${kaldifst_SOURCE_DIR}")
message(STATUS "kaldifst's binary dir is ${kaldifst_BINARY_DIR}")
list(APPEND CMAKE_MODULE_PATH ${kaldifst_SOURCE_DIR}/cmake)
add_subdirectory(${kaldifst_SOURCE_DIR} ${kaldifst_BINARY_DIR} EXCLUDE_FROM_ALL)
target_include_directories(kaldifst_core
PUBLIC
${kaldifst_SOURCE_DIR}/
)
target_include_directories(fst
PUBLIC
${openfst_SOURCE_DIR}/src/include
)
set_target_properties(kaldifst_core PROPERTIES OUTPUT_NAME "sherpa-onnx-kaldifst-core")
set_target_properties(fst PROPERTIES OUTPUT_NAME "sherpa-onnx-fst")
endfunction()
download_kaldifst()

View File

@@ -13,4 +13,4 @@ Cflags: -I"${includedir}"
# Note: -lcargs is required only for the following file
# https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
# We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c
Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-native-fbank-core -lcargs -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@
Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lonnxruntime -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lcargs -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@