Add dart API for SenseVoice (#1159)
This commit is contained in:
@@ -13,14 +13,15 @@ namespace sherpa_onnx {
|
||||
|
||||
void CudaConfig::Register(ParseOptions *po) {
|
||||
po->Register("cuda-cudnn-conv-algo-search", &cudnn_conv_algo_search,
|
||||
"CuDNN convolution algrorithm search");
|
||||
"CuDNN convolution algrorithm search");
|
||||
}
|
||||
|
||||
bool CudaConfig::Validate() const {
|
||||
if (cudnn_conv_algo_search < 1 || cudnn_conv_algo_search > 3) {
|
||||
SHERPA_ONNX_LOGE("cudnn_conv_algo_search: '%d' is not a valid option."
|
||||
"Options : [1,3]. Check OnnxRT docs",
|
||||
cudnn_conv_algo_search);
|
||||
SHERPA_ONNX_LOGE(
|
||||
"cudnn_conv_algo_search: '%d' is not a valid option."
|
||||
"Options : [1,3]. Check OnnxRT docs",
|
||||
cudnn_conv_algo_search);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@@ -37,41 +38,41 @@ std::string CudaConfig::ToString() const {
|
||||
|
||||
void TensorrtConfig::Register(ParseOptions *po) {
|
||||
po->Register("trt-max-workspace-size", &trt_max_workspace_size,
|
||||
"Set TensorRT EP GPU memory usage limit.");
|
||||
"Set TensorRT EP GPU memory usage limit.");
|
||||
po->Register("trt-max-partition-iterations", &trt_max_partition_iterations,
|
||||
"Limit partitioning iterations for model conversion.");
|
||||
"Limit partitioning iterations for model conversion.");
|
||||
po->Register("trt-min-subgraph-size", &trt_min_subgraph_size,
|
||||
"Set minimum size for subgraphs in partitioning.");
|
||||
"Set minimum size for subgraphs in partitioning.");
|
||||
po->Register("trt-fp16-enable", &trt_fp16_enable,
|
||||
"Enable FP16 precision for faster performance.");
|
||||
"Enable FP16 precision for faster performance.");
|
||||
po->Register("trt-detailed-build-log", &trt_detailed_build_log,
|
||||
"Enable detailed logging of build steps.");
|
||||
"Enable detailed logging of build steps.");
|
||||
po->Register("trt-engine-cache-enable", &trt_engine_cache_enable,
|
||||
"Enable caching of TensorRT engines.");
|
||||
"Enable caching of TensorRT engines.");
|
||||
po->Register("trt-timing-cache-enable", &trt_timing_cache_enable,
|
||||
"Enable use of timing cache to speed up builds.");
|
||||
"Enable use of timing cache to speed up builds.");
|
||||
po->Register("trt-engine-cache-path", &trt_engine_cache_path,
|
||||
"Set path to store cached TensorRT engines.");
|
||||
"Set path to store cached TensorRT engines.");
|
||||
po->Register("trt-timing-cache-path", &trt_timing_cache_path,
|
||||
"Set path for storing timing cache.");
|
||||
"Set path for storing timing cache.");
|
||||
po->Register("trt-dump-subgraphs", &trt_dump_subgraphs,
|
||||
"Dump optimized subgraphs for debugging.");
|
||||
"Dump optimized subgraphs for debugging.");
|
||||
}
|
||||
|
||||
bool TensorrtConfig::Validate() const {
|
||||
if (trt_max_workspace_size < 0) {
|
||||
SHERPA_ONNX_LOGE("trt_max_workspace_size: %lld is not valid.",
|
||||
trt_max_workspace_size);
|
||||
SHERPA_ONNX_LOGE("trt_max_workspace_size: %ld is not valid.",
|
||||
trt_max_workspace_size);
|
||||
return false;
|
||||
}
|
||||
if (trt_max_partition_iterations < 0) {
|
||||
SHERPA_ONNX_LOGE("trt_max_partition_iterations: %d is not valid.",
|
||||
trt_max_partition_iterations);
|
||||
trt_max_partition_iterations);
|
||||
return false;
|
||||
}
|
||||
if (trt_min_subgraph_size < 0) {
|
||||
SHERPA_ONNX_LOGE("trt_min_subgraph_size: %d is not valid.",
|
||||
trt_min_subgraph_size);
|
||||
trt_min_subgraph_size);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -83,23 +84,19 @@ std::string TensorrtConfig::ToString() const {
|
||||
|
||||
os << "TensorrtConfig(";
|
||||
os << "trt_max_workspace_size=" << trt_max_workspace_size << ", ";
|
||||
os << "trt_max_partition_iterations="
|
||||
<< trt_max_partition_iterations << ", ";
|
||||
os << "trt_max_partition_iterations=" << trt_max_partition_iterations << ", ";
|
||||
os << "trt_min_subgraph_size=" << trt_min_subgraph_size << ", ";
|
||||
os << "trt_fp16_enable=\""
|
||||
<< (trt_fp16_enable? "True" : "False") << "\", ";
|
||||
os << "trt_fp16_enable=\"" << (trt_fp16_enable ? "True" : "False") << "\", ";
|
||||
os << "trt_detailed_build_log=\""
|
||||
<< (trt_detailed_build_log? "True" : "False") << "\", ";
|
||||
<< (trt_detailed_build_log ? "True" : "False") << "\", ";
|
||||
os << "trt_engine_cache_enable=\""
|
||||
<< (trt_engine_cache_enable? "True" : "False") << "\", ";
|
||||
os << "trt_engine_cache_path=\""
|
||||
<< trt_engine_cache_path.c_str() << "\", ";
|
||||
<< (trt_engine_cache_enable ? "True" : "False") << "\", ";
|
||||
os << "trt_engine_cache_path=\"" << trt_engine_cache_path.c_str() << "\", ";
|
||||
os << "trt_timing_cache_enable=\""
|
||||
<< (trt_timing_cache_enable? "True" : "False") << "\", ";
|
||||
os << "trt_timing_cache_path=\""
|
||||
<< trt_timing_cache_path.c_str() << "\",";
|
||||
os << "trt_dump_subgraphs=\""
|
||||
<< (trt_dump_subgraphs? "True" : "False") << "\" )";
|
||||
<< (trt_timing_cache_enable ? "True" : "False") << "\", ";
|
||||
os << "trt_timing_cache_path=\"" << trt_timing_cache_path.c_str() << "\",";
|
||||
os << "trt_dump_subgraphs=\"" << (trt_dump_subgraphs ? "True" : "False")
|
||||
<< "\" )";
|
||||
return os.str();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user