Support Zipformer transducer ASR with whisper features. (#2321)
Adds support for Zipformer transducer ASR models that use Whisper-style features by introducing a new feature flag, parsing metadata, and integrating per-chunk normalization. - Introduce UseWhisperFeature in the model interface and Zipformer implementation - Parse "feature" metadata to set the whisper flag and wire it into the recognizer - Update feature extraction logic to handle Whisper filterbanks with early returns
This commit is contained in:
@@ -52,6 +52,8 @@ class OnlineZipformer2TransducerModel : public OnlineTransducerModel {
|
||||
int32_t VocabSize() const override { return vocab_size_; }
|
||||
OrtAllocator *Allocator() override { return allocator_; }
|
||||
|
||||
bool UseWhisperFeature() const override { return use_whisper_feature_; }
|
||||
|
||||
private:
|
||||
void InitEncoder(void *model_data, size_t model_data_length);
|
||||
void InitDecoder(void *model_data, size_t model_data_length);
|
||||
@@ -103,6 +105,10 @@ class OnlineZipformer2TransducerModel : public OnlineTransducerModel {
|
||||
int32_t context_size_ = 0;
|
||||
int32_t vocab_size_ = 0;
|
||||
int32_t feature_dim_ = 80;
|
||||
|
||||
// for models from
|
||||
// https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#streaming-with-ctc-head
|
||||
bool use_whisper_feature_ = false;
|
||||
};
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
Reference in New Issue
Block a user