Support Giga AM transducer V2 (#2136)
This commit is contained in:
@@ -20,7 +20,7 @@ def create_fbank():
|
||||
opts.frame_opts.window_type = "hann"
|
||||
|
||||
# Even though GigaAM uses 400 for fft, here we use 512
|
||||
# since kaldi-native-fbank only support fft for power of 2.
|
||||
# since kaldi-native-fbank only supports fft for power of 2.
|
||||
opts.frame_opts.round_to_power_of_two = True
|
||||
|
||||
opts.mel_opts.low_freq = 0
|
||||
@@ -166,12 +166,7 @@ class OnnxModel:
|
||||
target = torch.tensor([[token]], dtype=torch.int32).numpy()
|
||||
target_len = torch.tensor([1], dtype=torch.int32).numpy()
|
||||
|
||||
(
|
||||
decoder_out,
|
||||
decoder_out_length,
|
||||
state0_next,
|
||||
state1_next,
|
||||
) = self.decoder.run(
|
||||
(decoder_out, decoder_out_length, state0_next, state1_next,) = self.decoder.run(
|
||||
[
|
||||
self.decoder.get_outputs()[0].name,
|
||||
self.decoder.get_outputs()[1].name,
|
||||
@@ -213,8 +208,12 @@ def main():
|
||||
id2token = dict()
|
||||
with open("./tokens.txt", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
t, idx = line.split()
|
||||
id2token[int(idx)] = t
|
||||
fields = line.split()
|
||||
if len(fields) == 1:
|
||||
id2token[int(fields[0])] = " "
|
||||
else:
|
||||
t, idx = fields
|
||||
id2token[int(idx)] = t
|
||||
|
||||
fbank = create_fbank()
|
||||
audio, sample_rate = sf.read("./example.wav", dtype="float32", always_2d=True)
|
||||
|
||||
Reference in New Issue
Block a user