diff --git a/scripts/nemo/canary/test_180m_flash.py b/scripts/nemo/canary/test_180m_flash.py index 8654d13d..38fd3c43 100755 --- a/scripts/nemo/canary/test_180m_flash.py +++ b/scripts/nemo/canary/test_180m_flash.py @@ -228,8 +228,8 @@ def main(): features = compute_features(audio, fbank) if model.normalize_type != "": assert model.normalize_type == "per_feature", model.normalize_type - mean = features.mean(axis=1, keepdims=True) - stddev = features.std(axis=1, keepdims=True) + 1e-5 + mean = features.mean(axis=0, keepdims=True) + stddev = features.std(axis=0, keepdims=True) + 1e-5 features = (features - mean) / stddev features = np.expand_dims(features, axis=0) diff --git a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/test-onnx-ctc-non-streaming.py b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/test-onnx-ctc-non-streaming.py index 9eb91c88..8dbb1d02 100755 --- a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/test-onnx-ctc-non-streaming.py +++ b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/test-onnx-ctc-non-streaming.py @@ -140,8 +140,8 @@ def main(): if model.normalize_type != "": assert model.normalize_type == "per_feature", model.normalize_type features = torch.from_numpy(features) - mean = features.mean(dim=1, keepdims=True) - stddev = features.std(dim=1, keepdims=True) + 1e-5 + mean = features.mean(dim=0, keepdims=True) + stddev = features.std(dim=0, keepdims=True) + 1e-5 features = (features - mean) / stddev features = features.numpy() diff --git a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/test-onnx-transducer-non-streaming.py b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/test-onnx-transducer-non-streaming.py index b0d23b5a..df2b6b97 100755 --- a/scripts/nemo/fast-conformer-hybrid-transducer-ctc/test-onnx-transducer-non-streaming.py +++ b/scripts/nemo/fast-conformer-hybrid-transducer-ctc/test-onnx-transducer-non-streaming.py @@ -184,12 +184,7 @@ class OnnxModel: target = torch.tensor([[token]], dtype=torch.int32).numpy() target_len = torch.tensor([1], dtype=torch.int32).numpy() - ( - decoder_out, - decoder_out_length, - state0_next, - state1_next, - ) = self.decoder.run( + (decoder_out, decoder_out_length, state0_next, state1_next,) = self.decoder.run( [ self.decoder.get_outputs()[0].name, self.decoder.get_outputs()[1].name, @@ -267,8 +262,8 @@ def main(): if model.normalize_type != "": assert model.normalize_type == "per_feature", model.normalize_type features = torch.from_numpy(features) - mean = features.mean(dim=1, keepdims=True) - stddev = features.std(dim=1, keepdims=True) + 1e-5 + mean = features.mean(dim=0, keepdims=True) + stddev = features.std(dim=0, keepdims=True) + 1e-5 features = (features - mean) / stddev features = features.numpy() print(audio.shape) diff --git a/scripts/nemo/parakeet-tdt-0.6b-v2/test_onnx.py b/scripts/nemo/parakeet-tdt-0.6b-v2/test_onnx.py index 36ab4740..f5d2d0c7 100755 --- a/scripts/nemo/parakeet-tdt-0.6b-v2/test_onnx.py +++ b/scripts/nemo/parakeet-tdt-0.6b-v2/test_onnx.py @@ -233,8 +233,8 @@ def main(): if model.normalize_type != "": assert model.normalize_type == "per_feature", model.normalize_type features = torch.from_numpy(features) - mean = features.mean(dim=1, keepdims=True) - stddev = features.std(dim=1, keepdims=True) + 1e-5 + mean = features.mean(dim=0, keepdims=True) + stddev = features.std(dim=0, keepdims=True) + 1e-5 features = (features - mean) / stddev features = features.numpy() print(audio.shape)