Commit
·
705a1ad
1
Parent(s):
91121b0
add code
Browse files
README.md
CHANGED
|
@@ -48,6 +48,46 @@ Download the model by cloning the repository:
|
|
| 48 |
git clone https://huggingface.co/Intel/whisper-large-int8-static
|
| 49 |
```
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
## Metrics (Model Performance):
|
| 52 |
| Model | Model Size (GB) | wer |
|
| 53 |
|---|:---:|:---:|
|
|
|
|
| 48 |
git clone https://huggingface.co/Intel/whisper-large-int8-static
|
| 49 |
```
|
| 50 |
|
| 51 |
+
Evaluate the model with below code:
|
| 52 |
+
```python
|
| 53 |
+
import os
|
| 54 |
+
from evaluate import load
|
| 55 |
+
from datasets import load_dataset
|
| 56 |
+
from transformers import WhisperForConditionalGeneration, WhisperProcessor, AutoConfig
|
| 57 |
+
|
| 58 |
+
model_name = 'openai/whisper-large'
|
| 59 |
+
model_path = 'whisper-large-int8-static'
|
| 60 |
+
processor = WhisperProcessor.from_pretrained(model_name)
|
| 61 |
+
model = WhisperForConditionalGeneration.from_pretrained(model_name)
|
| 62 |
+
config = AutoConfig.from_pretrained(model_name)
|
| 63 |
+
wer = load("wer")
|
| 64 |
+
librispeech_test_clean = load_dataset("librispeech_asr", "clean", split="test")
|
| 65 |
+
|
| 66 |
+
from optimum.onnxruntime import ORTModelForSpeechSeq2Seq
|
| 67 |
+
from transformers import PretrainedConfig
|
| 68 |
+
model_config = PretrainedConfig.from_pretrained(model_name)
|
| 69 |
+
predictions = []
|
| 70 |
+
references = []
|
| 71 |
+
sessions = ORTModelForSpeechSeq2Seq.load_model(
|
| 72 |
+
os.path.join(model_path, 'encoder_model.onnx'),
|
| 73 |
+
os.path.join(model_path, 'decoder_model.onnx'),
|
| 74 |
+
os.path.join(model_path, 'decoder_with_past_model.onnx'))
|
| 75 |
+
model = ORTModelForSpeechSeq2Seq(sessions[0], sessions[1], model_config, model_path, sessions[2])
|
| 76 |
+
for idx, batch in enumerate(librispeech_test_clean):
|
| 77 |
+
audio = batch["audio"]
|
| 78 |
+
input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features
|
| 79 |
+
reference = processor.tokenizer._normalize(batch['text'])
|
| 80 |
+
references.append(reference)
|
| 81 |
+
predicted_ids = model.generate(input_features)[0]
|
| 82 |
+
transcription = processor.decode(predicted_ids)
|
| 83 |
+
prediction = processor.tokenizer._normalize(transcription)
|
| 84 |
+
predictions.append(prediction)
|
| 85 |
+
wer_result = wer.compute(references=references, predictions=predictions)
|
| 86 |
+
print(f"Result wer: {wer_result * 100}")
|
| 87 |
+
accuracy = 1 - wer_result
|
| 88 |
+
print("Accuracy: %.5f" % accuracy)
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
## Metrics (Model Performance):
|
| 92 |
| Model | Model Size (GB) | wer |
|
| 93 |
|---|:---:|:---:|
|