Update README.md
Browse files
README.md
CHANGED
|
@@ -236,15 +236,13 @@ transcription.
|
|
| 236 |
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
| 237 |
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
| 238 |
|
| 239 |
-
>>> decoder_input_ids = processor.tokenizer.encode("<|startoftranscript|><|fr|><|transcribe|><|notimestamps|>", return_tensors="pt")
|
| 240 |
-
|
| 241 |
>>> # load dummy dataset and read soundfiles
|
| 242 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
| 243 |
>>> ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
| 244 |
>>> input_speech = next(iter(ds))["audio"]["array"]
|
| 245 |
>>> # tokenize
|
| 246 |
>>> input_features = processor(input_speech, return_tensors="pt").input_features
|
| 247 |
-
>>> predicted_ids = model.generate(input_features
|
| 248 |
>>> transcription = processor.batch_decode(predicted_ids)
|
| 249 |
['<|startoftranscript|><|fr|><|transcribe|><|notimestamps|> Un vrai travail intéressant va enfin être mené sur ce sujet.<|endoftext|>']
|
| 250 |
|
|
@@ -266,15 +264,15 @@ The "<|translate|>" is used as the first decoder input token to specify the tran
|
|
| 266 |
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
| 267 |
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
| 268 |
|
| 269 |
-
>>> decoder_input_ids = processor.tokenizer.encode("<|startoftranscript|><|fr|><|translate|><|notimestamps|>", return_tensors="pt")
|
| 270 |
-
|
| 271 |
>>> # load dummy dataset and read soundfiles
|
| 272 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
| 273 |
>>> ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
| 274 |
>>> input_speech = next(iter(ds))["audio"]["array"]
|
| 275 |
>>> # tokenize
|
| 276 |
>>> input_features = processor(input_speech, return_tensors="pt").input_features
|
| 277 |
-
>>>
|
|
|
|
|
|
|
| 278 |
>>> transcription = processor.batch_decode(predicted_ids, skip_special_tokens = True)
|
| 279 |
[' A real interesting work will be done on this subject.']
|
| 280 |
```
|
|
|
|
| 236 |
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
| 237 |
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
| 238 |
|
|
|
|
|
|
|
| 239 |
>>> # load dummy dataset and read soundfiles
|
| 240 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
| 241 |
>>> ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
| 242 |
>>> input_speech = next(iter(ds))["audio"]["array"]
|
| 243 |
>>> # tokenize
|
| 244 |
>>> input_features = processor(input_speech, return_tensors="pt").input_features
|
| 245 |
+
>>> predicted_ids = model.generate(input_features)
|
| 246 |
>>> transcription = processor.batch_decode(predicted_ids)
|
| 247 |
['<|startoftranscript|><|fr|><|transcribe|><|notimestamps|> Un vrai travail intéressant va enfin être mené sur ce sujet.<|endoftext|>']
|
| 248 |
|
|
|
|
| 264 |
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
| 265 |
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
| 266 |
|
|
|
|
|
|
|
| 267 |
>>> # load dummy dataset and read soundfiles
|
| 268 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
| 269 |
>>> ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
|
| 270 |
>>> input_speech = next(iter(ds))["audio"]["array"]
|
| 271 |
>>> # tokenize
|
| 272 |
>>> input_features = processor(input_speech, return_tensors="pt").input_features
|
| 273 |
+
>>> forced_decoder_ids = processor._get_decoder_prompt_ids(language = "fr", task = "translate")
|
| 274 |
+
|
| 275 |
+
>>> predicted_ids = model.generate(input_features, forced_decoder_ids = forced_decoder_ids)
|
| 276 |
>>> transcription = processor.batch_decode(predicted_ids, skip_special_tokens = True)
|
| 277 |
[' A real interesting work will be done on this subject.']
|
| 278 |
```
|