Kaiyeee's picture
Create app.py
3d5c597 verified
import gradio as gr
import soundfile as sf
from transformers import AutoProcessor, pipeline
from optimum.intel.openvino import OVModelForSpeechSeq2Seq
# Load model + processor
model_id = "distil-whisper/distil-large-v2"
processor = AutoProcessor.from_pretrained(model_id)
ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True)
ov_model.generation_config.max_new_tokens = 128
# Create HF pipeline
pipe = pipeline(
"automatic-speech-recognition",
model=ov_model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
chunk_length_s=15,
batch_size=16,
)
# Transcription function
def transcribe(audio):
audio_array, sampling_rate = sf.read(audio)
result = pipe(audio_array)
return result["text"]
# Launch Gradio UI
gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="🧠 Distil-Whisper + OpenVINO ASR",
description="Upload audio to transcribe using Distil-Whisper accelerated with Intel OpenVINO.",
).launch()