Spaces:

Kaiyeee
/

MultiModal_Chat_Using_OpenVino_and_Whisper

Sleeping

Create app.py

3d5c597 verified 7 months ago

1.04 kB

	import gradio as gr
	import soundfile as sf
	from transformers import AutoProcessor, pipeline
	from optimum.intel.openvino import OVModelForSpeechSeq2Seq

	# Load model + processor
	model_id = "distil-whisper/distil-large-v2"
	processor = AutoProcessor.from_pretrained(model_id)
	ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True)
	ov_model.generation_config.max_new_tokens = 128

	# Create HF pipeline
	pipe = pipeline(
	"automatic-speech-recognition",
	model=ov_model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	chunk_length_s=15,
	batch_size=16,
	)

	# Transcription function
	def transcribe(audio):
	audio_array, sampling_rate = sf.read(audio)
	result = pipe(audio_array)
	return result["text"]

	# Launch Gradio UI
	gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(type="filepath"),
	outputs="text",
	title="🧠 Distil-Whisper + OpenVINO ASR",
	description="Upload audio to transcribe using Distil-Whisper accelerated with Intel OpenVINO.",
	).launch()