import gradio as gr import time from transformers import pipeline import torch import ffmpeg # Make sure it's ffmpeg-python # Check if GPU is available use_gpu = torch.cuda.is_available() # Configure the pipeline to use the GPU if available if use_gpu: p = pipeline("automatic-speech-recognition", model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h", device=0) else: p = pipeline("automatic-speech-recognition", model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h") def extract_audio_from_m3u8(url): try: output_file = "output_audio.aac" ffmpeg.input(url).output(output_file).run(overwrite_output=True) return output_file except Exception as e: return f"An error occurred: {e}" def transcribe_function(audio, state, m3u8_url): # If an m3u8 URL is provided, extract audio from the URL if m3u8_url: audio = extract_audio_from_m3u8(m3u8_url) # If no audio is provided, return the current state if not audio: return {state_var: state, transcription_var: state} try: time.sleep(3) # Simulate processing delay text = p(audio, chunk_length_s=50)["text"] state += text + "\n" return {state_var: state, transcription_var: state} except Exception as e: return {transcription_var: "An error occurred during transcription.", state_var: state} def reset_output(transcription, state): """Reset the state to an empty string.""" return "", "" with gr.Blocks() as demo: state_var = gr.State("") with gr.Row(): with gr.Column(): # Single Audio component supporting both recording and file upload audio_input = gr.Audio(type="filepath", label="Audio (Record or Upload)") m3u8_url = gr.Textbox(label="m3u8 URL | E.g.: from kvf.fo or logting.fo") with gr.Column(): # Use interactive=False to make the textbox read-only transcription_var = gr.Textbox(type="text", label="Transcription", interactive=False) with gr.Row(): transcribe_button = gr.Button("Transcribe") reset_button = gr.Button("Reset output") transcribe_button.click( transcribe_function, [audio_input, state_var, m3u8_url], [transcription_var, state_var] ) reset_button.click( reset_output, [transcription_var, state_var], [transcription_var, state_var] ) demo.launch()