Spaces:
Runtime error
Runtime error
| import tempfile | |
| import gradio as gr | |
| from gtts import gTTS | |
| import inference_script | |
| import vit_gpt2 | |
| import os | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| def process_image_and_generate_output(image, model_selection): | |
| if model_selection == ('Basic Model (Trained only for 15 epochs without any hyperparameter tuning, utilizing ' | |
| 'inception v3)'): | |
| result = inference_script.evaluate(image) | |
| pred_caption = ' '.join(result).rsplit(' ', 1)[0] | |
| pred_caption = pred_caption.replace('<unk>', '') | |
| elif model_selection == 'ViT-GPT2 (SOTA model for Image captioning)': | |
| result = vit_gpt2.predict_step(image) | |
| pred_caption = result[0] | |
| else: | |
| return "Invalid model selection", None | |
| # Generate speech from the caption | |
| tts = gTTS(text=pred_caption, lang='en', slow=False) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio: | |
| audio_file_path = temp_audio.name | |
| tts.save(audio_file_path) | |
| # Read the audio file | |
| with open(audio_file_path, "rb") as f: | |
| audio_content = f.read() | |
| # Clean up the temporary audio file | |
| os.unlink(audio_file_path) | |
| return pred_caption, audio_content | |
| iface = gr.Interface(fn=process_image_and_generate_output, | |
| inputs=["image", gr.Radio(["Basic Model (Trained only for 15 epochs without any hyperparameter " | |
| "tuning, utilizing inception v3)", "ViT-GPT2 (SOTA model for Image " | |
| "captioning)"], label="Choose " | |
| "Model")], | |
| outputs=["text", "audio"], | |
| title="Eye For Blind | Image Captioning & TTS", | |
| description="Generate a caption for the uploaded image and convert it to speech.") | |
| iface.launch() | |