Spaces:

ojas121
/

speech_emotion_project

Running

App Files Files Community

ojas121 commited on Mar 11

Commit

a9f8ee6

verified ·

1 Parent(s): 17a00d3

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -24

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import streamlit as st
 import librosa
 import librosa.display
@@ -8,32 +10,31 @@ import wave
 import json
 from vosk import Model, KaldiRecognizer
 from transformers import pipeline
-import os
 from pydub import AudioSegment
 import noisereduce as nr
-import streamlit as st
-import subprocess
-try:
-    import librosa
-    st.write("✅ Librosa is installed successfully!")
-except ImportError:
-    st.write("❌ Librosa is missing! Installing now...")
-    subprocess.run(["pip", "install", "librosa"])
-    import librosa
-    st.write("✅ Librosa installed successfully!")
-# Load Vosk model
-MODEL_PATH = "vosk-model-small-en-us-0.15"
-if not os.path.exists(MODEL_PATH):
-    st.error("Vosk model not found! Please download and extract it.")
-    st.stop()
-model = Model(MODEL_PATH)
-# Streamlit UI
 st.title("🎙️ Speech Detection System using Mozilla Common Voice")
 st.write("Upload an audio file and get real-time speech-to-text, noise filtering, and emotion analysis.")
@@ -60,12 +61,12 @@ if uploaded_file:
     librosa.display.waveshow(y, sr=sr, ax=ax)
     st.pyplot(fig)
-    # Noise Reduction
     y_denoised = nr.reduce_noise(y=y, sr=sr)
     denoised_path = file_path.replace(".wav", "_denoised.wav")
     sf.write(denoised_path, y_denoised, sr)
-    # Speech-to-Text using Vosk
     def transcribe_audio(audio_path):
         wf = wave.open(audio_path, "rb")
         rec = KaldiRecognizer(model, wf.getframerate())
@@ -82,14 +83,13 @@ if uploaded_file:
     st.subheader("📝 Transcribed Text:")
     st.write(transcription)
-    # Emotion Detection
-    emotion_model = pipeline("audio-classification", model="superb/wav2vec2-large-xlsr-53")
     emotion_result = emotion_model(file_path)
     st.subheader("😊 Emotion Analysis:")
     st.write(emotion_result)
-    # Play original and denoised audio
     st.audio(file_path, format="audio/wav", start_time=0)
     st.subheader("🔊 Denoised Audio:")
     st.audio(denoised_path, format="audio/wav", start_time=0)

+import os
+import subprocess
 import streamlit as st
 import librosa
 import librosa.display
 import json
 from vosk import Model, KaldiRecognizer
 from transformers import pipeline
+from huggingface_hub import snapshot_download
 from pydub import AudioSegment
 import noisereduce as nr
+# ✅ Auto-Download Vosk Model (Speech-to-Text)
+VOSK_MODEL = "vosk-model-small-en-us-0.15"
+if not os.path.exists(VOSK_MODEL):
+    st.write("Downloading Vosk Model...")
+    subprocess.run(["wget", "-O", "vosk.zip", "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip"])
+    subprocess.run(["unzip", "vosk.zip"])
+    subprocess.run(["rm", "vosk.zip"])
+# Load Vosk model
+model = Model(VOSK_MODEL)
+# ✅ Auto-Download Wav2Vec2 Model (Emotion Detection)
+WAV2VEC_MODEL = "superb/wav2vec2-large-xlsr-53"
+if not os.path.exists(WAV2VEC_MODEL):
+    st.write(f"Downloading {WAV2VEC_MODEL}...")
+    snapshot_download(repo_id=WAV2VEC_MODEL, local_dir=WAV2VEC_MODEL)
+# Load emotion detection model
+emotion_model = pipeline("audio-classification", model=WAV2VEC_MODEL)
+# ✅ Streamlit UI
 st.title("🎙️ Speech Detection System using Mozilla Common Voice")
 st.write("Upload an audio file and get real-time speech-to-text, noise filtering, and emotion analysis.")
     librosa.display.waveshow(y, sr=sr, ax=ax)
     st.pyplot(fig)
+    # ✅ Noise Reduction
     y_denoised = nr.reduce_noise(y=y, sr=sr)
     denoised_path = file_path.replace(".wav", "_denoised.wav")
     sf.write(denoised_path, y_denoised, sr)
+    # ✅ Speech-to-Text using Vosk
     def transcribe_audio(audio_path):
         wf = wave.open(audio_path, "rb")
         rec = KaldiRecognizer(model, wf.getframerate())
     st.subheader("📝 Transcribed Text:")
     st.write(transcription)
+    # ✅ Emotion Detection
     emotion_result = emotion_model(file_path)
     st.subheader("😊 Emotion Analysis:")
     st.write(emotion_result)
+    # ✅ Play Original & Denoised Audio
     st.audio(file_path, format="audio/wav", start_time=0)
     st.subheader("🔊 Denoised Audio:")
     st.audio(denoised_path, format="audio/wav", start_time=0)