Spaces:

Jitender1278
/

babycry

Running

App Files Files Community

babycry / app.py

Jitender1278

Update app.py

48a45c6 verified 6 months ago

raw

history blame contribute delete

22.4 kB

	# app.py - Baby Cry Classifier with WORKING HuggingFace API Support

	import gradio as gr
	import numpy as np
	import librosa
	import warnings
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.preprocessing import LabelEncoder, StandardScaler
	from sklearn.model_selection import train_test_split
	import os
	import json
	from datetime import datetime
	import uuid
	import math
	import requests
	import tempfile
	warnings.filterwarnings('ignore')

	class FoduucomStyleBabyCryClassifier:
	"""
	Baby cry classifier replicating foduucom/baby-cry-classification approach
	"""

	def __init__(self):
	self.model = None
	self.scaler = None
	self.label_encoder = LabelEncoder()
	self.categories = ["belly_pain", "burping", "discomfort", "hunger", "tiredness"]
	self.is_trained = False

	# Audio processing parameters (matching foduucom model)
	self.sr = 16000
	self.n_mfcc = 40
	self.n_mels = 128
	self.n_fft = 2048
	self.hop_length = 512
	self.win_length = 2048
	self.window = 'hann'
	self.n_bands = 6
	self.fmin = 200.0

	print("🍼 Initializing foduucom-style Baby Cry Classifier...")

	def extract_features(self, file_path):
	"""Extract features exactly like foduucom model"""
	try:
	# Handle URL inputs
	if isinstance(file_path, str) and file_path.startswith('http'):
	print(f"📥 Downloading audio from URL: {file_path}")
	response = requests.get(file_path, timeout=30)
	response.raise_for_status()

	# Save to temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file:
	tmp_file.write(response.content)
	file_path = tmp_file.name
	print(f"✅ Downloaded to: {file_path}")

	# Load audio file
	y, sr = librosa.load(file_path, sr=self.sr)

	if len(y) < 1024:
	return None

	# MFCC features (40 coefficients)
	mfcc = np.mean(librosa.feature.mfcc(
	y=y, sr=sr, n_mfcc=self.n_mfcc,
	n_fft=self.n_fft, hop_length=self.hop_length,
	win_length=self.win_length, window=self.window
	).T, axis=0)

	# Mel-spectrogram features
	mel = np.mean(librosa.feature.melspectrogram(
	y=y, sr=sr,
	n_fft=self.n_fft, hop_length=self.hop_length,
	win_length=self.win_length, window='hann',
	n_mels=self.n_mels
	).T, axis=0)

	# STFT for chroma and contrast
	stft = np.abs(librosa.stft(y))

	# Chroma features
	chroma = np.mean(librosa.feature.chroma_stft(
	S=stft, y=y, sr=sr
	).T, axis=0)

	# Spectral contrast
	contrast = np.mean(librosa.feature.spectral_contrast(
	S=stft, y=y, sr=sr,
	n_fft=self.n_fft,
	hop_length=self.hop_length,
	win_length=self.win_length,
	n_bands=self.n_bands,
	fmin=self.fmin
	).T, axis=0)

	# Tonnetz features
	tonnetz = np.mean(librosa.feature.tonnetz(y=y, sr=sr).T, axis=0)

	# Concatenate all features
	features = np.concatenate((mfcc, chroma, mel, contrast, tonnetz))

	print(f"✅ Extracted {len(features)} features")
	return features

	except Exception as e:
	print(f"❌ Feature extraction error: {e}")
	return None

	def _create_realistic_training_data(self):
	"""Create balanced training data"""
	np.random.seed(42)
	n_samples_per_class = 1000

	X_synthetic = []
	y_synthetic = []

	cry_characteristics = {
	"hunger": {
	"base_energy": 0.7,
	"mfcc_boost": 0.4,
	"frequency_range": (60, 100),
	"pattern_type": "rhythmic",
	"contrast_level": 0.3
	},
	"belly_pain": {
	"base_energy": 0.6,
	"mfcc_boost": 0.2,
	"frequency_range": (20, 60),
	"pattern_type": "strained",
	"contrast_level": 0.4
	},
	"burping": {
	"base_energy": 0.8,
	"mfcc_boost": 0.5,
	"frequency_range": (40, 80),
	"pattern_type": "bursts",
	"contrast_level": 0.5
	},
	"discomfort": {
	"base_energy": 0.5,
	"mfcc_boost": 0.1,
	"frequency_range": (80, 120),
	"pattern_type": "sharp",
	"contrast_level": 0.2
	},
	"tiredness": {
	"base_energy": 0.3,
	"mfcc_boost": -0.1,
	"frequency_range": (10, 40),
	"pattern_type": "declining",
	"contrast_level": 0.1
	}
	}

	for category in self.categories:
	char = cry_characteristics[category]

	for sample_idx in range(n_samples_per_class):
	features = []

	# MFCC features (40)
	if category == "hunger":
	mfcc_features = np.random.normal(0.8, 0.2, 40)
	for i in range(0, 40, 8):
	mfcc_features[i] += 0.5
	elif category == "belly_pain":
	mfcc_features = np.random.normal(0.4, 0.15, 40)
	mfcc_features = np.tanh(mfcc_features * 2) * 0.3
	elif category == "burping":
	mfcc_features = np.random.normal(0.6, 0.25, 40)
	for i in range(0, 40, 10):
	end_idx = min(i + 3, 40)
	mfcc_features[i:end_idx] += 0.7
	elif category == "discomfort":
	mfcc_features = np.random.normal(0.2, 0.3, 40)
	peak_indices = np.random.choice(40, 3, replace=False)
	mfcc_features[peak_indices] += 0.2
	else: # tiredness
	decline = np.linspace(0.1, -0.2, 40)
	mfcc_features = decline + np.random.normal(0, 0.1, 40)

	features.extend(mfcc_features)

	# Chroma features (12)
	if category == "hunger":
	chroma_features = np.random.normal(0.5, 0.15, 12)
	elif category == "belly_pain":
	chroma_features = np.random.normal(0.3, 0.1, 12)
	elif category == "burping":
	chroma_features = np.random.normal(0.6, 0.2, 12)
	elif category == "discomfort":
	chroma_features = np.random.normal(0.2, 0.12, 12)
	else: # tiredness
	chroma_features = np.random.normal(0.1, 0.08, 12)

	features.extend(chroma_features)

	# Mel-spectrogram features (128)
	mel_base = char["base_energy"] * 0.4
	mel_features = np.random.normal(mel_base, 0.15, 128)

	freq_start, freq_end = char["frequency_range"]
	if category == "hunger":
	mel_features[freq_start:freq_end] += 0.6
	elif category == "belly_pain":
	mel_features[freq_start:freq_end] += 0.4
	elif category == "burping":
	mel_features[freq_start:freq_end] += 0.7
	elif category == "discomfort":
	mel_features[freq_start:freq_end] += 0.2
	else: # tiredness
	mel_features[freq_start:freq_end] += 0.1

	features.extend(mel_features)

	# Spectral contrast (7)
	if category == "hunger":
	contrast_features = np.random.normal(0.4, 0.1, 7)
	elif category == "belly_pain":
	contrast_features = np.random.normal(0.3, 0.08, 7)
	elif category == "burping":
	contrast_features = np.random.normal(0.5, 0.12, 7)
	elif category == "discomfort":
	contrast_features = np.random.normal(0.15, 0.06, 7)
	else: # tiredness
	contrast_features = np.random.normal(0.05, 0.04, 7)

	features.extend(contrast_features)

	# Tonnetz features (6)
	if category == "hunger":
	tonnetz_features = np.random.normal(0.3, 0.1, 6)
	elif category == "belly_pain":
	tonnetz_features = np.random.normal(0.2, 0.08, 6)
	elif category == "burping":
	tonnetz_features = np.random.normal(0.35, 0.12, 6)
	elif category == "discomfort":
	tonnetz_features = np.random.normal(0.1, 0.06, 6)
	else: # tiredness
	tonnetz_features = np.random.normal(0.05, 0.04, 6)

	features.extend(tonnetz_features)

	# Add light noise
	features = np.array(features)
	features += np.random.normal(0, 0.02, len(features))

	X_synthetic.append(features)
	y_synthetic.append(category)

	# Shuffle data
	combined = list(zip(X_synthetic, y_synthetic))
	np.random.shuffle(combined)
	X_synthetic, y_synthetic = zip(*combined)

	return np.array(X_synthetic), np.array(y_synthetic)

	def train(self):
	"""Train the model"""
	print("🔬 Creating training data...")
	X_synthetic, y_synthetic = self._create_realistic_training_data()

	# Encode labels
	self.label_encoder.fit(self.categories)
	y_encoded = self.label_encoder.transform(y_synthetic)

	# Split data
	X_train, X_val, y_train, y_val = train_test_split(
	X_synthetic, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
	)

	# Scale features
	self.scaler = StandardScaler()
	X_train_scaled = self.scaler.fit_transform(X_train)
	X_val_scaled = self.scaler.transform(X_val)

	# Train model
	self.model = RandomForestClassifier(
	n_estimators=100,
	max_depth=None,
	min_samples_split=2,
	min_samples_leaf=1,
	max_features='sqrt',
	bootstrap=True,
	class_weight='balanced',
	random_state=42,
	n_jobs=-1
	)

	self.model.fit(X_train_scaled, y_train)

	val_accuracy = self.model.score(X_val_scaled, y_val)
	print(f"✅ Validation accuracy: {val_accuracy:.3f}")

	self.is_trained = True
	return val_accuracy

	def predict(self, audio_input):
	"""Make prediction - handles both UI and API inputs"""
	if not self.is_trained:
	self.train()

	# Handle different input types
	audio_path = None

	if isinstance(audio_input, dict):
	if 'path' in audio_input:
	audio_path = audio_input['path']
	elif 'name' in audio_input:
	audio_path = audio_input['name']
	elif isinstance(audio_input, str):
	audio_path = audio_input
	else:
	audio_path = audio_input

	if audio_path is None:
	return {"success": False, "error": "No valid audio input provided"}

	features = self.extract_features(audio_path)
	if features is None:
	return {"success": False, "error": "Feature extraction failed"}

	try:
	features_scaled = self.scaler.transform(features.reshape(1, -1))
	prediction = self.model.predict(features_scaled)[0]
	probabilities = self.model.predict_proba(features_scaled)[0]

	predicted_label = self.label_encoder.inverse_transform([prediction])[0]

	prob_dict = {}
	for i, category in enumerate(self.categories):
	prob_dict[category] = float(probabilities[i])

	confidence = np.max(probabilities)
	sorted_probs = sorted(prob_dict.items(), key=lambda x: x[1], reverse=True)

	# Add recommendations
	recommendations = {
	"hunger": {
	"immediate": "Offer feeding - check if it's been 2-3 hours since last meal",
	"details": "Look for additional hunger cues: rooting reflex, sucking motions, bringing hands to mouth"
	},
	"tiredness": {
	"immediate": "Create calm sleep environment - dim lights, reduce noise, comfortable temperature",
	"details": "Try soothing techniques: gentle rocking, swaddling, white noise, pacifier"
	},
	"discomfort": {
	"immediate": "Check diaper immediately and examine clothing fit and room temperature",
	"details": "Look for physical irritants: hair wrapped around fingers/toes, skin irritation"
	},
	"belly_pain": {
	"immediate": "Apply gentle clockwise tummy massage and try gas relief positions",
	"details": "Hold baby upright, bicycle legs gently, check feeding pace and burping frequency"
	},
	"burping": {
	"immediate": "Try different burping positions - shoulder, lap, face-down positions",
	"details": "Be patient (5-10 minutes), gentle back patting with circular motions"
	}
	}

	return {
	"success": True,
	"prediction": predicted_label,
	"confidence": float(confidence),
	"probabilities": prob_dict,
	"top_predictions": sorted_probs,
	"recommendations": recommendations.get(predicted_label, {
	"immediate": "Monitor baby closely and try general comfort measures",
	"details": "Address basic needs systematically: feeding, diaper, position, temperature"
	}),
	"timestamp": datetime.now().isoformat(),
	"session_id": str(uuid.uuid4())[:8],
	"model_info": "foduucom-style implementation"
	}

	except Exception as e:
	return {"success": False, "error": f"Prediction error: {str(e)}"}

	# Initialize classifier
	classifier = FoduucomStyleBabyCryClassifier()

	def predict_baby_cry(audio_url):
	"""
	Main prediction function for API and UI
	"""
	print(f"🔍 Prediction request: {audio_url}")

	if not audio_url or audio_url.strip() == "":
	return {"success": False, "error": "No audio URL provided"}

	result = classifier.predict(audio_url)
	print(f"✅ Prediction result: {result.get('prediction', 'error')}")

	return result

	def web_interface_predict(audio_file):
	"""Web interface function for file uploads"""
	if audio_file is None:
	return "❌ No audio file provided", "{}"

	result = classifier.predict(audio_file)

	if not result["success"]:
	return f"❌ Error: {result['error']}", json.dumps(result, indent=2)

	# Create summary
	prediction = result["prediction"]
	confidence = result["confidence"]

	category_names = {
	"hunger": "🍼 Hunger",
	"tiredness": "😴 Tiredness",
	"discomfort": "😣 Discomfort",
	"belly_pain": "🤱 Belly Pain",
	"burping": "🫧 Burping"
	}

	primary_category = category_names.get(prediction, prediction.title())

	summary = f"""## 🍼 Baby Cry Analysis

	### 🎯 What Your Baby Needs
	{primary_category} ({confidence:.0%} confidence)

	### 💡 Immediate Action
	{result["recommendations"]["immediate"]}

	### 📋 Additional Guidance
	{result["recommendations"]["details"]}

	### 📈 All Probabilities
	"""

	for category, prob_val in result["top_predictions"]:
	display_name = category_names.get(category, category.title())
	bar_length = int(prob_val * 20)
	bar = "█" * bar_length + "░" * (20 - bar_length)
	summary += f"\n{display_name}: {prob_val:.1%} {bar}"

	return summary, json.dumps(result, indent=2)

	# Create a simple Interface that will work with HuggingFace API
	api_interface = gr.Interface(
	fn=predict_baby_cry,
	inputs=gr.Textbox(
	label="Audio URL",
	placeholder="https://raw.githubusercontent.com/jiten-kmar/python-projects/main/baby-crying-32232.mp3",
	info="Enter the URL of an audio file to analyze"
	),
	outputs=gr.JSON(label="Baby Cry Analysis"),
	title="🍼 Baby Cry Classifier - API Ready",
	description="Analyze baby cries to understand what your baby needs. This interface works with both UI and API calls.",
	examples=[
	["https://raw.githubusercontent.com/jiten-kmar/python-projects/main/baby-crying-32232.mp3"]
	]
	)

	# Create a file upload interface
	upload_interface = gr.Interface(
	fn=web_interface_predict,
	inputs=gr.Audio(label="Upload Baby Cry Audio", type="filepath"),
	outputs=[
	gr.Markdown(label="Analysis Summary"),
	gr.Code(label="JSON Data", language="json")
	],
	title="🍼 Baby Cry Classifier - File Upload",
	description="Upload an audio file directly to analyze baby cries."
	)

	# Combine interfaces
	demo = gr.TabbedInterface(
	[api_interface, upload_interface],
	["🌐 API Interface", "📁 File Upload"],
	title="🍼 Baby Cry Classifier"
	)

	# Add documentation as a separate Blocks interface
	with gr.Blocks() as full_demo:
	gr.HTML("""
	<div style="text-align: center; margin-bottom: 20px;">
	<h1>🍼 Baby Cry Classifier</h1>
	<p><em>✅ API Ready - Use Python Client for Best Results!</em></p>
	</div>
	""")

	# Render the main demo
	demo.render()

	# Add documentation
	with gr.Accordion("📖 API Documentation & Usage", open=True):
	gr.Markdown("""
	## 🚨 Important: HuggingFace API Limitations

	HuggingFace Spaces no longer supports direct curl commands due to their queue system.
	Here are the working alternatives:

	## ✅ Method 1: Python Client (RECOMMENDED)

	```python
	from gradio_client import Client

	# Initialize client
	client = Client("https://jitender1278-babycry.hf.space/")

	# Make prediction
	result = client.predict(
	"https://raw.githubusercontent.com/jiten-kmar/python-projects/main/baby-crying-32232.mp3",
	api_name="/predict"
	)

	print(result)
	```

	## ✅ Method 2: JavaScript/Node.js

	```javascript
	import { Client } from "@gradio/client";

	const client = await Client.connect("https://jitender1278-babycry.hf.space/");
	const result = await client.predict("/predict", {
	audio_url: "https://raw.githubusercontent.com/jiten-kmar/python-projects/main/baby-crying-32232.mp3"
	});

	console.log(result.data);
	```

	## ✅ Method 3: Python Requests (Alternative)

	```python
	import requests
	import json

	# This uses the gradio_client internally
	from gradio_client import Client

	def analyze_baby_cry(audio_url):
	client = Client("https://jitender1278-babycry.hf.space/")
	result = client.predict(audio_url, api_name="/predict")
	return result

	# Usage
	audio_url = "https://raw.githubusercontent.com/jiten-kmar/python-projects/main/baby-crying-32232.mp3"
	analysis = analyze_baby_cry(audio_url)
	print(json.dumps(analysis, indent=2))
	```

	## 📝 Response Format

	```json
	{
	"success": true,
	"prediction": "hunger",
	"confidence": 0.85,
	"probabilities": {
	"hunger": 0.85,
	"tiredness": 0.10,
	"discomfort": 0.03,
	"belly_pain": 0.01,
	"burping": 0.01
	},
	"recommendations": {
	"immediate": "Offer feeding - check if it's been 2-3 hours since last meal",
	"details": "Look for additional hunger cues: rooting reflex, sucking motions, bringing hands to mouth"
	},
	"timestamp": "2025-06-12T12:00:00.000000",
	"session_id": "abc12345",
	"model_info": "foduucom-style implementation"
	}
	```

	## 🔧 Installation

	```bash
	pip install gradio-client
	```

	## 📋 Supported Audio Formats
	- MP3, WAV, M4A, FLAC, OGG
	- Maximum file size: ~10MB
	- Audio URLs must be publicly accessible

	## ⚠️ Why Curl Doesn't Work
	HuggingFace Spaces now uses a queue system that requires WebSocket connections for real-time processing.
	Direct HTTP POST requests are blocked to prevent abuse and ensure fair resource allocation.

	Use the Python client above for the best API experience!
	""")

	if __name__ == "__main__":
	print("🚀 Starting Baby Cry Classifier...")
	print("📝 Note: Use Python gradio_client for API access (curl not supported)")

	full_demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True
	)