Spaces:

MCP-1st-Birthday
/

mcp-nlp-analytics

Running

App Files Files Community

mcp-nlp-analytics / src /sentiment_analyzer.py

RReyesp

feat: entrega MVP MCP-NLP para hackatón

f120be8 15 days ago

raw

history blame contribute delete

8.99 kB

	"""
	Sentiment Analysis Module
	Analyzes emotional tone and sentiment evolution in messages.
	"""

	from textblob import TextBlob
	from typing import List, Dict, Any
	import re
	from datetime import datetime


	class SentimentAnalyzer:
	"""Analyzes sentiment evolution across messages."""

	def __init__(self):
	"""Initialize sentiment analyzer."""
	# Extended keyword lists with Spanish and English
	self.positive_words = {
	'love', 'excellent', 'amazing', 'fantastic', 'wonderful', 'great', 'good',
	'perfect', 'best', 'awesome', 'brilliant', 'outstanding', 'superb', 'trust',
	'confident', 'happy', 'thrilled', 'delighted', 'impressed', 'satisfied',
	'encanta', 'excelente', 'perfecto', 'increible', 'genial', 'bueno', 'maravilloso',
	'fantastico', 'sobresaliente', 'impresionado', 'satisfecho', 'love', 'adoro',
	'me encanta', 'fantástico', 'fabuloso', 'me gusta', 'bien', 'obra'
	}

	self.negative_words = {
	'hate', 'terrible', 'awful', 'horrible', 'bad', 'poor', 'worst',
	'disappointed', 'frustrated', 'angry', 'annoyed', 'upset', 'problem',
	'issue', 'bug', 'slow', 'expensive', 'difficult', 'fail', 'cancel',
	'doubt', 'concern', 'worried', 'unsure', 'alternative', 'competitor',
	'odio', 'terrible', 'horrible', 'malo', 'peor', 'problema', 'bugs',
	'caro', 'lento', 'difícil', 'fracaso', 'cancelar', 'competencia',
	'competidor', 'preocupacion', 'inquietud', 'alternativa', 'dudoso',
	'cambiar', 'adios', 'adiós', 'otros developers', 'más barato',
	'renunciar', 'renuncia', 'renuncie', 'partir', 'irme', 'me voy',
	'dejar', 'abandonar', 'salir', 'terminar', 'fin', 'otro trabajo',
	'mejor oferta', 'buscar', 'explorar', 'mejores', 'mejores roles'
	}

	def analyze_evolution(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
	"""
	Analyze how sentiment evolves across messages.

	Args:
	messages: List of {'timestamp': str, 'text': str, 'sender': str}

	Returns:
	Dictionary with sentiment evolution analysis
	"""
	if not messages:
	return self._empty_analysis()

	# Analyze each message
	timeline = []
	sentiments = []

	for i, msg in enumerate(messages):
	# Handle both strings and dicts
	if isinstance(msg, dict):
	text = msg.get('text', '')
	timestamp = msg.get('timestamp', f'Message {i+1}')
	elif isinstance(msg, str):
	text = msg
	timestamp = f'Message {i+1}'
	else:
	text = str(msg)
	timestamp = f'Message {i+1}'

	sentiment_score = self._calculate_sentiment(text)
	sentiments.append(sentiment_score)

	timeline.append({
	'timestamp': timestamp,
	'text': text[:100] + '...' if len(text) > 100 else text,
	'sentiment_score': round(sentiment_score, 2),
	'sentiment_state': self._sentiment_state(sentiment_score),
	'message_index': i + 1
	})

	# Calculate trend
	trend = self._calculate_trend(sentiments)
	turning_points = self._find_turning_points(sentiments, timeline)
	overall_change = sentiments[-1] - sentiments[0] if sentiments else 0

	# Generate interpretation
	interpretation = self._generate_interpretation(
	sentiments, trend, turning_points
	)

	return {
	'timeline': timeline,
	'current_sentiment': round(sentiments[-1], 2) if sentiments else 0,
	'initial_sentiment': round(sentiments[0], 2) if sentiments else 0,
	'trend': trend,
	'turning_points': turning_points,
	'overall_change': round(overall_change, 2),
	'interpretation': interpretation,
	'message_count': len(messages)
	}

	def _calculate_sentiment(self, text: str) -> float:
	"""
	Calculate sentiment score from 0-100.
	0 = very negative, 50 = neutral, 100 = very positive
	Uses keyword matching primarily, TextBlob for fine-tuning.
	"""
	if not text:
	return 50.0

	text_lower = text.lower()

	# Primary: Count positive and negative keywords
	positive_count = sum(1 for word in self.positive_words if word in text_lower)
	negative_count = sum(1 for word in self.negative_words if word in text_lower)

	# Base score from keywords
	keyword_score = 50 + (positive_count * 10) - (negative_count * 10)

	# Use TextBlob for fine-tuning
	blob = TextBlob(text)
	polarity = blob.sentiment.polarity # -1 to 1
	textblob_score = (polarity + 1) * 50

	# Combine: 70% keywords, 30% TextBlob
	# Keywords are more reliable for detecting sentiment in conversations
	final_score = (keyword_score * 0.7) + (textblob_score * 0.3)

	# Ensure score is in valid range
	return min(100, max(0, final_score))

	def _sentiment_state(self, score: float) -> str:
	"""Classify sentiment into states."""
	if score >= 80:
	return "EXTREMELY_POSITIVE"
	elif score >= 60:
	return "POSITIVE"
	elif score >= 40:
	return "NEUTRAL"
	elif score >= 20:
	return "NEGATIVE"
	else:
	return "EXTREMELY_NEGATIVE"

	def _calculate_trend(self, sentiments: List[float]) -> str:
	"""Determine overall trend."""
	if len(sentiments) < 2:
	return "insufficient_data"

	# Calculate slope
	first_half = sum(sentiments[:len(sentiments)//2]) / max(1, len(sentiments)//2)
	second_half = sum(sentiments[len(sentiments)//2:]) / max(1, len(sentiments) - len(sentiments)//2)

	diff = second_half - first_half

	if diff > 10:
	return "IMPROVING"
	elif diff < -10:
	return "DECLINING"
	else:
	return "STABLE"

	def _find_turning_points(self, sentiments: List[float], timeline: List[Dict]) -> List[Dict]:
	"""Find significant sentiment changes."""
	turning_points = []

	for i in range(1, len(sentiments)):
	change = abs(sentiments[i] - sentiments[i-1])

	# Significant change: > 20 points
	if change > 20:
	turning_points.append({
	'index': i,
	'timestamp': timeline[i]['timestamp'],
	'from_state': self._sentiment_state(sentiments[i-1]),
	'to_state': self._sentiment_state(sentiments[i]),
	'change_magnitude': round(change, 2),
	'severity': 'CRITICAL' if change > 40 else 'HIGH' if change > 30 else 'MEDIUM'
	})

	return turning_points

	def _generate_interpretation(self, sentiments: List[float], trend: str,
	turning_points: List[Dict]) -> str:
	"""Generate human-readable interpretation."""
	if not sentiments:
	return "No messages to analyze."

	current = sentiments[-1]
	initial = sentiments[0]

	# Base interpretation
	if trend == "DECLINING":
	base = f"Sentiment is DECLINING overall (from {initial:.0f} to {current:.0f})"
	elif trend == "IMPROVING":
	base = f"Sentiment is IMPROVING overall (from {initial:.0f} to {current:.0f})"
	else:
	base = f"Sentiment is STABLE (around {current:.0f})"

	# Add turning point info
	if turning_points:
	critical_points = [p for p in turning_points if p['severity'] == 'CRITICAL']
	if critical_points:
	base += f". WARNING: {len(critical_points)} critical sentiment shift(s) detected."

	# Final assessment
	if current < 30:
	base += " RISK LEVEL: CRITICAL - Immediate intervention recommended."
	elif current < 50:
	base += " RISK LEVEL: HIGH - Attention needed soon."
	elif current > 70:
	base += " Status: POSITIVE - No immediate action needed."

	return base

	def _empty_analysis(self) -> Dict[str, Any]:
	"""Return empty analysis structure."""
	return {
	'timeline': [],
	'current_sentiment': 0,
	'initial_sentiment': 0,
	'trend': 'unknown',
	'turning_points': [],
	'overall_change': 0,
	'interpretation': 'No data provided',
	'message_count': 0
	}