|
|
""" |
|
|
Sentiment Analysis Module |
|
|
Analyzes emotional tone and sentiment evolution in messages. |
|
|
""" |
|
|
|
|
|
from textblob import TextBlob |
|
|
from typing import List, Dict, Any |
|
|
import re |
|
|
from datetime import datetime |
|
|
|
|
|
|
|
|
class SentimentAnalyzer: |
|
|
"""Analyzes sentiment evolution across messages.""" |
|
|
|
|
|
def __init__(self): |
|
|
"""Initialize sentiment analyzer.""" |
|
|
|
|
|
self.positive_words = { |
|
|
'love', 'excellent', 'amazing', 'fantastic', 'wonderful', 'great', 'good', |
|
|
'perfect', 'best', 'awesome', 'brilliant', 'outstanding', 'superb', 'trust', |
|
|
'confident', 'happy', 'thrilled', 'delighted', 'impressed', 'satisfied', |
|
|
'encanta', 'excelente', 'perfecto', 'increible', 'genial', 'bueno', 'maravilloso', |
|
|
'fantastico', 'sobresaliente', 'impresionado', 'satisfecho', 'love', 'adoro', |
|
|
'me encanta', 'fant谩stico', 'fabuloso', 'me gusta', 'bien', 'obra' |
|
|
} |
|
|
|
|
|
self.negative_words = { |
|
|
'hate', 'terrible', 'awful', 'horrible', 'bad', 'poor', 'worst', |
|
|
'disappointed', 'frustrated', 'angry', 'annoyed', 'upset', 'problem', |
|
|
'issue', 'bug', 'slow', 'expensive', 'difficult', 'fail', 'cancel', |
|
|
'doubt', 'concern', 'worried', 'unsure', 'alternative', 'competitor', |
|
|
'odio', 'terrible', 'horrible', 'malo', 'peor', 'problema', 'bugs', |
|
|
'caro', 'lento', 'dif铆cil', 'fracaso', 'cancelar', 'competencia', |
|
|
'competidor', 'preocupacion', 'inquietud', 'alternativa', 'dudoso', |
|
|
'cambiar', 'adios', 'adi贸s', 'otros developers', 'm谩s barato', |
|
|
'renunciar', 'renuncia', 'renuncie', 'partir', 'irme', 'me voy', |
|
|
'dejar', 'abandonar', 'salir', 'terminar', 'fin', 'otro trabajo', |
|
|
'mejor oferta', 'buscar', 'explorar', 'mejores', 'mejores roles' |
|
|
} |
|
|
|
|
|
def analyze_evolution(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]: |
|
|
""" |
|
|
Analyze how sentiment evolves across messages. |
|
|
|
|
|
Args: |
|
|
messages: List of {'timestamp': str, 'text': str, 'sender': str} |
|
|
|
|
|
Returns: |
|
|
Dictionary with sentiment evolution analysis |
|
|
""" |
|
|
if not messages: |
|
|
return self._empty_analysis() |
|
|
|
|
|
|
|
|
timeline = [] |
|
|
sentiments = [] |
|
|
|
|
|
for i, msg in enumerate(messages): |
|
|
|
|
|
if isinstance(msg, dict): |
|
|
text = msg.get('text', '') |
|
|
timestamp = msg.get('timestamp', f'Message {i+1}') |
|
|
elif isinstance(msg, str): |
|
|
text = msg |
|
|
timestamp = f'Message {i+1}' |
|
|
else: |
|
|
text = str(msg) |
|
|
timestamp = f'Message {i+1}' |
|
|
|
|
|
sentiment_score = self._calculate_sentiment(text) |
|
|
sentiments.append(sentiment_score) |
|
|
|
|
|
timeline.append({ |
|
|
'timestamp': timestamp, |
|
|
'text': text[:100] + '...' if len(text) > 100 else text, |
|
|
'sentiment_score': round(sentiment_score, 2), |
|
|
'sentiment_state': self._sentiment_state(sentiment_score), |
|
|
'message_index': i + 1 |
|
|
}) |
|
|
|
|
|
|
|
|
trend = self._calculate_trend(sentiments) |
|
|
turning_points = self._find_turning_points(sentiments, timeline) |
|
|
overall_change = sentiments[-1] - sentiments[0] if sentiments else 0 |
|
|
|
|
|
|
|
|
interpretation = self._generate_interpretation( |
|
|
sentiments, trend, turning_points |
|
|
) |
|
|
|
|
|
return { |
|
|
'timeline': timeline, |
|
|
'current_sentiment': round(sentiments[-1], 2) if sentiments else 0, |
|
|
'initial_sentiment': round(sentiments[0], 2) if sentiments else 0, |
|
|
'trend': trend, |
|
|
'turning_points': turning_points, |
|
|
'overall_change': round(overall_change, 2), |
|
|
'interpretation': interpretation, |
|
|
'message_count': len(messages) |
|
|
} |
|
|
|
|
|
def _calculate_sentiment(self, text: str) -> float: |
|
|
""" |
|
|
Calculate sentiment score from 0-100. |
|
|
0 = very negative, 50 = neutral, 100 = very positive |
|
|
Uses keyword matching primarily, TextBlob for fine-tuning. |
|
|
""" |
|
|
if not text: |
|
|
return 50.0 |
|
|
|
|
|
text_lower = text.lower() |
|
|
|
|
|
|
|
|
positive_count = sum(1 for word in self.positive_words if word in text_lower) |
|
|
negative_count = sum(1 for word in self.negative_words if word in text_lower) |
|
|
|
|
|
|
|
|
keyword_score = 50 + (positive_count * 10) - (negative_count * 10) |
|
|
|
|
|
|
|
|
blob = TextBlob(text) |
|
|
polarity = blob.sentiment.polarity |
|
|
textblob_score = (polarity + 1) * 50 |
|
|
|
|
|
|
|
|
|
|
|
final_score = (keyword_score * 0.7) + (textblob_score * 0.3) |
|
|
|
|
|
|
|
|
return min(100, max(0, final_score)) |
|
|
|
|
|
def _sentiment_state(self, score: float) -> str: |
|
|
"""Classify sentiment into states.""" |
|
|
if score >= 80: |
|
|
return "EXTREMELY_POSITIVE" |
|
|
elif score >= 60: |
|
|
return "POSITIVE" |
|
|
elif score >= 40: |
|
|
return "NEUTRAL" |
|
|
elif score >= 20: |
|
|
return "NEGATIVE" |
|
|
else: |
|
|
return "EXTREMELY_NEGATIVE" |
|
|
|
|
|
def _calculate_trend(self, sentiments: List[float]) -> str: |
|
|
"""Determine overall trend.""" |
|
|
if len(sentiments) < 2: |
|
|
return "insufficient_data" |
|
|
|
|
|
|
|
|
first_half = sum(sentiments[:len(sentiments)//2]) / max(1, len(sentiments)//2) |
|
|
second_half = sum(sentiments[len(sentiments)//2:]) / max(1, len(sentiments) - len(sentiments)//2) |
|
|
|
|
|
diff = second_half - first_half |
|
|
|
|
|
if diff > 10: |
|
|
return "IMPROVING" |
|
|
elif diff < -10: |
|
|
return "DECLINING" |
|
|
else: |
|
|
return "STABLE" |
|
|
|
|
|
def _find_turning_points(self, sentiments: List[float], timeline: List[Dict]) -> List[Dict]: |
|
|
"""Find significant sentiment changes.""" |
|
|
turning_points = [] |
|
|
|
|
|
for i in range(1, len(sentiments)): |
|
|
change = abs(sentiments[i] - sentiments[i-1]) |
|
|
|
|
|
|
|
|
if change > 20: |
|
|
turning_points.append({ |
|
|
'index': i, |
|
|
'timestamp': timeline[i]['timestamp'], |
|
|
'from_state': self._sentiment_state(sentiments[i-1]), |
|
|
'to_state': self._sentiment_state(sentiments[i]), |
|
|
'change_magnitude': round(change, 2), |
|
|
'severity': 'CRITICAL' if change > 40 else 'HIGH' if change > 30 else 'MEDIUM' |
|
|
}) |
|
|
|
|
|
return turning_points |
|
|
|
|
|
def _generate_interpretation(self, sentiments: List[float], trend: str, |
|
|
turning_points: List[Dict]) -> str: |
|
|
"""Generate human-readable interpretation.""" |
|
|
if not sentiments: |
|
|
return "No messages to analyze." |
|
|
|
|
|
current = sentiments[-1] |
|
|
initial = sentiments[0] |
|
|
|
|
|
|
|
|
if trend == "DECLINING": |
|
|
base = f"Sentiment is DECLINING overall (from {initial:.0f} to {current:.0f})" |
|
|
elif trend == "IMPROVING": |
|
|
base = f"Sentiment is IMPROVING overall (from {initial:.0f} to {current:.0f})" |
|
|
else: |
|
|
base = f"Sentiment is STABLE (around {current:.0f})" |
|
|
|
|
|
|
|
|
if turning_points: |
|
|
critical_points = [p for p in turning_points if p['severity'] == 'CRITICAL'] |
|
|
if critical_points: |
|
|
base += f". WARNING: {len(critical_points)} critical sentiment shift(s) detected." |
|
|
|
|
|
|
|
|
if current < 30: |
|
|
base += " RISK LEVEL: CRITICAL - Immediate intervention recommended." |
|
|
elif current < 50: |
|
|
base += " RISK LEVEL: HIGH - Attention needed soon." |
|
|
elif current > 70: |
|
|
base += " Status: POSITIVE - No immediate action needed." |
|
|
|
|
|
return base |
|
|
|
|
|
def _empty_analysis(self) -> Dict[str, Any]: |
|
|
"""Return empty analysis structure.""" |
|
|
return { |
|
|
'timeline': [], |
|
|
'current_sentiment': 0, |
|
|
'initial_sentiment': 0, |
|
|
'trend': 'unknown', |
|
|
'turning_points': [], |
|
|
'overall_change': 0, |
|
|
'interpretation': 'No data provided', |
|
|
'message_count': 0 |
|
|
} |
|
|
|