Spaces:

Rejeno
/

SentimentAnalysis

Running

Regino

nasf

3f37371 9 months ago

5.65 kB

	import joblib
	import streamlit as st
	import pandas as pd
	import re
	import nltk
	import matplotlib.pyplot as plt
	import seaborn as sns
	from wordcloud import WordCloud
	from nltk.corpus import stopwords
	from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

	# Download stopwords if not already available
	nltk.download("stopwords")
	stop_words = set(stopwords.words("english"))

	# Load the trained model and TF-IDF vectorizer
	model = joblib.load("sentiment_model.pkl")
	vectorizer = joblib.load("tfidf_vectorizer.pkl")

	# Load dataset with manually defined headers
	column_names = ["id", "place", "label", "text"]
	df = pd.read_csv("twitter_training.csv", names=column_names, header=None)

	# Function to preprocess text
	def preprocess_text(text):
	text = str(text).lower()
	text = re.sub(r"\W", " ", text) # Remove special characters
	text = re.sub(r"\s+", " ", text).strip() # Remove extra spaces
	text = " ".join([word for word in text.split() if word not in stop_words]) # Remove stopwords
	return text

	# Load test dataset and compute model metrics
	try:
	test_df = pd.read_csv("twitter_validation.csv", names=column_names, header=None)
	X_test = vectorizer.transform(test_df["text"].astype(str))
	y_test = test_df["label"]
	y_pred = model.predict(X_test)

	# Model metrics
	accuracy = accuracy_score(y_test, y_pred)
	classification_report_text = classification_report(y_test, y_pred, output_dict=True)
	class_report_df = pd.DataFrame(classification_report_text).T.round(2)

	# Compute confusion matrix
	cm = confusion_matrix(y_test, y_pred, labels=["Positive", "Neutral", "Negative"])

	except Exception as e:
	accuracy = None
	class_report_df = None
	cm = None

	# Function to predict sentiment
	def predict_sentiment(user_input):
	cleaned_text = preprocess_text(user_input)
	text_vector = vectorizer.transform([cleaned_text])
	prediction = model.predict(text_vector)[0]
	return prediction

	# Sidebar Navigation
	st.sidebar.title("🔍 Sentiment Analysis App")
	st.sidebar.markdown(
	"This app performs Sentiment Analysis on text using Machine Learning. "
	"It classifies text as Positive, Neutral, or Negative based on its sentiment."
	)

	st.sidebar.header("📌 Navigation")
	page = st.sidebar.radio(
	"Go to:",
	["📂 Dataset", "📊 Visualizations", "📈 Model Metrics", "🤖 Sentiment Predictor"]
	)

	# App Title and Explanation
	st.title("📢 Twitter Sentiment Analysis")
	st.markdown(
	"This application uses Natural Language Processing (NLP) and "
	"Logistic Regression to analyze the sentiment of tweets. The model is trained using a dataset "
	"of tweets labeled as Positive, Neutral, or Negative."
	)

	# 📂 Dataset Page
	if page == "📂 Dataset":
	st.header("📂 Dataset Preview")
	st.dataframe(df.iloc[49:105])

	# 📊 Visualization Page
	elif page == "📊 Visualizations":
	st.header("📊 Data Visualizations")

	# Pie Chart of Sentiments
	st.subheader("🥧 Sentiment Distribution")
	fig, ax = plt.subplots(figsize=(5, 5))
	df["label"].value_counts().plot(kind="pie", autopct="%1.1f%%", colors=["green", "gray", "red", "blue"], ax=ax)
	plt.title("Sentiment Distribution")
	plt.ylabel("")
	st.pyplot(fig)

	# Bar Chart of Sentiment Counts
	st.subheader("📊 Sentiment Count (Bar Chart)")
	fig, ax = plt.subplots(figsize=(6, 4))
	sns.countplot(x=df["label"], palette={"Positive": "green", "Neutral": "gray", "Negative": "red", "Irrelevant": "blue"}, ax=ax)
	plt.xlabel("Sentiment Type")
	plt.ylabel("Count")
	plt.title("Distribution of Sentiments")
	st.pyplot(fig)

	# Word Cloud for Most Frequent Words
	st.subheader("☁️ Word Cloud of Most Common Words")
	text_data = " ".join(df["text"].astype(str))
	wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text_data)
	fig, ax = plt.subplots(figsize=(8, 4))
	ax.imshow(wordcloud, interpolation="bilinear")
	ax.axis("off")
	st.pyplot(fig)

	# 📈 Model Metrics Page
	elif page == "📈 Model Metrics":
	st.header("📈 Model Performance")

	if accuracy is not None:
	st.write(f"✅ Accuracy: {accuracy * 100:.2f}%")
	else:
	st.warning("⚠️ Could not calculate accuracy. Please check the test dataset.")

	if class_report_df is not None and not class_report_df.empty:
	st.subheader("📌 Classification Report")
	st.dataframe(class_report_df)
	else:
	st.warning("⚠️ Classification report is empty.")

	if cm is not None and cm.any():
	st.subheader("🔥 Confusion Matrix")
	fig, ax = plt.subplots(figsize=(6, 5))
	sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Positive", "Neutral", "Negative"], yticklabels=["Positive", "Neutral", "Negative"], ax=ax)
	plt.xlabel("Predicted")
	plt.ylabel("Actual")
	plt.title("Confusion Matrix")
	st.pyplot(fig)
	else:
	st.warning("⚠️ Confusion matrix could not be generated.")

	# 🤖 Sentiment Predictor Page
	elif page == "🤖 Sentiment Predictor":
	st.header("🤖 Sentiment Analysis")
	st.markdown("Enter a sentence below, and the model will predict whether it is Positive, Neutral, or Negative.")

	user_input = st.text_area("Type your sentence here:", "")

	if st.button("Analyze Sentiment"):
	if user_input.strip():
	sentiment_result = predict_sentiment(user_input)
	st.markdown(f"### 🔍 Prediction: {sentiment_result}")
	else:
	st.warning("Please enter some text to analyze.")