Spaces:
Running
Running
| import joblib | |
| import streamlit as st | |
| import pandas as pd | |
| import re | |
| import nltk | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from wordcloud import WordCloud | |
| from nltk.corpus import stopwords | |
| from sklearn.metrics import accuracy_score, classification_report, confusion_matrix | |
| # Download stopwords if not already available | |
| nltk.download("stopwords") | |
| stop_words = set(stopwords.words("english")) | |
| # Load the trained model and TF-IDF vectorizer | |
| model = joblib.load("sentiment_model.pkl") | |
| vectorizer = joblib.load("tfidf_vectorizer.pkl") | |
| # Load dataset with manually defined headers | |
| column_names = ["id", "place", "label", "text"] | |
| df = pd.read_csv("twitter_training.csv", names=column_names, header=None) | |
| # Function to preprocess text | |
| def preprocess_text(text): | |
| text = str(text).lower() | |
| text = re.sub(r"\W", " ", text) # Remove special characters | |
| text = re.sub(r"\s+", " ", text).strip() # Remove extra spaces | |
| text = " ".join([word for word in text.split() if word not in stop_words]) # Remove stopwords | |
| return text | |
| # Load test dataset and compute model metrics | |
| try: | |
| test_df = pd.read_csv("twitter_validation.csv", names=column_names, header=None) | |
| X_test = vectorizer.transform(test_df["text"].astype(str)) | |
| y_test = test_df["label"] | |
| y_pred = model.predict(X_test) | |
| # Model metrics | |
| accuracy = accuracy_score(y_test, y_pred) | |
| classification_report_text = classification_report(y_test, y_pred, output_dict=True) | |
| class_report_df = pd.DataFrame(classification_report_text).T.round(2) | |
| # Compute confusion matrix | |
| cm = confusion_matrix(y_test, y_pred, labels=["Positive", "Neutral", "Negative"]) | |
| except Exception as e: | |
| accuracy = None | |
| class_report_df = None | |
| cm = None | |
| # Function to predict sentiment | |
| def predict_sentiment(user_input): | |
| cleaned_text = preprocess_text(user_input) | |
| text_vector = vectorizer.transform([cleaned_text]) | |
| prediction = model.predict(text_vector)[0] | |
| return prediction | |
| # Sidebar Navigation | |
| st.sidebar.title("π Sentiment Analysis App") | |
| st.sidebar.markdown( | |
| "This app performs **Sentiment Analysis** on text using **Machine Learning**. " | |
| "It classifies text as **Positive, Neutral, or Negative** based on its sentiment." | |
| ) | |
| st.sidebar.header("π Navigation") | |
| page = st.sidebar.radio( | |
| "Go to:", | |
| ["π Dataset", "π Visualizations", "π Model Metrics", "π€ Sentiment Predictor"] | |
| ) | |
| # App Title and Explanation | |
| st.title("π’ Twitter Sentiment Analysis") | |
| st.markdown( | |
| "This application uses **Natural Language Processing (NLP)** and " | |
| "**Logistic Regression** to analyze the sentiment of tweets. The model is trained using a dataset " | |
| "of tweets labeled as **Positive, Neutral, or Negative**." | |
| ) | |
| # π Dataset Page | |
| if page == "π Dataset": | |
| st.header("π Dataset Preview") | |
| st.dataframe(df.iloc[49:105]) | |
| # π Visualization Page | |
| elif page == "π Visualizations": | |
| st.header("π Data Visualizations") | |
| # Pie Chart of Sentiments | |
| st.subheader("π₯§ Sentiment Distribution") | |
| fig, ax = plt.subplots(figsize=(5, 5)) | |
| df["label"].value_counts().plot(kind="pie", autopct="%1.1f%%", colors=["green", "gray", "red", "blue"], ax=ax) | |
| plt.title("Sentiment Distribution") | |
| plt.ylabel("") | |
| st.pyplot(fig) | |
| # Bar Chart of Sentiment Counts | |
| st.subheader("π Sentiment Count (Bar Chart)") | |
| fig, ax = plt.subplots(figsize=(6, 4)) | |
| sns.countplot(x=df["label"], palette={"Positive": "green", "Neutral": "gray", "Negative": "red", "Irrelevant": "blue"}, ax=ax) | |
| plt.xlabel("Sentiment Type") | |
| plt.ylabel("Count") | |
| plt.title("Distribution of Sentiments") | |
| st.pyplot(fig) | |
| # Word Cloud for Most Frequent Words | |
| st.subheader("βοΈ Word Cloud of Most Common Words") | |
| text_data = " ".join(df["text"].astype(str)) | |
| wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text_data) | |
| fig, ax = plt.subplots(figsize=(8, 4)) | |
| ax.imshow(wordcloud, interpolation="bilinear") | |
| ax.axis("off") | |
| st.pyplot(fig) | |
| # π Model Metrics Page | |
| elif page == "π Model Metrics": | |
| st.header("π Model Performance") | |
| if accuracy is not None: | |
| st.write(f"β **Accuracy:** {accuracy * 100:.2f}%") | |
| else: | |
| st.warning("β οΈ Could not calculate accuracy. Please check the test dataset.") | |
| if class_report_df is not None and not class_report_df.empty: | |
| st.subheader("π Classification Report") | |
| st.dataframe(class_report_df) | |
| else: | |
| st.warning("β οΈ Classification report is empty.") | |
| if cm is not None and cm.any(): | |
| st.subheader("π₯ Confusion Matrix") | |
| fig, ax = plt.subplots(figsize=(6, 5)) | |
| sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Positive", "Neutral", "Negative"], yticklabels=["Positive", "Neutral", "Negative"], ax=ax) | |
| plt.xlabel("Predicted") | |
| plt.ylabel("Actual") | |
| plt.title("Confusion Matrix") | |
| st.pyplot(fig) | |
| else: | |
| st.warning("β οΈ Confusion matrix could not be generated.") | |
| # π€ Sentiment Predictor Page | |
| elif page == "π€ Sentiment Predictor": | |
| st.header("π€ Sentiment Analysis") | |
| st.markdown("Enter a sentence below, and the model will predict whether it is **Positive, Neutral, or Negative**.") | |
| user_input = st.text_area("Type your sentence here:", "") | |
| if st.button("Analyze Sentiment"): | |
| if user_input.strip(): | |
| sentiment_result = predict_sentiment(user_input) | |
| st.markdown(f"### π Prediction: **{sentiment_result}**") | |
| else: | |
| st.warning("Please enter some text to analyze.") | |