sukhmani1303's picture
Upload app.py with huggingface_hub
d49ff4a verified
raw
history blame
7.03 kB
import streamlit as st
import pandas as pd
import numpy as np
import os
import plotly.express as px
from io import StringIO
from datetime import datetime, timedelta
# Debug: Verify file paths
st.write("Debug: Checking file paths...")
files_to_check = ["inference.py", "new_best_improved_model.pth", "scaler.pkl", "feature_names.json"]
for file in files_to_check:
if os.path.exists(file):
st.write(f"{file} found")
else:
st.error(f"{file} not found")
try:
from inference import load_model_and_artifacts, forecast
except Exception as e:
st.error(f"Error importing inference: {str(e)}")
st.stop()
st.title("Store Sales Forecasting")
try:
model, scaler, feature_names = load_model_and_artifacts()
st.success("Model and artifacts loaded successfully")
except Exception as e:
st.error(f"Error loading model or artifacts: {str(e)}")
st.stop()
# Display model metrics
st.header("Model Performance Metrics")
metrics = {
"MAE": 710.75,
"RMSE": 1108.51,
"MAPE": 7.14,
"R2": 0.8633
}
st.write(f"- **MAE**: ${metrics['MAE']:.2f}")
st.write(f"- **RMSE**: ${metrics['RMSE']:.2f}")
st.write(f"- **MAPE**: {metrics['MAPE']:.2f}%")
st.write(f"- **R² Score**: {metrics['R2']:.4f}")
# Synthetic data generation
st.header("Test with Synthetic Data")
if st.button("Generate Synthetic Sample Data"):
np.random.seed(42)
sequence_length = 21
n_features = len(feature_names)
synthetic_data = np.zeros((sequence_length, n_features))
# Generate realistic data based on training sales range (~$3,000–19,000)
for i, feature in enumerate(feature_names):
if feature == "sales":
synthetic_data[:, i] = np.random.normal(8955, 3307, sequence_length) # Mean=8954.97, std=3307.49
elif feature == "onpromotion":
synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.8, 0.2])
elif feature in ["dayofweek_sin", "dayofweek_cos", "month_sin", "month_cos"]:
synthetic_data[:, i] = np.sin(np.linspace(0, 2 * np.pi, sequence_length))
elif feature == "trend":
synthetic_data[:, i] = np.linspace(0, sequence_length, sequence_length)
elif feature == "is_weekend":
synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.7, 0.3])
elif feature == "quarter":
synthetic_data[:, i] = np.random.choice([1, 2, 3, 4], sequence_length)
elif "lag" in feature:
synthetic_data[:, i] = np.roll(synthetic_data[:, 0], int(feature.split('_')[-1])) if i > 0 else np.zeros(sequence_length)
elif "ma" in feature or "std" in feature:
synthetic_data[:, i] = np.random.normal(8955, 1000, sequence_length)
elif "ratio" in feature:
synthetic_data[:, i] = np.random.normal(1, 0.2, sequence_length)
elif "promo" in feature:
synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.8, 0.2])
elif feature == "dcoilwtico":
synthetic_data[:, i] = np.random.normal(80, 10, sequence_length)
elif feature == "is_holiday":
synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.9, 0.1])
synthetic_df = pd.DataFrame(synthetic_data, columns=feature_names)
end_date = datetime.now().date()
dates = [end_date - timedelta(days=x) for x in range(sequence_length-1, -1, -1)]
synthetic_df.index = dates
st.session_state["synthetic_df"] = synthetic_df
st.subheader("Synthetic Sample Data Preview")
st.write(synthetic_df.head())
csv_buffer = StringIO()
synthetic_df.to_csv(csv_buffer)
st.download_button(
label="Download Synthetic Sample CSV",
data=csv_buffer.getvalue(),
file_name="synthetic_sample.csv",
mime="text/csv"
)
try:
sequences = synthetic_df[feature_names].values
sequences = scaler.transform(sequences)
sequences = sequences.reshape(-1, sequence_length, n_features)
predictions, uncertainties = forecast(model, scaler, sequences)
forecast_dates = [end_date + timedelta(days=x*7) for x in range(1, 14)]
df_predictions = pd.DataFrame({
'Date': forecast_dates,
'Predicted Sales ($)': predictions[0],
'Uncertainty ($)': uncertainties[0]
})
st.subheader("Forecast Results")
st.table(df_predictions)
fig = px.line(df_predictions, x='Date', y='Predicted Sales ($)', title='13-Week Sales Forecast')
fig.add_scatter(
x=df_predictions['Date'],
y=df_predictions['Predicted Sales ($)'] + df_predictions['Uncertainty ($)'],
mode='lines', name='Upper Bound', line=dict(dash='dash', color='red')
)
fig.add_scatter(
x=df_predictions['Date'],
y=df_predictions['Predicted Sales ($)'] - df_predictions['Uncertainty ($)'],
mode='lines', name='Lower Bound', line=dict(dash='dash', color='red'),
fill='tonexty', fillcolor='rgba(255, 0, 0, 0.1)'
)
st.plotly_chart(fig)
except Exception as e:
st.error(f"Error generating forecast: {str(e)}")
# Manual CSV upload
st.header("Upload Your Own Data")
st.write("Upload a CSV with 21 timesteps and 20 features.")
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
try:
data = pd.read_csv(uploaded_file)
if set(feature_names).issubset(data.columns) and len(data) == 21:
sequences = data[feature_names].values
sequences = scaler.transform(sequences)
sequences = sequences.reshape(-1, 21, len(feature_names))
predictions, uncertainties = forecast(model, scaler, sequences)
df_predictions = pd.DataFrame({
'Week': range(1, 14),
'Predicted Sales ($)': predictions[0],
'Uncertainty ($)': uncertainties[0]
})
st.subheader("Forecast Results")
st.table(df_predictions)
fig = px.line(df_predictions, x='Week', y='Predicted Sales ($)', title='13-Week Sales Forecast')
fig.add_scatter(
x=df_predictions['Week'],
y=df_predictions['Predicted Sales ($)'] + df_predictions['Uncertainty ($)'],
mode='lines', name='Upper Bound', line=dict(dash='dash', color='red')
)
fig.add_scatter(
x=df_predictions['Week'],
y=df_predictions['Predicted Sales ($)'] - df_predictions['Uncertainty ($)'],
mode='lines', name='Lower Bound', line=dict(dash='dash', color='red'),
fill='tonexty', fillcolor='rgba(255, 0, 0, 0.1)'
)
st.plotly_chart(fig)
else:
st.error(f"Invalid CSV. Ensure 21 rows and columns: {', '.join(feature_names)}")
except Exception as e:
st.error(f"Error processing CSV or making prediction: {str(e)}")