|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import os |
|
|
import plotly.express as px |
|
|
from io import StringIO |
|
|
from datetime import datetime, timedelta |
|
|
|
|
|
|
|
|
st.write("Debug: Checking file paths...") |
|
|
files_to_check = ["inference.py", "new_best_improved_model.pth", "scaler.pkl", "feature_names.json"] |
|
|
for file in files_to_check: |
|
|
if os.path.exists(file): |
|
|
st.write(f"{file} found") |
|
|
else: |
|
|
st.error(f"{file} not found") |
|
|
|
|
|
try: |
|
|
from inference import load_model_and_artifacts, forecast |
|
|
except Exception as e: |
|
|
st.error(f"Error importing inference: {str(e)}") |
|
|
st.stop() |
|
|
|
|
|
st.title("Store Sales Forecasting") |
|
|
|
|
|
try: |
|
|
model, scaler, feature_names = load_model_and_artifacts() |
|
|
st.success("Model and artifacts loaded successfully") |
|
|
except Exception as e: |
|
|
st.error(f"Error loading model or artifacts: {str(e)}") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
st.header("Model Performance Metrics") |
|
|
metrics = { |
|
|
"MAE": 710.75, |
|
|
"RMSE": 1108.51, |
|
|
"MAPE": 7.14, |
|
|
"R2": 0.8633 |
|
|
} |
|
|
st.write(f"- **MAE**: ${metrics['MAE']:.2f}") |
|
|
st.write(f"- **RMSE**: ${metrics['RMSE']:.2f}") |
|
|
st.write(f"- **MAPE**: {metrics['MAPE']:.2f}%") |
|
|
st.write(f"- **R² Score**: {metrics['R2']:.4f}") |
|
|
|
|
|
|
|
|
st.header("Test with Synthetic Data") |
|
|
if st.button("Generate Synthetic Sample Data"): |
|
|
np.random.seed(42) |
|
|
sequence_length = 21 |
|
|
n_features = len(feature_names) |
|
|
synthetic_data = np.zeros((sequence_length, n_features)) |
|
|
|
|
|
|
|
|
for i, feature in enumerate(feature_names): |
|
|
if feature == "sales": |
|
|
synthetic_data[:, i] = np.random.normal(8955, 3307, sequence_length) |
|
|
elif feature == "onpromotion": |
|
|
synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.8, 0.2]) |
|
|
elif feature in ["dayofweek_sin", "dayofweek_cos", "month_sin", "month_cos"]: |
|
|
synthetic_data[:, i] = np.sin(np.linspace(0, 2 * np.pi, sequence_length)) |
|
|
elif feature == "trend": |
|
|
synthetic_data[:, i] = np.linspace(0, sequence_length, sequence_length) |
|
|
elif feature == "is_weekend": |
|
|
synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.7, 0.3]) |
|
|
elif feature == "quarter": |
|
|
synthetic_data[:, i] = np.random.choice([1, 2, 3, 4], sequence_length) |
|
|
elif "lag" in feature: |
|
|
synthetic_data[:, i] = np.roll(synthetic_data[:, 0], int(feature.split('_')[-1])) if i > 0 else np.zeros(sequence_length) |
|
|
elif "ma" in feature or "std" in feature: |
|
|
synthetic_data[:, i] = np.random.normal(8955, 1000, sequence_length) |
|
|
elif "ratio" in feature: |
|
|
synthetic_data[:, i] = np.random.normal(1, 0.2, sequence_length) |
|
|
elif "promo" in feature: |
|
|
synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.8, 0.2]) |
|
|
elif feature == "dcoilwtico": |
|
|
synthetic_data[:, i] = np.random.normal(80, 10, sequence_length) |
|
|
elif feature == "is_holiday": |
|
|
synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.9, 0.1]) |
|
|
|
|
|
synthetic_df = pd.DataFrame(synthetic_data, columns=feature_names) |
|
|
end_date = datetime.now().date() |
|
|
dates = [end_date - timedelta(days=x) for x in range(sequence_length-1, -1, -1)] |
|
|
synthetic_df.index = dates |
|
|
st.session_state["synthetic_df"] = synthetic_df |
|
|
|
|
|
st.subheader("Synthetic Sample Data Preview") |
|
|
st.write(synthetic_df.head()) |
|
|
|
|
|
csv_buffer = StringIO() |
|
|
synthetic_df.to_csv(csv_buffer) |
|
|
st.download_button( |
|
|
label="Download Synthetic Sample CSV", |
|
|
data=csv_buffer.getvalue(), |
|
|
file_name="synthetic_sample.csv", |
|
|
mime="text/csv" |
|
|
) |
|
|
|
|
|
try: |
|
|
sequences = synthetic_df[feature_names].values |
|
|
sequences = scaler.transform(sequences) |
|
|
sequences = sequences.reshape(-1, sequence_length, n_features) |
|
|
predictions, uncertainties = forecast(model, scaler, sequences) |
|
|
|
|
|
forecast_dates = [end_date + timedelta(days=x*7) for x in range(1, 14)] |
|
|
df_predictions = pd.DataFrame({ |
|
|
'Date': forecast_dates, |
|
|
'Predicted Sales ($)': predictions[0], |
|
|
'Uncertainty ($)': uncertainties[0] |
|
|
}) |
|
|
|
|
|
st.subheader("Forecast Results") |
|
|
st.table(df_predictions) |
|
|
|
|
|
fig = px.line(df_predictions, x='Date', y='Predicted Sales ($)', title='13-Week Sales Forecast') |
|
|
fig.add_scatter( |
|
|
x=df_predictions['Date'], |
|
|
y=df_predictions['Predicted Sales ($)'] + df_predictions['Uncertainty ($)'], |
|
|
mode='lines', name='Upper Bound', line=dict(dash='dash', color='red') |
|
|
) |
|
|
fig.add_scatter( |
|
|
x=df_predictions['Date'], |
|
|
y=df_predictions['Predicted Sales ($)'] - df_predictions['Uncertainty ($)'], |
|
|
mode='lines', name='Lower Bound', line=dict(dash='dash', color='red'), |
|
|
fill='tonexty', fillcolor='rgba(255, 0, 0, 0.1)' |
|
|
) |
|
|
st.plotly_chart(fig) |
|
|
except Exception as e: |
|
|
st.error(f"Error generating forecast: {str(e)}") |
|
|
|
|
|
|
|
|
st.header("Upload Your Own Data") |
|
|
st.write("Upload a CSV with 21 timesteps and 20 features.") |
|
|
uploaded_file = st.file_uploader("Choose a CSV file", type="csv") |
|
|
|
|
|
if uploaded_file is not None: |
|
|
try: |
|
|
data = pd.read_csv(uploaded_file) |
|
|
if set(feature_names).issubset(data.columns) and len(data) == 21: |
|
|
sequences = data[feature_names].values |
|
|
sequences = scaler.transform(sequences) |
|
|
sequences = sequences.reshape(-1, 21, len(feature_names)) |
|
|
predictions, uncertainties = forecast(model, scaler, sequences) |
|
|
df_predictions = pd.DataFrame({ |
|
|
'Week': range(1, 14), |
|
|
'Predicted Sales ($)': predictions[0], |
|
|
'Uncertainty ($)': uncertainties[0] |
|
|
}) |
|
|
st.subheader("Forecast Results") |
|
|
st.table(df_predictions) |
|
|
|
|
|
fig = px.line(df_predictions, x='Week', y='Predicted Sales ($)', title='13-Week Sales Forecast') |
|
|
fig.add_scatter( |
|
|
x=df_predictions['Week'], |
|
|
y=df_predictions['Predicted Sales ($)'] + df_predictions['Uncertainty ($)'], |
|
|
mode='lines', name='Upper Bound', line=dict(dash='dash', color='red') |
|
|
) |
|
|
fig.add_scatter( |
|
|
x=df_predictions['Week'], |
|
|
y=df_predictions['Predicted Sales ($)'] - df_predictions['Uncertainty ($)'], |
|
|
mode='lines', name='Lower Bound', line=dict(dash='dash', color='red'), |
|
|
fill='tonexty', fillcolor='rgba(255, 0, 0, 0.1)' |
|
|
) |
|
|
st.plotly_chart(fig) |
|
|
else: |
|
|
st.error(f"Invalid CSV. Ensure 21 rows and columns: {', '.join(feature_names)}") |
|
|
except Exception as e: |
|
|
st.error(f"Error processing CSV or making prediction: {str(e)}") |