Spaces:

sukhmani1303
/

store-sales-ts-forecasting

Running

App Files Files Community

store-sales-ts-forecasting / app.py

sukhmani1303

Upload app.py with huggingface_hub

d49ff4a verified 6 months ago

raw

history blame

7.03 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import os
	import plotly.express as px
	from io import StringIO
	from datetime import datetime, timedelta

	# Debug: Verify file paths
	st.write("Debug: Checking file paths...")
	files_to_check = ["inference.py", "new_best_improved_model.pth", "scaler.pkl", "feature_names.json"]
	for file in files_to_check:
	if os.path.exists(file):
	st.write(f"{file} found")
	else:
	st.error(f"{file} not found")

	try:
	from inference import load_model_and_artifacts, forecast
	except Exception as e:
	st.error(f"Error importing inference: {str(e)}")
	st.stop()

	st.title("Store Sales Forecasting")

	try:
	model, scaler, feature_names = load_model_and_artifacts()
	st.success("Model and artifacts loaded successfully")
	except Exception as e:
	st.error(f"Error loading model or artifacts: {str(e)}")
	st.stop()

	# Display model metrics
	st.header("Model Performance Metrics")
	metrics = {
	"MAE": 710.75,
	"RMSE": 1108.51,
	"MAPE": 7.14,
	"R2": 0.8633
	}
	st.write(f"- MAE: ${metrics['MAE']:.2f}")
	st.write(f"- RMSE: ${metrics['RMSE']:.2f}")
	st.write(f"- MAPE: {metrics['MAPE']:.2f}%")
	st.write(f"- R² Score: {metrics['R2']:.4f}")

	# Synthetic data generation
	st.header("Test with Synthetic Data")
	if st.button("Generate Synthetic Sample Data"):
	np.random.seed(42)
	sequence_length = 21
	n_features = len(feature_names)
	synthetic_data = np.zeros((sequence_length, n_features))

	# Generate realistic data based on training sales range (~$3,000–19,000)
	for i, feature in enumerate(feature_names):
	if feature == "sales":
	synthetic_data[:, i] = np.random.normal(8955, 3307, sequence_length) # Mean=8954.97, std=3307.49
	elif feature == "onpromotion":
	synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.8, 0.2])
	elif feature in ["dayofweek_sin", "dayofweek_cos", "month_sin", "month_cos"]:
	synthetic_data[:, i] = np.sin(np.linspace(0, 2 * np.pi, sequence_length))
	elif feature == "trend":
	synthetic_data[:, i] = np.linspace(0, sequence_length, sequence_length)
	elif feature == "is_weekend":
	synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.7, 0.3])
	elif feature == "quarter":
	synthetic_data[:, i] = np.random.choice([1, 2, 3, 4], sequence_length)
	elif "lag" in feature:
	synthetic_data[:, i] = np.roll(synthetic_data[:, 0], int(feature.split('_')[-1])) if i > 0 else np.zeros(sequence_length)
	elif "ma" in feature or "std" in feature:
	synthetic_data[:, i] = np.random.normal(8955, 1000, sequence_length)
	elif "ratio" in feature:
	synthetic_data[:, i] = np.random.normal(1, 0.2, sequence_length)
	elif "promo" in feature:
	synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.8, 0.2])
	elif feature == "dcoilwtico":
	synthetic_data[:, i] = np.random.normal(80, 10, sequence_length)
	elif feature == "is_holiday":
	synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.9, 0.1])

	synthetic_df = pd.DataFrame(synthetic_data, columns=feature_names)
	end_date = datetime.now().date()
	dates = [end_date - timedelta(days=x) for x in range(sequence_length-1, -1, -1)]
	synthetic_df.index = dates
	st.session_state["synthetic_df"] = synthetic_df

	st.subheader("Synthetic Sample Data Preview")
	st.write(synthetic_df.head())

	csv_buffer = StringIO()
	synthetic_df.to_csv(csv_buffer)
	st.download_button(
	label="Download Synthetic Sample CSV",
	data=csv_buffer.getvalue(),
	file_name="synthetic_sample.csv",
	mime="text/csv"
	)

	try:
	sequences = synthetic_df[feature_names].values
	sequences = scaler.transform(sequences)
	sequences = sequences.reshape(-1, sequence_length, n_features)
	predictions, uncertainties = forecast(model, scaler, sequences)

	forecast_dates = [end_date + timedelta(days=x*7) for x in range(1, 14)]
	df_predictions = pd.DataFrame({
	'Date': forecast_dates,
	'Predicted Sales ($)': predictions[0],
	'Uncertainty ($)': uncertainties[0]
	})

	st.subheader("Forecast Results")
	st.table(df_predictions)

	fig = px.line(df_predictions, x='Date', y='Predicted Sales ($)', title='13-Week Sales Forecast')
	fig.add_scatter(
	x=df_predictions['Date'],
	y=df_predictions['Predicted Sales ($)'] + df_predictions['Uncertainty ($)'],
	mode='lines', name='Upper Bound', line=dict(dash='dash', color='red')
	)
	fig.add_scatter(
	x=df_predictions['Date'],
	y=df_predictions['Predicted Sales ($)'] - df_predictions['Uncertainty ($)'],
	mode='lines', name='Lower Bound', line=dict(dash='dash', color='red'),
	fill='tonexty', fillcolor='rgba(255, 0, 0, 0.1)'
	)
	st.plotly_chart(fig)
	except Exception as e:
	st.error(f"Error generating forecast: {str(e)}")

	# Manual CSV upload
	st.header("Upload Your Own Data")
	st.write("Upload a CSV with 21 timesteps and 20 features.")
	uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

	if uploaded_file is not None:
	try:
	data = pd.read_csv(uploaded_file)
	if set(feature_names).issubset(data.columns) and len(data) == 21:
	sequences = data[feature_names].values
	sequences = scaler.transform(sequences)
	sequences = sequences.reshape(-1, 21, len(feature_names))
	predictions, uncertainties = forecast(model, scaler, sequences)
	df_predictions = pd.DataFrame({
	'Week': range(1, 14),
	'Predicted Sales ($)': predictions[0],
	'Uncertainty ($)': uncertainties[0]
	})
	st.subheader("Forecast Results")
	st.table(df_predictions)

	fig = px.line(df_predictions, x='Week', y='Predicted Sales ($)', title='13-Week Sales Forecast')
	fig.add_scatter(
	x=df_predictions['Week'],
	y=df_predictions['Predicted Sales ($)'] + df_predictions['Uncertainty ($)'],
	mode='lines', name='Upper Bound', line=dict(dash='dash', color='red')
	)
	fig.add_scatter(
	x=df_predictions['Week'],
	y=df_predictions['Predicted Sales ($)'] - df_predictions['Uncertainty ($)'],
	mode='lines', name='Lower Bound', line=dict(dash='dash', color='red'),
	fill='tonexty', fillcolor='rgba(255, 0, 0, 0.1)'
	)
	st.plotly_chart(fig)
	else:
	st.error(f"Invalid CSV. Ensure 21 rows and columns: {', '.join(feature_names)}")
	except Exception as e:
	st.error(f"Error processing CSV or making prediction: {str(e)}")