sukhmani1303 commited on
Commit
ada9349
·
verified ·
1 Parent(s): 5cdc794

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +278 -13
app.py CHANGED
@@ -1,3 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
@@ -14,7 +247,7 @@ for file in files_to_check:
14
  st.write(f"{file}: {'Found' if os.path.exists(file) else 'Missing'}")
15
 
16
  try:
17
- from inference import load_model_and_artifacts, predict
18
  except Exception as e:
19
  st.error(f"Error importing inference: {str(e)}")
20
  st.stop()
@@ -113,11 +346,11 @@ if st.button("Generate Synthetic Data"):
113
  st.session_state["synthetic_df"] = synthetic_df
114
 
115
  st.subheader("Synthetic Data Preview")
116
- st.dataframe(synthetic_df.head())
117
 
118
  # Download synthetic data
119
  csv_buffer = StringIO()
120
- synthetic_df.to_csv(csv_buffer, index=False)
121
  st.download_button(
122
  label="Download Synthetic Data CSV",
123
  data=csv_buffer.getvalue(),
@@ -173,16 +406,46 @@ if st.button("Generate Synthetic Data"):
173
  except Exception as e:
174
  st.error(f"Error generating forecast: {str(e)}")
175
 
176
- # CSV upload for custom predictions
177
  st.header("Upload Custom Data")
178
- st.markdown("Upload a CSV with 21 timesteps and 20 features matching the feature names and format of the synthetic data.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
180
 
181
  if uploaded_file is not None:
182
  try:
183
  data = pd.read_csv(uploaded_file)
184
- if set(feature_names).issubset(data.columns) and len(data) == 21:
185
- sequences = data[feature_names].values.reshape(1, 21, len(feature_names))
 
 
186
  sequences_scaled = scaler.transform(sequences.reshape(-1, len(feature_names))).reshape(1, 21, len(feature_names))
187
  predictions, uncertainties = predict(model, scaler, sequences_scaled)
188
 
@@ -191,8 +454,10 @@ if uploaded_file is not None:
191
  raise ValueError(f"Expected predictions and uncertainties of shape (1, 13), got {predictions.shape} and {uncertainties.shape}")
192
 
193
  # Create forecast DataFrame
 
 
194
  forecast_df = pd.DataFrame({
195
- 'Week': range(1, 14),
196
  'Predicted Sales ($)': predictions[0],
197
  'Uncertainty ($)': uncertainties[0]
198
  })
@@ -201,14 +466,14 @@ if uploaded_file is not None:
201
  st.dataframe(forecast_df)
202
 
203
  # Plot forecast
204
- fig = px.line(forecast_df, x='Week', y='Predicted Sales ($)', title='13-Week Sales Forecast')
205
  fig.add_scatter(
206
- x=forecast_df['Week'],
207
  y=forecast_df['Predicted Sales ($)'] + forecast_df['Uncertainty ($)'],
208
  mode='lines', name='Upper Bound', line=dict(dash='dash', color='green')
209
  )
210
  fig.add_scatter(
211
- x=forecast_df['Week'],
212
  y=forecast_df['Predicted Sales ($)'] - forecast_df['Uncertainty ($)'],
213
  mode='lines', name='Lower Bound', line=dict(dash='dash', color='green'),
214
  fill='tonexty', fillcolor='rgba(0, 255, 0, 0.1)'
@@ -225,6 +490,6 @@ if uploaded_file is not None:
225
  mime="text/csv"
226
  )
227
  else:
228
- st.error(f"Invalid CSV. Expected 21 rows and columns including: {', '.join(feature_names)}")
229
  except Exception as e:
230
- st.error(f"Error processing CSV or generating forecast: {str(e)}")
 
1
+ # import streamlit as st
2
+ # import pandas as pd
3
+ # import numpy as np
4
+ # import plotly.express as px
5
+ # from datetime import datetime, timedelta
6
+ # from io import StringIO
7
+ # import os
8
+ # import json
9
+
10
+ # # Debug: Verify file paths
11
+ # st.write("Debug: Checking file paths...")
12
+ # files_to_check = ["new_best_improved_model.pth", "scaler.pkl", "feature_names.json", "model_config.json"]
13
+ # for file in files_to_check:
14
+ # st.write(f"{file}: {'Found' if os.path.exists(file) else 'Missing'}")
15
+
16
+ # try:
17
+ # from inference import load_model_and_artifacts, predict
18
+ # except Exception as e:
19
+ # st.error(f"Error importing inference: {str(e)}")
20
+ # st.stop()
21
+
22
+ # st.title("Store Sales Time Series Forecasting")
23
+ # st.markdown("Forecast 13-week store sales using an LSTM model trained on Kaggle Store Sales data.")
24
+
25
+ # # Load model and artifacts
26
+ # try:
27
+ # st.write("Debug: Loading model and artifacts...")
28
+ # model, scaler, feature_names, config = load_model_and_artifacts()
29
+ # st.success("Model and artifacts loaded successfully")
30
+ # except Exception as e:
31
+ # st.error(f"Error loading model or artifacts: {str(e)}")
32
+ # st.stop()
33
+
34
+ # # Display model metrics
35
+ # st.header("Model Performance Metrics")
36
+ # metrics = {
37
+ # "MAE": 710.75,
38
+ # "RMSE": 1108.36,
39
+ # "MAPE": 7.16,
40
+ # "R2": 0.8633
41
+ # }
42
+ # st.markdown(f"""
43
+ # - **MAE**: ${metrics['MAE']:.2f}
44
+ # - **RMSE**: ${metrics['RMSE']:.2f}
45
+ # - **MAPE**: {metrics['MAPE']:.2f}%
46
+ # - **R² Score**: {metrics['R2']:.4f}
47
+ # """)
48
+
49
+ # # Model architecture summary
50
+ # st.header("Model Architecture")
51
+ # st.markdown(f"""
52
+ # - **Input Size**: {config['input_size']} features
53
+ # - **Hidden Size**: {config['hidden_size']}
54
+ # - **Number of Layers**: {config['num_layers']}
55
+ # - **Forecast Horizon**: {config['forecast_horizon']} weeks
56
+ # - **Dropout**: {config['dropout']}
57
+ # - **Attention**: {config['has_attention']}
58
+ # - **Input Projection**: {config['has_input_projection']}
59
+ # - **Parameters**: 227,441
60
+ # """)
61
+
62
+ # # Synthetic data generation
63
+ # st.header("Generate Synthetic Test Data")
64
+ # st.markdown("Create a sample dataset with 21 timesteps matching the training data distribution (sales ~$3,000–19,000).")
65
+ # if st.button("Generate Synthetic Data"):
66
+ # np.random.seed(42)
67
+ # sequence_length = 21
68
+ # n_features = len(feature_names)
69
+ # synthetic_data = np.zeros((sequence_length, n_features))
70
+
71
+ # # Generate features based on training data characteristics
72
+ # for i, feature in enumerate(feature_names):
73
+ # if feature == "sales":
74
+ # synthetic_data[:, i] = np.random.normal(8954.97, 3307.49, sequence_length) # Mean, std from verbose
75
+ # elif feature == "onpromotion":
76
+ # synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.8, 0.2])
77
+ # elif feature in ["dayofweek_sin", "dayofweek_cos"]:
78
+ # synthetic_data[:, i] = np.sin(np.linspace(0, 2 * np.pi, sequence_length)) if "sin" in feature else np.cos(np.linspace(0, 2 * np.pi, sequence_length))
79
+ # elif feature in ["month_sin", "month_cos"]:
80
+ # synthetic_data[:, i] = np.sin(np.linspace(0, 2 * np.pi * 12 / sequence_length, sequence_length)) if "sin" in feature else np.cos(np.linspace(0, 2 * np.pi * 12 / sequence_length, sequence_length))
81
+ # elif feature == "trend":
82
+ # synthetic_data[:, i] = np.linspace(0, sequence_length, sequence_length)
83
+ # elif feature == "is_weekend":
84
+ # synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.7, 0.3])
85
+ # elif feature == "quarter":
86
+ # synthetic_data[:, i] = np.random.choice([1, 2, 3, 4], sequence_length)
87
+ # elif "lag" in feature:
88
+ # lag = int(feature.split('_')[-1])
89
+ # synthetic_data[:, i] = np.roll(synthetic_data[:, 0], lag)
90
+ # if lag > 0:
91
+ # synthetic_data[:lag, i] = synthetic_data[:lag, 0]
92
+ # elif "ma" in feature:
93
+ # window = int(feature.split('_')[-1])
94
+ # synthetic_data[:, i] = pd.Series(synthetic_data[:, 0]).rolling(window=window, min_periods=1).mean().values
95
+ # elif "ratio" in feature:
96
+ # window = int(feature.split('_')[-1])
97
+ # ma = pd.Series(synthetic_data[:, 0]).rolling(window=window, min_periods=1).mean().values
98
+ # synthetic_data[:, i] = synthetic_data[:, 0] / (ma + 1e-8)
99
+ # elif "promo" in feature:
100
+ # synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.8, 0.2])
101
+ # elif feature == "dcoilwtico":
102
+ # synthetic_data[:, i] = np.random.normal(80, 10, sequence_length)
103
+ # elif feature == "is_holiday":
104
+ # synthetic_data[:, i] = np.random.choice([0, 1], sequence_length, p=[0.9, 0.1])
105
+
106
+ # # Create DataFrame with dates
107
+ # synthetic_df = pd.DataFrame(synthetic_data, columns=feature_names)
108
+ # end_date = datetime.now().date()
109
+ # dates = [end_date - timedelta(days=x) for x in range(sequence_length-1, -1, -1)]
110
+ # synthetic_df['Date'] = dates
111
+
112
+ # # Store in session state
113
+ # st.session_state["synthetic_df"] = synthetic_df
114
+
115
+ # st.subheader("Synthetic Data Preview")
116
+ # st.dataframe(synthetic_df.head())
117
+
118
+ # # Download synthetic data
119
+ # csv_buffer = StringIO()
120
+ # synthetic_df.to_csv(csv_buffer, index=False)
121
+ # st.download_button(
122
+ # label="Download Synthetic Data CSV",
123
+ # data=csv_buffer.getvalue(),
124
+ # file_name="synthetic_sales_data.csv",
125
+ # mime="text/csv"
126
+ # )
127
+
128
+ # # Generate forecast
129
+ # try:
130
+ # sequences = synthetic_df[feature_names].values.reshape(1, sequence_length, n_features)
131
+ # sequences_scaled = scaler.transform(sequences.reshape(-1, n_features)).reshape(1, sequence_length, n_features)
132
+ # predictions, uncertainties = predict(model, scaler, sequences_scaled)
133
+
134
+ # # Validate output shapes
135
+ # if predictions.shape != (1, 13) or uncertainties.shape != (1, 13):
136
+ # raise ValueError(f"Expected predictions and uncertainties of shape (1, 13), got {predictions.shape} and {uncertainties.shape}")
137
+
138
+ # # Create forecast DataFrame
139
+ # forecast_dates = [end_date + timedelta(days=x*7) for x in range(1, 14)]
140
+ # forecast_df = pd.DataFrame({
141
+ # 'Date': forecast_dates,
142
+ # 'Predicted Sales ($)': predictions[0],
143
+ # 'Uncertainty ($)': uncertainties[0]
144
+ # })
145
+
146
+ # st.subheader("13-Week Forecast")
147
+ # st.dataframe(forecast_df)
148
+
149
+ # # Plot forecast
150
+ # fig = px.line(forecast_df, x='Date', y='Predicted Sales ($)', title='13-Week Sales Forecast')
151
+ # fig.add_scatter(
152
+ # x=forecast_df['Date'],
153
+ # y=forecast_df['Predicted Sales ($)'] + forecast_df['Uncertainty ($)'],
154
+ # mode='lines', name='Upper Bound', line=dict(dash='dash', color='green')
155
+ # )
156
+ # fig.add_scatter(
157
+ # x=forecast_df['Date'],
158
+ # y=forecast_df['Predicted Sales ($)'] - forecast_df['Uncertainty ($)'],
159
+ # mode='lines', name='Lower Bound', line=dict(dash='dash', color='green'),
160
+ # fill='tonexty', fillcolor='rgba(0, 255, 0, 0.1)'
161
+ # )
162
+ # st.plotly_chart(fig)
163
+
164
+ # # Download forecast
165
+ # csv_buffer = StringIO()
166
+ # forecast_df.to_csv(csv_buffer, index=False)
167
+ # st.download_button(
168
+ # label="Download Forecast CSV",
169
+ # data=csv_buffer.getvalue(),
170
+ # file_name="forecast_results.csv",
171
+ # mime="text/csv"
172
+ # )
173
+ # except Exception as e:
174
+ # st.error(f"Error generating forecast: {str(e)}")
175
+
176
+ # # CSV upload for custom predictions
177
+ # st.header("Upload Custom Data")
178
+ # st.markdown("Upload a CSV with 21 timesteps and 20 features matching the feature names and format of the synthetic data.")
179
+ # uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
180
+
181
+ # if uploaded_file is not None:
182
+ # try:
183
+ # data = pd.read_csv(uploaded_file)
184
+ # if set(feature_names).issubset(data.columns) and len(data) == 21:
185
+ # sequences = data[feature_names].values.reshape(1, 21, len(feature_names))
186
+ # sequences_scaled = scaler.transform(sequences.reshape(-1, len(feature_names))).reshape(1, 21, len(feature_names))
187
+ # predictions, uncertainties = predict(model, scaler, sequences_scaled)
188
+
189
+ # # Validate output shapes
190
+ # if predictions.shape != (1, 13) or uncertainties.shape != (1, 13):
191
+ # raise ValueError(f"Expected predictions and uncertainties of shape (1, 13), got {predictions.shape} and {uncertainties.shape}")
192
+
193
+ # # Create forecast DataFrame
194
+ # forecast_df = pd.DataFrame({
195
+ # 'Week': range(1, 14),
196
+ # 'Predicted Sales ($)': predictions[0],
197
+ # 'Uncertainty ($)': uncertainties[0]
198
+ # })
199
+
200
+ # st.subheader("13-Week Forecast")
201
+ # st.dataframe(forecast_df)
202
+
203
+ # # Plot forecast
204
+ # fig = px.line(forecast_df, x='Week', y='Predicted Sales ($)', title='13-Week Sales Forecast')
205
+ # fig.add_scatter(
206
+ # x=forecast_df['Week'],
207
+ # y=forecast_df['Predicted Sales ($)'] + forecast_df['Uncertainty ($)'],
208
+ # mode='lines', name='Upper Bound', line=dict(dash='dash', color='green')
209
+ # )
210
+ # fig.add_scatter(
211
+ # x=forecast_df['Week'],
212
+ # y=forecast_df['Predicted Sales ($)'] - forecast_df['Uncertainty ($)'],
213
+ # mode='lines', name='Lower Bound', line=dict(dash='dash', color='green'),
214
+ # fill='tonexty', fillcolor='rgba(0, 255, 0, 0.1)'
215
+ # )
216
+ # st.plotly_chart(fig)
217
+
218
+ # # Download forecast
219
+ # csv_buffer = StringIO()
220
+ # forecast_df.to_csv(csv_buffer, index=False)
221
+ # st.download_button(
222
+ # label="Download Forecast CSV",
223
+ # data=csv_buffer.getvalue(),
224
+ # file_name="custom_forecast_results.csv",
225
+ # mime="text/csv"
226
+ # )
227
+ # else:
228
+ # st.error(f"Invalid CSV. Expected 21 rows and columns including: {', '.join(feature_names)}")
229
+ # except Exception as e:
230
+ # st.error(f"Error processing CSV or generating forecast: {str(e)}")
231
+
232
+
233
+
234
  import streamlit as st
235
  import pandas as pd
236
  import numpy as np
 
247
  st.write(f"{file}: {'Found' if os.path.exists(file) else 'Missing'}")
248
 
249
  try:
250
+ from inference import load_model_and_artifacts, predict, derive_features
251
  except Exception as e:
252
  st.error(f"Error importing inference: {str(e)}")
253
  st.stop()
 
346
  st.session_state["synthetic_df"] = synthetic_df
347
 
348
  st.subheader("Synthetic Data Preview")
349
+ st.dataframe(synthetic_df[["Date", "sales", "onpromotion", "dcoilwtico", "is_holiday"]].head())
350
 
351
  # Download synthetic data
352
  csv_buffer = StringIO()
353
+ synthetic_df[["date", "sales", "onpromotion", "dcoilwtico", "is_holiday"]].rename(columns={"Date": "date"}).to_csv(csv_buffer, index=False)
354
  st.download_button(
355
  label="Download Synthetic Data CSV",
356
  data=csv_buffer.getvalue(),
 
406
  except Exception as e:
407
  st.error(f"Error generating forecast: {str(e)}")
408
 
409
+ # Sample CSV for user guidance
410
  st.header("Upload Custom Data")
411
+ st.markdown("""
412
+ Upload a CSV with 21 timesteps containing the following columns:
413
+ - **date**: Date in YYYY-MM-DD format (e.g., 2025-06-22)
414
+ - **sales**: Weekly sales in USD (e.g., 3000 to 19372)
415
+ - **onpromotion**: 0 or 1 indicating if items are on promotion
416
+ - **dcoilwtico**: Oil price (e.g., 70 to 90)
417
+ - **is_holiday**: 0 or 1 indicating if the day is a holiday
418
+
419
+ The remaining features will be derived automatically. Download a sample CSV below to see the expected format.
420
+ """)
421
+
422
+ # Generate sample CSV
423
+ sample_data = pd.DataFrame({
424
+ "date": ["2025-06-22", "2025-06-15", "2025-06-08"],
425
+ "sales": [8954.97, 9500.00, 8000.00],
426
+ "onpromotion": [0, 1, 0],
427
+ "dcoilwtico": [80.0, 82.5, 78.0],
428
+ "is_holiday": [0, 0, 1]
429
+ })
430
+ csv_buffer = StringIO()
431
+ sample_data.to_csv(csv_buffer, index=False)
432
+ st.download_button(
433
+ label="Download Sample CSV",
434
+ data=csv_buffer.getvalue(),
435
+ file_name="sample_input.csv",
436
+ mime="text/csv"
437
+ )
438
+
439
+ # CSV upload for custom predictions
440
  uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
441
 
442
  if uploaded_file is not None:
443
  try:
444
  data = pd.read_csv(uploaded_file)
445
+ required_columns = ["date", "sales", "onpromotion", "dcoilwtico", "is_holiday"]
446
+ if set(required_columns).issubset(data.columns) and len(data) == 21:
447
+ # Derive full feature set
448
+ sequences = derive_features(data, feature_names, sequence_length=21)
449
  sequences_scaled = scaler.transform(sequences.reshape(-1, len(feature_names))).reshape(1, 21, len(feature_names))
450
  predictions, uncertainties = predict(model, scaler, sequences_scaled)
451
 
 
454
  raise ValueError(f"Expected predictions and uncertainties of shape (1, 13), got {predictions.shape} and {uncertainties.shape}")
455
 
456
  # Create forecast DataFrame
457
+ end_date = pd.to_datetime(data["date"].iloc[0]).date()
458
+ forecast_dates = [end_date + timedelta(days=x*7) for x in range(1, 14)]
459
  forecast_df = pd.DataFrame({
460
+ 'Date': forecast_dates,
461
  'Predicted Sales ($)': predictions[0],
462
  'Uncertainty ($)': uncertainties[0]
463
  })
 
466
  st.dataframe(forecast_df)
467
 
468
  # Plot forecast
469
+ fig = px.line(forecast_df, x='Date', y='Predicted Sales ($)', title='13-Week Sales Forecast')
470
  fig.add_scatter(
471
+ x=forecast_df['Date'],
472
  y=forecast_df['Predicted Sales ($)'] + forecast_df['Uncertainty ($)'],
473
  mode='lines', name='Upper Bound', line=dict(dash='dash', color='green')
474
  )
475
  fig.add_scatter(
476
+ x=forecast_df['Date'],
477
  y=forecast_df['Predicted Sales ($)'] - forecast_df['Uncertainty ($)'],
478
  mode='lines', name='Lower Bound', line=dict(dash='dash', color='green'),
479
  fill='tonexty', fillcolor='rgba(0, 255, 0, 0.1)'
 
490
  mime="text/csv"
491
  )
492
  else:
493
+ st.error(f"Invalid CSV. Expected 21 rows and columns: {', '.join(required_columns)}")
494
  except Exception as e:
495
+ st.error(f"Error processing CSV or generating forecast: {str(e)}")