sukhmani1303 commited on
Commit
5cdc794
·
verified ·
1 Parent(s): 2ef9bc0

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +144 -1
inference.py CHANGED
@@ -1,9 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
  import torch.nn as nn
3
  import numpy as np
 
4
  import pickle
5
  import json
6
  import os
 
7
 
8
  class ImprovedCashFlowLSTM(nn.Module):
9
  def __init__(self, input_size, hidden_size=128, num_layers=2, forecast_horizon=13, dropout=0.2):
@@ -63,6 +164,48 @@ def load_model_and_artifacts(
63
  model.eval()
64
  return model, scaler, feature_names, config
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def predict(model, scaler, sequences):
67
  """Generate 13-week sales forecasts from input sequences."""
68
  device = torch.device("cpu")
@@ -93,4 +236,4 @@ def predict(model, scaler, sequences):
93
  uncertainties = np.repeat(uncertainties, rescaled.shape[1], axis=1) # Shape: (batch_size, 13)
94
  uncertainties = np.clip(uncertainties, 100, 1000) # Wider bounds to avoid constant clipping
95
 
96
- return rescaled, uncertainties
 
1
+ # import torch
2
+ # import torch.nn as nn
3
+ # import numpy as np
4
+ # import pickle
5
+ # import json
6
+ # import os
7
+
8
+ # class ImprovedCashFlowLSTM(nn.Module):
9
+ # def __init__(self, input_size, hidden_size=128, num_layers=2, forecast_horizon=13, dropout=0.2):
10
+ # super(ImprovedCashFlowLSTM, self).__init__()
11
+ # self.hidden_size = hidden_size
12
+ # self.num_layers = num_layers
13
+ # self.forecast_horizon = forecast_horizon
14
+ # self.lstm = nn.LSTM(
15
+ # input_size,
16
+ # hidden_size,
17
+ # num_layers,
18
+ # dropout=dropout if num_layers > 1 else 0,
19
+ # batch_first=True
20
+ # )
21
+ # self.output_layers = nn.Sequential(
22
+ # nn.Linear(hidden_size, hidden_size),
23
+ # nn.ReLU(),
24
+ # nn.Dropout(dropout),
25
+ # nn.Linear(hidden_size, forecast_horizon)
26
+ # )
27
+
28
+ # def forward(self, x):
29
+ # lstm_out, (hidden, cell) = self.lstm(x)
30
+ # last_hidden = lstm_out[:, -1, :]
31
+ # forecast = self.output_layers(last_hidden)
32
+ # return forecast
33
+
34
+ # def load_model_and_artifacts(
35
+ # model_path="new_best_improved_model.pth",
36
+ # scaler_path="scaler.pkl",
37
+ # feature_names_path="feature_names.json",
38
+ # config_path="model_config.json"
39
+ # ):
40
+ # """Load model, scaler, feature names, and configuration."""
41
+ # if not all(os.path.exists(path) for path in [model_path, scaler_path, feature_names_path, config_path]):
42
+ # missing = [path for path in [model_path, scaler_path, feature_names_path, config_path] if not os.path.exists(path)]
43
+ # raise FileNotFoundError(f"Missing files: {missing}")
44
+
45
+ # with open(config_path, "r") as f:
46
+ # config = json.load(f)
47
+
48
+ # with open(scaler_path, "rb") as f:
49
+ # scaler = pickle.load(f)
50
+
51
+ # with open(feature_names_path, "r") as f:
52
+ # feature_names = json.load(f)
53
+
54
+ # input_size = config["input_size"]
55
+ # model = ImprovedCashFlowLSTM(
56
+ # input_size=input_size,
57
+ # hidden_size=config["hidden_size"],
58
+ # num_layers=config["num_layers"],
59
+ # forecast_horizon=config["forecast_horizon"],
60
+ # dropout=config["dropout"]
61
+ # )
62
+ # model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
63
+ # model.eval()
64
+ # return model, scaler, feature_names, config
65
+
66
+ # def predict(model, scaler, sequences):
67
+ # """Generate 13-week sales forecasts from input sequences."""
68
+ # device = torch.device("cpu")
69
+ # model.to(device)
70
+
71
+ # # Validate input shape: (batch_size, sequence_length=21, n_features=20)
72
+ # if len(sequences.shape) != 3 or sequences.shape[1] != 21 or sequences.shape[2] != model.lstm.input_size:
73
+ # raise ValueError(f"Expected input shape (batch_size, 21, {model.lstm.input_size}), got {sequences.shape}")
74
+
75
+ # # Convert to tensor
76
+ # sequences = torch.tensor(sequences, dtype=torch.float32).to(device)
77
+
78
+ # # Generate predictions
79
+ # with torch.no_grad():
80
+ # predictions = model(sequences).cpu().numpy() # Shape: (batch_size, 13)
81
+
82
+ # # Inverse transform predictions (sales is first feature)
83
+ # dummy = np.zeros((predictions.shape[0] * predictions.shape[1], scaler.n_features_in_))
84
+ # dummy[:, 0] = predictions.flatten()
85
+ # rescaled = scaler.inverse_transform(dummy)[:, 0].reshape(predictions.shape)
86
+
87
+ # # Ensure non-negative predictions and clip to training range
88
+ # rescaled = np.maximum(rescaled, 0)
89
+ # rescaled = np.clip(rescaled, 3000, 19372) # Training sales range: $3069–19372
90
+
91
+ # # Estimate uncertainty: scaled std of predictions per sequence, repeated for each timestep
92
+ # uncertainties = 0.2 * np.std(rescaled, axis=1, keepdims=True) + 100 # Shape: (batch_size, 1)
93
+ # uncertainties = np.repeat(uncertainties, rescaled.shape[1], axis=1) # Shape: (batch_size, 13)
94
+ # uncertainties = np.clip(uncertainties, 100, 1000) # Wider bounds to avoid constant clipping
95
+
96
+ # return rescaled, uncertainties
97
+
98
+
99
+
100
  import torch
101
  import torch.nn as nn
102
  import numpy as np
103
+ import pandas as pd
104
  import pickle
105
  import json
106
  import os
107
+ from datetime import datetime
108
 
109
  class ImprovedCashFlowLSTM(nn.Module):
110
  def __init__(self, input_size, hidden_size=128, num_layers=2, forecast_horizon=13, dropout=0.2):
 
164
  model.eval()
165
  return model, scaler, feature_names, config
166
 
167
+ def derive_features(df, feature_names, sequence_length=21):
168
+ """Derive 20 features from minimal input columns: date, sales, onpromotion, dcoilwtico, is_holiday."""
169
+ required_columns = ["date", "sales", "onpromotion", "dcoilwtico", "is_holiday"]
170
+ if not all(col in df.columns for col in required_columns):
171
+ raise ValueError(f"CSV must contain columns: {', '.join(required_columns)}")
172
+ if len(df) != sequence_length:
173
+ raise ValueError(f"CSV must have exactly {sequence_length} rows, got {len(df)}")
174
+
175
+ # Convert date to datetime
176
+ df["date"] = pd.to_datetime(df["date"])
177
+
178
+ # Initialize output DataFrame with all required features
179
+ feature_df = pd.DataFrame(0.0, index=df.index, columns=feature_names)
180
+
181
+ # Copy direct input features
182
+ for col in ["sales", "onpromotion", "dcoilwtico", "is_holiday"]:
183
+ feature_df[col] = df[col]
184
+
185
+ # Derive temporal features
186
+ feature_df["trend"] = np.linspace(0, sequence_length - 1, sequence_length)
187
+ feature_df["dayofweek_sin"] = np.sin(2 * np.pi * df["date"].dt.dayofweek / 7)
188
+ feature_df["dayofweek_cos"] = np.cos(2 * np.pi * df["date"].dt.dayofweek / 7)
189
+ feature_df["month_sin"] = np.sin(2 * np.pi * df["date"].dt.month / 12)
190
+ feature_df["month_cos"] = np.cos(2 * np.pi * df["date"].dt.month / 12)
191
+ feature_df["quarter"] = df["date"].dt.quarter
192
+ feature_df["is_weekend"] = (df["date"].dt.dayofweek >= 5).astype(float)
193
+
194
+ # Derive lag features
195
+ for lag in [1, 2, 3]:
196
+ feature_df[f"lag_{lag}"] = df["sales"].shift(lag).fillna(df["sales"].iloc[0])
197
+
198
+ # Derive moving average and ratio features
199
+ for window in [7, 14]:
200
+ feature_df[f"ma_{window}"] = df["sales"].rolling(window=window, min_periods=1).mean()
201
+ feature_df[f"ratio_{window}"] = df["sales"] / (feature_df[f"ma_{window}"] + 1e-8)
202
+
203
+ # Derive promotion lag features
204
+ for lag in [1, 2]:
205
+ feature_df[f"promo_lag_{lag}"] = df["onpromotion"].shift(lag).fillna(df["onpromotion"].iloc[0])
206
+
207
+ return feature_df[feature_names].values
208
+
209
  def predict(model, scaler, sequences):
210
  """Generate 13-week sales forecasts from input sequences."""
211
  device = torch.device("cpu")
 
236
  uncertainties = np.repeat(uncertainties, rescaled.shape[1], axis=1) # Shape: (batch_size, 13)
237
  uncertainties = np.clip(uncertainties, 100, 1000) # Wider bounds to avoid constant clipping
238
 
239
+ return rescaled, uncertainties