from fastapi import FastAPI, HTTPException, Header from contextlib import asynccontextmanager import pandas as pd import os import joblib import time import requests from huggingface_hub import hf_hub_download from prometheus_client import Counter, Histogram, Gauge, generate_latest # Global variables model = None encoders = {} scaler = None feature_columns = [] categorical_columns = [] boolean_columns = [] loaded = False # Environment variables API_KEY = os.getenv("API_KEY", "test-key-123") HF_MODEL_REPO = os.getenv("HF_MODEL_REPO") PROM_PUSHGATEWAY = os.getenv("PROM_PUSHGATEWAY") # Prometheus metrics REQS = Counter("pred_requests_total", "Total prediction requests") LAT = Histogram("pred_request_latency_seconds", "Request latency") LATEST = Gauge("latest_prediction", "Last predicted value") @asynccontextmanager async def lifespan(app: FastAPI): # Startup global model, encoders, scaler, feature_columns, categorical_columns, boolean_columns, loaded print("🚀 Starting FastAPI application...") print(f"API_KEY: {'Set' if API_KEY else 'Not set'}") print(f"HF_MODEL_REPO: {HF_MODEL_REPO}") if not HF_MODEL_REPO: print("⚠️ WARNING: HF_MODEL_REPO not set. Using mock mode.") loaded = False else: try: print(f" Downloading model from {HF_MODEL_REPO}...") m = hf_hub_download(repo_id=HF_MODEL_REPO, filename="best_model.joblib") e = hf_hub_download(repo_id=HF_MODEL_REPO, filename="models/encoders.joblib") s = hf_hub_download(repo_id=HF_MODEL_REPO, filename="models/scaler.joblib") f = hf_hub_download(repo_id=HF_MODEL_REPO, filename="models/feature_columns.joblib") c = hf_hub_download(repo_id=HF_MODEL_REPO, filename="models/categorical_columns.joblib") b = hf_hub_download(repo_id=HF_MODEL_REPO, filename="models/boolean_columns.joblib") print(" Loading artifacts...") model = joblib.load(m) encoders = joblib.load(e) scaler = joblib.load(s) feature_columns = joblib.load(f) categorical_columns = joblib.load(c) boolean_columns = joblib.load(b) loaded = True print(" Model loaded successfully!") print(f" Features: {len(feature_columns)}") except Exception as ex: print(f" Model load error: {ex}") loaded = False print(" FastAPI app initialized and ready!") yield # Server runs here # Shutdown (cleanup if needed) print("🛑 Shutting down...") app = FastAPI(title="Loan Approval API", version="1.0", lifespan=lifespan) @app.get("/") def health(): return { "status": "ok", "model_loaded": loaded, "features": feature_columns if loaded else [] } @app.post("/predict") def predict(payload: dict, x_api_key: str = Header(None)): if x_api_key != API_KEY: raise HTTPException(status_code=401, detail="Invalid API key") if not loaded: raise HTTPException(status_code=503, detail="Model not loaded") try: df = pd.DataFrame([payload]) for col in feature_columns: if col not in df.columns: df[col] = 0 for col in boolean_columns: if col in df.columns: if df[col].dtype == bool: df[col] = df[col].astype(int) elif df[col].dtype == 'object': df[col] = df[col].map({ 'True': 1, 'true': 1, True: 1, 1: 1, 'False': 0, 'false': 0, False: 0, 0: 0 }).fillna(0).astype(int) for col in categorical_columns: if col in df.columns and col in encoders: try: df[col] = encoders[col].transform(df[col]) except: df[col] = 0 df = df[feature_columns] df_scaled = scaler.transform(df) start = time.time() pred = model.predict(df_scaled)[0] latency = time.time() - start LAT.observe(latency) REQS.inc() LATEST.set(pred) if PROM_PUSHGATEWAY: try: requests.post(f"{PROM_PUSHGATEWAY}/metrics/job/loan_model", data=generate_latest(), timeout=2) except: pass return { "prediction": int(pred), "prediction_label": "Approved" if pred == 1 else "Rejected", "latency_seconds": round(latency, 4) } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/metrics") def metrics(): return generate_latest()