File size: 2,938 Bytes

2969cfa

# Food Image Classifier Configuration
project:
  name: "food_image_classifier"
  version: "1.0.0"
  description: "World-Class Food Image Classifier with Hybrid CNN-ViT Architecture"

# Hardware Configuration
hardware:
  device: "cuda"  # RTX 5060 Laptop GPU
  mixed_precision: true
  compile_model: true
  num_workers: 4
  pin_memory: true

# Data Configuration
data:
  image_size: 224
  batch_size: 32  # Reduced to avoid memory issues
  num_classes: 101  # Food101 dataset: 101 classes, 1000 images per class
  datasets:
    - name: "food101"
      source: "kaggle"
      path: "data/raw/food101"
    # Temporarily disabled HuggingFace dataset to use only Food101
    # - name: "food_images_hf"
    #   source: "huggingface"
    #   path: "data/raw/food_images_hf"
  
  # Data splits
  train_ratio: 0.8
  val_ratio: 0.15
  test_ratio: 0.05
  
  # Augmentation
  augmentation:
    horizontal_flip: 0.5
    rotation: 15
    color_jitter:
      brightness: 0.2
      contrast: 0.2
      saturation: 0.2
      hue: 0.1
    normalize:
      mean: [0.485, 0.456, 0.406]
      std: [0.229, 0.224, 0.225]

# Model Configuration
model:
  architecture: "hybrid_cnn_vit"
  
  # CNN Branch (ResNet50)
  cnn:
    backbone: "resnet50"
    pretrained: true
    freeze_early_layers: true
    dropout: 0.3
  
  # ViT Branch (DeiT-Base)
  vit:
    model_name: "facebook/deit-base-distilled-patch16-224"
    pretrained: true
    freeze_early_layers: true
    dropout: 0.1
  
  # Fusion Module
  fusion:
    hidden_dim: 512
    num_heads: 8
    dropout: 0.2
  
  # Classification Head
  head:
    hidden_dims: [1024, 512]
    dropout: 0.4

# Training Configuration
training:
  epochs: 100  # Increased for comprehensive training with 101k images
  learning_rate: 1e-4
  weight_decay: 1e-5
  
  # Optimizer
  optimizer:
    type: "adamw"
    betas: [0.9, 0.999]
    eps: 1e-8
    
  # Learning Rate Scheduler
  scheduler:
    type: "cosine_annealing_warm_restarts"
    T_0: 10
    T_mult: 2
    eta_min: 1e-6
  
  # Loss Function
  loss:
    type: "label_smoothing_cross_entropy"
    smoothing: 0.1
  
  # Advanced Training Techniques
  ema:
    enabled: true
    decay: 0.9999
  
  gradient_clipping:
    enabled: true
    max_norm: 1.0
  
  early_stopping:
    enabled: true
    patience: 10
    min_delta: 0.001

# Evaluation Configuration
evaluation:
  metrics:
    - "accuracy"
    - "top5_accuracy"
    - "f1_score"
    - "precision"
    - "recall"
  
  save_confusion_matrix: true
  save_classification_report: true

# Logging Configuration
logging:
  tensorboard:
    enabled: true
    log_dir: "runs"
  
  wandb:
    enabled: false  # Set to true if you want to use wandb
    project: "food_classifier"
  
  checkpoint:
    save_best: true
    save_last: true
    save_every_n_epochs: 10

# API Keys (will be loaded from environment)
api_keys:
  kaggle_username: "${KAGGLE_USERNAME}"
  kaggle_key: "${KAGGLE_KEY}"
  huggingface_token: "${HF_TOKEN}"