# Food Image Classifier Configuration project: name: "food_image_classifier" version: "1.0.0" description: "World-Class Food Image Classifier with Hybrid CNN-ViT Architecture" # Hardware Configuration hardware: device: "cuda" # RTX 5060 Laptop GPU mixed_precision: true compile_model: true num_workers: 4 pin_memory: true # Data Configuration data: image_size: 224 batch_size: 32 # Reduced to avoid memory issues num_classes: 101 # Food101 dataset: 101 classes, 1000 images per class datasets: - name: "food101" source: "kaggle" path: "data/raw/food101" # Temporarily disabled HuggingFace dataset to use only Food101 # - name: "food_images_hf" # source: "huggingface" # path: "data/raw/food_images_hf" # Data splits train_ratio: 0.8 val_ratio: 0.15 test_ratio: 0.05 # Augmentation augmentation: horizontal_flip: 0.5 rotation: 15 color_jitter: brightness: 0.2 contrast: 0.2 saturation: 0.2 hue: 0.1 normalize: mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] # Model Configuration model: architecture: "hybrid_cnn_vit" # CNN Branch (ResNet50) cnn: backbone: "resnet50" pretrained: true freeze_early_layers: true dropout: 0.3 # ViT Branch (DeiT-Base) vit: model_name: "facebook/deit-base-distilled-patch16-224" pretrained: true freeze_early_layers: true dropout: 0.1 # Fusion Module fusion: hidden_dim: 512 num_heads: 8 dropout: 0.2 # Classification Head head: hidden_dims: [1024, 512] dropout: 0.4 # Training Configuration training: epochs: 100 # Increased for comprehensive training with 101k images learning_rate: 1e-4 weight_decay: 1e-5 # Optimizer optimizer: type: "adamw" betas: [0.9, 0.999] eps: 1e-8 # Learning Rate Scheduler scheduler: type: "cosine_annealing_warm_restarts" T_0: 10 T_mult: 2 eta_min: 1e-6 # Loss Function loss: type: "label_smoothing_cross_entropy" smoothing: 0.1 # Advanced Training Techniques ema: enabled: true decay: 0.9999 gradient_clipping: enabled: true max_norm: 1.0 early_stopping: enabled: true patience: 10 min_delta: 0.001 # Evaluation Configuration evaluation: metrics: - "accuracy" - "top5_accuracy" - "f1_score" - "precision" - "recall" save_confusion_matrix: true save_classification_report: true # Logging Configuration logging: tensorboard: enabled: true log_dir: "runs" wandb: enabled: false # Set to true if you want to use wandb project: "food_classifier" checkpoint: save_best: true save_last: true save_every_n_epochs: 10 # API Keys (will be loaded from environment) api_keys: kaggle_username: "${KAGGLE_USERNAME}" kaggle_key: "${KAGGLE_KEY}" huggingface_token: "${HF_TOKEN}"