File size: 2,938 Bytes
2969cfa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# Food Image Classifier Configuration
project:
name: "food_image_classifier"
version: "1.0.0"
description: "World-Class Food Image Classifier with Hybrid CNN-ViT Architecture"
# Hardware Configuration
hardware:
device: "cuda" # RTX 5060 Laptop GPU
mixed_precision: true
compile_model: true
num_workers: 4
pin_memory: true
# Data Configuration
data:
image_size: 224
batch_size: 32 # Reduced to avoid memory issues
num_classes: 101 # Food101 dataset: 101 classes, 1000 images per class
datasets:
- name: "food101"
source: "kaggle"
path: "data/raw/food101"
# Temporarily disabled HuggingFace dataset to use only Food101
# - name: "food_images_hf"
# source: "huggingface"
# path: "data/raw/food_images_hf"
# Data splits
train_ratio: 0.8
val_ratio: 0.15
test_ratio: 0.05
# Augmentation
augmentation:
horizontal_flip: 0.5
rotation: 15
color_jitter:
brightness: 0.2
contrast: 0.2
saturation: 0.2
hue: 0.1
normalize:
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
# Model Configuration
model:
architecture: "hybrid_cnn_vit"
# CNN Branch (ResNet50)
cnn:
backbone: "resnet50"
pretrained: true
freeze_early_layers: true
dropout: 0.3
# ViT Branch (DeiT-Base)
vit:
model_name: "facebook/deit-base-distilled-patch16-224"
pretrained: true
freeze_early_layers: true
dropout: 0.1
# Fusion Module
fusion:
hidden_dim: 512
num_heads: 8
dropout: 0.2
# Classification Head
head:
hidden_dims: [1024, 512]
dropout: 0.4
# Training Configuration
training:
epochs: 100 # Increased for comprehensive training with 101k images
learning_rate: 1e-4
weight_decay: 1e-5
# Optimizer
optimizer:
type: "adamw"
betas: [0.9, 0.999]
eps: 1e-8
# Learning Rate Scheduler
scheduler:
type: "cosine_annealing_warm_restarts"
T_0: 10
T_mult: 2
eta_min: 1e-6
# Loss Function
loss:
type: "label_smoothing_cross_entropy"
smoothing: 0.1
# Advanced Training Techniques
ema:
enabled: true
decay: 0.9999
gradient_clipping:
enabled: true
max_norm: 1.0
early_stopping:
enabled: true
patience: 10
min_delta: 0.001
# Evaluation Configuration
evaluation:
metrics:
- "accuracy"
- "top5_accuracy"
- "f1_score"
- "precision"
- "recall"
save_confusion_matrix: true
save_classification_report: true
# Logging Configuration
logging:
tensorboard:
enabled: true
log_dir: "runs"
wandb:
enabled: false # Set to true if you want to use wandb
project: "food_classifier"
checkpoint:
save_best: true
save_last: true
save_every_n_epochs: 10
# API Keys (will be loaded from environment)
api_keys:
kaggle_username: "${KAGGLE_USERNAME}"
kaggle_key: "${KAGGLE_KEY}"
huggingface_token: "${HF_TOKEN}" |