| { | |
| "mbart50": { | |
| "args": { | |
| "warmup_steps": 500, | |
| "lr_scheduler_type": "cosine", | |
| "per_device_train_batch_size": 8, | |
| "per_device_eval_batch_size": 8, | |
| "num_train_epochs": 3, | |
| "weight_decay": 0.05, | |
| "max_len": 128, | |
| "id": null, | |
| "initial_learning_rate": 5e-5, | |
| "model_name": "facebook/mbart-large-50-many-to-many-mmt", | |
| "src_lang": "en_XX", | |
| "tgt_lang": "vi_VN", | |
| "wandb_project": "mbart50-lora-en-vi", | |
| "output_dir": "checkpoints" | |
| }, | |
| "lora_config": { | |
| "r": 16, | |
| "lora_alpha": 32, | |
| "target_modules": [ | |
| "q_proj", | |
| "v_proj", | |
| "k_proj", | |
| "o_proj" | |
| ], | |
| "lora_dropout": 0.2 | |
| }, | |
| "paths": { | |
| "checkpoint_path": "checkpoints/best_mbart50", | |
| "base_model_name": "facebook/mbart-large-50-many-to-many-mmt" | |
| } | |
| }, | |
| "mt5": { | |
| "args": { | |
| "warmup_steps": 500, | |
| "lr_scheduler_type": "cosine", | |
| "per_device_train_batch_size": 8, | |
| "per_device_eval_batch_size": 8, | |
| "num_train_epochs": 3, | |
| "weight_decay": 0.05, | |
| "max_len": 128, | |
| "id": null, | |
| "initial_learning_rate": 5e-5, | |
| "prefix": "translate English to Vietnamese: ", | |
| "model_name": "google/mt5-base", | |
| "wandb_project": "mt5-lora-en-vi", | |
| "output_dir": "checkpoints" | |
| }, | |
| "lora_config": { | |
| "r": 16, | |
| "lora_alpha": 32, | |
| "target_modules": [ | |
| "q", | |
| "v", | |
| "k", | |
| "o" | |
| ], | |
| "lora_dropout": 0.2 | |
| }, | |
| "paths": { | |
| "checkpoint_path": "checkpoints/best_mt5", | |
| "base_model_name": "google/mt5-base" | |
| } | |
| }, | |
| "metric_weights": { | |
| "bleu": 0.3, | |
| "rouge1": 0.15, | |
| "rouge2": 0.15, | |
| "rougeL": 0.1, | |
| "meteor": 0.1, | |
| "bertscore": 0.1, | |
| "comet": 0.1 | |
| } | |
| } |