fisherman611's picture
Upload 5 files
ed25d6f verified
{
"mbart50": {
"args": {
"warmup_steps": 500,
"lr_scheduler_type": "cosine",
"per_device_train_batch_size": 8,
"per_device_eval_batch_size": 8,
"num_train_epochs": 3,
"weight_decay": 0.05,
"max_len": 128,
"id": null,
"initial_learning_rate": 5e-5,
"model_name": "facebook/mbart-large-50-many-to-many-mmt",
"src_lang": "en_XX",
"tgt_lang": "vi_VN",
"wandb_project": "mbart50-lora-en-vi",
"output_dir": "checkpoints"
},
"lora_config": {
"r": 16,
"lora_alpha": 32,
"target_modules": [
"q_proj",
"v_proj",
"k_proj",
"o_proj"
],
"lora_dropout": 0.2
},
"paths": {
"checkpoint_path": "checkpoints/best_mbart50",
"base_model_name": "facebook/mbart-large-50-many-to-many-mmt"
}
},
"mt5": {
"args": {
"warmup_steps": 500,
"lr_scheduler_type": "cosine",
"per_device_train_batch_size": 8,
"per_device_eval_batch_size": 8,
"num_train_epochs": 3,
"weight_decay": 0.05,
"max_len": 128,
"id": null,
"initial_learning_rate": 5e-5,
"prefix": "translate English to Vietnamese: ",
"model_name": "google/mt5-base",
"wandb_project": "mt5-lora-en-vi",
"output_dir": "checkpoints"
},
"lora_config": {
"r": 16,
"lora_alpha": 32,
"target_modules": [
"q",
"v",
"k",
"o"
],
"lora_dropout": 0.2
},
"paths": {
"checkpoint_path": "checkpoints/best_mt5",
"base_model_name": "google/mt5-base"
}
},
"metric_weights": {
"bleu": 0.3,
"rouge1": 0.15,
"rouge2": 0.15,
"rougeL": 0.1,
"meteor": 0.1,
"bertscore": 0.1,
"comet": 0.1
}
}