llama-3.2-identity-2 / trainer_state.json
Sri Santh
Upload 26 files
d149ed5 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.96,
"eval_steps": 500,
"global_step": 186,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"grad_norm": 6.012967586517334,
"learning_rate": 4.9910902453260824e-05,
"loss": 3.1744,
"num_input_tokens_seen": 4544,
"step": 5
},
{
"epoch": 0.16,
"grad_norm": 3.4645135402679443,
"learning_rate": 4.964424488287009e-05,
"loss": 2.264,
"num_input_tokens_seen": 9104,
"step": 10
},
{
"epoch": 0.24,
"grad_norm": 3.6127007007598877,
"learning_rate": 4.920192797165511e-05,
"loss": 1.5631,
"num_input_tokens_seen": 13504,
"step": 15
},
{
"epoch": 0.32,
"grad_norm": 2.5983505249023438,
"learning_rate": 4.858710446774951e-05,
"loss": 1.0369,
"num_input_tokens_seen": 17808,
"step": 20
},
{
"epoch": 0.4,
"grad_norm": 1.702121615409851,
"learning_rate": 4.780415671242334e-05,
"loss": 0.8575,
"num_input_tokens_seen": 22464,
"step": 25
},
{
"epoch": 0.48,
"grad_norm": 1.5292010307312012,
"learning_rate": 4.685866540361456e-05,
"loss": 0.6605,
"num_input_tokens_seen": 26848,
"step": 30
},
{
"epoch": 0.56,
"grad_norm": 1.8130377531051636,
"learning_rate": 4.5757369817809415e-05,
"loss": 0.6076,
"num_input_tokens_seen": 31296,
"step": 35
},
{
"epoch": 0.64,
"grad_norm": 1.726243019104004,
"learning_rate": 4.45081197738023e-05,
"loss": 0.5067,
"num_input_tokens_seen": 35648,
"step": 40
},
{
"epoch": 0.72,
"grad_norm": 1.7013319730758667,
"learning_rate": 4.3119819680728e-05,
"loss": 0.5202,
"num_input_tokens_seen": 40160,
"step": 45
},
{
"epoch": 0.8,
"grad_norm": 1.4300320148468018,
"learning_rate": 4.160236506918098e-05,
"loss": 0.484,
"num_input_tokens_seen": 44608,
"step": 50
},
{
"epoch": 0.88,
"grad_norm": 1.562301754951477,
"learning_rate": 3.9966572057815373e-05,
"loss": 0.5275,
"num_input_tokens_seen": 49488,
"step": 55
},
{
"epoch": 0.96,
"grad_norm": 1.4365646839141846,
"learning_rate": 3.822410025817406e-05,
"loss": 0.4158,
"num_input_tokens_seen": 53888,
"step": 60
},
{
"epoch": 1.032,
"grad_norm": 1.6914381980895996,
"learning_rate": 3.638736966726585e-05,
"loss": 0.3571,
"num_input_tokens_seen": 57952,
"step": 65
},
{
"epoch": 1.112,
"grad_norm": 1.6642335653305054,
"learning_rate": 3.44694721402644e-05,
"loss": 0.4535,
"num_input_tokens_seen": 62304,
"step": 70
},
{
"epoch": 1.192,
"grad_norm": 1.2602016925811768,
"learning_rate": 3.2484078074333954e-05,
"loss": 0.3905,
"num_input_tokens_seen": 66736,
"step": 75
},
{
"epoch": 1.272,
"grad_norm": 1.4347716569900513,
"learning_rate": 3.0445338968721287e-05,
"loss": 0.42,
"num_input_tokens_seen": 71248,
"step": 80
},
{
"epoch": 1.3519999999999999,
"grad_norm": 1.617989420890808,
"learning_rate": 2.836778655564653e-05,
"loss": 0.3955,
"num_input_tokens_seen": 75776,
"step": 85
},
{
"epoch": 1.432,
"grad_norm": 1.752974033355713,
"learning_rate": 2.6266229220967818e-05,
"loss": 0.4066,
"num_input_tokens_seen": 80032,
"step": 90
},
{
"epoch": 1.512,
"grad_norm": 1.70826256275177,
"learning_rate": 2.4155646452913296e-05,
"loss": 0.4497,
"num_input_tokens_seen": 84816,
"step": 95
},
{
"epoch": 1.592,
"grad_norm": 1.4035143852233887,
"learning_rate": 2.2051082071228854e-05,
"loss": 0.4116,
"num_input_tokens_seen": 89360,
"step": 100
},
{
"epoch": 1.6720000000000002,
"grad_norm": 1.484985113143921,
"learning_rate": 1.9967536997783494e-05,
"loss": 0.4653,
"num_input_tokens_seen": 93920,
"step": 105
},
{
"epoch": 1.752,
"grad_norm": 1.2968859672546387,
"learning_rate": 1.79198623329424e-05,
"loss": 0.405,
"num_input_tokens_seen": 98368,
"step": 110
},
{
"epoch": 1.8319999999999999,
"grad_norm": 0.9588320255279541,
"learning_rate": 1.5922653499838137e-05,
"loss": 0.4061,
"num_input_tokens_seen": 102992,
"step": 115
},
{
"epoch": 1.912,
"grad_norm": 1.1992862224578857,
"learning_rate": 1.399014621105914e-05,
"loss": 0.3665,
"num_input_tokens_seen": 107472,
"step": 120
},
{
"epoch": 1.992,
"grad_norm": 1.4413576126098633,
"learning_rate": 1.2136114999284288e-05,
"loss": 0.3508,
"num_input_tokens_seen": 111776,
"step": 125
},
{
"epoch": 2.064,
"grad_norm": 1.225965976715088,
"learning_rate": 1.0373775035117305e-05,
"loss": 0.3255,
"num_input_tokens_seen": 115824,
"step": 130
},
{
"epoch": 2.144,
"grad_norm": 1.2995370626449585,
"learning_rate": 8.715687931944449e-06,
"loss": 0.4355,
"num_input_tokens_seen": 120544,
"step": 135
},
{
"epoch": 2.224,
"grad_norm": 1.6566274166107178,
"learning_rate": 7.173672209219495e-06,
"loss": 0.3769,
"num_input_tokens_seen": 125104,
"step": 140
},
{
"epoch": 2.304,
"grad_norm": 1.2420896291732788,
"learning_rate": 5.758719052376693e-06,
"loss": 0.3056,
"num_input_tokens_seen": 129360,
"step": 145
},
{
"epoch": 2.384,
"grad_norm": 1.6003204584121704,
"learning_rate": 4.480913969818098e-06,
"loss": 0.3677,
"num_input_tokens_seen": 133856,
"step": 150
},
{
"epoch": 2.464,
"grad_norm": 1.551696538925171,
"learning_rate": 3.3493649053890326e-06,
"loss": 0.3446,
"num_input_tokens_seen": 138240,
"step": 155
},
{
"epoch": 2.544,
"grad_norm": 1.4812240600585938,
"learning_rate": 2.372137318741968e-06,
"loss": 0.3474,
"num_input_tokens_seen": 142784,
"step": 160
},
{
"epoch": 2.624,
"grad_norm": 1.4917963743209839,
"learning_rate": 1.5561966963229924e-06,
"loss": 0.4203,
"num_input_tokens_seen": 147344,
"step": 165
},
{
"epoch": 2.7039999999999997,
"grad_norm": 1.5821081399917603,
"learning_rate": 9.073589027514789e-07,
"loss": 0.3472,
"num_input_tokens_seen": 151792,
"step": 170
},
{
"epoch": 2.784,
"grad_norm": 1.8335837125778198,
"learning_rate": 4.302487264785521e-07,
"loss": 0.3478,
"num_input_tokens_seen": 156304,
"step": 175
},
{
"epoch": 2.864,
"grad_norm": 1.3951280117034912,
"learning_rate": 1.2826691520262114e-07,
"loss": 0.3708,
"num_input_tokens_seen": 160864,
"step": 180
},
{
"epoch": 2.944,
"grad_norm": 1.2953612804412842,
"learning_rate": 3.565936007254855e-09,
"loss": 0.4196,
"num_input_tokens_seen": 165376,
"step": 185
},
{
"epoch": 2.96,
"num_input_tokens_seen": 166272,
"step": 186,
"total_flos": 976464451731456.0,
"train_loss": 0.6014698924877311,
"train_runtime": 123.0139,
"train_samples_per_second": 24.387,
"train_steps_per_second": 1.512
}
],
"logging_steps": 5,
"max_steps": 186,
"num_input_tokens_seen": 166272,
"num_train_epochs": 3,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 976464451731456.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}