File size: 3,760 Bytes
5943cb1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1673,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05977286312014345,
"grad_norm": 0.18261943757534027,
"learning_rate": 6.666666666666667e-06,
"loss": 2.3555,
"step": 100
},
{
"epoch": 0.1195457262402869,
"grad_norm": 0.2138003557920456,
"learning_rate": 1.3333333333333333e-05,
"loss": 2.3285,
"step": 200
},
{
"epoch": 0.17931858936043035,
"grad_norm": 0.3002658188343048,
"learning_rate": 2e-05,
"loss": 2.2489,
"step": 300
},
{
"epoch": 0.2390914524805738,
"grad_norm": 0.4061976671218872,
"learning_rate": 1.9739364808281564e-05,
"loss": 2.1912,
"step": 400
},
{
"epoch": 0.2988643156007173,
"grad_norm": 0.5177231431007385,
"learning_rate": 1.8971045373758673e-05,
"loss": 2.1328,
"step": 500
},
{
"epoch": 0.3586371787208607,
"grad_norm": 0.6728057861328125,
"learning_rate": 1.7735091913054898e-05,
"loss": 2.062,
"step": 600
},
{
"epoch": 0.41841004184100417,
"grad_norm": 0.6207203269004822,
"learning_rate": 1.6095931019607367e-05,
"loss": 2.0053,
"step": 700
},
{
"epoch": 0.4781829049611476,
"grad_norm": 0.6931679248809814,
"learning_rate": 1.4139007296160285e-05,
"loss": 2.0062,
"step": 800
},
{
"epoch": 0.5379557680812911,
"grad_norm": 0.6681210994720459,
"learning_rate": 1.1966329380681454e-05,
"loss": 1.9427,
"step": 900
},
{
"epoch": 0.5977286312014346,
"grad_norm": 0.8678436279296875,
"learning_rate": 9.691152538179525e-06,
"loss": 1.9019,
"step": 1000
},
{
"epoch": 0.657501494321578,
"grad_norm": 1.0380738973617554,
"learning_rate": 7.432074999162258e-06,
"loss": 1.9017,
"step": 1100
},
{
"epoch": 0.7172743574417214,
"grad_norm": 0.8856862783432007,
"learning_rate": 5.306855785127376e-06,
"loss": 1.8821,
"step": 1200
},
{
"epoch": 0.7770472205618649,
"grad_norm": 1.1661518812179565,
"learning_rate": 3.426276279533615e-06,
"loss": 1.8826,
"step": 1300
},
{
"epoch": 0.8368200836820083,
"grad_norm": 0.8435269594192505,
"learning_rate": 1.8883655223774121e-06,
"loss": 1.8919,
"step": 1400
},
{
"epoch": 0.8965929468021518,
"grad_norm": 0.7680924534797668,
"learning_rate": 7.732902466662218e-07,
"loss": 1.9126,
"step": 1500
},
{
"epoch": 0.9563658099222953,
"grad_norm": 0.9946077466011047,
"learning_rate": 1.3917602405313812e-07,
"loss": 1.8853,
"step": 1600
},
{
"epoch": 1.0,
"step": 1673,
"total_flos": 3.0394718060544e+16,
"train_loss": 2.029054084226035,
"train_runtime": 519.4686,
"train_samples_per_second": 6.439,
"train_steps_per_second": 3.221
}
],
"logging_steps": 100,
"max_steps": 1673,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.0394718060544e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|