| { | |
| "best_metric": 1.1622273921966553, | |
| "best_model_checkpoint": "./outputs/instruct-lora-8b-aplly_chat_template-land/checkpoint-740", | |
| "epoch": 1.0652463382157125, | |
| "eval_steps": 20, | |
| "global_step": 800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0013315579227696406, | |
| "eval_loss": 1.4733461141586304, | |
| "eval_runtime": 59.4361, | |
| "eval_samples_per_second": 22.461, | |
| "eval_steps_per_second": 5.619, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.02663115845539281, | |
| "grad_norm": 0.7614122629165649, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 1.4194, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02663115845539281, | |
| "eval_loss": 1.471280813217163, | |
| "eval_runtime": 57.1574, | |
| "eval_samples_per_second": 23.357, | |
| "eval_steps_per_second": 5.844, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05326231691078562, | |
| "grad_norm": 0.7800308465957642, | |
| "learning_rate": 5.333333333333334e-06, | |
| "loss": 1.376, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05326231691078562, | |
| "eval_loss": 1.4474345445632935, | |
| "eval_runtime": 57.2352, | |
| "eval_samples_per_second": 23.325, | |
| "eval_steps_per_second": 5.836, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07989347536617843, | |
| "grad_norm": 0.8508164286613464, | |
| "learning_rate": 8e-06, | |
| "loss": 1.3563, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07989347536617843, | |
| "eval_loss": 1.3645858764648438, | |
| "eval_runtime": 57.1364, | |
| "eval_samples_per_second": 23.365, | |
| "eval_steps_per_second": 5.846, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.10652463382157124, | |
| "grad_norm": 0.8896499276161194, | |
| "learning_rate": 1.0666666666666667e-05, | |
| "loss": 1.2653, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10652463382157124, | |
| "eval_loss": 1.303858757019043, | |
| "eval_runtime": 57.1088, | |
| "eval_samples_per_second": 23.376, | |
| "eval_steps_per_second": 5.848, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.13315579227696406, | |
| "grad_norm": 0.9267684817314148, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 1.2094, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.13315579227696406, | |
| "eval_loss": 1.279226541519165, | |
| "eval_runtime": 59.6928, | |
| "eval_samples_per_second": 22.365, | |
| "eval_steps_per_second": 5.595, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15978695073235685, | |
| "grad_norm": 1.0457453727722168, | |
| "learning_rate": 1.6e-05, | |
| "loss": 1.1917, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.15978695073235685, | |
| "eval_loss": 1.2594722509384155, | |
| "eval_runtime": 57.1101, | |
| "eval_samples_per_second": 23.376, | |
| "eval_steps_per_second": 5.848, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.18641810918774968, | |
| "grad_norm": 1.1883381605148315, | |
| "learning_rate": 1.866666666666667e-05, | |
| "loss": 1.2034, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.18641810918774968, | |
| "eval_loss": 1.2453105449676514, | |
| "eval_runtime": 57.085, | |
| "eval_samples_per_second": 23.386, | |
| "eval_steps_per_second": 5.851, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.21304926764314247, | |
| "grad_norm": 1.2522987127304077, | |
| "learning_rate": 2.1333333333333335e-05, | |
| "loss": 1.1147, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.21304926764314247, | |
| "eval_loss": 1.2352497577667236, | |
| "eval_runtime": 59.6977, | |
| "eval_samples_per_second": 22.363, | |
| "eval_steps_per_second": 5.595, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2396804260985353, | |
| "grad_norm": 1.3950749635696411, | |
| "learning_rate": 2.4e-05, | |
| "loss": 1.1172, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2396804260985353, | |
| "eval_loss": 1.2247178554534912, | |
| "eval_runtime": 57.1298, | |
| "eval_samples_per_second": 23.368, | |
| "eval_steps_per_second": 5.846, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2663115845539281, | |
| "grad_norm": 1.3889997005462646, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 1.1148, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2663115845539281, | |
| "eval_loss": 1.2236417531967163, | |
| "eval_runtime": 57.1101, | |
| "eval_samples_per_second": 23.376, | |
| "eval_steps_per_second": 5.848, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2929427430093209, | |
| "grad_norm": 1.4289050102233887, | |
| "learning_rate": 2.9333333333333333e-05, | |
| "loss": 1.0828, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2929427430093209, | |
| "eval_loss": 1.217771291732788, | |
| "eval_runtime": 57.09, | |
| "eval_samples_per_second": 23.384, | |
| "eval_steps_per_second": 5.85, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3195739014647137, | |
| "grad_norm": 1.48817777633667, | |
| "learning_rate": 2.9995950624188135e-05, | |
| "loss": 1.0756, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3195739014647137, | |
| "eval_loss": 1.2135677337646484, | |
| "eval_runtime": 57.0597, | |
| "eval_samples_per_second": 23.397, | |
| "eval_steps_per_second": 5.854, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.34620505992010653, | |
| "grad_norm": 1.4912829399108887, | |
| "learning_rate": 2.9977957806883764e-05, | |
| "loss": 1.0463, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.34620505992010653, | |
| "eval_loss": 1.207130789756775, | |
| "eval_runtime": 57.0489, | |
| "eval_samples_per_second": 23.401, | |
| "eval_steps_per_second": 5.855, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.37283621837549935, | |
| "grad_norm": 1.4056388139724731, | |
| "learning_rate": 2.99455888692835e-05, | |
| "loss": 1.0452, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.37283621837549935, | |
| "eval_loss": 1.2046023607254028, | |
| "eval_runtime": 57.0853, | |
| "eval_samples_per_second": 23.386, | |
| "eval_steps_per_second": 5.851, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3994673768308921, | |
| "grad_norm": 1.4942606687545776, | |
| "learning_rate": 2.989887487969095e-05, | |
| "loss": 1.0261, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3994673768308921, | |
| "eval_loss": 1.1982561349868774, | |
| "eval_runtime": 57.1051, | |
| "eval_samples_per_second": 23.378, | |
| "eval_steps_per_second": 5.849, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.42609853528628494, | |
| "grad_norm": 1.6378928422927856, | |
| "learning_rate": 2.983786067505537e-05, | |
| "loss": 1.0198, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.42609853528628494, | |
| "eval_loss": 1.197502851486206, | |
| "eval_runtime": 59.5901, | |
| "eval_samples_per_second": 22.403, | |
| "eval_steps_per_second": 5.605, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.45272969374167776, | |
| "grad_norm": 1.569143533706665, | |
| "learning_rate": 2.9762604817936267e-05, | |
| "loss": 1.0101, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.45272969374167776, | |
| "eval_loss": 1.197273850440979, | |
| "eval_runtime": 57.1144, | |
| "eval_samples_per_second": 23.374, | |
| "eval_steps_per_second": 5.848, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4793608521970706, | |
| "grad_norm": 1.6125699281692505, | |
| "learning_rate": 2.9673179540294035e-05, | |
| "loss": 1.0121, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.4793608521970706, | |
| "eval_loss": 1.1948621273040771, | |
| "eval_runtime": 57.1203, | |
| "eval_samples_per_second": 23.372, | |
| "eval_steps_per_second": 5.847, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5059920106524634, | |
| "grad_norm": 1.5121594667434692, | |
| "learning_rate": 2.9569670674160343e-05, | |
| "loss": 1.0169, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5059920106524634, | |
| "eval_loss": 1.1911152601242065, | |
| "eval_runtime": 60.0674, | |
| "eval_samples_per_second": 22.225, | |
| "eval_steps_per_second": 5.56, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5326231691078562, | |
| "grad_norm": 1.5439465045928955, | |
| "learning_rate": 2.945217756925498e-05, | |
| "loss": 0.9799, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5326231691078562, | |
| "eval_loss": 1.1894199848175049, | |
| "eval_runtime": 57.1247, | |
| "eval_samples_per_second": 23.37, | |
| "eval_steps_per_second": 5.847, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.559254327563249, | |
| "grad_norm": 1.857911229133606, | |
| "learning_rate": 2.9320812997628184e-05, | |
| "loss": 0.9872, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.559254327563249, | |
| "eval_loss": 1.1862047910690308, | |
| "eval_runtime": 57.1282, | |
| "eval_samples_per_second": 23.368, | |
| "eval_steps_per_second": 5.847, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5858854860186418, | |
| "grad_norm": 1.6074450016021729, | |
| "learning_rate": 2.9175703045419906e-05, | |
| "loss": 0.988, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5858854860186418, | |
| "eval_loss": 1.184722661972046, | |
| "eval_runtime": 57.1666, | |
| "eval_samples_per_second": 23.353, | |
| "eval_steps_per_second": 5.843, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6125166444740346, | |
| "grad_norm": 1.587011456489563, | |
| "learning_rate": 2.9016986991840035e-05, | |
| "loss": 0.9861, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6125166444740346, | |
| "eval_loss": 1.1814427375793457, | |
| "eval_runtime": 57.1111, | |
| "eval_samples_per_second": 23.375, | |
| "eval_steps_per_second": 5.848, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6391478029294274, | |
| "grad_norm": 1.6503058671951294, | |
| "learning_rate": 2.8844817175485628e-05, | |
| "loss": 0.9997, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6391478029294274, | |
| "eval_loss": 1.1827510595321655, | |
| "eval_runtime": 59.6344, | |
| "eval_samples_per_second": 22.386, | |
| "eval_steps_per_second": 5.601, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6657789613848203, | |
| "grad_norm": 1.4606473445892334, | |
| "learning_rate": 2.865935884812353e-05, | |
| "loss": 0.9756, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6657789613848203, | |
| "eval_loss": 1.177931785583496, | |
| "eval_runtime": 57.1613, | |
| "eval_samples_per_second": 23.355, | |
| "eval_steps_per_second": 5.843, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6924101198402131, | |
| "grad_norm": 1.6386032104492188, | |
| "learning_rate": 2.8460790016078664e-05, | |
| "loss": 0.9704, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6924101198402131, | |
| "eval_loss": 1.1767512559890747, | |
| "eval_runtime": 57.128, | |
| "eval_samples_per_second": 23.369, | |
| "eval_steps_per_second": 5.847, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7190412782956058, | |
| "grad_norm": 1.5629956722259521, | |
| "learning_rate": 2.824930126938027e-05, | |
| "loss": 0.9575, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7190412782956058, | |
| "eval_loss": 1.1756982803344727, | |
| "eval_runtime": 59.3596, | |
| "eval_samples_per_second": 22.49, | |
| "eval_steps_per_second": 5.627, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7456724367509987, | |
| "grad_norm": 1.9192149639129639, | |
| "learning_rate": 2.8025095598830108e-05, | |
| "loss": 0.9845, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7456724367509987, | |
| "eval_loss": 1.1744287014007568, | |
| "eval_runtime": 57.1096, | |
| "eval_samples_per_second": 23.376, | |
| "eval_steps_per_second": 5.848, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7723035952063915, | |
| "grad_norm": 1.5297322273254395, | |
| "learning_rate": 2.7788388201168096e-05, | |
| "loss": 0.9635, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7723035952063915, | |
| "eval_loss": 1.1726077795028687, | |
| "eval_runtime": 57.1106, | |
| "eval_samples_per_second": 23.376, | |
| "eval_steps_per_second": 5.848, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.7989347536617842, | |
| "grad_norm": 1.5995993614196777, | |
| "learning_rate": 2.7539406272522557e-05, | |
| "loss": 1.0019, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7989347536617842, | |
| "eval_loss": 1.1684755086898804, | |
| "eval_runtime": 59.1165, | |
| "eval_samples_per_second": 22.583, | |
| "eval_steps_per_second": 5.65, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8255659121171771, | |
| "grad_norm": 1.989475131034851, | |
| "learning_rate": 2.7278388790343133e-05, | |
| "loss": 0.965, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8255659121171771, | |
| "eval_loss": 1.16959547996521, | |
| "eval_runtime": 57.5389, | |
| "eval_samples_per_second": 23.202, | |
| "eval_steps_per_second": 5.805, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8521970705725699, | |
| "grad_norm": 1.581007719039917, | |
| "learning_rate": 2.7005586284025857e-05, | |
| "loss": 0.9521, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.8521970705725699, | |
| "eval_loss": 1.1685765981674194, | |
| "eval_runtime": 57.0994, | |
| "eval_samples_per_second": 23.38, | |
| "eval_steps_per_second": 5.849, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.8788282290279628, | |
| "grad_norm": 1.8926242589950562, | |
| "learning_rate": 2.6721260594450408e-05, | |
| "loss": 0.9714, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.8788282290279628, | |
| "eval_loss": 1.1654787063598633, | |
| "eval_runtime": 57.0989, | |
| "eval_samples_per_second": 23.38, | |
| "eval_steps_per_second": 5.849, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.9054593874833555, | |
| "grad_norm": 1.7182027101516724, | |
| "learning_rate": 2.6425684622660387e-05, | |
| "loss": 0.9893, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9054593874833555, | |
| "eval_loss": 1.1642155647277832, | |
| "eval_runtime": 57.0492, | |
| "eval_samples_per_second": 23.401, | |
| "eval_steps_per_second": 5.855, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9320905459387483, | |
| "grad_norm": 1.7494959831237793, | |
| "learning_rate": 2.6119142067927872e-05, | |
| "loss": 0.9581, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9320905459387483, | |
| "eval_loss": 1.164635419845581, | |
| "eval_runtime": 59.4597, | |
| "eval_samples_per_second": 22.452, | |
| "eval_steps_per_second": 5.617, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9587217043941412, | |
| "grad_norm": 1.9605196714401245, | |
| "learning_rate": 2.5801927155453614e-05, | |
| "loss": 0.9165, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.9587217043941412, | |
| "eval_loss": 1.164476752281189, | |
| "eval_runtime": 59.4987, | |
| "eval_samples_per_second": 22.437, | |
| "eval_steps_per_second": 5.614, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.9853528628495339, | |
| "grad_norm": 1.636960744857788, | |
| "learning_rate": 2.5474344353964275e-05, | |
| "loss": 0.9849, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.9853528628495339, | |
| "eval_loss": 1.1622273921966553, | |
| "eval_runtime": 57.4882, | |
| "eval_samples_per_second": 23.222, | |
| "eval_steps_per_second": 5.81, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.0119840213049267, | |
| "grad_norm": 1.6740643978118896, | |
| "learning_rate": 2.513670808347771e-05, | |
| "loss": 0.905, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.0119840213049267, | |
| "eval_loss": 1.1645617485046387, | |
| "eval_runtime": 57.4263, | |
| "eval_samples_per_second": 23.247, | |
| "eval_steps_per_second": 5.816, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.0386151797603196, | |
| "grad_norm": 1.7723573446273804, | |
| "learning_rate": 2.4789342413516838e-05, | |
| "loss": 0.8868, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.0386151797603196, | |
| "eval_loss": 1.1635513305664062, | |
| "eval_runtime": 57.091, | |
| "eval_samples_per_second": 23.384, | |
| "eval_steps_per_second": 5.85, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.0652463382157125, | |
| "grad_norm": 1.7861186265945435, | |
| "learning_rate": 2.4432580752061735e-05, | |
| "loss": 0.8853, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.0652463382157125, | |
| "eval_loss": 1.1627150774002075, | |
| "eval_runtime": 57.0672, | |
| "eval_samples_per_second": 23.393, | |
| "eval_steps_per_second": 5.853, | |
| "step": 800 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 2253, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 3 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.1795547152069427e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |