| { | |
| "best_metric": 0.9470136761665344, | |
| "best_model_checkpoint": "./trained-race/checkpoint-9000", | |
| "epoch": 1.8443378827001107, | |
| "eval_steps": 1000, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.000199907783105865, | |
| "loss": 7.8876, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00019981556621172998, | |
| "loss": 6.9577, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.000199723349317595, | |
| "loss": 6.1368, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019963113242345997, | |
| "loss": 5.5562, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019953891552932499, | |
| "loss": 4.6469, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019944669863518997, | |
| "loss": 3.999, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019935448174105498, | |
| "loss": 3.5516, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00019926226484691997, | |
| "loss": 3.2196, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019917004795278495, | |
| "loss": 3.0321, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019907783105864993, | |
| "loss": 2.7039, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019898561416451495, | |
| "loss": 2.6115, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019889339727037993, | |
| "loss": 2.4644, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00019880118037624494, | |
| "loss": 2.3589, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019870896348210993, | |
| "loss": 2.3657, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019861674658797494, | |
| "loss": 2.1013, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019852452969383992, | |
| "loss": 2.0926, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001984323127997049, | |
| "loss": 2.1673, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001983400959055699, | |
| "loss": 1.9943, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001982478790114349, | |
| "loss": 1.9654, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001981556621172999, | |
| "loss": 2.0219, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001980634452231649, | |
| "loss": 2.2223, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019797122832902988, | |
| "loss": 1.9974, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001978790114348949, | |
| "loss": 1.9526, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019778679454075988, | |
| "loss": 2.0874, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019769457764662486, | |
| "loss": 2.0008, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019760236075248985, | |
| "loss": 1.9077, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019751014385835486, | |
| "loss": 1.9373, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019741792696421984, | |
| "loss": 1.9091, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00019732571007008486, | |
| "loss": 1.9555, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019723349317594984, | |
| "loss": 1.8459, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019714127628181485, | |
| "loss": 1.9188, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019704905938767984, | |
| "loss": 1.8289, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019695684249354482, | |
| "loss": 1.8993, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001968646255994098, | |
| "loss": 1.8261, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019677240870527482, | |
| "loss": 1.8536, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001966801918111398, | |
| "loss": 1.8515, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001965879749170048, | |
| "loss": 1.9419, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001964957580228698, | |
| "loss": 1.8086, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001964035411287348, | |
| "loss": 1.7413, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001963113242345998, | |
| "loss": 1.7866, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019621910734046478, | |
| "loss": 1.8727, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019612689044632976, | |
| "loss": 1.8292, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019603467355219477, | |
| "loss": 1.7826, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019594245665805976, | |
| "loss": 1.8574, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019585023976392477, | |
| "loss": 1.7725, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00019575802286978975, | |
| "loss": 1.7879, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019566580597565476, | |
| "loss": 1.6836, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019557358908151975, | |
| "loss": 1.7871, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019548137218738473, | |
| "loss": 1.7409, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019538915529324972, | |
| "loss": 1.6154, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019529693839911473, | |
| "loss": 1.6699, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019520472150497971, | |
| "loss": 1.7106, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019511250461084473, | |
| "loss": 1.5808, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001950202877167097, | |
| "loss": 1.7415, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00019492807082257472, | |
| "loss": 1.7023, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001948358539284397, | |
| "loss": 1.6189, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0001947436370343047, | |
| "loss": 1.7952, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019465142014016967, | |
| "loss": 1.7501, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019455920324603469, | |
| "loss": 1.7323, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019446698635189967, | |
| "loss": 1.6671, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019437476945776468, | |
| "loss": 1.7774, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00019428255256362967, | |
| "loss": 1.8237, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019419033566949468, | |
| "loss": 1.6837, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019409811877535966, | |
| "loss": 1.7524, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019400590188122465, | |
| "loss": 1.617, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019391368498708963, | |
| "loss": 1.6385, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00019382146809295462, | |
| "loss": 1.7298, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019372925119881963, | |
| "loss": 1.8203, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019363703430468464, | |
| "loss": 1.6564, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019354481741054962, | |
| "loss": 1.6086, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001934526005164146, | |
| "loss": 1.7182, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00019336038362227962, | |
| "loss": 1.8396, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0001932681667281446, | |
| "loss": 1.6539, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001931759498340096, | |
| "loss": 1.6745, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019308373293987457, | |
| "loss": 1.7868, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019299151604573958, | |
| "loss": 1.6292, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001928992991516046, | |
| "loss": 1.5977, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00019280708225746958, | |
| "loss": 1.6717, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019271486536333456, | |
| "loss": 1.712, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019262264846919958, | |
| "loss": 1.6402, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019253043157506456, | |
| "loss": 1.6647, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019243821468092954, | |
| "loss": 1.6568, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019234599778679453, | |
| "loss": 1.5253, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00019225378089265954, | |
| "loss": 1.646, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019216156399852455, | |
| "loss": 1.7057, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019206934710438954, | |
| "loss": 1.5224, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019197713021025452, | |
| "loss": 1.6126, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019188491331611953, | |
| "loss": 1.5545, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00019179269642198452, | |
| "loss": 1.6877, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001917004795278495, | |
| "loss": 1.6015, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00019160826263371449, | |
| "loss": 1.6627, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001915160457395795, | |
| "loss": 1.5924, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001914238288454445, | |
| "loss": 1.5444, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0001913316119513095, | |
| "loss": 1.5869, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019123939505717448, | |
| "loss": 1.5678, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0001911471781630395, | |
| "loss": 1.5879, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019105496126890447, | |
| "loss": 1.5347, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019096274437476946, | |
| "loss": 1.6076, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019087052748063444, | |
| "loss": 1.5945, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00019077831058649945, | |
| "loss": 1.4995, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.4135475625342341, | |
| "eval_loss": 1.513289451599121, | |
| "eval_runtime": 120.4122, | |
| "eval_samples_per_second": 90.971, | |
| "eval_steps_per_second": 11.378, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019068609369236446, | |
| "loss": 1.513, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019059387679822945, | |
| "loss": 1.6145, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019050165990409443, | |
| "loss": 1.6611, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019040944300995944, | |
| "loss": 1.5784, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019031722611582443, | |
| "loss": 1.5153, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00019022500922168941, | |
| "loss": 1.5148, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001901327923275544, | |
| "loss": 1.6189, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001900405754334194, | |
| "loss": 1.6063, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018994835853928442, | |
| "loss": 1.5173, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001898561416451494, | |
| "loss": 1.4855, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0001897639247510144, | |
| "loss": 1.6097, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0001896717078568794, | |
| "loss": 1.6251, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00018957949096274439, | |
| "loss": 1.5885, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00018948727406860937, | |
| "loss": 1.5966, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00018939505717447435, | |
| "loss": 1.5552, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00018930284028033937, | |
| "loss": 1.6476, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00018921062338620438, | |
| "loss": 1.492, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00018911840649206936, | |
| "loss": 1.4321, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00018902618959793435, | |
| "loss": 1.5384, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00018893397270379936, | |
| "loss": 1.5005, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00018884175580966434, | |
| "loss": 1.4933, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00018874953891552933, | |
| "loss": 1.5114, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001886573220213943, | |
| "loss": 1.5656, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00018856510512725932, | |
| "loss": 1.4009, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00018847288823312433, | |
| "loss": 1.4853, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00018838067133898932, | |
| "loss": 1.5312, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0001882884544448543, | |
| "loss": 1.5566, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001881962375507193, | |
| "loss": 1.4965, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0001881040206565843, | |
| "loss": 1.5982, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018801180376244928, | |
| "loss": 1.4838, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018791958686831427, | |
| "loss": 1.4434, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00018782736997417928, | |
| "loss": 1.4311, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0001877351530800443, | |
| "loss": 1.4896, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018764293618590928, | |
| "loss": 1.4999, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018755071929177426, | |
| "loss": 1.3174, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018745850239763924, | |
| "loss": 1.3892, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018736628550350426, | |
| "loss": 1.4754, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00018727406860936924, | |
| "loss": 1.4074, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018718185171523422, | |
| "loss": 1.526, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018708963482109924, | |
| "loss": 1.3958, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018699741792696425, | |
| "loss": 1.473, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018690520103282923, | |
| "loss": 1.3931, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00018681298413869422, | |
| "loss": 1.4689, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001867207672445592, | |
| "loss": 1.4804, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001866285503504242, | |
| "loss": 1.4743, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001865363334562892, | |
| "loss": 1.386, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00018644411656215418, | |
| "loss": 1.4222, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001863518996680192, | |
| "loss": 1.3938, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0001862596827738842, | |
| "loss": 1.5226, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0001861674658797492, | |
| "loss": 1.4203, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018607524898561417, | |
| "loss": 1.5243, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018598303209147916, | |
| "loss": 1.4287, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018589081519734417, | |
| "loss": 1.2989, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00018579859830320915, | |
| "loss": 1.4642, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00018570638140907414, | |
| "loss": 1.5333, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00018561416451493915, | |
| "loss": 1.4577, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00018552194762080416, | |
| "loss": 1.409, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00018542973072666915, | |
| "loss": 1.4131, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00018533751383253413, | |
| "loss": 1.4178, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00018524529693839911, | |
| "loss": 1.4121, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00018515308004426413, | |
| "loss": 1.4046, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001850608631501291, | |
| "loss": 1.4579, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001849686462559941, | |
| "loss": 1.2591, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0001848764293618591, | |
| "loss": 1.3416, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00018478421246772412, | |
| "loss": 1.3919, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0001846919955735891, | |
| "loss": 1.5162, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00018459977867945409, | |
| "loss": 1.32, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00018450756178531907, | |
| "loss": 1.4586, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00018441534489118408, | |
| "loss": 1.2827, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00018432312799704907, | |
| "loss": 1.4172, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00018423091110291405, | |
| "loss": 1.3311, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00018413869420877906, | |
| "loss": 1.3658, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00018404647731464407, | |
| "loss": 1.5157, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00018395426042050906, | |
| "loss": 1.4216, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00018386204352637404, | |
| "loss": 1.3597, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00018376982663223903, | |
| "loss": 1.3921, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00018367760973810404, | |
| "loss": 1.3468, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00018358539284396902, | |
| "loss": 1.3247, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.000183493175949834, | |
| "loss": 1.2005, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00018340095905569902, | |
| "loss": 1.3875, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00018330874216156403, | |
| "loss": 1.4271, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00018321652526742901, | |
| "loss": 1.3405, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.000183124308373294, | |
| "loss": 1.3398, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00018303209147915898, | |
| "loss": 1.4113, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.000182939874585024, | |
| "loss": 1.3913, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00018284765769088898, | |
| "loss": 1.4525, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00018275544079675396, | |
| "loss": 1.3711, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00018266322390261898, | |
| "loss": 1.3856, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.000182571007008484, | |
| "loss": 1.4341, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00018247879011434897, | |
| "loss": 1.2842, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00018238657322021396, | |
| "loss": 1.3662, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00018229435632607894, | |
| "loss": 1.3707, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00018220213943194392, | |
| "loss": 1.3719, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00018210992253780894, | |
| "loss": 1.2909, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00018201770564367392, | |
| "loss": 1.424, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00018192548874953893, | |
| "loss": 1.291, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00018183327185540392, | |
| "loss": 1.4188, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018174105496126893, | |
| "loss": 1.2462, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001816488380671339, | |
| "loss": 1.4223, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001815566211729989, | |
| "loss": 1.3103, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.5044732517801717, | |
| "eval_loss": 1.2796891927719116, | |
| "eval_runtime": 119.305, | |
| "eval_samples_per_second": 91.815, | |
| "eval_steps_per_second": 11.483, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018146440427886388, | |
| "loss": 1.4283, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001813721873847289, | |
| "loss": 1.3649, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018127997049059388, | |
| "loss": 1.3352, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001811877535964589, | |
| "loss": 1.4464, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018109553670232387, | |
| "loss": 1.2952, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018100331980818888, | |
| "loss": 1.3101, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018091110291405387, | |
| "loss": 1.4286, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018081888601991885, | |
| "loss": 1.1986, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018072666912578384, | |
| "loss": 1.2121, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018063445223164885, | |
| "loss": 1.2574, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018054223533751383, | |
| "loss": 1.2758, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018045001844337885, | |
| "loss": 1.2478, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018035780154924383, | |
| "loss": 1.365, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018026558465510884, | |
| "loss": 1.4186, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00018017336776097383, | |
| "loss": 1.326, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001800811508668388, | |
| "loss": 1.2639, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001799889339727038, | |
| "loss": 1.2415, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001798967170785688, | |
| "loss": 1.2523, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001798045001844338, | |
| "loss": 1.3794, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001797122832902988, | |
| "loss": 1.2596, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017962006639616379, | |
| "loss": 1.2908, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001795278495020288, | |
| "loss": 1.4256, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017943563260789378, | |
| "loss": 1.4267, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017934341571375877, | |
| "loss": 1.2581, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017925119881962375, | |
| "loss": 1.3161, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017915898192548876, | |
| "loss": 1.3534, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017906676503135375, | |
| "loss": 1.3234, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017897454813721876, | |
| "loss": 1.3659, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017888233124308374, | |
| "loss": 1.3122, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017879011434894875, | |
| "loss": 1.2602, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017869789745481374, | |
| "loss": 1.3454, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017860568056067872, | |
| "loss": 1.2308, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001785134636665437, | |
| "loss": 1.3802, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017842124677240872, | |
| "loss": 1.328, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001783290298782737, | |
| "loss": 1.2514, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00017823681298413871, | |
| "loss": 1.3682, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001781445960900037, | |
| "loss": 1.2913, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001780523791958687, | |
| "loss": 1.2392, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001779601623017337, | |
| "loss": 1.2229, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00017786794540759868, | |
| "loss": 1.2137, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00017777572851346366, | |
| "loss": 1.3575, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00017768351161932868, | |
| "loss": 1.2537, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00017759129472519366, | |
| "loss": 1.2323, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00017749907783105864, | |
| "loss": 1.3776, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00017740686093692366, | |
| "loss": 1.257, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00017731464404278867, | |
| "loss": 1.2321, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00017722242714865365, | |
| "loss": 1.3849, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00017713021025451864, | |
| "loss": 1.2232, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00017703799336038362, | |
| "loss": 1.2499, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00017694577646624863, | |
| "loss": 1.2204, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00017685355957211362, | |
| "loss": 1.3772, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001767613426779786, | |
| "loss": 1.2534, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001766691257838436, | |
| "loss": 1.2529, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001765769088897086, | |
| "loss": 1.3371, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001764846919955736, | |
| "loss": 1.2988, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001763924751014386, | |
| "loss": 1.2163, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00017630025820730358, | |
| "loss": 1.3794, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00017620804131316856, | |
| "loss": 1.1544, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00017611582441903357, | |
| "loss": 1.1822, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00017602360752489856, | |
| "loss": 1.0888, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00017593139063076357, | |
| "loss": 1.2588, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00017583917373662855, | |
| "loss": 1.3141, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00017574695684249356, | |
| "loss": 1.1886, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00017565473994835855, | |
| "loss": 1.3152, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00017556252305422353, | |
| "loss": 1.2039, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00017547030616008852, | |
| "loss": 1.2983, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00017537808926595353, | |
| "loss": 1.2921, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00017528587237181851, | |
| "loss": 1.2147, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00017519365547768353, | |
| "loss": 1.053, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001751014385835485, | |
| "loss": 1.2289, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00017500922168941352, | |
| "loss": 1.2486, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001749170047952785, | |
| "loss": 1.164, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001748247879011435, | |
| "loss": 1.3237, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00017473257100700847, | |
| "loss": 1.2586, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00017464035411287349, | |
| "loss": 1.2166, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00017454813721873847, | |
| "loss": 1.25, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00017445592032460348, | |
| "loss": 1.1968, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00017436370343046847, | |
| "loss": 1.2486, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00017427148653633348, | |
| "loss": 1.2165, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00017417926964219846, | |
| "loss": 1.2314, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00017408705274806345, | |
| "loss": 1.1307, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00017399483585392843, | |
| "loss": 1.2748, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00017390261895979344, | |
| "loss": 1.2688, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00017381040206565843, | |
| "loss": 1.3272, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00017371818517152344, | |
| "loss": 1.3796, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00017362596827738842, | |
| "loss": 1.2804, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00017353375138325343, | |
| "loss": 1.2676, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00017344153448911842, | |
| "loss": 1.2247, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0001733493175949834, | |
| "loss": 1.2221, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0001732571007008484, | |
| "loss": 1.1982, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001731648838067134, | |
| "loss": 1.1594, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00017307266691257838, | |
| "loss": 1.0483, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001729804500184434, | |
| "loss": 1.2851, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00017288823312430838, | |
| "loss": 1.234, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001727960162301734, | |
| "loss": 1.237, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00017270379933603838, | |
| "loss": 1.2682, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00017261158244190336, | |
| "loss": 1.2149, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00017251936554776834, | |
| "loss": 1.2921, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00017242714865363336, | |
| "loss": 1.2349, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00017233493175949834, | |
| "loss": 1.2417, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.5450976812123425, | |
| "eval_loss": 1.1583396196365356, | |
| "eval_runtime": 118.0159, | |
| "eval_samples_per_second": 92.818, | |
| "eval_steps_per_second": 11.609, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00017224271486536335, | |
| "loss": 1.2374, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00017215049797122834, | |
| "loss": 1.2474, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00017205828107709335, | |
| "loss": 1.2253, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00017196606418295833, | |
| "loss": 1.2369, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00017187384728882332, | |
| "loss": 1.1092, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001717816303946883, | |
| "loss": 1.2028, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001716894135005533, | |
| "loss": 1.2553, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001715971966064183, | |
| "loss": 1.067, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001715049797122833, | |
| "loss": 1.0817, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001714127628181483, | |
| "loss": 1.179, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001713205459240133, | |
| "loss": 1.251, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0001712283290298783, | |
| "loss": 1.2271, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00017113611213574327, | |
| "loss": 1.1765, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00017104389524160826, | |
| "loss": 1.1913, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00017095167834747327, | |
| "loss": 1.361, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00017085946145333825, | |
| "loss": 1.1744, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00017076724455920326, | |
| "loss": 1.249, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017067502766506825, | |
| "loss": 1.1168, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017058281077093323, | |
| "loss": 1.0437, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017049059387679825, | |
| "loss": 1.1651, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017039837698266323, | |
| "loss": 1.1997, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00017030616008852821, | |
| "loss": 1.0417, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001702139431943932, | |
| "loss": 1.0223, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001701217263002582, | |
| "loss": 1.1061, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00017002950940612322, | |
| "loss": 1.3041, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001699372925119882, | |
| "loss": 1.2808, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001698450756178532, | |
| "loss": 1.3155, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0001697528587237182, | |
| "loss": 1.1121, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00016966064182958319, | |
| "loss": 1.1627, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00016956842493544817, | |
| "loss": 1.2635, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00016947620804131316, | |
| "loss": 1.2897, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00016938399114717817, | |
| "loss": 1.1851, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00016929177425304318, | |
| "loss": 1.1192, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00016919955735890816, | |
| "loss": 1.3427, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00016910734046477315, | |
| "loss": 1.278, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00016901512357063816, | |
| "loss": 1.0677, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00016892290667650314, | |
| "loss": 1.1406, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00016883068978236813, | |
| "loss": 1.0977, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0001687384728882331, | |
| "loss": 1.1641, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00016864625599409812, | |
| "loss": 1.1614, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00016855403909996313, | |
| "loss": 1.1798, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00016846182220582812, | |
| "loss": 1.0466, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0001683696053116931, | |
| "loss": 1.1568, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00016827738841755812, | |
| "loss": 1.1289, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0001681851715234231, | |
| "loss": 1.1635, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00016809295462928808, | |
| "loss": 1.2475, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00016800073773515307, | |
| "loss": 1.1312, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00016790852084101808, | |
| "loss": 1.0037, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0001678163039468831, | |
| "loss": 1.2798, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016772408705274808, | |
| "loss": 1.1405, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016763187015861306, | |
| "loss": 1.239, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016753965326447807, | |
| "loss": 1.2206, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016744743637034306, | |
| "loss": 1.1298, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016735521947620804, | |
| "loss": 1.2235, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00016726300258207302, | |
| "loss": 1.2435, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016717078568793804, | |
| "loss": 0.9712, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016707856879380305, | |
| "loss": 1.0133, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016698635189966803, | |
| "loss": 1.2278, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016689413500553302, | |
| "loss": 1.1089, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00016680191811139803, | |
| "loss": 1.1549, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.000166709701217263, | |
| "loss": 1.0782, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.000166617484323128, | |
| "loss": 1.0607, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00016652526742899298, | |
| "loss": 1.0517, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.000166433050534858, | |
| "loss": 1.1832, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.000166340833640723, | |
| "loss": 1.1528, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.000166248616746588, | |
| "loss": 1.0889, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00016615639985245297, | |
| "loss": 1.2566, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00016606418295831798, | |
| "loss": 1.0122, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00016597196606418297, | |
| "loss": 1.1981, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00016587974917004795, | |
| "loss": 1.3029, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00016578753227591294, | |
| "loss": 1.0953, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00016569531538177795, | |
| "loss": 1.243, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00016560309848764296, | |
| "loss": 1.2896, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00016551088159350795, | |
| "loss": 1.281, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00016541866469937293, | |
| "loss": 1.2114, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00016532644780523794, | |
| "loss": 1.1415, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00016523423091110293, | |
| "loss": 1.0854, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001651420140169679, | |
| "loss": 1.2067, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001650497971228329, | |
| "loss": 1.1454, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001649575802286979, | |
| "loss": 1.227, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00016486536333456292, | |
| "loss": 1.0913, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0001647731464404279, | |
| "loss": 1.2574, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00016468092954629289, | |
| "loss": 1.2291, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00016458871265215787, | |
| "loss": 1.2255, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00016449649575802288, | |
| "loss": 1.2167, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00016440427886388787, | |
| "loss": 1.0863, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00016431206196975285, | |
| "loss": 1.1154, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00016421984507561786, | |
| "loss": 1.2457, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00016412762818148287, | |
| "loss": 1.1383, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00016403541128734786, | |
| "loss": 1.0618, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00016394319439321284, | |
| "loss": 1.1089, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00016385097749907783, | |
| "loss": 1.1526, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00016375876060494284, | |
| "loss": 1.155, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00016366654371080782, | |
| "loss": 1.1888, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0001635743268166728, | |
| "loss": 1.0237, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00016348210992253782, | |
| "loss": 1.006, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00016338989302840283, | |
| "loss": 1.0144, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00016329767613426782, | |
| "loss": 1.2536, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001632054592401328, | |
| "loss": 1.1196, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00016311324234599778, | |
| "loss": 1.2142, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.5779623881687055, | |
| "eval_loss": 1.0952427387237549, | |
| "eval_runtime": 117.9515, | |
| "eval_samples_per_second": 92.869, | |
| "eval_steps_per_second": 11.615, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001630210254518628, | |
| "loss": 1.2744, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00016292880855772778, | |
| "loss": 1.1747, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00016283659166359276, | |
| "loss": 1.211, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00016274437476945778, | |
| "loss": 1.1793, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0001626521578753228, | |
| "loss": 1.1115, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00016255994098118777, | |
| "loss": 1.0671, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00016246772408705276, | |
| "loss": 1.1405, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00016237550719291774, | |
| "loss": 1.0318, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00016228329029878275, | |
| "loss": 1.091, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00016219107340464774, | |
| "loss": 1.1943, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00016209885651051272, | |
| "loss": 1.0205, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00016200663961637773, | |
| "loss": 1.1909, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00016191442272224274, | |
| "loss": 1.3308, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00016182220582810773, | |
| "loss": 1.2898, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0001617299889339727, | |
| "loss": 1.2433, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0001616377720398377, | |
| "loss": 1.1034, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0001615455551457027, | |
| "loss": 1.1534, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0001614533382515677, | |
| "loss": 1.1877, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00016136112135743268, | |
| "loss": 1.2006, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0001612689044632977, | |
| "loss": 1.1875, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0001611766875691627, | |
| "loss": 1.1954, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00016108447067502768, | |
| "loss": 1.0619, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00016099225378089267, | |
| "loss": 1.1578, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00016090003688675765, | |
| "loss": 1.0976, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00016080781999262267, | |
| "loss": 1.0075, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00016071560309848765, | |
| "loss": 1.1182, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00016062338620435263, | |
| "loss": 1.0065, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00016053116931021765, | |
| "loss": 1.0983, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00016043895241608266, | |
| "loss": 0.988, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00016034673552194764, | |
| "loss": 1.1844, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00016025451862781263, | |
| "loss": 1.1004, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001601623017336776, | |
| "loss": 1.1675, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00016007008483954262, | |
| "loss": 1.0223, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001599778679454076, | |
| "loss": 1.1491, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001598856510512726, | |
| "loss": 1.0693, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001597934341571376, | |
| "loss": 1.2653, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0001597012172630026, | |
| "loss": 1.1791, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0001596090003688676, | |
| "loss": 1.0327, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00015951678347473258, | |
| "loss": 1.1773, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00015942456658059757, | |
| "loss": 1.2394, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00015933234968646258, | |
| "loss": 1.1458, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00015924013279232756, | |
| "loss": 1.0744, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00015914791589819255, | |
| "loss": 1.2287, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00015905569900405756, | |
| "loss": 1.0857, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00015896348210992254, | |
| "loss": 1.2088, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00015887126521578755, | |
| "loss": 1.0082, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00015877904832165254, | |
| "loss": 1.054, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00015868683142751752, | |
| "loss": 1.2285, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0001585946145333825, | |
| "loss": 1.0143, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00015850239763924752, | |
| "loss": 1.0714, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0001584101807451125, | |
| "loss": 1.0882, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00015831796385097752, | |
| "loss": 1.1059, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0001582257469568425, | |
| "loss": 1.0658, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0001581335300627075, | |
| "loss": 1.1272, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0001580413131685725, | |
| "loss": 1.0791, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00015794909627443748, | |
| "loss": 1.1533, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00015785687938030246, | |
| "loss": 1.1378, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00015776466248616748, | |
| "loss": 1.1286, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00015767244559203246, | |
| "loss": 1.1265, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00015758022869789747, | |
| "loss": 0.9253, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00015748801180376246, | |
| "loss": 1.0376, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00015739579490962747, | |
| "loss": 1.1497, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00015730357801549245, | |
| "loss": 1.1336, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00015721136112135744, | |
| "loss": 1.2066, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00015711914422722242, | |
| "loss": 1.1973, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00015702692733308743, | |
| "loss": 1.1119, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00015693471043895242, | |
| "loss": 1.044, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00015684249354481743, | |
| "loss": 1.1936, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0001567502766506824, | |
| "loss": 0.9883, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00015665805975654742, | |
| "loss": 1.0117, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0001565658428624124, | |
| "loss": 1.0789, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0001564736259682774, | |
| "loss": 1.1306, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00015638140907414238, | |
| "loss": 1.1597, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0001562891921800074, | |
| "loss": 1.0788, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00015619697528587237, | |
| "loss": 1.0904, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00015610475839173736, | |
| "loss": 1.155, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00015601254149760237, | |
| "loss": 1.0715, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00015592032460346738, | |
| "loss": 1.0461, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00015582810770933237, | |
| "loss": 1.1844, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00015573589081519735, | |
| "loss": 1.2037, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00015564367392106233, | |
| "loss": 1.0438, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00015555145702692735, | |
| "loss": 1.1288, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00015545924013279233, | |
| "loss": 1.0254, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00015536702323865731, | |
| "loss": 1.0471, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00015527480634452233, | |
| "loss": 1.1056, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00015518258945038734, | |
| "loss": 0.9244, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00015509037255625232, | |
| "loss": 1.0792, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0001549981556621173, | |
| "loss": 1.1597, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0001549059387679823, | |
| "loss": 1.2357, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0001548137218738473, | |
| "loss": 1.0048, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0001547215049797123, | |
| "loss": 1.1648, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00015462928808557727, | |
| "loss": 0.9634, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00015453707119144228, | |
| "loss": 1.1526, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0001544448542973073, | |
| "loss": 1.0899, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00015435263740317228, | |
| "loss": 1.1109, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00015426042050903726, | |
| "loss": 1.0795, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00015416820361490225, | |
| "loss": 0.9967, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00015407598672076726, | |
| "loss": 1.0689, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00015398376982663224, | |
| "loss": 1.0041, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00015389155293249723, | |
| "loss": 1.0633, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.597316048931897, | |
| "eval_loss": 1.0508341789245605, | |
| "eval_runtime": 119.8159, | |
| "eval_samples_per_second": 91.424, | |
| "eval_steps_per_second": 11.434, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00015379933603836224, | |
| "loss": 1.0456, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00015370711914422725, | |
| "loss": 1.1123, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00015361490225009223, | |
| "loss": 1.1132, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00015352268535595722, | |
| "loss": 1.0581, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0001534304684618222, | |
| "loss": 1.0916, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0001533382515676872, | |
| "loss": 1.2104, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0001532460346735522, | |
| "loss": 1.0765, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00015315381777941718, | |
| "loss": 1.0358, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0001530616008852822, | |
| "loss": 1.253, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00015296938399114718, | |
| "loss": 1.0404, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0001528771670970122, | |
| "loss": 1.1361, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00015278495020287718, | |
| "loss": 1.0879, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00015269273330874216, | |
| "loss": 1.0397, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00015260051641460714, | |
| "loss": 1.1521, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00015250829952047216, | |
| "loss": 1.009, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00015241608262633714, | |
| "loss": 1.0276, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00015232386573220215, | |
| "loss": 1.2377, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00015223164883806714, | |
| "loss": 1.24, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00015213943194393215, | |
| "loss": 1.1269, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00015204721504979713, | |
| "loss": 1.0245, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00015195499815566212, | |
| "loss": 0.9721, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0001518627812615271, | |
| "loss": 1.1268, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0001517705643673921, | |
| "loss": 1.0673, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001516783474732571, | |
| "loss": 0.9425, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001515861305791221, | |
| "loss": 1.1363, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001514939136849871, | |
| "loss": 1.1446, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001514016967908521, | |
| "loss": 1.1441, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0001513094798967171, | |
| "loss": 1.1669, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00015121726300258207, | |
| "loss": 1.0665, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00015112504610844706, | |
| "loss": 0.9422, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00015103282921431207, | |
| "loss": 1.0679, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00015094061232017705, | |
| "loss": 1.1073, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00015084839542604207, | |
| "loss": 1.042, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00015075617853190705, | |
| "loss": 1.1157, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00015066396163777206, | |
| "loss": 1.1154, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00015057174474363705, | |
| "loss": 1.2048, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00015047952784950203, | |
| "loss": 1.0115, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00015038731095536701, | |
| "loss": 1.1561, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00015029509406123203, | |
| "loss": 1.0331, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000150202877167097, | |
| "loss": 1.0371, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00015011066027296202, | |
| "loss": 0.9979, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000150018443378827, | |
| "loss": 1.1012, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00014992622648469202, | |
| "loss": 1.0519, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000149834009590557, | |
| "loss": 0.9354, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.000149741792696422, | |
| "loss": 0.9012, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00014964957580228697, | |
| "loss": 1.0494, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00014955735890815198, | |
| "loss": 0.9693, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00014946514201401697, | |
| "loss": 1.1344, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00014937292511988198, | |
| "loss": 1.0195, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00014928070822574696, | |
| "loss": 0.8604, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00014918849133161197, | |
| "loss": 1.045, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00014909627443747696, | |
| "loss": 0.9119, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00014900405754334194, | |
| "loss": 1.0778, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00014891184064920693, | |
| "loss": 0.9471, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00014881962375507194, | |
| "loss": 1.0131, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00014872740686093692, | |
| "loss": 1.0563, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00014863518996680193, | |
| "loss": 0.9712, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00014854297307266692, | |
| "loss": 0.9751, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00014845075617853193, | |
| "loss": 0.9858, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00014835853928439692, | |
| "loss": 0.8813, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.0001482663223902619, | |
| "loss": 0.9845, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00014817410549612688, | |
| "loss": 0.8825, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0001480818886019919, | |
| "loss": 1.1025, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00014798967170785688, | |
| "loss": 1.0646, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0001478974548137219, | |
| "loss": 1.0291, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00014780523791958688, | |
| "loss": 0.9306, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0001477130210254519, | |
| "loss": 1.0153, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00014762080413131687, | |
| "loss": 0.9032, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00014752858723718186, | |
| "loss": 1.011, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00014743637034304684, | |
| "loss": 0.9371, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00014734415344891183, | |
| "loss": 1.0678, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00014725193655477684, | |
| "loss": 1.0736, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00014715971966064185, | |
| "loss": 1.0615, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00014706750276650683, | |
| "loss": 0.9433, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00014697528587237182, | |
| "loss": 0.8013, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00014688306897823683, | |
| "loss": 1.0051, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.0001467908520841018, | |
| "loss": 0.9543, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0001466986351899668, | |
| "loss": 0.918, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00014660641829583178, | |
| "loss": 0.9612, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0001465142014016968, | |
| "loss": 0.9229, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0001464219845075618, | |
| "loss": 1.0071, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0001463297676134268, | |
| "loss": 1.0156, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00014623755071929177, | |
| "loss": 0.9041, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00014614533382515679, | |
| "loss": 0.8998, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00014605311693102177, | |
| "loss": 1.0525, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00014596090003688675, | |
| "loss": 1.0265, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00014586868314275174, | |
| "loss": 0.8741, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00014577646624861675, | |
| "loss": 1.0066, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00014568424935448176, | |
| "loss": 1.1394, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00014559203246034675, | |
| "loss": 0.9974, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00014549981556621173, | |
| "loss": 0.9108, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00014540759867207674, | |
| "loss": 0.9731, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00014531538177794173, | |
| "loss": 0.914, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0001452231648838067, | |
| "loss": 1.1119, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0001451309479896717, | |
| "loss": 1.0655, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0001450387310955367, | |
| "loss": 0.982, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00014494651420140172, | |
| "loss": 1.0214, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0001448542973072667, | |
| "loss": 0.9889, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0001447620804131317, | |
| "loss": 1.1089, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.0001446698635189967, | |
| "loss": 0.9559, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_accuracy": 0.6106445134197553, | |
| "eval_loss": 1.017890453338623, | |
| "eval_runtime": 117.4367, | |
| "eval_samples_per_second": 93.276, | |
| "eval_steps_per_second": 11.666, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00014457764662486168, | |
| "loss": 0.9921, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00014448542973072667, | |
| "loss": 1.0913, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00014439321283659165, | |
| "loss": 0.9651, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00014430099594245666, | |
| "loss": 0.9044, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00014420877904832167, | |
| "loss": 0.9205, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00014411656215418666, | |
| "loss": 0.8574, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00014402434526005164, | |
| "loss": 0.9738, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00014393212836591665, | |
| "loss": 0.9524, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00014383991147178164, | |
| "loss": 1.068, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00014374769457764662, | |
| "loss": 0.9719, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.0001436554776835116, | |
| "loss": 0.8633, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00014356326078937662, | |
| "loss": 1.1786, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00014347104389524163, | |
| "loss": 0.9662, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00014337882700110662, | |
| "loss": 1.0083, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.0001432866101069716, | |
| "loss": 0.9786, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.0001431943932128366, | |
| "loss": 1.0162, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.0001431021763187016, | |
| "loss": 1.1466, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00014300995942456658, | |
| "loss": 0.9591, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00014291774253043156, | |
| "loss": 0.9515, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00014282552563629658, | |
| "loss": 0.9676, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0001427333087421616, | |
| "loss": 1.0068, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.00014264109184802657, | |
| "loss": 1.0432, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.00014254887495389156, | |
| "loss": 1.0162, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.00014245665805975657, | |
| "loss": 0.9487, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.00014236444116562155, | |
| "loss": 0.8694, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.00014227222427148654, | |
| "loss": 0.8753, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00014218000737735152, | |
| "loss": 0.9522, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00014208779048321653, | |
| "loss": 1.0287, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00014199557358908154, | |
| "loss": 0.9371, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00014190335669494653, | |
| "loss": 1.0457, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.0001418111398008115, | |
| "loss": 0.9603, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.0001417189229066765, | |
| "loss": 0.9226, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.0001416267060125415, | |
| "loss": 0.9346, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.0001415344891184065, | |
| "loss": 0.9726, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.00014144227222427148, | |
| "loss": 0.885, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.0001413500553301365, | |
| "loss": 0.8587, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.0001412578384360015, | |
| "loss": 1.0051, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00014116562154186649, | |
| "loss": 1.1279, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00014107340464773147, | |
| "loss": 0.9938, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00014098118775359645, | |
| "loss": 0.884, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00014088897085946147, | |
| "loss": 0.9564, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00014079675396532645, | |
| "loss": 1.0019, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00014070453707119143, | |
| "loss": 1.0467, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00014061232017705645, | |
| "loss": 0.9473, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00014052010328292146, | |
| "loss": 1.0038, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00014042788638878644, | |
| "loss": 0.881, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00014033566949465143, | |
| "loss": 0.9928, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0001402434526005164, | |
| "loss": 1.028, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.00014015123570638142, | |
| "loss": 0.8758, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0001400590188122464, | |
| "loss": 1.0071, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0001399668019181114, | |
| "loss": 0.9646, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0001398745850239764, | |
| "loss": 0.9039, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.00013978236812984141, | |
| "loss": 1.0133, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.0001396901512357064, | |
| "loss": 0.9122, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00013959793434157138, | |
| "loss": 0.9466, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00013950571744743637, | |
| "loss": 1.0843, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00013941350055330138, | |
| "loss": 0.9617, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00013932128365916636, | |
| "loss": 0.9372, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00013922906676503135, | |
| "loss": 0.8564, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00013913684987089636, | |
| "loss": 1.0391, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00013904463297676137, | |
| "loss": 1.0507, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00013895241608262635, | |
| "loss": 1.116, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00013886019918849134, | |
| "loss": 0.9702, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00013876798229435632, | |
| "loss": 0.9306, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00013867576540022134, | |
| "loss": 0.8996, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00013858354850608632, | |
| "loss": 1.1706, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.0001384913316119513, | |
| "loss": 1.0423, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00013839911471781632, | |
| "loss": 1.0197, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 0.00013830689782368133, | |
| "loss": 0.885, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.0001382146809295463, | |
| "loss": 1.0354, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.0001381224640354113, | |
| "loss": 1.0241, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.00013803024714127628, | |
| "loss": 1.0995, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.0001379380302471413, | |
| "loss": 0.9009, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.00013784581335300628, | |
| "loss": 1.0854, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.00013775359645887126, | |
| "loss": 1.0316, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.00013766137956473627, | |
| "loss": 0.9286, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.00013756916267060128, | |
| "loss": 0.7782, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.00013747694577646627, | |
| "loss": 1.0035, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.00013738472888233125, | |
| "loss": 1.0167, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.00013729251198819624, | |
| "loss": 0.8368, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00013720029509406125, | |
| "loss": 0.9972, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00013710807819992623, | |
| "loss": 0.958, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00013701586130579122, | |
| "loss": 1.0516, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00013692364441165623, | |
| "loss": 0.8795, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00013683142751752124, | |
| "loss": 1.0168, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.00013673921062338622, | |
| "loss": 0.9276, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.0001366469937292512, | |
| "loss": 1.1021, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.0001365547768351162, | |
| "loss": 0.975, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.0001364625599409812, | |
| "loss": 0.8585, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.0001363703430468462, | |
| "loss": 1.0498, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.00013627812615271117, | |
| "loss": 1.0257, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00013618590925857619, | |
| "loss": 0.9268, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.0001360936923644412, | |
| "loss": 1.0438, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00013600147547030618, | |
| "loss": 0.8607, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00013590925857617117, | |
| "loss": 0.9124, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.00013581704168203615, | |
| "loss": 0.9851, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00013572482478790113, | |
| "loss": 0.9737, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00013563260789376615, | |
| "loss": 0.8973, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00013554039099963113, | |
| "loss": 1.0597, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00013544817410549614, | |
| "loss": 0.9984, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_accuracy": 0.6178564907796239, | |
| "eval_loss": 0.9958341121673584, | |
| "eval_runtime": 116.6094, | |
| "eval_samples_per_second": 93.938, | |
| "eval_steps_per_second": 11.749, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00013535595721136113, | |
| "loss": 0.9331, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 0.00013526374031722614, | |
| "loss": 0.9406, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.00013517152342309112, | |
| "loss": 1.072, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.0001350793065289561, | |
| "loss": 0.9313, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.0001349870896348211, | |
| "loss": 0.9284, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.0001348948727406861, | |
| "loss": 1.0917, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.0001348026558465511, | |
| "loss": 0.9193, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.0001347104389524161, | |
| "loss": 0.9261, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00013461822205828108, | |
| "loss": 0.9229, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.0001345260051641461, | |
| "loss": 0.8888, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00013443378827001108, | |
| "loss": 1.0265, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00013434157137587606, | |
| "loss": 0.913, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00013424935448174105, | |
| "loss": 0.8745, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00013415713758760606, | |
| "loss": 1.0292, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00013406492069347104, | |
| "loss": 0.9958, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00013397270379933603, | |
| "loss": 0.8609, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00013388048690520104, | |
| "loss": 1.0622, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00013378827001106605, | |
| "loss": 0.959, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00013369605311693104, | |
| "loss": 1.009, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00013360383622279602, | |
| "loss": 0.8563, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.000133511619328661, | |
| "loss": 0.9683, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00013341940243452602, | |
| "loss": 0.9321, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.000133327185540391, | |
| "loss": 0.836, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.00013323496864625598, | |
| "loss": 1.0297, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.000133142751752121, | |
| "loss": 0.8333, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.000133050534857986, | |
| "loss": 0.8996, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.000132958317963851, | |
| "loss": 0.8785, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.00013286610106971598, | |
| "loss": 0.9904, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.00013277388417558096, | |
| "loss": 1.0555, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00013268166728144597, | |
| "loss": 1.0595, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00013258945038731096, | |
| "loss": 0.9628, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00013249723349317594, | |
| "loss": 0.9414, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00013240501659904095, | |
| "loss": 1.0532, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00013231279970490596, | |
| "loss": 0.796, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00013222058281077095, | |
| "loss": 0.8783, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00013212836591663593, | |
| "loss": 0.9517, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00013203614902250092, | |
| "loss": 1.0121, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.00013194393212836593, | |
| "loss": 0.8807, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.0001318517152342309, | |
| "loss": 0.9829, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.0001317594983400959, | |
| "loss": 1.1037, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.0001316672814459609, | |
| "loss": 0.9892, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.00013157506455182592, | |
| "loss": 0.8818, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.0001314828476576909, | |
| "loss": 0.8415, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.0001313906307635559, | |
| "loss": 0.9181, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.00013129841386942087, | |
| "loss": 1.0715, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.00013120619697528589, | |
| "loss": 0.9243, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.00013111398008115087, | |
| "loss": 0.911, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.00013102176318701585, | |
| "loss": 0.8901, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.00013092954629288087, | |
| "loss": 1.0062, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.00013083732939874588, | |
| "loss": 0.9856, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00013074511250461086, | |
| "loss": 1.0214, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00013065289561047585, | |
| "loss": 0.8438, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00013056067871634083, | |
| "loss": 0.8056, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00013046846182220584, | |
| "loss": 1.0268, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00013037624492807083, | |
| "loss": 0.9718, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.0001302840280339358, | |
| "loss": 1.0429, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00013019181113980082, | |
| "loss": 0.9091, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.0001300995942456658, | |
| "loss": 0.9045, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00013000737735153082, | |
| "loss": 1.0823, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.0001299151604573958, | |
| "loss": 0.8333, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.0001298229435632608, | |
| "loss": 0.8417, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.00012973072666912577, | |
| "loss": 0.9994, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.00012963850977499078, | |
| "loss": 1.0543, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.00012954629288085577, | |
| "loss": 0.9443, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.00012945407598672078, | |
| "loss": 0.9332, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.00012936185909258576, | |
| "loss": 1.0525, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 0.00012926964219845077, | |
| "loss": 1.0072, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00012917742530431576, | |
| "loss": 0.8867, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00012908520841018074, | |
| "loss": 1.0054, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00012899299151604573, | |
| "loss": 1.0029, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00012890077462191074, | |
| "loss": 1.1104, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.00012880855772777572, | |
| "loss": 1.0245, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00012871634083364074, | |
| "loss": 0.8354, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00012862412393950572, | |
| "loss": 0.8816, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00012853190704537073, | |
| "loss": 0.8503, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00012843969015123572, | |
| "loss": 0.9247, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.0001283474732571007, | |
| "loss": 0.9184, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00012825525636296568, | |
| "loss": 0.9498, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.0001281630394688307, | |
| "loss": 0.8573, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.00012807082257469568, | |
| "loss": 0.9359, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.0001279786056805607, | |
| "loss": 0.8813, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.00012788638878642568, | |
| "loss": 0.9125, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.0001277941718922907, | |
| "loss": 0.8091, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.00012770195499815567, | |
| "loss": 0.8828, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.00012760973810402066, | |
| "loss": 0.8689, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.00012751752120988564, | |
| "loss": 0.9063, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.00012742530431575065, | |
| "loss": 0.9397, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.00012733308742161564, | |
| "loss": 0.8055, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00012724087052748065, | |
| "loss": 1.0647, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00012714865363334563, | |
| "loss": 0.9543, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00012705643673921064, | |
| "loss": 0.9021, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00012696421984507563, | |
| "loss": 0.8933, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.0001268720029509406, | |
| "loss": 0.9597, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.0001267797860568056, | |
| "loss": 0.8972, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.0001266875691626706, | |
| "loss": 0.8196, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.0001265953522685356, | |
| "loss": 0.9674, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.0001265031353744006, | |
| "loss": 0.9939, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.0001264109184802656, | |
| "loss": 0.9279, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.0001263187015861306, | |
| "loss": 0.9121, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.00012622648469199559, | |
| "loss": 0.9843, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_accuracy": 0.6314588278254519, | |
| "eval_loss": 0.978008508682251, | |
| "eval_runtime": 119.0456, | |
| "eval_samples_per_second": 92.015, | |
| "eval_steps_per_second": 11.508, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.00012613426779786057, | |
| "loss": 1.0928, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.00012604205090372555, | |
| "loss": 1.008, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.00012594983400959057, | |
| "loss": 0.8691, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.00012585761711545555, | |
| "loss": 0.9296, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.00012576540022132056, | |
| "loss": 0.9163, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00012567318332718555, | |
| "loss": 0.8996, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00012558096643305056, | |
| "loss": 0.8988, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00012548874953891554, | |
| "loss": 1.0093, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.00012539653264478053, | |
| "loss": 0.9081, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 0.0001253043157506455, | |
| "loss": 0.9128, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.00012521209885651052, | |
| "loss": 0.8395, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.0001251198819623755, | |
| "loss": 0.9784, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.00012502766506824052, | |
| "loss": 0.9919, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.0001249354481741055, | |
| "loss": 1.0478, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.00012484323127997051, | |
| "loss": 1.0693, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.0001247510143858355, | |
| "loss": 0.9146, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00012465879749170048, | |
| "loss": 1.0575, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00012456658059756547, | |
| "loss": 0.9678, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00012447436370343048, | |
| "loss": 0.9004, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00012438214680929546, | |
| "loss": 0.9842, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00012428992991516047, | |
| "loss": 0.9282, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.00012419771302102546, | |
| "loss": 0.9722, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.00012410549612689044, | |
| "loss": 1.1208, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.00012401327923275546, | |
| "loss": 0.8511, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.00012392106233862044, | |
| "loss": 0.9338, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.00012382884544448542, | |
| "loss": 0.8577, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.0001237366285503504, | |
| "loss": 0.9919, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.00012364441165621542, | |
| "loss": 0.9534, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.00012355219476208043, | |
| "loss": 0.9393, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.00012345997786794542, | |
| "loss": 0.8468, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.0001233677609738104, | |
| "loss": 0.8686, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 0.0001232755440796754, | |
| "loss": 1.0077, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.0001231833271855404, | |
| "loss": 0.8607, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.00012309111029140538, | |
| "loss": 1.1106, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.00012299889339727036, | |
| "loss": 1.0332, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.00012290667650313538, | |
| "loss": 0.8054, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.0001228144596090004, | |
| "loss": 0.8144, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00012272224271486537, | |
| "loss": 0.8902, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00012263002582073036, | |
| "loss": 0.9698, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00012253780892659537, | |
| "loss": 0.919, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00012244559203246035, | |
| "loss": 0.8431, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00012235337513832534, | |
| "loss": 1.0242, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00012226115824419032, | |
| "loss": 0.9033, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00012216894135005533, | |
| "loss": 0.9966, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00012207672445592034, | |
| "loss": 0.8997, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00012198450756178532, | |
| "loss": 0.9247, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00012189229066765031, | |
| "loss": 0.7221, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00012180007377351532, | |
| "loss": 0.8302, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.00012170785687938031, | |
| "loss": 1.0828, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.0001216156399852453, | |
| "loss": 0.9364, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.00012152342309111029, | |
| "loss": 0.927, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.0001214312061969753, | |
| "loss": 1.0354, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.00012133898930284029, | |
| "loss": 0.9749, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00012124677240870527, | |
| "loss": 0.8811, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00012115455551457027, | |
| "loss": 1.0251, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00012106233862043528, | |
| "loss": 0.9448, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00012097012172630027, | |
| "loss": 0.9655, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00012087790483216525, | |
| "loss": 0.9447, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00012078568793803025, | |
| "loss": 0.9376, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00012069347104389526, | |
| "loss": 0.9657, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00012060125414976024, | |
| "loss": 0.8257, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00012050903725562523, | |
| "loss": 0.9139, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00012041682036149023, | |
| "loss": 0.8469, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00012032460346735524, | |
| "loss": 0.8781, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.00012023238657322022, | |
| "loss": 0.9082, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.0001201401696790852, | |
| "loss": 0.9517, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.0001200479527849502, | |
| "loss": 0.8731, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.00011995573589081522, | |
| "loss": 1.0299, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.0001198635189966802, | |
| "loss": 0.8816, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.00011977130210254518, | |
| "loss": 0.8483, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00011967908520841018, | |
| "loss": 0.865, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.0001195868683142752, | |
| "loss": 0.8243, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00011949465142014018, | |
| "loss": 0.8385, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00011940243452600516, | |
| "loss": 1.0068, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 0.00011931021763187016, | |
| "loss": 0.8506, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00011921800073773517, | |
| "loss": 0.9249, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00011912578384360016, | |
| "loss": 1.1078, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00011903356694946514, | |
| "loss": 1.0514, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00011894135005533014, | |
| "loss": 0.9401, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00011884913316119515, | |
| "loss": 0.8352, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.00011875691626706014, | |
| "loss": 0.9655, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.00011866469937292512, | |
| "loss": 1.1827, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.00011857248247879012, | |
| "loss": 0.9401, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.0001184802655846551, | |
| "loss": 0.9116, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.00011838804869052011, | |
| "loss": 0.8191, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.0001182958317963851, | |
| "loss": 0.8648, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.0001182036149022501, | |
| "loss": 0.9634, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00011811139800811508, | |
| "loss": 0.981, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00011801918111398009, | |
| "loss": 0.8884, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00011792696421984508, | |
| "loss": 0.8395, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00011783474732571007, | |
| "loss": 0.9688, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.00011774253043157506, | |
| "loss": 0.9953, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.00011765031353744007, | |
| "loss": 0.9366, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.00011755809664330505, | |
| "loss": 1.0527, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.00011746587974917005, | |
| "loss": 0.9375, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.00011737366285503504, | |
| "loss": 0.8931, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 0.00011728144596090005, | |
| "loss": 0.9988, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.00011718922906676503, | |
| "loss": 0.7768, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.00011709701217263003, | |
| "loss": 0.8878, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.00011700479527849502, | |
| "loss": 0.8943, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_accuracy": 0.6385795143326639, | |
| "eval_loss": 0.9470136761665344, | |
| "eval_runtime": 115.3232, | |
| "eval_samples_per_second": 94.985, | |
| "eval_steps_per_second": 11.88, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.00011691257838436003, | |
| "loss": 0.8918, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.00011682036149022501, | |
| "loss": 0.7877, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00011672814459609001, | |
| "loss": 0.7868, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.000116635927701955, | |
| "loss": 0.9623, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00011654371080782, | |
| "loss": 0.9734, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00011645149391368499, | |
| "loss": 0.9686, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00011635927701954999, | |
| "loss": 0.9404, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00011626706012541497, | |
| "loss": 0.9221, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00011617484323127998, | |
| "loss": 0.9959, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00011608262633714497, | |
| "loss": 0.8569, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00011599040944300997, | |
| "loss": 0.8794, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00011589819254887495, | |
| "loss": 0.8572, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00011580597565473996, | |
| "loss": 0.9671, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00011571375876060495, | |
| "loss": 0.953, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00011562154186646994, | |
| "loss": 0.9218, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00011552932497233493, | |
| "loss": 1.1192, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00011543710807819994, | |
| "loss": 0.9314, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00011534489118406492, | |
| "loss": 0.9793, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00011525267428992992, | |
| "loss": 0.9013, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.0001151604573957949, | |
| "loss": 0.9303, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.00011506824050165992, | |
| "loss": 0.868, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.0001149760236075249, | |
| "loss": 0.9062, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.0001148838067133899, | |
| "loss": 1.021, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.00011479158981925489, | |
| "loss": 0.8758, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.0001146993729251199, | |
| "loss": 0.9029, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.00011460715603098488, | |
| "loss": 0.9457, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.00011451493913684988, | |
| "loss": 0.9978, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.00011442272224271486, | |
| "loss": 0.9076, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.00011433050534857987, | |
| "loss": 0.996, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00011423828845444486, | |
| "loss": 0.8963, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00011414607156030986, | |
| "loss": 0.913, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00011405385466617484, | |
| "loss": 1.0293, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00011396163777203985, | |
| "loss": 0.9292, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00011386942087790484, | |
| "loss": 0.8293, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00011377720398376984, | |
| "loss": 1.0578, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.00011368498708963482, | |
| "loss": 0.9555, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.00011359277019549983, | |
| "loss": 0.7886, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.00011350055330136482, | |
| "loss": 0.8831, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.00011340833640722981, | |
| "loss": 0.9281, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 0.0001133161195130948, | |
| "loss": 0.8645, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00011322390261895981, | |
| "loss": 0.9587, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.0001131316857248248, | |
| "loss": 0.9112, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00011303946883068979, | |
| "loss": 0.8635, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00011294725193655478, | |
| "loss": 1.012, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00011285503504241979, | |
| "loss": 0.9985, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00011276281814828477, | |
| "loss": 0.9016, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00011267060125414977, | |
| "loss": 1.0043, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00011257838436001475, | |
| "loss": 0.8155, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00011248616746587974, | |
| "loss": 0.9456, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00011239395057174475, | |
| "loss": 0.8511, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00011230173367760975, | |
| "loss": 0.8481, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00011220951678347473, | |
| "loss": 0.9624, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00011211729988933972, | |
| "loss": 1.0411, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00011202508299520473, | |
| "loss": 0.8652, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00011193286610106973, | |
| "loss": 0.968, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00011184064920693471, | |
| "loss": 0.8184, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.0001117484323127997, | |
| "loss": 0.9174, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00011165621541866471, | |
| "loss": 1.0584, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.0001115639985245297, | |
| "loss": 0.9099, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00011147178163039469, | |
| "loss": 0.8901, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00011137956473625967, | |
| "loss": 0.8803, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.00011128734784212469, | |
| "loss": 0.9376, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00011119513094798968, | |
| "loss": 1.0229, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00011110291405385467, | |
| "loss": 0.8243, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00011101069715971965, | |
| "loss": 1.0061, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00011091848026558466, | |
| "loss": 0.9275, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00011082626337144966, | |
| "loss": 0.8948, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00011073404647731465, | |
| "loss": 1.0331, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00011064182958317963, | |
| "loss": 0.9624, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00011054961268904464, | |
| "loss": 0.9445, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00011045739579490964, | |
| "loss": 0.88, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00011036517890077462, | |
| "loss": 0.9094, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00011027296200663961, | |
| "loss": 1.03, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00011018074511250462, | |
| "loss": 1.0257, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00011008852821836962, | |
| "loss": 0.9672, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.0001099963113242346, | |
| "loss": 0.9712, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00010990409443009959, | |
| "loss": 0.9725, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.0001098118775359646, | |
| "loss": 0.8735, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.0001097196606418296, | |
| "loss": 0.8384, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00010962744374769458, | |
| "loss": 0.9615, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00010953522685355957, | |
| "loss": 0.99, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00010944300995942458, | |
| "loss": 0.867, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00010935079306528957, | |
| "loss": 0.9244, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00010925857617115456, | |
| "loss": 0.8202, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.00010916635927701954, | |
| "loss": 0.905, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.00010907414238288456, | |
| "loss": 0.8523, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.00010898192548874955, | |
| "loss": 1.079, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.00010888970859461454, | |
| "loss": 0.9162, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.00010879749170047952, | |
| "loss": 0.9556, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00010870527480634453, | |
| "loss": 0.9201, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00010861305791220953, | |
| "loss": 0.8199, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00010852084101807452, | |
| "loss": 0.9409, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.0001084286241239395, | |
| "loss": 0.9418, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00010833640722980451, | |
| "loss": 0.9588, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00010824419033566951, | |
| "loss": 0.9613, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.0001081519734415345, | |
| "loss": 0.9023, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00010805975654739948, | |
| "loss": 0.841, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00010796753965326449, | |
| "loss": 0.9361, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00010787532275912949, | |
| "loss": 0.7094, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00010778310586499447, | |
| "loss": 0.9941, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "eval_accuracy": 0.634380135110462, | |
| "eval_loss": 0.9495174288749695, | |
| "eval_runtime": 113.8999, | |
| "eval_samples_per_second": 96.172, | |
| "eval_steps_per_second": 12.028, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "step": 10000, | |
| "total_flos": 3.692982298032341e+18, | |
| "train_loss": 1.1900101467609405, | |
| "train_runtime": 3440.7653, | |
| "train_samples_per_second": 100.843, | |
| "train_steps_per_second": 6.303 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 21688, | |
| "num_train_epochs": 4, | |
| "save_steps": 1000, | |
| "total_flos": 3.692982298032341e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |