{ "best_metric": 0.13660724461078644, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 3.1739130434782608, "eval_steps": 25, "global_step": 39, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07729468599033816, "grad_norm": 0.43662938475608826, "learning_rate": 5e-05, "loss": 0.4704, "step": 1 }, { "epoch": 0.07729468599033816, "eval_loss": 0.5291202068328857, "eval_runtime": 1.828, "eval_samples_per_second": 27.352, "eval_steps_per_second": 7.111, "step": 1 }, { "epoch": 0.15458937198067632, "grad_norm": 0.5451412200927734, "learning_rate": 0.0001, "loss": 0.5141, "step": 2 }, { "epoch": 0.2318840579710145, "grad_norm": 0.6303209066390991, "learning_rate": 9.983788698441369e-05, "loss": 0.614, "step": 3 }, { "epoch": 0.30917874396135264, "grad_norm": 0.33306458592414856, "learning_rate": 9.935271596564688e-05, "loss": 0.4303, "step": 4 }, { "epoch": 0.3864734299516908, "grad_norm": 0.3506397306919098, "learning_rate": 9.854798261200746e-05, "loss": 0.3543, "step": 5 }, { "epoch": 0.463768115942029, "grad_norm": 0.4276319146156311, "learning_rate": 9.74294850457488e-05, "loss": 0.3801, "step": 6 }, { "epoch": 0.5410628019323671, "grad_norm": 0.22507691383361816, "learning_rate": 9.600528206746612e-05, "loss": 0.2902, "step": 7 }, { "epoch": 0.6183574879227053, "grad_norm": 0.24348624050617218, "learning_rate": 9.428563509225347e-05, "loss": 0.2535, "step": 8 }, { "epoch": 0.6956521739130435, "grad_norm": 0.4209447205066681, "learning_rate": 9.22829342159729e-05, "loss": 0.2811, "step": 9 }, { "epoch": 0.7729468599033816, "grad_norm": 0.24877655506134033, "learning_rate": 9.001160894432978e-05, "loss": 0.2272, "step": 10 }, { "epoch": 0.8502415458937198, "grad_norm": 0.520237147808075, "learning_rate": 8.74880242279536e-05, "loss": 0.2048, "step": 11 }, { "epoch": 0.927536231884058, "grad_norm": 0.9614755511283875, "learning_rate": 8.473036255255366e-05, "loss": 0.1937, "step": 12 }, { "epoch": 1.0579710144927537, "grad_norm": 0.8174208998680115, "learning_rate": 8.175849293369291e-05, "loss": 0.3853, "step": 13 }, { "epoch": 1.1352657004830917, "grad_norm": 0.21609851717948914, "learning_rate": 7.859382776007543e-05, "loss": 0.1514, "step": 14 }, { "epoch": 1.21256038647343, "grad_norm": 0.2834426462650299, "learning_rate": 7.525916851679529e-05, "loss": 0.151, "step": 15 }, { "epoch": 1.289855072463768, "grad_norm": 0.1985989809036255, "learning_rate": 7.177854150011389e-05, "loss": 0.1643, "step": 16 }, { "epoch": 1.3671497584541064, "grad_norm": 0.17753714323043823, "learning_rate": 6.817702470744477e-05, "loss": 0.1461, "step": 17 }, { "epoch": 1.4444444444444444, "grad_norm": 0.2179577499628067, "learning_rate": 6.448056714980767e-05, "loss": 0.1508, "step": 18 }, { "epoch": 1.5217391304347827, "grad_norm": 0.1527913361787796, "learning_rate": 6.071580188860955e-05, "loss": 0.2133, "step": 19 }, { "epoch": 1.5990338164251208, "grad_norm": 0.11120735108852386, "learning_rate": 5.690985414382668e-05, "loss": 0.1437, "step": 20 }, { "epoch": 1.6763285024154588, "grad_norm": 0.15228553116321564, "learning_rate": 5.3090145856173346e-05, "loss": 0.1414, "step": 21 }, { "epoch": 1.7536231884057971, "grad_norm": 0.11549022793769836, "learning_rate": 4.9284198111390456e-05, "loss": 0.2019, "step": 22 }, { "epoch": 1.8309178743961354, "grad_norm": 0.12989747524261475, "learning_rate": 4.551943285019234e-05, "loss": 0.137, "step": 23 }, { "epoch": 1.9082125603864735, "grad_norm": 0.10218170285224915, "learning_rate": 4.182297529255525e-05, "loss": 0.1316, "step": 24 }, { "epoch": 2.0386473429951693, "grad_norm": 0.2697076201438904, "learning_rate": 3.822145849988612e-05, "loss": 0.344, "step": 25 }, { "epoch": 2.0386473429951693, "eval_loss": 0.13660724461078644, "eval_runtime": 1.1451, "eval_samples_per_second": 43.663, "eval_steps_per_second": 11.352, "step": 25 }, { "epoch": 2.1159420289855073, "grad_norm": 0.1018509790301323, "learning_rate": 3.474083148320469e-05, "loss": 0.1343, "step": 26 }, { "epoch": 2.1932367149758454, "grad_norm": 0.12204479426145554, "learning_rate": 3.1406172239924584e-05, "loss": 0.1308, "step": 27 }, { "epoch": 2.2705314009661834, "grad_norm": 0.1276244819164276, "learning_rate": 2.8241507066307104e-05, "loss": 0.1798, "step": 28 }, { "epoch": 2.3478260869565215, "grad_norm": 0.07243148982524872, "learning_rate": 2.5269637447446348e-05, "loss": 0.1243, "step": 29 }, { "epoch": 2.42512077294686, "grad_norm": 0.12894168496131897, "learning_rate": 2.2511975772046403e-05, "loss": 0.1309, "step": 30 }, { "epoch": 2.502415458937198, "grad_norm": 0.09854254871606827, "learning_rate": 1.9988391055670233e-05, "loss": 0.1694, "step": 31 }, { "epoch": 2.579710144927536, "grad_norm": 0.08287820219993591, "learning_rate": 1.771706578402711e-05, "loss": 0.1285, "step": 32 }, { "epoch": 2.6570048309178746, "grad_norm": 0.10731519758701324, "learning_rate": 1.5714364907746536e-05, "loss": 0.1231, "step": 33 }, { "epoch": 2.7342995169082127, "grad_norm": 0.09050889313220978, "learning_rate": 1.3994717932533891e-05, "loss": 0.1906, "step": 34 }, { "epoch": 2.8115942028985508, "grad_norm": 0.10606944561004639, "learning_rate": 1.257051495425121e-05, "loss": 0.1276, "step": 35 }, { "epoch": 2.888888888888889, "grad_norm": 0.0904989019036293, "learning_rate": 1.1452017387992552e-05, "loss": 0.1243, "step": 36 }, { "epoch": 3.0193236714975846, "grad_norm": 0.182935893535614, "learning_rate": 1.064728403435312e-05, "loss": 0.3068, "step": 37 }, { "epoch": 3.0966183574879227, "grad_norm": 0.11063341051340103, "learning_rate": 1.0162113015586309e-05, "loss": 0.1229, "step": 38 }, { "epoch": 3.1739130434782608, "grad_norm": 0.0827033594250679, "learning_rate": 1e-05, "loss": 0.1225, "step": 39 } ], "logging_steps": 1, "max_steps": 39, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7527496374432563e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }