| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.032703785463167365, | |
| "eval_steps": 50, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0001635189273158368, | |
| "eval_loss": 2.2701847553253174, | |
| "eval_runtime": 45.6108, | |
| "eval_samples_per_second": 56.456, | |
| "eval_steps_per_second": 28.239, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.001635189273158368, | |
| "grad_norm": 1.6498826742172241, | |
| "learning_rate": 0.00019967573081342103, | |
| "loss": 1.8962, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.003270378546316736, | |
| "grad_norm": 2.2788889408111572, | |
| "learning_rate": 0.0001970941817426052, | |
| "loss": 1.6468, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0049055678194751045, | |
| "grad_norm": 1.7382475137710571, | |
| "learning_rate": 0.00019199794436588243, | |
| "loss": 1.7137, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.006540757092633472, | |
| "grad_norm": 1.3833692073822021, | |
| "learning_rate": 0.0001845190085543795, | |
| "loss": 1.7509, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.008175946365791841, | |
| "grad_norm": 1.2240101099014282, | |
| "learning_rate": 0.00017485107481711012, | |
| "loss": 1.6796, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.008175946365791841, | |
| "eval_loss": 1.7598543167114258, | |
| "eval_runtime": 44.6389, | |
| "eval_samples_per_second": 57.685, | |
| "eval_steps_per_second": 28.854, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.009811135638950209, | |
| "grad_norm": 1.3060485124588013, | |
| "learning_rate": 0.00016324453755953773, | |
| "loss": 1.5988, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.011446324912108577, | |
| "grad_norm": 1.2081083059310913, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 1.6312, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.013081514185266945, | |
| "grad_norm": 1.0915815830230713, | |
| "learning_rate": 0.00013546048870425356, | |
| "loss": 1.6993, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.014716703458425313, | |
| "grad_norm": 1.5986297130584717, | |
| "learning_rate": 0.00012000256937760445, | |
| "loss": 1.6293, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.016351892731583682, | |
| "grad_norm": 1.1840400695800781, | |
| "learning_rate": 0.00010402659401094152, | |
| "loss": 1.5559, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.016351892731583682, | |
| "eval_loss": 1.7379602193832397, | |
| "eval_runtime": 44.7506, | |
| "eval_samples_per_second": 57.541, | |
| "eval_steps_per_second": 28.782, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.01798708200474205, | |
| "grad_norm": 0.999516487121582, | |
| "learning_rate": 8.79463319744677e-05, | |
| "loss": 1.6158, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.019622271277900418, | |
| "grad_norm": 1.1924983263015747, | |
| "learning_rate": 7.217825360835473e-05, | |
| "loss": 1.7679, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.021257460551058784, | |
| "grad_norm": 1.1168382167816162, | |
| "learning_rate": 5.713074385969457e-05, | |
| "loss": 1.7005, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.022892649824217154, | |
| "grad_norm": 0.9874293208122253, | |
| "learning_rate": 4.3193525326884435e-05, | |
| "loss": 1.7217, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.02452783909737552, | |
| "grad_norm": 1.4432034492492676, | |
| "learning_rate": 3.072756464904006e-05, | |
| "loss": 1.6991, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.02452783909737552, | |
| "eval_loss": 1.7249417304992676, | |
| "eval_runtime": 45.3211, | |
| "eval_samples_per_second": 56.817, | |
| "eval_steps_per_second": 28.419, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.02616302837053389, | |
| "grad_norm": 1.6720688343048096, | |
| "learning_rate": 2.0055723659649904e-05, | |
| "loss": 1.8257, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.027798217643692256, | |
| "grad_norm": 1.2190316915512085, | |
| "learning_rate": 1.1454397434679021e-05, | |
| "loss": 1.4768, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.029433406916850625, | |
| "grad_norm": 1.2517452239990234, | |
| "learning_rate": 5.146355805285452e-06, | |
| "loss": 1.6874, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.031068596190008995, | |
| "grad_norm": 1.924039602279663, | |
| "learning_rate": 1.2949737362087156e-06, | |
| "loss": 1.5098, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.032703785463167365, | |
| "grad_norm": 1.1937552690505981, | |
| "learning_rate": 0.0, | |
| "loss": 1.5143, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.032703785463167365, | |
| "eval_loss": 1.723392367362976, | |
| "eval_runtime": 44.894, | |
| "eval_samples_per_second": 57.357, | |
| "eval_steps_per_second": 28.69, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1782989230178304.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |