| { | |
| "best_metric": 1.0790818929672241, | |
| "best_model_checkpoint": "./outputs/instruct-lora-8b-alpaca-capital/checkpoint-580", | |
| "epoch": 1.0169348010160881, | |
| "eval_steps": 20, | |
| "global_step": 600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001693480101608806, | |
| "eval_loss": 1.4240059852600098, | |
| "eval_runtime": 47.5545, | |
| "eval_samples_per_second": 22.08, | |
| "eval_steps_per_second": 5.53, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.03386960203217612, | |
| "grad_norm": 0.7761115431785583, | |
| "learning_rate": 3.3898305084745763e-06, | |
| "loss": 1.5037, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03386960203217612, | |
| "eval_loss": 1.4208382368087769, | |
| "eval_runtime": 49.6595, | |
| "eval_samples_per_second": 21.144, | |
| "eval_steps_per_second": 5.296, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06773920406435224, | |
| "grad_norm": 0.7378177046775818, | |
| "learning_rate": 6.779661016949153e-06, | |
| "loss": 1.4229, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06773920406435224, | |
| "eval_loss": 1.385835886001587, | |
| "eval_runtime": 47.5969, | |
| "eval_samples_per_second": 22.06, | |
| "eval_steps_per_second": 5.526, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10160880609652836, | |
| "grad_norm": 0.8430055379867554, | |
| "learning_rate": 1.016949152542373e-05, | |
| "loss": 1.3768, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.10160880609652836, | |
| "eval_loss": 1.279646873474121, | |
| "eval_runtime": 47.5556, | |
| "eval_samples_per_second": 22.079, | |
| "eval_steps_per_second": 5.53, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1354784081287045, | |
| "grad_norm": 0.8471567630767822, | |
| "learning_rate": 1.3559322033898305e-05, | |
| "loss": 1.2716, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1354784081287045, | |
| "eval_loss": 1.2302038669586182, | |
| "eval_runtime": 49.9002, | |
| "eval_samples_per_second": 21.042, | |
| "eval_steps_per_second": 5.271, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1693480101608806, | |
| "grad_norm": 1.0591727495193481, | |
| "learning_rate": 1.694915254237288e-05, | |
| "loss": 1.2258, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1693480101608806, | |
| "eval_loss": 1.204331874847412, | |
| "eval_runtime": 47.5296, | |
| "eval_samples_per_second": 22.092, | |
| "eval_steps_per_second": 5.533, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.20321761219305673, | |
| "grad_norm": 1.0976836681365967, | |
| "learning_rate": 2.033898305084746e-05, | |
| "loss": 1.2218, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.20321761219305673, | |
| "eval_loss": 1.1849374771118164, | |
| "eval_runtime": 47.5346, | |
| "eval_samples_per_second": 22.089, | |
| "eval_steps_per_second": 5.533, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23708721422523285, | |
| "grad_norm": 1.0816129446029663, | |
| "learning_rate": 2.3728813559322036e-05, | |
| "loss": 1.1868, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.23708721422523285, | |
| "eval_loss": 1.1752324104309082, | |
| "eval_runtime": 49.5795, | |
| "eval_samples_per_second": 21.178, | |
| "eval_steps_per_second": 5.305, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.270956816257409, | |
| "grad_norm": 1.1880944967269897, | |
| "learning_rate": 2.711864406779661e-05, | |
| "loss": 1.1179, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.270956816257409, | |
| "eval_loss": 1.163883924484253, | |
| "eval_runtime": 47.5388, | |
| "eval_samples_per_second": 22.087, | |
| "eval_steps_per_second": 5.532, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3048264182895851, | |
| "grad_norm": 1.2941898107528687, | |
| "learning_rate": 2.9999737474980266e-05, | |
| "loss": 1.1075, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3048264182895851, | |
| "eval_loss": 1.152719259262085, | |
| "eval_runtime": 47.5403, | |
| "eval_samples_per_second": 22.087, | |
| "eval_steps_per_second": 5.532, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3386960203217612, | |
| "grad_norm": 1.2686594724655151, | |
| "learning_rate": 2.998457196315866e-05, | |
| "loss": 1.1267, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3386960203217612, | |
| "eval_loss": 1.1428076028823853, | |
| "eval_runtime": 49.7957, | |
| "eval_samples_per_second": 21.086, | |
| "eval_steps_per_second": 5.282, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.37256562235393736, | |
| "grad_norm": 1.3804370164871216, | |
| "learning_rate": 2.9946097849501546e-05, | |
| "loss": 1.108, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.37256562235393736, | |
| "eval_loss": 1.1391258239746094, | |
| "eval_runtime": 47.54, | |
| "eval_samples_per_second": 22.087, | |
| "eval_steps_per_second": 5.532, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.40643522438611346, | |
| "grad_norm": 1.444969892501831, | |
| "learning_rate": 2.988437498074987e-05, | |
| "loss": 1.1118, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.40643522438611346, | |
| "eval_loss": 1.1317951679229736, | |
| "eval_runtime": 47.5438, | |
| "eval_samples_per_second": 22.085, | |
| "eval_steps_per_second": 5.532, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4403048264182896, | |
| "grad_norm": 1.5110204219818115, | |
| "learning_rate": 2.9799499367238472e-05, | |
| "loss": 1.1333, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4403048264182896, | |
| "eval_loss": 1.1260956525802612, | |
| "eval_runtime": 48.973, | |
| "eval_samples_per_second": 21.44, | |
| "eval_steps_per_second": 5.37, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4741744284504657, | |
| "grad_norm": 1.361185908317566, | |
| "learning_rate": 2.969160303355143e-05, | |
| "loss": 1.0918, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4741744284504657, | |
| "eval_loss": 1.119767665863037, | |
| "eval_runtime": 47.5362, | |
| "eval_samples_per_second": 22.088, | |
| "eval_steps_per_second": 5.533, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5080440304826418, | |
| "grad_norm": 1.462750792503357, | |
| "learning_rate": 2.95608538131569e-05, | |
| "loss": 1.0926, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5080440304826418, | |
| "eval_loss": 1.1143876314163208, | |
| "eval_runtime": 47.5395, | |
| "eval_samples_per_second": 22.087, | |
| "eval_steps_per_second": 5.532, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.541913632514818, | |
| "grad_norm": 1.5854558944702148, | |
| "learning_rate": 2.940745508734104e-05, | |
| "loss": 1.0953, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.541913632514818, | |
| "eval_loss": 1.1123721599578857, | |
| "eval_runtime": 47.5446, | |
| "eval_samples_per_second": 22.085, | |
| "eval_steps_per_second": 5.532, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5757832345469941, | |
| "grad_norm": 1.592598557472229, | |
| "learning_rate": 2.9231645468847078e-05, | |
| "loss": 1.1153, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5757832345469941, | |
| "eval_loss": 1.1083550453186035, | |
| "eval_runtime": 47.5373, | |
| "eval_samples_per_second": 22.088, | |
| "eval_steps_per_second": 5.532, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6096528365791702, | |
| "grad_norm": 1.5559636354446411, | |
| "learning_rate": 2.903369843071157e-05, | |
| "loss": 1.0646, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6096528365791702, | |
| "eval_loss": 1.1024267673492432, | |
| "eval_runtime": 47.5356, | |
| "eval_samples_per_second": 22.089, | |
| "eval_steps_per_second": 5.533, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6435224386113463, | |
| "grad_norm": 1.5940817594528198, | |
| "learning_rate": 2.881392188087528e-05, | |
| "loss": 1.0949, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6435224386113463, | |
| "eval_loss": 1.1020222902297974, | |
| "eval_runtime": 47.5276, | |
| "eval_samples_per_second": 22.092, | |
| "eval_steps_per_second": 5.534, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6773920406435224, | |
| "grad_norm": 1.5781282186508179, | |
| "learning_rate": 2.8572657683230322e-05, | |
| "loss": 1.0683, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6773920406435224, | |
| "eval_loss": 1.097179651260376, | |
| "eval_runtime": 47.547, | |
| "eval_samples_per_second": 22.083, | |
| "eval_steps_per_second": 5.531, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7112616426756986, | |
| "grad_norm": 1.5103650093078613, | |
| "learning_rate": 2.8310281125848574e-05, | |
| "loss": 1.082, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7112616426756986, | |
| "eval_loss": 1.0919424295425415, | |
| "eval_runtime": 47.5343, | |
| "eval_samples_per_second": 22.089, | |
| "eval_steps_per_second": 5.533, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7451312447078747, | |
| "grad_norm": 1.6828149557113647, | |
| "learning_rate": 2.80272003372186e-05, | |
| "loss": 1.0589, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7451312447078747, | |
| "eval_loss": 1.0919417142868042, | |
| "eval_runtime": 47.5415, | |
| "eval_samples_per_second": 22.086, | |
| "eval_steps_per_second": 5.532, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7790008467400508, | |
| "grad_norm": 1.5400513410568237, | |
| "learning_rate": 2.7723855651399027e-05, | |
| "loss": 1.0484, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.7790008467400508, | |
| "eval_loss": 1.0858741998672485, | |
| "eval_runtime": 47.5385, | |
| "eval_samples_per_second": 22.087, | |
| "eval_steps_per_second": 5.532, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8128704487722269, | |
| "grad_norm": 1.5118318796157837, | |
| "learning_rate": 2.7400718923076004e-05, | |
| "loss": 1.0645, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8128704487722269, | |
| "eval_loss": 1.083242416381836, | |
| "eval_runtime": 47.5448, | |
| "eval_samples_per_second": 22.084, | |
| "eval_steps_per_second": 5.532, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8467400508044031, | |
| "grad_norm": 1.6155920028686523, | |
| "learning_rate": 2.7058292793590064e-05, | |
| "loss": 1.061, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8467400508044031, | |
| "eval_loss": 1.0831594467163086, | |
| "eval_runtime": 47.5286, | |
| "eval_samples_per_second": 22.092, | |
| "eval_steps_per_second": 5.534, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8806096528365792, | |
| "grad_norm": 1.7232460975646973, | |
| "learning_rate": 2.6697109909074174e-05, | |
| "loss": 1.0411, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.8806096528365792, | |
| "eval_loss": 1.0841459035873413, | |
| "eval_runtime": 47.5262, | |
| "eval_samples_per_second": 22.093, | |
| "eval_steps_per_second": 5.534, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9144792548687553, | |
| "grad_norm": 1.6081137657165527, | |
| "learning_rate": 2.6317732091919095e-05, | |
| "loss": 1.0353, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9144792548687553, | |
| "eval_loss": 1.082026720046997, | |
| "eval_runtime": 47.5296, | |
| "eval_samples_per_second": 22.091, | |
| "eval_steps_per_second": 5.533, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9483488569009314, | |
| "grad_norm": 1.5575566291809082, | |
| "learning_rate": 2.5920749466854923e-05, | |
| "loss": 1.0144, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.9483488569009314, | |
| "eval_loss": 1.0793519020080566, | |
| "eval_runtime": 47.5254, | |
| "eval_samples_per_second": 22.093, | |
| "eval_steps_per_second": 5.534, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.9822184589331076, | |
| "grad_norm": 1.6414557695388794, | |
| "learning_rate": 2.550677954300811e-05, | |
| "loss": 1.0277, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.9822184589331076, | |
| "eval_loss": 1.0790818929672241, | |
| "eval_runtime": 47.526, | |
| "eval_samples_per_second": 22.093, | |
| "eval_steps_per_second": 5.534, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.0169348010160881, | |
| "grad_norm": 1.8802568912506104, | |
| "learning_rate": 2.5076466253361893e-05, | |
| "loss": 1.067, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.0169348010160881, | |
| "eval_loss": 1.0800410509109497, | |
| "eval_runtime": 47.5397, | |
| "eval_samples_per_second": 22.087, | |
| "eval_steps_per_second": 5.532, | |
| "step": 600 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 1770, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 1 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.563527450103644e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |