{ "best_metric": 1.0790818929672241, "best_model_checkpoint": "./outputs/instruct-lora-8b-alpaca-capital/checkpoint-580", "epoch": 1.0169348010160881, "eval_steps": 20, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001693480101608806, "eval_loss": 1.4240059852600098, "eval_runtime": 47.5545, "eval_samples_per_second": 22.08, "eval_steps_per_second": 5.53, "step": 1 }, { "epoch": 0.03386960203217612, "grad_norm": 0.7761115431785583, "learning_rate": 3.3898305084745763e-06, "loss": 1.5037, "step": 20 }, { "epoch": 0.03386960203217612, "eval_loss": 1.4208382368087769, "eval_runtime": 49.6595, "eval_samples_per_second": 21.144, "eval_steps_per_second": 5.296, "step": 20 }, { "epoch": 0.06773920406435224, "grad_norm": 0.7378177046775818, "learning_rate": 6.779661016949153e-06, "loss": 1.4229, "step": 40 }, { "epoch": 0.06773920406435224, "eval_loss": 1.385835886001587, "eval_runtime": 47.5969, "eval_samples_per_second": 22.06, "eval_steps_per_second": 5.526, "step": 40 }, { "epoch": 0.10160880609652836, "grad_norm": 0.8430055379867554, "learning_rate": 1.016949152542373e-05, "loss": 1.3768, "step": 60 }, { "epoch": 0.10160880609652836, "eval_loss": 1.279646873474121, "eval_runtime": 47.5556, "eval_samples_per_second": 22.079, "eval_steps_per_second": 5.53, "step": 60 }, { "epoch": 0.1354784081287045, "grad_norm": 0.8471567630767822, "learning_rate": 1.3559322033898305e-05, "loss": 1.2716, "step": 80 }, { "epoch": 0.1354784081287045, "eval_loss": 1.2302038669586182, "eval_runtime": 49.9002, "eval_samples_per_second": 21.042, "eval_steps_per_second": 5.271, "step": 80 }, { "epoch": 0.1693480101608806, "grad_norm": 1.0591727495193481, "learning_rate": 1.694915254237288e-05, "loss": 1.2258, "step": 100 }, { "epoch": 0.1693480101608806, "eval_loss": 1.204331874847412, "eval_runtime": 47.5296, "eval_samples_per_second": 22.092, "eval_steps_per_second": 5.533, "step": 100 }, { "epoch": 0.20321761219305673, "grad_norm": 1.0976836681365967, "learning_rate": 2.033898305084746e-05, "loss": 1.2218, "step": 120 }, { "epoch": 0.20321761219305673, "eval_loss": 1.1849374771118164, "eval_runtime": 47.5346, "eval_samples_per_second": 22.089, "eval_steps_per_second": 5.533, "step": 120 }, { "epoch": 0.23708721422523285, "grad_norm": 1.0816129446029663, "learning_rate": 2.3728813559322036e-05, "loss": 1.1868, "step": 140 }, { "epoch": 0.23708721422523285, "eval_loss": 1.1752324104309082, "eval_runtime": 49.5795, "eval_samples_per_second": 21.178, "eval_steps_per_second": 5.305, "step": 140 }, { "epoch": 0.270956816257409, "grad_norm": 1.1880944967269897, "learning_rate": 2.711864406779661e-05, "loss": 1.1179, "step": 160 }, { "epoch": 0.270956816257409, "eval_loss": 1.163883924484253, "eval_runtime": 47.5388, "eval_samples_per_second": 22.087, "eval_steps_per_second": 5.532, "step": 160 }, { "epoch": 0.3048264182895851, "grad_norm": 1.2941898107528687, "learning_rate": 2.9999737474980266e-05, "loss": 1.1075, "step": 180 }, { "epoch": 0.3048264182895851, "eval_loss": 1.152719259262085, "eval_runtime": 47.5403, "eval_samples_per_second": 22.087, "eval_steps_per_second": 5.532, "step": 180 }, { "epoch": 0.3386960203217612, "grad_norm": 1.2686594724655151, "learning_rate": 2.998457196315866e-05, "loss": 1.1267, "step": 200 }, { "epoch": 0.3386960203217612, "eval_loss": 1.1428076028823853, "eval_runtime": 49.7957, "eval_samples_per_second": 21.086, "eval_steps_per_second": 5.282, "step": 200 }, { "epoch": 0.37256562235393736, "grad_norm": 1.3804370164871216, "learning_rate": 2.9946097849501546e-05, "loss": 1.108, "step": 220 }, { "epoch": 0.37256562235393736, "eval_loss": 1.1391258239746094, "eval_runtime": 47.54, "eval_samples_per_second": 22.087, "eval_steps_per_second": 5.532, "step": 220 }, { "epoch": 0.40643522438611346, "grad_norm": 1.444969892501831, "learning_rate": 2.988437498074987e-05, "loss": 1.1118, "step": 240 }, { "epoch": 0.40643522438611346, "eval_loss": 1.1317951679229736, "eval_runtime": 47.5438, "eval_samples_per_second": 22.085, "eval_steps_per_second": 5.532, "step": 240 }, { "epoch": 0.4403048264182896, "grad_norm": 1.5110204219818115, "learning_rate": 2.9799499367238472e-05, "loss": 1.1333, "step": 260 }, { "epoch": 0.4403048264182896, "eval_loss": 1.1260956525802612, "eval_runtime": 48.973, "eval_samples_per_second": 21.44, "eval_steps_per_second": 5.37, "step": 260 }, { "epoch": 0.4741744284504657, "grad_norm": 1.361185908317566, "learning_rate": 2.969160303355143e-05, "loss": 1.0918, "step": 280 }, { "epoch": 0.4741744284504657, "eval_loss": 1.119767665863037, "eval_runtime": 47.5362, "eval_samples_per_second": 22.088, "eval_steps_per_second": 5.533, "step": 280 }, { "epoch": 0.5080440304826418, "grad_norm": 1.462750792503357, "learning_rate": 2.95608538131569e-05, "loss": 1.0926, "step": 300 }, { "epoch": 0.5080440304826418, "eval_loss": 1.1143876314163208, "eval_runtime": 47.5395, "eval_samples_per_second": 22.087, "eval_steps_per_second": 5.532, "step": 300 }, { "epoch": 0.541913632514818, "grad_norm": 1.5854558944702148, "learning_rate": 2.940745508734104e-05, "loss": 1.0953, "step": 320 }, { "epoch": 0.541913632514818, "eval_loss": 1.1123721599578857, "eval_runtime": 47.5446, "eval_samples_per_second": 22.085, "eval_steps_per_second": 5.532, "step": 320 }, { "epoch": 0.5757832345469941, "grad_norm": 1.592598557472229, "learning_rate": 2.9231645468847078e-05, "loss": 1.1153, "step": 340 }, { "epoch": 0.5757832345469941, "eval_loss": 1.1083550453186035, "eval_runtime": 47.5373, "eval_samples_per_second": 22.088, "eval_steps_per_second": 5.532, "step": 340 }, { "epoch": 0.6096528365791702, "grad_norm": 1.5559636354446411, "learning_rate": 2.903369843071157e-05, "loss": 1.0646, "step": 360 }, { "epoch": 0.6096528365791702, "eval_loss": 1.1024267673492432, "eval_runtime": 47.5356, "eval_samples_per_second": 22.089, "eval_steps_per_second": 5.533, "step": 360 }, { "epoch": 0.6435224386113463, "grad_norm": 1.5940817594528198, "learning_rate": 2.881392188087528e-05, "loss": 1.0949, "step": 380 }, { "epoch": 0.6435224386113463, "eval_loss": 1.1020222902297974, "eval_runtime": 47.5276, "eval_samples_per_second": 22.092, "eval_steps_per_second": 5.534, "step": 380 }, { "epoch": 0.6773920406435224, "grad_norm": 1.5781282186508179, "learning_rate": 2.8572657683230322e-05, "loss": 1.0683, "step": 400 }, { "epoch": 0.6773920406435224, "eval_loss": 1.097179651260376, "eval_runtime": 47.547, "eval_samples_per_second": 22.083, "eval_steps_per_second": 5.531, "step": 400 }, { "epoch": 0.7112616426756986, "grad_norm": 1.5103650093078613, "learning_rate": 2.8310281125848574e-05, "loss": 1.082, "step": 420 }, { "epoch": 0.7112616426756986, "eval_loss": 1.0919424295425415, "eval_runtime": 47.5343, "eval_samples_per_second": 22.089, "eval_steps_per_second": 5.533, "step": 420 }, { "epoch": 0.7451312447078747, "grad_norm": 1.6828149557113647, "learning_rate": 2.80272003372186e-05, "loss": 1.0589, "step": 440 }, { "epoch": 0.7451312447078747, "eval_loss": 1.0919417142868042, "eval_runtime": 47.5415, "eval_samples_per_second": 22.086, "eval_steps_per_second": 5.532, "step": 440 }, { "epoch": 0.7790008467400508, "grad_norm": 1.5400513410568237, "learning_rate": 2.7723855651399027e-05, "loss": 1.0484, "step": 460 }, { "epoch": 0.7790008467400508, "eval_loss": 1.0858741998672485, "eval_runtime": 47.5385, "eval_samples_per_second": 22.087, "eval_steps_per_second": 5.532, "step": 460 }, { "epoch": 0.8128704487722269, "grad_norm": 1.5118318796157837, "learning_rate": 2.7400718923076004e-05, "loss": 1.0645, "step": 480 }, { "epoch": 0.8128704487722269, "eval_loss": 1.083242416381836, "eval_runtime": 47.5448, "eval_samples_per_second": 22.084, "eval_steps_per_second": 5.532, "step": 480 }, { "epoch": 0.8467400508044031, "grad_norm": 1.6155920028686523, "learning_rate": 2.7058292793590064e-05, "loss": 1.061, "step": 500 }, { "epoch": 0.8467400508044031, "eval_loss": 1.0831594467163086, "eval_runtime": 47.5286, "eval_samples_per_second": 22.092, "eval_steps_per_second": 5.534, "step": 500 }, { "epoch": 0.8806096528365792, "grad_norm": 1.7232460975646973, "learning_rate": 2.6697109909074174e-05, "loss": 1.0411, "step": 520 }, { "epoch": 0.8806096528365792, "eval_loss": 1.0841459035873413, "eval_runtime": 47.5262, "eval_samples_per_second": 22.093, "eval_steps_per_second": 5.534, "step": 520 }, { "epoch": 0.9144792548687553, "grad_norm": 1.6081137657165527, "learning_rate": 2.6317732091919095e-05, "loss": 1.0353, "step": 540 }, { "epoch": 0.9144792548687553, "eval_loss": 1.082026720046997, "eval_runtime": 47.5296, "eval_samples_per_second": 22.091, "eval_steps_per_second": 5.533, "step": 540 }, { "epoch": 0.9483488569009314, "grad_norm": 1.5575566291809082, "learning_rate": 2.5920749466854923e-05, "loss": 1.0144, "step": 560 }, { "epoch": 0.9483488569009314, "eval_loss": 1.0793519020080566, "eval_runtime": 47.5254, "eval_samples_per_second": 22.093, "eval_steps_per_second": 5.534, "step": 560 }, { "epoch": 0.9822184589331076, "grad_norm": 1.6414557695388794, "learning_rate": 2.550677954300811e-05, "loss": 1.0277, "step": 580 }, { "epoch": 0.9822184589331076, "eval_loss": 1.0790818929672241, "eval_runtime": 47.526, "eval_samples_per_second": 22.093, "eval_steps_per_second": 5.534, "step": 580 }, { "epoch": 1.0169348010160881, "grad_norm": 1.8802568912506104, "learning_rate": 2.5076466253361893e-05, "loss": 1.067, "step": 600 }, { "epoch": 1.0169348010160881, "eval_loss": 1.0800410509109497, "eval_runtime": 47.5397, "eval_samples_per_second": 22.087, "eval_steps_per_second": 5.532, "step": 600 } ], "logging_steps": 20, "max_steps": 1770, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.563527450103644e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }