Qwen2.5-7B-gen-dpo-10k / trainer_state.json
AmberYifan's picture
Model save
06301e9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 200,
"global_step": 78,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01282051282051282,
"grad_norm": 101.98915917164486,
"learning_rate": 6.25e-08,
"logits/generated": -1.0679720640182495,
"logits/real": -0.826486349105835,
"logps/generated": -273.1225891113281,
"logps/real": -293.10797119140625,
"loss": 0.9699,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.1282051282051282,
"grad_norm": 80.52500100713164,
"learning_rate": 4.857142857142857e-07,
"logits/generated": -0.8668110966682434,
"logits/real": -0.644612193107605,
"logps/generated": -273.2930908203125,
"logps/real": -273.6876220703125,
"loss": 0.907,
"rewards/accuracies": 0.5277777910232544,
"rewards/generated": 0.20040562748908997,
"rewards/margins": 0.11361115425825119,
"rewards/real": 0.31401681900024414,
"step": 10
},
{
"epoch": 0.2564102564102564,
"grad_norm": 61.621972969126524,
"learning_rate": 4.142857142857143e-07,
"logits/generated": -1.0656638145446777,
"logits/real": -0.7032974362373352,
"logps/generated": -260.37921142578125,
"logps/real": -255.3711395263672,
"loss": 0.7653,
"rewards/accuracies": 0.737500011920929,
"rewards/generated": 0.9999414682388306,
"rewards/margins": 0.8540604710578918,
"rewards/real": 1.8540016412734985,
"step": 20
},
{
"epoch": 0.38461538461538464,
"grad_norm": 59.21144986605617,
"learning_rate": 3.4285714285714286e-07,
"logits/generated": -0.9283930063247681,
"logits/real": -0.6714473962783813,
"logps/generated": -261.3749084472656,
"logps/real": -249.65054321289062,
"loss": 0.7105,
"rewards/accuracies": 0.75,
"rewards/generated": 1.842218041419983,
"rewards/margins": 0.8659162521362305,
"rewards/real": 2.708134412765503,
"step": 30
},
{
"epoch": 0.5128205128205128,
"grad_norm": 62.53453913007679,
"learning_rate": 2.714285714285714e-07,
"logits/generated": -0.9731477499008179,
"logits/real": -0.6548992395401001,
"logps/generated": -253.58157348632812,
"logps/real": -250.48837280273438,
"loss": 0.7065,
"rewards/accuracies": 0.737500011920929,
"rewards/generated": 1.789442777633667,
"rewards/margins": 1.1298000812530518,
"rewards/real": 2.919243097305298,
"step": 40
},
{
"epoch": 0.6410256410256411,
"grad_norm": 63.255829698141724,
"learning_rate": 2e-07,
"logits/generated": -0.8815720677375793,
"logits/real": -0.7497758269309998,
"logps/generated": -253.22402954101562,
"logps/real": -238.9423370361328,
"loss": 0.6918,
"rewards/accuracies": 0.800000011920929,
"rewards/generated": 2.1875,
"rewards/margins": 0.8109487295150757,
"rewards/real": 2.998448610305786,
"step": 50
},
{
"epoch": 0.7692307692307693,
"grad_norm": 77.61305253400987,
"learning_rate": 1.2857142857142855e-07,
"logits/generated": -1.0364863872528076,
"logits/real": -0.6802612543106079,
"logps/generated": -244.5989227294922,
"logps/real": -247.1646270751953,
"loss": 0.7152,
"rewards/accuracies": 0.8500000238418579,
"rewards/generated": 1.9323114156723022,
"rewards/margins": 1.2057563066482544,
"rewards/real": 3.1380679607391357,
"step": 60
},
{
"epoch": 0.8974358974358975,
"grad_norm": 53.32977807319664,
"learning_rate": 5.714285714285714e-08,
"logits/generated": -1.086891531944275,
"logits/real": -0.7703801989555359,
"logps/generated": -244.48086547851562,
"logps/real": -236.3184814453125,
"loss": 0.7159,
"rewards/accuracies": 0.7749999761581421,
"rewards/generated": 1.834351897239685,
"rewards/margins": 1.2538334131240845,
"rewards/real": 3.0881857872009277,
"step": 70
},
{
"epoch": 1.0,
"step": 78,
"total_flos": 0.0,
"train_loss": 0.7437324760816036,
"train_runtime": 1103.4698,
"train_samples_per_second": 2.251,
"train_steps_per_second": 0.071
}
],
"logging_steps": 10,
"max_steps": 78,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}