{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 100.0, "global_step": 4166, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.0, "completions/mean_length": 188.1875, "completions/min_length": 13.0, "epoch": 0.00048007681228996637, "frac_reward_zero_std": 0.625, "grad_norm": 0.25846678018569946, "kl": 0.0, "learning_rate": 4.784688995215311e-08, "loss": 7.450580596923828e-09, "memory(GiB)": 22.98, "reward": -0.44574999809265137, "reward_std": 0.21814244985580444, "rewards/MMContentORM/mean": -0.8299999833106995, "rewards/MMContentORM/std": 0.5199999809265137, "rewards/MMFormatORM/mean": 0.12187499552965164, "rewards/MMFormatORM/std": 0.262023389339447, "rewards/MMRubricORM/mean": -0.8125, "rewards/MMRubricORM/std": 0.40311288833618164, "step": 1, "train_speed(iter/s)": 0.047076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 365.0, "completions/mean_length": 166.34375, "completions/min_length": 10.25, "epoch": 0.002400384061449832, "frac_reward_zero_std": 0.65625, "grad_norm": 0.5505234599113464, "kl": 0.0014767646789550781, "learning_rate": 2.3923444976076555e-07, "loss": 5.9054447774542496e-05, "memory(GiB)": 23.69, "reward": -0.40181251987814903, "reward_std": 0.2141649704426527, "rewards/MMContentORM/mean": -0.7381249889731407, "rewards/MMContentORM/std": 0.5696750730276108, "rewards/MMFormatORM/mean": 0.1320312451571226, "rewards/MMFormatORM/std": 0.25288669392466545, "rewards/MMRubricORM/mean": -0.796875, "rewards/MMRubricORM/std": 0.3890564441680908, "step": 5, "train_speed(iter/s)": 0.069193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 496.2, "completions/mean_length": 196.9625, "completions/min_length": 12.6, "epoch": 0.004800768122899664, "frac_reward_zero_std": 0.625, "grad_norm": 0.19138023257255554, "kl": 0.003989028930664063, "learning_rate": 4.784688995215311e-07, "loss": 0.0001598534407094121, "memory(GiB)": 23.69, "reward": -0.35715001821517944, "reward_std": 0.25731615722179413, "rewards/MMContentORM/mean": -0.6659999847412109, "rewards/MMContentORM/std": 0.6672868490219116, "rewards/MMFormatORM/mean": 0.15437499880790712, "rewards/MMFormatORM/std": 0.2739557534456253, "rewards/MMRubricORM/mean": -0.7625, "rewards/MMRubricORM/std": 0.42147040367126465, "step": 10, "train_speed(iter/s)": 0.066915 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.4, "completions/mean_length": 175.4625, "completions/min_length": 10.6, "epoch": 0.007201152184349496, "frac_reward_zero_std": 0.675, "grad_norm": 0.1383572220802307, "kl": 0.0017303466796875, "learning_rate": 7.177033492822967e-07, "loss": 6.930254749022424e-05, "memory(GiB)": 23.69, "reward": -0.43625002503395083, "reward_std": 0.23157747238874435, "rewards/MMContentORM/mean": -0.7774999976158142, "rewards/MMContentORM/std": 0.5322124093770981, "rewards/MMFormatORM/mean": 0.1056249976158142, "rewards/MMFormatORM/std": 0.23594435155391694, "rewards/MMRubricORM/mean": -0.8375, "rewards/MMRubricORM/std": 0.36299130916595457, "step": 15, "train_speed(iter/s)": 0.072542 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.0, "completions/mean_length": 180.6, "completions/min_length": 11.2, "epoch": 0.009601536245799328, "frac_reward_zero_std": 0.625, "grad_norm": 0.6662951707839966, "kl": 0.0012393951416015624, "learning_rate": 9.569377990430622e-07, "loss": 4.959976649843156e-05, "memory(GiB)": 23.69, "reward": -0.40305001139640806, "reward_std": 0.25151788890361787, "rewards/MMContentORM/mean": -0.7519999861717224, "rewards/MMContentORM/std": 0.5646255791187287, "rewards/MMFormatORM/mean": 0.13812499791383742, "rewards/MMFormatORM/std": 0.2656771123409271, "rewards/MMRubricORM/mean": -0.7875, "rewards/MMRubricORM/std": 0.4087340235710144, "step": 20, "train_speed(iter/s)": 0.075755 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.6, "completions/mean_length": 165.925, "completions/min_length": 10.6, "epoch": 0.01200192030724916, "frac_reward_zero_std": 0.575, "grad_norm": 1.417803406715393, "kl": 0.00196990966796875, "learning_rate": 1.196172248803828e-06, "loss": 7.890671258792281e-05, "memory(GiB)": 24.1, "reward": -0.3179000109434128, "reward_std": 0.2674277901649475, "rewards/MMContentORM/mean": -0.6160000085830688, "rewards/MMContentORM/std": 0.691703325510025, "rewards/MMFormatORM/mean": 0.17749999463558197, "rewards/MMFormatORM/std": 0.28609572947025297, "rewards/MMRubricORM/mean": -0.7125, "rewards/MMRubricORM/std": 0.45950802564620974, "step": 25, "train_speed(iter/s)": 0.077874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 372.0, "completions/mean_length": 176.1375, "completions/min_length": 10.0, "epoch": 0.014402304368698993, "frac_reward_zero_std": 0.55, "grad_norm": 0.6226190328598022, "kl": 0.001837921142578125, "learning_rate": 1.4354066985645934e-06, "loss": 7.35294190235436e-05, "memory(GiB)": 24.1, "reward": -0.3812000215053558, "reward_std": 0.28241844177246095, "rewards/MMContentORM/mean": -0.7405000030994415, "rewards/MMContentORM/std": 0.5169497162103653, "rewards/MMFormatORM/mean": 0.16249999552965164, "rewards/MMFormatORM/std": 0.28217866122722624, "rewards/MMRubricORM/mean": -0.75, "rewards/MMRubricORM/std": 0.43412102460861207, "step": 30, "train_speed(iter/s)": 0.07745 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.6, "completions/mean_length": 159.9625, "completions/min_length": 12.2, "epoch": 0.016802688430148822, "frac_reward_zero_std": 0.65, "grad_norm": 0.14647576212882996, "kl": 0.0017627716064453126, "learning_rate": 1.6746411483253591e-06, "loss": 7.049270207062363e-05, "memory(GiB)": 24.1, "reward": -0.41890002489089967, "reward_std": 0.22358716428279876, "rewards/MMContentORM/mean": -0.7735000014305115, "rewards/MMContentORM/std": 0.48501716256141664, "rewards/MMFormatORM/mean": 0.12624999843537807, "rewards/MMFormatORM/std": 0.23877365738153458, "rewards/MMRubricORM/mean": -0.8, "rewards/MMRubricORM/std": 0.3904210150241852, "step": 35, "train_speed(iter/s)": 0.078437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.8, "completions/mean_length": 173.625, "completions/min_length": 13.2, "epoch": 0.019203072491598656, "frac_reward_zero_std": 0.55, "grad_norm": 0.4236033260822296, "kl": 0.00177764892578125, "learning_rate": 1.9138755980861244e-06, "loss": 7.109665311872959e-05, "memory(GiB)": 24.1, "reward": -0.3528000235557556, "reward_std": 0.28255987763404844, "rewards/MMContentORM/mean": -0.6695000052452087, "rewards/MMContentORM/std": 0.6945539474487304, "rewards/MMFormatORM/mean": 0.16249999552965164, "rewards/MMFormatORM/std": 0.28217866122722624, "rewards/MMRubricORM/mean": -0.75, "rewards/MMRubricORM/std": 0.43412102460861207, "step": 40, "train_speed(iter/s)": 0.079708 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 348.8, "completions/mean_length": 168.5875, "completions/min_length": 11.4, "epoch": 0.02160345655304849, "frac_reward_zero_std": 0.625, "grad_norm": 0.08703174442052841, "kl": 0.00247039794921875, "learning_rate": 2.15311004784689e-06, "loss": 9.898855350911617e-05, "memory(GiB)": 24.1, "reward": -0.44075002074241637, "reward_std": 0.22521351724863053, "rewards/MMContentORM/mean": -0.8174999952316284, "rewards/MMContentORM/std": 0.4400706171989441, "rewards/MMFormatORM/mean": 0.12187499701976776, "rewards/MMFormatORM/std": 0.25194679796695707, "rewards/MMRubricORM/mean": -0.8125, "rewards/MMRubricORM/std": 0.3876104474067688, "step": 45, "train_speed(iter/s)": 0.080264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 326.2, "completions/mean_length": 170.0875, "completions/min_length": 11.6, "epoch": 0.02400384061449832, "frac_reward_zero_std": 0.65, "grad_norm": 0.3127768933773041, "kl": 0.0022594451904296873, "learning_rate": 2.392344497607656e-06, "loss": 9.052451932802796e-05, "memory(GiB)": 24.1, "reward": -0.4175000250339508, "reward_std": 0.20364675521850586, "rewards/MMContentORM/mean": -0.7824999928474426, "rewards/MMContentORM/std": 0.4635924696922302, "rewards/MMFormatORM/mean": 0.132499997317791, "rewards/MMFormatORM/std": 0.26173200011253356, "rewards/MMRubricORM/mean": -0.7875, "rewards/MMRubricORM/std": 0.4147436022758484, "step": 50, "train_speed(iter/s)": 0.080866 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 401.2, "completions/mean_length": 197.925, "completions/min_length": 14.6, "epoch": 0.026404224675948152, "frac_reward_zero_std": 0.55, "grad_norm": 0.17192748188972473, "kl": 0.0035968780517578124, "learning_rate": 2.631578947368421e-06, "loss": 0.0001437270431779325, "memory(GiB)": 24.55, "reward": -0.3665000259876251, "reward_std": 0.28623682260513306, "rewards/MMContentORM/mean": -0.6874999880790711, "rewards/MMContentORM/std": 0.6308155179023742, "rewards/MMFormatORM/mean": 0.15249999463558198, "rewards/MMFormatORM/std": 0.27434429824352263, "rewards/MMRubricORM/mean": -0.7625, "rewards/MMRubricORM/std": 0.4253008782863617, "step": 55, "train_speed(iter/s)": 0.080304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 543.4, "completions/mean_length": 194.5625, "completions/min_length": 11.6, "epoch": 0.028804608737397985, "frac_reward_zero_std": 0.7, "grad_norm": 0.14923764765262604, "kl": 0.004736709594726563, "learning_rate": 2.870813397129187e-06, "loss": 0.00018961232854053378, "memory(GiB)": 24.55, "reward": -0.48050001859664915, "reward_std": 0.1689985252916813, "rewards/MMContentORM/mean": -0.8699999928474427, "rewards/MMContentORM/std": 0.3361044704914093, "rewards/MMFormatORM/mean": 0.09374999701976776, "rewards/MMFormatORM/std": 0.22575461566448213, "rewards/MMRubricORM/mean": -0.85, "rewards/MMRubricORM/std": 0.3601807415485382, "step": 60, "train_speed(iter/s)": 0.077823 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.4, "completions/mean_length": 165.6375, "completions/min_length": 12.2, "epoch": 0.031204992798847815, "frac_reward_zero_std": 0.6, "grad_norm": 0.9673174023628235, "kl": 0.014190292358398438, "learning_rate": 3.1100478468899525e-06, "loss": 0.000567801995202899, "memory(GiB)": 24.55, "reward": -0.3531000196933746, "reward_std": 0.21906168013811111, "rewards/MMContentORM/mean": -0.6989999890327454, "rewards/MMContentORM/std": 0.570724368095398, "rewards/MMFormatORM/mean": 0.17874999791383744, "rewards/MMFormatORM/std": 0.29591297507286074, "rewards/MMRubricORM/mean": -0.725, "rewards/MMRubricORM/std": 0.45525074005126953, "step": 65, "train_speed(iter/s)": 0.078973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 356.2, "completions/mean_length": 165.6125, "completions/min_length": 13.6, "epoch": 0.033605376860297645, "frac_reward_zero_std": 0.625, "grad_norm": 0.19349712133407593, "kl": 0.01680755615234375, "learning_rate": 3.3492822966507182e-06, "loss": 0.0006725039333105087, "memory(GiB)": 24.55, "reward": -0.41095001697540284, "reward_std": 0.22394072413444518, "rewards/MMContentORM/mean": -0.7554999947547912, "rewards/MMContentORM/std": 0.5833674430847168, "rewards/MMFormatORM/mean": 0.12812499552965165, "rewards/MMFormatORM/std": 0.25294241309165955, "rewards/MMRubricORM/mean": -0.8, "rewards/MMRubricORM/std": 0.39294117093086245, "step": 70, "train_speed(iter/s)": 0.079238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 529.2, "completions/mean_length": 185.7875, "completions/min_length": 11.2, "epoch": 0.03600576092174748, "frac_reward_zero_std": 0.625, "grad_norm": 0.9495518803596497, "kl": 0.05297927856445313, "learning_rate": 3.5885167464114835e-06, "loss": 0.0021199073642492296, "memory(GiB)": 24.55, "reward": -0.3796000242233276, "reward_std": 0.2705390602350235, "rewards/MMContentORM/mean": -0.6914999961853028, "rewards/MMContentORM/std": 0.5877212882041931, "rewards/MMFormatORM/mean": 0.13624999821186065, "rewards/MMFormatORM/std": 0.2286323994398117, "rewards/MMRubricORM/mean": -0.7875, "rewards/MMRubricORM/std": 0.3555411517620087, "step": 75, "train_speed(iter/s)": 0.077333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.4, "completions/mean_length": 157.9625, "completions/min_length": 10.0, "epoch": 0.03840614498319731, "frac_reward_zero_std": 0.6, "grad_norm": 0.26823142170906067, "kl": 0.060577392578125, "learning_rate": 3.827751196172249e-06, "loss": 0.002423027902841568, "memory(GiB)": 24.55, "reward": -0.387850022315979, "reward_std": 0.27301393151283265, "rewards/MMContentORM/mean": -0.706499969959259, "rewards/MMContentORM/std": 0.6382155597209931, "rewards/MMFormatORM/mean": 0.13062499612569808, "rewards/MMFormatORM/std": 0.25994100272655485, "rewards/MMRubricORM/mean": -0.7875, "rewards/MMRubricORM/std": 0.41726375818252565, "step": 80, "train_speed(iter/s)": 0.077772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.2, "completions/mean_length": 165.1375, "completions/min_length": 11.2, "epoch": 0.04080652904464714, "frac_reward_zero_std": 0.575, "grad_norm": 1.0264372825622559, "kl": 0.06542510986328125, "learning_rate": 4.066985645933015e-06, "loss": 0.002620968222618103, "memory(GiB)": 24.55, "reward": -0.3707500219345093, "reward_std": 0.27471098899841306, "rewards/MMContentORM/mean": -0.6924999833106995, "rewards/MMContentORM/std": 0.6197769522666932, "rewards/MMFormatORM/mean": 0.14687499552965164, "rewards/MMFormatORM/std": 0.2623553693294525, "rewards/MMRubricORM/mean": -0.7625, "rewards/MMRubricORM/std": 0.41832817196846006, "step": 85, "train_speed(iter/s)": 0.078137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 341.4, "completions/mean_length": 179.8625, "completions/min_length": 11.8, "epoch": 0.04320691310609698, "frac_reward_zero_std": 0.65, "grad_norm": 0.17547202110290527, "kl": 0.02538909912109375, "learning_rate": 4.30622009569378e-06, "loss": 0.0010150117799639703, "memory(GiB)": 24.55, "reward": -0.4253000199794769, "reward_std": 0.2470631130039692, "rewards/MMContentORM/mean": -0.7644999980926513, "rewards/MMContentORM/std": 0.5263380289077759, "rewards/MMFormatORM/mean": 0.11374999582767487, "rewards/MMFormatORM/std": 0.24558367133140563, "rewards/MMRubricORM/mean": -0.825, "rewards/MMRubricORM/std": 0.3778210341930389, "step": 90, "train_speed(iter/s)": 0.078557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 353.8, "completions/mean_length": 198.0625, "completions/min_length": 10.0, "epoch": 0.04560729716754681, "frac_reward_zero_std": 0.525, "grad_norm": 0.3727310597896576, "kl": 0.06304931640625, "learning_rate": 4.5454545454545455e-06, "loss": 0.0025252360850572587, "memory(GiB)": 24.55, "reward": -0.35370001196861267, "reward_std": 0.32187501192092893, "rewards/MMContentORM/mean": -0.6679999887943268, "rewards/MMContentORM/std": 0.599762350320816, "rewards/MMFormatORM/mean": 0.15874999314546584, "rewards/MMFormatORM/std": 0.2781087428331375, "rewards/MMRubricORM/mean": -0.75, "rewards/MMRubricORM/std": 0.43412102460861207, "step": 95, "train_speed(iter/s)": 0.078826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.2, "completions/mean_length": 178.225, "completions/min_length": 10.6, "epoch": 0.04800768122899664, "frac_reward_zero_std": 0.625, "grad_norm": 0.5053905248641968, "kl": 0.058896636962890624, "learning_rate": 4.784688995215312e-06, "loss": 0.0023545216768980025, "memory(GiB)": 24.55, "reward": -0.366600027680397, "reward_std": 0.25964961051940916, "rewards/MMContentORM/mean": -0.6715000033378601, "rewards/MMContentORM/std": 0.6754477977752685, "rewards/MMFormatORM/mean": 0.1424999937415123, "rewards/MMFormatORM/std": 0.2643744289875031, "rewards/MMRubricORM/mean": -0.775, "rewards/MMRubricORM/std": 0.4129913091659546, "step": 100, "train_speed(iter/s)": 0.079147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.6, "completions/mean_length": 183.025, "completions/min_length": 38.8, "epoch": 0.050408065290446474, "frac_reward_zero_std": 0.675, "grad_norm": 0.7233589887619019, "kl": 0.03177032470703125, "learning_rate": 5.023923444976077e-06, "loss": 0.0012724055908620358, "memory(GiB)": 24.55, "reward": -0.4313500225543976, "reward_std": 0.21149563789367676, "rewards/MMContentORM/mean": -0.793999993801117, "rewards/MMContentORM/std": 0.48824622631073, "rewards/MMFormatORM/mean": 0.12187499701976776, "rewards/MMFormatORM/std": 0.255849027633667, "rewards/MMRubricORM/mean": -0.8125, "rewards/MMRubricORM/std": 0.39361388683319093, "step": 105, "train_speed(iter/s)": 0.078492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 350.4, "completions/mean_length": 205.525, "completions/min_length": 40.4, "epoch": 0.052808449351896304, "frac_reward_zero_std": 0.575, "grad_norm": 0.656991720199585, "kl": 0.00919189453125, "learning_rate": 5.263157894736842e-06, "loss": 0.00036728212144225835, "memory(GiB)": 24.55, "reward": -0.3993500292301178, "reward_std": 0.2595789015293121, "rewards/MMContentORM/mean": -0.7514999866485595, "rewards/MMContentORM/std": 0.5262986779212951, "rewards/MMFormatORM/mean": 0.14062499403953552, "rewards/MMFormatORM/std": 0.26713907420635224, "rewards/MMRubricORM/mean": -0.775, "rewards/MMRubricORM/std": 0.4210435926914215, "step": 110, "train_speed(iter/s)": 0.078426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 378.8, "completions/mean_length": 198.6875, "completions/min_length": 40.8, "epoch": 0.055208833413346134, "frac_reward_zero_std": 0.7, "grad_norm": 0.16790097951889038, "kl": 0.0282012939453125, "learning_rate": 5.502392344497608e-06, "loss": 0.0011287719011306764, "memory(GiB)": 24.55, "reward": -0.36190002262592313, "reward_std": 0.12529932260513305, "rewards/MMContentORM/mean": -0.7134999990463257, "rewards/MMContentORM/std": 0.5213123708963394, "rewards/MMFormatORM/mean": 0.17124999314546585, "rewards/MMFormatORM/std": 0.2684005439281464, "rewards/MMRubricORM/mean": -0.725, "rewards/MMRubricORM/std": 0.42883480787277223, "step": 115, "train_speed(iter/s)": 0.078525 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.8, "completions/mean_length": 191.825, "completions/min_length": 18.8, "epoch": 0.05760921747479597, "frac_reward_zero_std": 0.425, "grad_norm": 0.7576951384544373, "kl": 0.027909088134765624, "learning_rate": 5.741626794258374e-06, "loss": 0.001117511186748743, "memory(GiB)": 24.55, "reward": -0.26640002727508544, "reward_std": 0.39357563853263855, "rewards/MMContentORM/mean": -0.5684999823570251, "rewards/MMContentORM/std": 0.7017473936080932, "rewards/MMFormatORM/mean": 0.22749999165534973, "rewards/MMFormatORM/std": 0.3158136546611786, "rewards/MMRubricORM/mean": -0.65, "rewards/MMRubricORM/std": 0.4858671844005585, "step": 120, "train_speed(iter/s)": 0.079177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 331.4, "completions/mean_length": 199.3375, "completions/min_length": 21.6, "epoch": 0.0600096015362458, "frac_reward_zero_std": 0.65, "grad_norm": 0.35645198822021484, "kl": 0.01103973388671875, "learning_rate": 5.98086124401914e-06, "loss": 0.0004417818039655685, "memory(GiB)": 24.59, "reward": -0.3517500251531601, "reward_std": 0.2542048916220665, "rewards/MMContentORM/mean": -0.6524999976158142, "rewards/MMContentORM/std": 0.6953619718551636, "rewards/MMFormatORM/mean": 0.15437499582767486, "rewards/MMFormatORM/std": 0.25867260694503785, "rewards/MMRubricORM/mean": -0.7625, "rewards/MMRubricORM/std": 0.397957855463028, "step": 125, "train_speed(iter/s)": 0.079515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 460.8, "completions/mean_length": 206.5125, "completions/min_length": 23.6, "epoch": 0.06240998559769563, "frac_reward_zero_std": 0.65, "grad_norm": 0.7836592197418213, "kl": 0.046865081787109374, "learning_rate": 6.220095693779905e-06, "loss": 0.0018781695514917373, "memory(GiB)": 24.59, "reward": -0.4076500177383423, "reward_std": 0.17062486261129378, "rewards/MMContentORM/mean": -0.7634999871253967, "rewards/MMContentORM/std": 0.5306057691574096, "rewards/MMFormatORM/mean": 0.1381249964237213, "rewards/MMFormatORM/std": 0.26958334147930146, "rewards/MMRubricORM/mean": -0.7875, "rewards/MMRubricORM/std": 0.4147436022758484, "step": 130, "train_speed(iter/s)": 0.079017 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 485.4, "completions/mean_length": 211.1625, "completions/min_length": 60.6, "epoch": 0.06481036965914547, "frac_reward_zero_std": 0.425, "grad_norm": 0.19905580580234528, "kl": 0.051471710205078125, "learning_rate": 6.459330143540671e-06, "loss": 0.0020540472120046615, "memory(GiB)": 24.59, "reward": -0.2741000235080719, "reward_std": 0.37745361328125, "rewards/MMContentORM/mean": -0.5589999914169311, "rewards/MMContentORM/std": 0.7385274767875671, "rewards/MMFormatORM/mean": 0.2112499952316284, "rewards/MMFormatORM/std": 0.3126032888889313, "rewards/MMRubricORM/mean": -0.675, "rewards/MMRubricORM/std": 0.4809281527996063, "step": 135, "train_speed(iter/s)": 0.078216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 450.8, "completions/mean_length": 202.375, "completions/min_length": 13.4, "epoch": 0.06721075372059529, "frac_reward_zero_std": 0.5, "grad_norm": 0.833666205406189, "kl": 0.051274871826171874, "learning_rate": 6.6985645933014365e-06, "loss": 0.002046111598610878, "memory(GiB)": 24.59, "reward": -0.2925000175833702, "reward_std": 0.2917522594332695, "rewards/MMContentORM/mean": -0.5849999874830246, "rewards/MMContentORM/std": 0.7148973345756531, "rewards/MMFormatORM/mean": 0.19749999642372132, "rewards/MMFormatORM/std": 0.2828102707862854, "rewards/MMRubricORM/mean": -0.6875, "rewards/MMRubricORM/std": 0.44643059372901917, "step": 140, "train_speed(iter/s)": 0.077881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.0, "completions/mean_length": 165.1875, "completions/min_length": 37.8, "epoch": 0.06961113778204513, "frac_reward_zero_std": 0.625, "grad_norm": 0.9144070148468018, "kl": 0.05808563232421875, "learning_rate": 6.937799043062201e-06, "loss": 0.0023345451802015303, "memory(GiB)": 24.59, "reward": -0.39740002155303955, "reward_std": 0.22047589719295502, "rewards/MMContentORM/mean": -0.7284999907016754, "rewards/MMContentORM/std": 0.6163743019104004, "rewards/MMFormatORM/mean": 0.1287499949336052, "rewards/MMFormatORM/std": 0.24967178106307983, "rewards/MMRubricORM/mean": -0.7875, "rewards/MMRubricORM/std": 0.40525074005126954, "step": 145, "train_speed(iter/s)": 0.078142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.0, "completions/mean_length": 189.1125, "completions/min_length": 38.2, "epoch": 0.07201152184349496, "frac_reward_zero_std": 0.5, "grad_norm": 0.23582716286182404, "kl": 0.036444091796875, "learning_rate": 7.177033492822967e-06, "loss": 0.001458549778908491, "memory(GiB)": 24.59, "reward": -0.29195002317428587, "reward_std": 0.23907281160354615, "rewards/MMContentORM/mean": -0.6305000007152557, "rewards/MMContentORM/std": 0.6437041282653808, "rewards/MMFormatORM/mean": 0.22562499046325685, "rewards/MMFormatORM/std": 0.3097758531570435, "rewards/MMRubricORM/mean": -0.65, "rewards/MMRubricORM/std": 0.48037723302841184, "step": 150, "train_speed(iter/s)": 0.078351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025, "completions/max_length": 485.6, "completions/mean_length": 236.125, "completions/min_length": 72.2, "epoch": 0.07441190590494479, "frac_reward_zero_std": 0.45, "grad_norm": 0.1894054263830185, "kl": 0.00907440185546875, "learning_rate": 7.416267942583732e-06, "loss": 0.00036348355934023857, "memory(GiB)": 24.59, "reward": -0.29550001621246336, "reward_std": 0.35963451862335205, "rewards/MMContentORM/mean": -0.6124999940395355, "rewards/MMContentORM/std": 0.6703195393085479, "rewards/MMFormatORM/mean": 0.21124999225139618, "rewards/MMFormatORM/std": 0.3112755298614502, "rewards/MMRubricORM/mean": -0.675, "rewards/MMRubricORM/std": 0.47888544797897337, "step": 155, "train_speed(iter/s)": 0.077809 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 330.2, "completions/mean_length": 194.05, "completions/min_length": 17.0, "epoch": 0.07681228996639462, "frac_reward_zero_std": 0.5, "grad_norm": 0.16335317492485046, "kl": 0.023895263671875, "learning_rate": 7.655502392344498e-06, "loss": 0.0009563345462083817, "memory(GiB)": 24.59, "reward": -0.24430001378059388, "reward_std": 0.26785205602645873, "rewards/MMContentORM/mean": -0.56700000166893, "rewards/MMContentORM/std": 0.6720305800437927, "rewards/MMFormatORM/mean": 0.2562499925494194, "rewards/MMFormatORM/std": 0.31278570294380187, "rewards/MMRubricORM/mean": -0.6, "rewards/MMRubricORM/std": 0.486371648311615, "step": 160, "train_speed(iter/s)": 0.078087 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.0, "completions/mean_length": 211.7875, "completions/min_length": 60.6, "epoch": 0.07921267402784446, "frac_reward_zero_std": 0.375, "grad_norm": 0.5669417381286621, "kl": 0.015301513671875, "learning_rate": 7.894736842105265e-06, "loss": 0.0006132687442004681, "memory(GiB)": 24.59, "reward": -0.22980001866817473, "reward_std": 0.2708218902349472, "rewards/MMContentORM/mean": -0.584499990940094, "rewards/MMContentORM/std": 0.6051075398921967, "rewards/MMFormatORM/mean": 0.2849999874830246, "rewards/MMFormatORM/std": 0.32614828944206237, "rewards/MMRubricORM/mean": -0.55, "rewards/MMRubricORM/std": 0.5098386645317078, "step": 165, "train_speed(iter/s)": 0.077789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 350.8, "completions/mean_length": 204.05, "completions/min_length": 56.0, "epoch": 0.08161305808929428, "frac_reward_zero_std": 0.375, "grad_norm": 0.23664671182632446, "kl": 0.054852294921875, "learning_rate": 8.13397129186603e-06, "loss": 0.002194448187947273, "memory(GiB)": 24.59, "reward": -0.17945002168416976, "reward_std": 0.2834791004657745, "rewards/MMContentORM/mean": -0.48049999326467513, "rewards/MMContentORM/std": 0.6853928565979004, "rewards/MMFormatORM/mean": 0.3006249964237213, "rewards/MMFormatORM/std": 0.3232024133205414, "rewards/MMRubricORM/mean": -0.5375, "rewards/MMRubricORM/std": 0.49723449945449827, "step": 170, "train_speed(iter/s)": 0.077962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.4, "completions/mean_length": 180.9375, "completions/min_length": 19.4, "epoch": 0.08401344215074412, "frac_reward_zero_std": 0.475, "grad_norm": 0.41499343514442444, "kl": 0.0380462646484375, "learning_rate": 8.373205741626795e-06, "loss": 0.0015261590480804444, "memory(GiB)": 24.67, "reward": -0.2555500268936157, "reward_std": 0.271882563829422, "rewards/MMContentORM/mean": -0.5519999861717224, "rewards/MMContentORM/std": 0.7190791845321656, "rewards/MMFormatORM/mean": 0.2318749874830246, "rewards/MMFormatORM/std": 0.31184685230255127, "rewards/MMRubricORM/mean": -0.6375, "rewards/MMRubricORM/std": 0.48463451862335205, "step": 175, "train_speed(iter/s)": 0.078217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.8, "completions/mean_length": 192.8875, "completions/min_length": 40.8, "epoch": 0.08641382621219396, "frac_reward_zero_std": 0.3, "grad_norm": 0.4990822970867157, "kl": 0.023101806640625, "learning_rate": 8.61244019138756e-06, "loss": 0.000923317763954401, "memory(GiB)": 24.67, "reward": -0.20000003054738044, "reward_std": 0.35242201685905455, "rewards/MMContentORM/mean": -0.5174999952316284, "rewards/MMContentORM/std": 0.6820277512073517, "rewards/MMFormatORM/mean": 0.29249999225139617, "rewards/MMFormatORM/std": 0.32704830169677734, "rewards/MMRubricORM/mean": -0.55, "rewards/MMRubricORM/std": 0.5031512618064881, "step": 180, "train_speed(iter/s)": 0.078339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 338.8, "completions/mean_length": 203.6, "completions/min_length": 40.0, "epoch": 0.08881421027364378, "frac_reward_zero_std": 0.225, "grad_norm": 0.43722620606422424, "kl": 0.02008056640625, "learning_rate": 8.851674641148326e-06, "loss": 0.0008031532168388366, "memory(GiB)": 24.67, "reward": -0.12450002208352089, "reward_std": 0.3882016271352768, "rewards/MMContentORM/mean": -0.41499999687075617, "rewards/MMContentORM/std": 0.637773585319519, "rewards/MMFormatORM/mean": 0.3412499904632568, "rewards/MMFormatORM/std": 0.3158136546611786, "rewards/MMRubricORM/mean": -0.475, "rewards/MMRubricORM/std": 0.4858671844005585, "step": 185, "train_speed(iter/s)": 0.078522 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.6, "completions/mean_length": 192.125, "completions/min_length": 35.8, "epoch": 0.09121459433509362, "frac_reward_zero_std": 0.4, "grad_norm": 0.2395554482936859, "kl": 0.04338836669921875, "learning_rate": 9.090909090909091e-06, "loss": 0.0017313847318291664, "memory(GiB)": 24.67, "reward": -0.1971000224351883, "reward_std": 0.31183409988880156, "rewards/MMContentORM/mean": -0.5064999997615814, "rewards/MMContentORM/std": 0.7074662327766419, "rewards/MMFormatORM/mean": 0.2887499928474426, "rewards/MMFormatORM/std": 0.32622864842414856, "rewards/MMRubricORM/mean": -0.55, "rewards/MMRubricORM/std": 0.5080508947372436, "step": 190, "train_speed(iter/s)": 0.078772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.8, "completions/mean_length": 195.0, "completions/min_length": 90.4, "epoch": 0.09361497839654345, "frac_reward_zero_std": 0.2, "grad_norm": 0.25950706005096436, "kl": 0.0434844970703125, "learning_rate": 9.330143540669856e-06, "loss": 0.001740964502096176, "memory(GiB)": 24.67, "reward": -0.04445001631975174, "reward_std": 0.4817518353462219, "rewards/MMContentORM/mean": -0.27049999833106997, "rewards/MMContentORM/std": 0.8492022752761841, "rewards/MMFormatORM/mean": 0.3718749940395355, "rewards/MMFormatORM/std": 0.3288069784641266, "rewards/MMRubricORM/mean": -0.425, "rewards/MMRubricORM/std": 0.5082185864448547, "step": 195, "train_speed(iter/s)": 0.079029 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.0, "completions/mean_length": 196.225, "completions/min_length": 99.2, "epoch": 0.09601536245799328, "frac_reward_zero_std": 0.3, "grad_norm": 0.26332148909568787, "kl": 0.057666015625, "learning_rate": 9.569377990430623e-06, "loss": 0.002305997908115387, "memory(GiB)": 24.67, "reward": -0.13745001405477525, "reward_std": 0.4148595631122589, "rewards/MMContentORM/mean": -0.38799999952316283, "rewards/MMContentORM/std": 0.7763695597648621, "rewards/MMFormatORM/mean": 0.30687499344348906, "rewards/MMFormatORM/std": 0.320049911737442, "rewards/MMRubricORM/mean": -0.525, "rewards/MMRubricORM/std": 0.49438175559043884, "step": 200, "train_speed(iter/s)": 0.079255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 436.8, "completions/mean_length": 201.1375, "completions/min_length": 57.4, "epoch": 0.09841574651944311, "frac_reward_zero_std": 0.3, "grad_norm": 0.20934279263019562, "kl": 0.0318572998046875, "learning_rate": 9.808612440191389e-06, "loss": 0.0012754278257489204, "memory(GiB)": 25.29, "reward": -0.09840002059936523, "reward_std": 0.3428053617477417, "rewards/MMContentORM/mean": -0.3834999889135361, "rewards/MMContentORM/std": 0.7341944694519043, "rewards/MMFormatORM/mean": 0.3562499850988388, "rewards/MMFormatORM/std": 0.3010324537754059, "rewards/MMRubricORM/mean": -0.4375, "rewards/MMRubricORM/std": 0.4770470380783081, "step": 205, "train_speed(iter/s)": 0.078583 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.6, "completions/mean_length": 185.1375, "completions/min_length": 86.8, "epoch": 0.10081613058089295, "frac_reward_zero_std": 0.225, "grad_norm": 0.22751331329345703, "kl": 0.0099822998046875, "learning_rate": 9.99999842417629e-06, "loss": 0.00039928192272782327, "memory(GiB)": 25.29, "reward": -0.013300008326768874, "reward_std": 0.367412693798542, "rewards/MMContentORM/mean": -0.2519999980926514, "rewards/MMContentORM/std": 0.6957788646221161, "rewards/MMFormatORM/mean": 0.40624999105930326, "rewards/MMFormatORM/std": 0.29864728450775146, "rewards/MMRubricORM/mean": -0.375, "rewards/MMRubricORM/std": 0.45945738554000853, "step": 210, "train_speed(iter/s)": 0.078979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 372.0, "completions/mean_length": 203.25, "completions/min_length": 119.4, "epoch": 0.10321651464234277, "frac_reward_zero_std": 0.25, "grad_norm": 0.2543454170227051, "kl": 0.01162109375, "learning_rate": 9.999943270450725e-06, "loss": 0.00046498142182826996, "memory(GiB)": 25.29, "reward": -0.09350001960992813, "reward_std": 0.3010860651731491, "rewards/MMContentORM/mean": -0.4375, "rewards/MMContentORM/std": 0.6415534257888794, "rewards/MMFormatORM/mean": 0.39124998450279236, "rewards/MMFormatORM/std": 0.30987287759780885, "rewards/MMRubricORM/mean": -0.375, "rewards/MMRubricORM/std": 0.4858671844005585, "step": 215, "train_speed(iter/s)": 0.078963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.2, "completions/mean_length": 192.45, "completions/min_length": 76.8, "epoch": 0.10561689870379261, "frac_reward_zero_std": 0.175, "grad_norm": 0.5995835661888123, "kl": 0.02928466796875, "learning_rate": 9.999809326532929e-06, "loss": 0.0011718601919710637, "memory(GiB)": 25.29, "reward": 0.18744998872280122, "reward_std": 0.29281292855739594, "rewards/MMContentORM/mean": 0.08300001323223113, "rewards/MMContentORM/std": 0.809050726890564, "rewards/MMFormatORM/mean": 0.4981249749660492, "rewards/MMFormatORM/std": 0.2565573215484619, "rewards/MMRubricORM/mean": -0.225, "rewards/MMRubricORM/std": 0.3916578650474548, "step": 220, "train_speed(iter/s)": 0.079297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.2, "completions/mean_length": 191.175, "completions/min_length": 101.8, "epoch": 0.10801728276524244, "frac_reward_zero_std": 0.25, "grad_norm": 0.2489888072013855, "kl": 0.01142578125, "learning_rate": 9.99959659453362e-06, "loss": 0.0004573634825646877, "memory(GiB)": 25.29, "reward": 0.07849998809397221, "reward_std": 0.29528780579566954, "rewards/MMContentORM/mean": -0.17499998956918716, "rewards/MMContentORM/std": 0.7530360221862793, "rewards/MMFormatORM/mean": 0.4899999797344208, "rewards/MMFormatORM/std": 0.23433216214179992, "rewards/MMRubricORM/mean": -0.2375, "rewards/MMRubricORM/std": 0.36226795315742494, "step": 225, "train_speed(iter/s)": 0.079619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 327.0, "completions/mean_length": 197.075, "completions/min_length": 113.0, "epoch": 0.11041766682669227, "frac_reward_zero_std": 0.15, "grad_norm": 0.27176177501678467, "kl": 0.012841796875, "learning_rate": 9.999305077805077e-06, "loss": 0.0005132704041898251, "memory(GiB)": 25.29, "reward": -0.02635001763701439, "reward_std": 0.40764704942703245, "rewards/MMContentORM/mean": -0.2990000039339066, "rewards/MMContentORM/std": 0.7697360038757324, "rewards/MMFormatORM/mean": 0.41437498331069944, "rewards/MMFormatORM/std": 0.31634018421173093, "rewards/MMRubricORM/mean": -0.3625, "rewards/MMRubricORM/std": 0.486677223443985, "step": 230, "train_speed(iter/s)": 0.079753 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.4, "completions/mean_length": 189.825, "completions/min_length": 85.0, "epoch": 0.1128180508881421, "frac_reward_zero_std": 0.25, "grad_norm": 0.3836219310760498, "kl": 0.0144561767578125, "learning_rate": 9.99893478094108e-06, "loss": 0.0005777373909950257, "memory(GiB)": 25.29, "reward": 0.07359998375177383, "reward_std": 0.32229926288127897, "rewards/MMContentORM/mean": -0.14599999487400056, "rewards/MMContentORM/std": 0.7706803798675537, "rewards/MMFormatORM/mean": 0.4674999833106995, "rewards/MMFormatORM/std": 0.28921514451503755, "rewards/MMRubricORM/mean": -0.275, "rewards/MMRubricORM/std": 0.4454106867313385, "step": 235, "train_speed(iter/s)": 0.079954 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.6, "completions/mean_length": 192.375, "completions/min_length": 102.6, "epoch": 0.11521843494959194, "frac_reward_zero_std": 0.1, "grad_norm": 0.3361359238624573, "kl": 0.01556396484375, "learning_rate": 9.99848570977685e-06, "loss": 0.0006220159120857716, "memory(GiB)": 25.29, "reward": 0.0773499846458435, "reward_std": 0.34245182275772096, "rewards/MMContentORM/mean": -0.15099999755620958, "rewards/MMContentORM/std": 0.7413495063781739, "rewards/MMFormatORM/mean": 0.47562498450279234, "rewards/MMFormatORM/std": 0.2913523316383362, "rewards/MMRubricORM/mean": -0.2625, "rewards/MMRubricORM/std": 0.4509934544563293, "step": 240, "train_speed(iter/s)": 0.079982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.2, "completions/mean_length": 196.525, "completions/min_length": 130.8, "epoch": 0.11761881901104176, "frac_reward_zero_std": 0.175, "grad_norm": 0.23139292001724243, "kl": 0.0150390625, "learning_rate": 9.997957871388948e-06, "loss": 0.0006011344958096743, "memory(GiB)": 25.29, "reward": 0.22974997647106649, "reward_std": 0.27442815005779264, "rewards/MMContentORM/mean": 0.13999999761581422, "rewards/MMContentORM/std": 0.7938369989395142, "rewards/MMFormatORM/mean": 0.5281249761581421, "rewards/MMFormatORM/std": 0.22127365171909333, "rewards/MMRubricORM/mean": -0.1875, "rewards/MMRubricORM/std": 0.3404210150241852, "step": 245, "train_speed(iter/s)": 0.0802 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.8, "completions/mean_length": 190.8875, "completions/min_length": 96.8, "epoch": 0.1200192030724916, "frac_reward_zero_std": 0.275, "grad_norm": 0.24103079736232758, "kl": 0.058306884765625, "learning_rate": 9.997351274095165e-06, "loss": 0.002327635698020458, "memory(GiB)": 25.29, "reward": 0.14489998891949654, "reward_std": 0.31650099754333494, "rewards/MMContentORM/mean": -0.028999996185302735, "rewards/MMContentORM/std": 0.7646125912666321, "rewards/MMFormatORM/mean": 0.5037499785423278, "rewards/MMFormatORM/std": 0.2736783236265182, "rewards/MMRubricORM/mean": -0.225, "rewards/MMRubricORM/std": 0.4210435926914215, "step": 250, "train_speed(iter/s)": 0.080485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.2, "completions/mean_length": 196.875, "completions/min_length": 126.4, "epoch": 0.12241958713394142, "frac_reward_zero_std": 0.125, "grad_norm": 0.2732993960380554, "kl": 0.0129638671875, "learning_rate": 9.996665927454393e-06, "loss": 0.0005180831998586654, "memory(GiB)": 25.29, "reward": 0.13094998374581338, "reward_std": 0.3363707005977631, "rewards/MMContentORM/mean": -0.10699999555945397, "rewards/MMContentORM/std": 0.732841408252716, "rewards/MMFormatORM/mean": 0.5281249821186066, "rewards/MMFormatORM/std": 0.2597552388906479, "rewards/MMRubricORM/mean": -0.1875, "rewards/MMRubricORM/std": 0.3996234655380249, "step": 255, "train_speed(iter/s)": 0.080761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.0, "completions/mean_length": 192.6625, "completions/min_length": 101.6, "epoch": 0.12481997119539126, "frac_reward_zero_std": 0.2, "grad_norm": 0.2951659858226776, "kl": 0.03148193359375, "learning_rate": 9.995901842266476e-06, "loss": 0.0012587737292051315, "memory(GiB)": 25.29, "reward": 0.1078499898314476, "reward_std": 0.3215214520692825, "rewards/MMContentORM/mean": -0.1535000056028366, "rewards/MMContentORM/std": 0.7672501325607299, "rewards/MMFormatORM/mean": 0.5168749809265136, "rewards/MMFormatORM/std": 0.25745911300182345, "rewards/MMRubricORM/mean": -0.1875, "rewards/MMRubricORM/std": 0.3996234655380249, "step": 260, "train_speed(iter/s)": 0.081034 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.4, "completions/mean_length": 199.075, "completions/min_length": 118.2, "epoch": 0.12722035525684108, "frac_reward_zero_std": 0.275, "grad_norm": 0.23530304431915283, "kl": 0.0120758056640625, "learning_rate": 9.99505903057203e-06, "loss": 0.00048267128877341745, "memory(GiB)": 25.29, "reward": 0.20024997591972352, "reward_std": 0.22662772685289384, "rewards/MMContentORM/mean": 0.012500005960464477, "rewards/MMContentORM/std": 0.6804582595825195, "rewards/MMFormatORM/mean": 0.5568749785423279, "rewards/MMFormatORM/std": 0.19259803593158722, "rewards/MMRubricORM/mean": -0.1375, "rewards/MMRubricORM/std": 0.29467830061912537, "step": 265, "train_speed(iter/s)": 0.081248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.6, "completions/mean_length": 188.1125, "completions/min_length": 134.8, "epoch": 0.12962073931829093, "frac_reward_zero_std": 0.175, "grad_norm": 0.2173798680305481, "kl": 0.013104248046875, "learning_rate": 9.994137505652267e-06, "loss": 0.0005250374786555767, "memory(GiB)": 25.29, "reward": 0.21479999721050264, "reward_std": 0.28807530701160433, "rewards/MMContentORM/mean": 0.05450000464916229, "rewards/MMContentORM/std": 0.7564670324325562, "rewards/MMFormatORM/mean": 0.5512499809265137, "rewards/MMFormatORM/std": 0.22316529154777526, "rewards/MMRubricORM/mean": -0.1375, "rewards/MMRubricORM/std": 0.34438174962997437, "step": 270, "train_speed(iter/s)": 0.081407 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.2, "completions/mean_length": 189.925, "completions/min_length": 126.8, "epoch": 0.13202112337974076, "frac_reward_zero_std": 0.3, "grad_norm": 0.23718853294849396, "kl": 0.015252685546875, "learning_rate": 9.993137282028777e-06, "loss": 0.0006098361685872078, "memory(GiB)": 25.29, "reward": 0.22029998302459716, "reward_std": 0.29967186152935027, "rewards/MMContentORM/mean": 0.12200000137090683, "rewards/MMContentORM/std": 0.8326894640922546, "rewards/MMFormatORM/mean": 0.5224999785423279, "rewards/MMFormatORM/std": 0.25347527861595154, "rewards/MMRubricORM/mean": -0.1875, "rewards/MMRubricORM/std": 0.39013060331344607, "step": 275, "train_speed(iter/s)": 0.081663 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 467.6, "completions/mean_length": 205.3, "completions/min_length": 126.8, "epoch": 0.13442150744119058, "frac_reward_zero_std": 0.2, "grad_norm": 0.27481362223625183, "kl": 0.013482666015625, "learning_rate": 9.992058375463302e-06, "loss": 0.0005398368928581476, "memory(GiB)": 25.29, "reward": 0.15504998862743377, "reward_std": 0.3667762905359268, "rewards/MMContentORM/mean": -0.03049999326467514, "rewards/MMContentORM/std": 0.7381291270256043, "rewards/MMFormatORM/mean": 0.5181249678134918, "rewards/MMFormatORM/std": 0.2611870527267456, "rewards/MMRubricORM/mean": -0.2, "rewards/MMRubricORM/std": 0.4024340331554413, "step": 280, "train_speed(iter/s)": 0.081362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.4, "completions/mean_length": 183.0, "completions/min_length": 125.6, "epoch": 0.13682189150264043, "frac_reward_zero_std": 0.25, "grad_norm": 0.33459168672561646, "kl": 0.013397216796875, "learning_rate": 9.990900802957484e-06, "loss": 0.0005357235670089722, "memory(GiB)": 25.29, "reward": 0.17229999005794525, "reward_std": 0.3010860651731491, "rewards/MMContentORM/mean": -0.07550000250339509, "rewards/MMContentORM/std": 0.6900161981582642, "rewards/MMFormatORM/mean": 0.568749976158142, "rewards/MMFormatORM/std": 0.1936162531375885, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.2978711724281311, "step": 285, "train_speed(iter/s)": 0.081635 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.8, "completions/mean_length": 193.6875, "completions/min_length": 125.6, "epoch": 0.13922227556409025, "frac_reward_zero_std": 0.25, "grad_norm": 0.22289112210273743, "kl": 0.015679931640625, "learning_rate": 9.989664582752603e-06, "loss": 0.0006269993260502815, "memory(GiB)": 25.29, "reward": 0.20594998747110366, "reward_std": 0.2612759530544281, "rewards/MMContentORM/mean": 0.055499997735023496, "rewards/MMContentORM/std": 0.7748138785362244, "rewards/MMFormatORM/mean": 0.5406249761581421, "rewards/MMFormatORM/std": 0.2212115779519081, "rewards/MMRubricORM/mean": -0.1625, "rewards/MMRubricORM/std": 0.3172485947608948, "step": 290, "train_speed(iter/s)": 0.081907 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 327.2, "completions/mean_length": 200.9875, "completions/min_length": 134.4, "epoch": 0.14162265962554008, "frac_reward_zero_std": 0.1, "grad_norm": 0.26251521706581116, "kl": 0.015625, "learning_rate": 9.988349734329284e-06, "loss": 0.0006249185651540756, "memory(GiB)": 25.29, "reward": 0.2513999938964844, "reward_std": 0.24013345837593078, "rewards/MMContentORM/mean": 0.08850000128149986, "rewards/MMContentORM/std": 0.7766122579574585, "rewards/MMFormatORM/mean": 0.5837499737739563, "rewards/MMFormatORM/std": 0.16351408362388611, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 295, "train_speed(iter/s)": 0.08196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 367.0, "completions/mean_length": 191.7, "completions/min_length": 116.6, "epoch": 0.14402304368698993, "frac_reward_zero_std": 0.125, "grad_norm": 0.28246110677719116, "kl": 0.017596435546875, "learning_rate": 9.986956278407198e-06, "loss": 0.0007036954164505004, "memory(GiB)": 25.29, "reward": 0.13574998527765275, "reward_std": 0.30907638669013976, "rewards/MMContentORM/mean": -0.049999994784593584, "rewards/MMContentORM/std": 0.7848005771636963, "rewards/MMFormatORM/mean": 0.5018749833106995, "rewards/MMFormatORM/std": 0.2665435582399368, "rewards/MMRubricORM/mean": -0.225, "rewards/MMRubricORM/std": 0.4095080256462097, "step": 300, "train_speed(iter/s)": 0.081952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.6, "completions/mean_length": 182.3, "completions/min_length": 105.6, "epoch": 0.14642342774843975, "frac_reward_zero_std": 0.2, "grad_norm": 0.6548066139221191, "kl": 0.02274169921875, "learning_rate": 9.985484236944723e-06, "loss": 0.0009119081310927868, "memory(GiB)": 25.29, "reward": 0.2615999788045883, "reward_std": 0.2737917542457581, "rewards/MMContentORM/mean": 0.12650000676512718, "rewards/MMContentORM/std": 0.7763458490371704, "rewards/MMFormatORM/mean": 0.5774999856948853, "rewards/MMFormatORM/std": 0.19905767738819122, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2989355862140656, "step": 305, "train_speed(iter/s)": 0.081951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.2, "completions/mean_length": 189.0875, "completions/min_length": 117.0, "epoch": 0.14882381180988957, "frac_reward_zero_std": 0.125, "grad_norm": 0.8391085863113403, "kl": 0.0144775390625, "learning_rate": 9.983933633138607e-06, "loss": 0.000579320639371872, "memory(GiB)": 25.29, "reward": 0.2854999780654907, "reward_std": 0.23037539422512054, "rewards/MMContentORM/mean": 0.14999999850988388, "rewards/MMContentORM/std": 0.6774580955505372, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 310, "train_speed(iter/s)": 0.082155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.4, "completions/mean_length": 205.75, "completions/min_length": 138.6, "epoch": 0.15122419587133942, "frac_reward_zero_std": 0.175, "grad_norm": 0.4532962143421173, "kl": 0.014453125, "learning_rate": 9.982304491423607e-06, "loss": 0.0005786891095340251, "memory(GiB)": 25.29, "reward": 0.2162499874830246, "reward_std": 0.26664996445178984, "rewards/MMContentORM/mean": 0.019999995827674866, "rewards/MMContentORM/std": 0.7253228902816773, "rewards/MMFormatORM/mean": 0.5768749833106994, "rewards/MMFormatORM/std": 0.2062115788459778, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.3172485947608948, "step": 315, "train_speed(iter/s)": 0.082254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.2, "completions/mean_length": 186.875, "completions/min_length": 129.8, "epoch": 0.15362457993278925, "frac_reward_zero_std": 0.25, "grad_norm": 0.3340102732181549, "kl": 0.017840576171875, "learning_rate": 9.980596837472085e-06, "loss": 0.000713213300332427, "memory(GiB)": 25.29, "reward": 0.2187499910593033, "reward_std": 0.20993999540805816, "rewards/MMContentORM/mean": 0.030000004172325134, "rewards/MMContentORM/std": 0.716671884059906, "rewards/MMFormatORM/mean": 0.5731249749660492, "rewards/MMFormatORM/std": 0.19444467574357988, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.27606874108314516, "step": 320, "train_speed(iter/s)": 0.082482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.2, "completions/mean_length": 190.875, "completions/min_length": 119.0, "epoch": 0.15602496399423907, "frac_reward_zero_std": 0.175, "grad_norm": 0.297242671251297, "kl": 0.016180419921875, "learning_rate": 9.978810698193628e-06, "loss": 0.0006479379255324603, "memory(GiB)": 25.29, "reward": 0.36794998943805696, "reward_std": 0.2478409305214882, "rewards/MMContentORM/mean": 0.37049999833106995, "rewards/MMContentORM/std": 0.6727034986019135, "rewards/MMFormatORM/mean": 0.5931249797344208, "rewards/MMFormatORM/std": 0.1350412219762802, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2077557325363159, "step": 325, "train_speed(iter/s)": 0.082645 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.4, "completions/mean_length": 188.0875, "completions/min_length": 105.4, "epoch": 0.15842534805568892, "frac_reward_zero_std": 0.25, "grad_norm": 0.25825080275535583, "kl": 0.016131591796875, "learning_rate": 9.976946101734607e-06, "loss": 0.0006450886372476816, "memory(GiB)": 25.29, "reward": 0.32744998335838316, "reward_std": 0.2085257887840271, "rewards/MMContentORM/mean": 0.2405000112950802, "rewards/MMContentORM/std": 0.7213131546974182, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 330, "train_speed(iter/s)": 0.082761 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.2, "completions/mean_length": 188.65, "completions/min_length": 133.8, "epoch": 0.16082573211713874, "frac_reward_zero_std": 0.15, "grad_norm": 0.2867213189601898, "kl": 0.01766357421875, "learning_rate": 9.975003077477733e-06, "loss": 0.0007068701088428497, "memory(GiB)": 25.29, "reward": 0.3085499942302704, "reward_std": 0.19721208810806273, "rewards/MMContentORM/mean": 0.22200000584125518, "rewards/MMContentORM/std": 0.6932164669036865, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.16571036279201506, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.25493902564048765, "step": 335, "train_speed(iter/s)": 0.082968 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.0, "completions/mean_length": 188.6, "completions/min_length": 118.8, "epoch": 0.16322611617858857, "frac_reward_zero_std": 0.35, "grad_norm": 0.21676421165466309, "kl": 0.0164306640625, "learning_rate": 9.97298165604161e-06, "loss": 0.0006582758855074644, "memory(GiB)": 25.29, "reward": 0.2879999876022339, "reward_std": 0.1766352742910385, "rewards/MMContentORM/mean": 0.1849999964237213, "rewards/MMContentORM/std": 0.7507146120071411, "rewards/MMFormatORM/mean": 0.5849999904632568, "rewards/MMFormatORM/std": 0.19430812299251557, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2989355862140656, "step": 340, "train_speed(iter/s)": 0.083168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.4, "completions/mean_length": 185.2, "completions/min_length": 111.8, "epoch": 0.16562650024003842, "frac_reward_zero_std": 0.2, "grad_norm": 0.21741819381713867, "kl": 0.017755126953125, "learning_rate": 9.970881869280231e-06, "loss": 0.0007105268072336912, "memory(GiB)": 25.29, "reward": 0.31114999651908876, "reward_std": 0.21616254448890687, "rewards/MMContentORM/mean": 0.22849999815225602, "rewards/MMContentORM/std": 0.7147838234901428, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.16571036279201506, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.25493902564048765, "step": 345, "train_speed(iter/s)": 0.08331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.8, "completions/mean_length": 193.775, "completions/min_length": 107.8, "epoch": 0.16802688430148824, "frac_reward_zero_std": 0.25, "grad_norm": 0.25716090202331543, "kl": 0.01529541015625, "learning_rate": 9.968703750282498e-06, "loss": 0.0006120001431554556, "memory(GiB)": 25.29, "reward": 0.3012999713420868, "reward_std": 0.24819448292255403, "rewards/MMContentORM/mean": 0.18950000554323196, "rewards/MMContentORM/std": 0.7667634725570679, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.17440344989299775, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.2683130085468292, "step": 350, "train_speed(iter/s)": 0.083431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 338.2, "completions/mean_length": 199.3625, "completions/min_length": 131.2, "epoch": 0.17042726836293806, "frac_reward_zero_std": 0.125, "grad_norm": 0.29177579283714294, "kl": 0.016534423828125, "learning_rate": 9.966447333371679e-06, "loss": 0.0006617675069719553, "memory(GiB)": 25.29, "reward": 0.3361999988555908, "reward_std": 0.2548412889242172, "rewards/MMContentORM/mean": 0.2804999977350235, "rewards/MMContentORM/std": 0.7203467965126038, "rewards/MMFormatORM/mean": 0.5974999785423278, "rewards/MMFormatORM/std": 0.1303652733564377, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 355, "train_speed(iter/s)": 0.083425 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.0, "completions/mean_length": 194.0125, "completions/min_length": 119.0, "epoch": 0.1728276524243879, "frac_reward_zero_std": 0.275, "grad_norm": 0.22960415482521057, "kl": 0.015142822265625, "learning_rate": 9.964112654104881e-06, "loss": 0.0006059727631509304, "memory(GiB)": 25.29, "reward": 0.18509998098015784, "reward_std": 0.19671710431575776, "rewards/MMContentORM/mean": -0.043500003218650815, "rewards/MMContentORM/std": 0.6924860835075378, "rewards/MMFormatORM/mean": 0.5687499880790711, "rewards/MMFormatORM/std": 0.17440344989299775, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.2683130085468292, "step": 360, "train_speed(iter/s)": 0.083524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 339.6, "completions/mean_length": 199.275, "completions/min_length": 126.8, "epoch": 0.17522803648583773, "frac_reward_zero_std": 0.2, "grad_norm": 0.22960397601127625, "kl": 0.014129638671875, "learning_rate": 9.961699749272491e-06, "loss": 0.00056455098092556, "memory(GiB)": 25.29, "reward": 0.28239999413490297, "reward_std": 0.15443212017416955, "rewards/MMContentORM/mean": 0.13350000753998756, "rewards/MMContentORM/std": 0.694654929637909, "rewards/MMFormatORM/mean": 0.6037499666213989, "rewards/MMFormatORM/std": 0.1227274090051651, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 365, "train_speed(iter/s)": 0.083611 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 420.8, "completions/mean_length": 195.0375, "completions/min_length": 104.8, "epoch": 0.17762842054728756, "frac_reward_zero_std": 0.275, "grad_norm": 0.247096449136734, "kl": 0.0181640625, "learning_rate": 9.959208656897584e-06, "loss": 0.000726937735453248, "memory(GiB)": 25.29, "reward": 0.3057999789714813, "reward_std": 0.21651609390974044, "rewards/MMContentORM/mean": 0.22949999421834946, "rewards/MMContentORM/std": 0.7364683985710144, "rewards/MMFormatORM/mean": 0.5849999845027923, "rewards/MMFormatORM/std": 0.16754122078418732, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2577557325363159, "step": 370, "train_speed(iter/s)": 0.08347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.6, "completions/mean_length": 181.1, "completions/min_length": 111.0, "epoch": 0.1800288046087374, "frac_reward_zero_std": 0.35, "grad_norm": 0.2202872931957245, "kl": 0.01561279296875, "learning_rate": 9.956639416235337e-06, "loss": 0.0006248470395803452, "memory(GiB)": 25.29, "reward": 0.3359999775886536, "reward_std": 0.12784490436315538, "rewards/MMContentORM/mean": 0.2475000001490116, "rewards/MMContentORM/std": 0.6804847836494445, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 375, "train_speed(iter/s)": 0.083633 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.0, "completions/mean_length": 188.625, "completions/min_length": 126.0, "epoch": 0.18242918867018723, "frac_reward_zero_std": 0.1, "grad_norm": 0.3163186311721802, "kl": 0.02537841796875, "learning_rate": 9.953992067772402e-06, "loss": 0.0010158225893974304, "memory(GiB)": 25.29, "reward": 0.3191999852657318, "reward_std": 0.2754888117313385, "rewards/MMContentORM/mean": 0.2705000042915344, "rewards/MMContentORM/std": 0.7479893922805786, "rewards/MMFormatORM/mean": 0.5774999737739563, "rewards/MMFormatORM/std": 0.19774004817008972, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2989355862140656, "step": 380, "train_speed(iter/s)": 0.083784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.0, "completions/mean_length": 196.3625, "completions/min_length": 104.0, "epoch": 0.18482957273163705, "frac_reward_zero_std": 0.075, "grad_norm": 0.2652575373649597, "kl": 0.025384521484375, "learning_rate": 9.95126665322627e-06, "loss": 0.001015142910182476, "memory(GiB)": 25.29, "reward": 0.33874998092651365, "reward_std": 0.2068287432193756, "rewards/MMContentORM/mean": 0.28500000238418577, "rewards/MMContentORM/std": 0.7165241241455078, "rewards/MMFormatORM/mean": 0.5993749856948852, "rewards/MMFormatORM/std": 0.16130690574645995, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 385, "train_speed(iter/s)": 0.083924 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.8, "completions/mean_length": 184.35, "completions/min_length": 107.2, "epoch": 0.1872299567930869, "frac_reward_zero_std": 0.175, "grad_norm": 0.25149068236351013, "kl": 0.024176025390625, "learning_rate": 9.948463215544617e-06, "loss": 0.0009666066616773605, "memory(GiB)": 25.29, "reward": 0.36579999327659607, "reward_std": 0.18809040486812592, "rewards/MMContentORM/mean": 0.32200001180171967, "rewards/MMContentORM/std": 0.6846403241157532, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.08490467071533203, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 390, "train_speed(iter/s)": 0.083997 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.0, "completions/mean_length": 171.7125, "completions/min_length": 89.8, "epoch": 0.18963034085453673, "frac_reward_zero_std": 0.225, "grad_norm": 0.2679647207260132, "kl": 0.023345947265625, "learning_rate": 9.945581798904623e-06, "loss": 0.0009329639375209809, "memory(GiB)": 25.29, "reward": 0.2854999899864197, "reward_std": 0.24409326910972595, "rewards/MMContentORM/mean": 0.18250000327825547, "rewards/MMContentORM/std": 0.7319893360137939, "rewards/MMFormatORM/mean": 0.5812499761581421, "rewards/MMFormatORM/std": 0.1992675095796585, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.3049390256404877, "step": 395, "train_speed(iter/s)": 0.084235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.0, "completions/mean_length": 182.8, "completions/min_length": 118.4, "epoch": 0.19203072491598655, "frac_reward_zero_std": 0.3, "grad_norm": 0.283568799495697, "kl": 0.022515869140625, "learning_rate": 9.942622448712276e-06, "loss": 0.0009008722379803657, "memory(GiB)": 25.29, "reward": 0.3700499892234802, "reward_std": 0.1973535120487213, "rewards/MMContentORM/mean": 0.34700000286102295, "rewards/MMContentORM/std": 0.7341425061225891, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 400, "train_speed(iter/s)": 0.0845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.8, "completions/mean_length": 192.4875, "completions/min_length": 123.8, "epoch": 0.1944311089774364, "frac_reward_zero_std": 0.225, "grad_norm": 0.23822158575057983, "kl": 0.020452880859375, "learning_rate": 9.93958521160166e-06, "loss": 0.0008180794306099415, "memory(GiB)": 25.29, "reward": 0.3346499800682068, "reward_std": 0.20442457497119904, "rewards/MMContentORM/mean": 0.2585000067949295, "rewards/MMContentORM/std": 0.7207361459732056, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 405, "train_speed(iter/s)": 0.084282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.0, "completions/mean_length": 183.3375, "completions/min_length": 130.0, "epoch": 0.19683149303888622, "frac_reward_zero_std": 0.325, "grad_norm": 0.21869473159313202, "kl": 0.017327880859375, "learning_rate": 9.936470135434219e-06, "loss": 0.000694124260917306, "memory(GiB)": 25.29, "reward": 0.3697000026702881, "reward_std": 0.18314065933227539, "rewards/MMContentORM/mean": 0.3479999989271164, "rewards/MMContentORM/std": 0.7096795082092285, "rewards/MMFormatORM/mean": 0.6074999928474426, "rewards/MMFormatORM/std": 0.12490466833114625, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 410, "train_speed(iter/s)": 0.084451 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.4, "completions/mean_length": 188.8625, "completions/min_length": 132.2, "epoch": 0.19923187710033605, "frac_reward_zero_std": 0.25, "grad_norm": 0.26058775186538696, "kl": 0.019140625, "learning_rate": 9.933277269297995e-06, "loss": 0.0007644101046025753, "memory(GiB)": 25.29, "reward": 0.3062999933958054, "reward_std": 0.23659793436527252, "rewards/MMContentORM/mean": 0.20200001299381257, "rewards/MMContentORM/std": 0.7170636773109436, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 415, "train_speed(iter/s)": 0.084554 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.0, "completions/mean_length": 182.9875, "completions/min_length": 107.2, "epoch": 0.2016322611617859, "frac_reward_zero_std": 0.275, "grad_norm": 0.2722574472427368, "kl": 0.024627685546875, "learning_rate": 9.930006663506872e-06, "loss": 0.0009830674156546594, "memory(GiB)": 25.29, "reward": 0.393399977684021, "reward_std": 0.2146776258945465, "rewards/MMContentORM/mean": 0.3910000085830688, "rewards/MMContentORM/std": 0.68272864818573, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 420, "train_speed(iter/s)": 0.084714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.6, "completions/mean_length": 192.0, "completions/min_length": 112.0, "epoch": 0.20403264522323572, "frac_reward_zero_std": 0.325, "grad_norm": 0.25141969323158264, "kl": 0.017279052734375, "learning_rate": 9.926658369599761e-06, "loss": 0.0006905121728777886, "memory(GiB)": 25.29, "reward": 0.3258499801158905, "reward_std": 0.20725299715995787, "rewards/MMContentORM/mean": 0.23650000989437103, "rewards/MMContentORM/std": 0.7136297464370728, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 425, "train_speed(iter/s)": 0.084795 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.8, "completions/mean_length": 189.6625, "completions/min_length": 128.2, "epoch": 0.20643302928468554, "frac_reward_zero_std": 0.325, "grad_norm": 0.23711970448493958, "kl": 0.022698974609375, "learning_rate": 9.923232440339811e-06, "loss": 0.0009088035672903061, "memory(GiB)": 25.29, "reward": 0.40454998016357424, "reward_std": 0.1615738956257701, "rewards/MMContentORM/mean": 0.4044999837875366, "rewards/MMContentORM/std": 0.6727037191390991, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 430, "train_speed(iter/s)": 0.084935 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.4, "completions/mean_length": 196.9, "completions/min_length": 134.6, "epoch": 0.2088334133461354, "frac_reward_zero_std": 0.3, "grad_norm": 0.2575157582759857, "kl": 0.021795654296875, "learning_rate": 9.919728929713555e-06, "loss": 0.0008713678456842899, "memory(GiB)": 25.29, "reward": 0.3168999910354614, "reward_std": 0.22896117568016053, "rewards/MMContentORM/mean": 0.2859999895095825, "rewards/MMContentORM/std": 0.7667613625526428, "rewards/MMFormatORM/mean": 0.5687499880790711, "rewards/MMFormatORM/std": 0.18971401453018188, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.291867733001709, "step": 435, "train_speed(iter/s)": 0.085105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.6, "completions/mean_length": 203.775, "completions/min_length": 126.6, "epoch": 0.21123379740758522, "frac_reward_zero_std": 0.325, "grad_norm": 0.19185078144073486, "kl": 0.014251708984375, "learning_rate": 9.916147892930075e-06, "loss": 0.0005701377056539058, "memory(GiB)": 25.29, "reward": 0.3422499805688858, "reward_std": 0.19254517406225205, "rewards/MMContentORM/mean": 0.27750000208616254, "rewards/MMContentORM/std": 0.7132205009460449, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 440, "train_speed(iter/s)": 0.085168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 464.2, "completions/mean_length": 220.8625, "completions/min_length": 117.4, "epoch": 0.21363418146903504, "frac_reward_zero_std": 0.35, "grad_norm": 0.3216782808303833, "kl": 0.016217041015625, "learning_rate": 9.912489386420127e-06, "loss": 0.0006480277515947819, "memory(GiB)": 25.29, "reward": 0.38044998943805697, "reward_std": 0.19636355340480804, "rewards/MMContentORM/mean": 0.4055000066757202, "rewards/MMContentORM/std": 0.6969575762748719, "rewards/MMFormatORM/mean": 0.5893749892711639, "rewards/MMFormatORM/std": 0.17063776403665543, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.23944272398948668, "step": 445, "train_speed(iter/s)": 0.084893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.8, "completions/mean_length": 193.45, "completions/min_length": 100.2, "epoch": 0.2160345655304849, "frac_reward_zero_std": 0.325, "grad_norm": 0.2631596326828003, "kl": 0.01585693359375, "learning_rate": 9.908753467835252e-06, "loss": 0.000633768830448389, "memory(GiB)": 25.29, "reward": 0.31024998873472215, "reward_std": 0.13823937475681305, "rewards/MMContentORM/mean": 0.19750000834465026, "rewards/MMContentORM/std": 0.6008442759513855, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 450, "train_speed(iter/s)": 0.084948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.2, "completions/mean_length": 197.2625, "completions/min_length": 127.0, "epoch": 0.2184349495919347, "frac_reward_zero_std": 0.25, "grad_norm": 0.2569175660610199, "kl": 0.01932373046875, "learning_rate": 9.904940196046867e-06, "loss": 0.0007727490272372961, "memory(GiB)": 25.29, "reward": 0.3388499915599823, "reward_std": 0.20187898278236388, "rewards/MMContentORM/mean": 0.326500004529953, "rewards/MMContentORM/std": 0.7351299285888672, "rewards/MMFormatORM/mean": 0.5768749713897705, "rewards/MMFormatORM/std": 0.1856150358915329, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.285561603307724, "step": 455, "train_speed(iter/s)": 0.085103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.6, "completions/mean_length": 192.1625, "completions/min_length": 112.2, "epoch": 0.22083533365338454, "frac_reward_zero_std": 0.225, "grad_norm": 0.295173704624176, "kl": 0.023602294921875, "learning_rate": 9.901049631145336e-06, "loss": 0.0009442863985896111, "memory(GiB)": 25.29, "reward": 0.3670499801635742, "reward_std": 0.22988041043281554, "rewards/MMContentORM/mean": 0.33950000405311587, "rewards/MMContentORM/std": 0.7248481631278991, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 460, "train_speed(iter/s)": 0.085231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.6, "completions/mean_length": 198.8625, "completions/min_length": 134.2, "epoch": 0.2232357177148344, "frac_reward_zero_std": 0.425, "grad_norm": 0.18804140388965607, "kl": 0.01673583984375, "learning_rate": 9.897081834439026e-06, "loss": 0.0006706462241709233, "memory(GiB)": 25.29, "reward": 0.3739499866962433, "reward_std": 0.16638222634792327, "rewards/MMContentORM/mean": 0.3279999911785126, "rewards/MMContentORM/std": 0.6578051447868347, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 465, "train_speed(iter/s)": 0.085203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 437.2, "completions/mean_length": 207.525, "completions/min_length": 127.8, "epoch": 0.2256361017762842, "frac_reward_zero_std": 0.325, "grad_norm": 0.2759953439235687, "kl": 0.017724609375, "learning_rate": 9.89303686845334e-06, "loss": 0.0007088197395205498, "memory(GiB)": 25.29, "reward": 0.33759998679161074, "reward_std": 0.26276087909936907, "rewards/MMContentORM/mean": 0.35399999022483825, "rewards/MMContentORM/std": 0.7571944236755371, "rewards/MMFormatORM/mean": 0.5587499856948852, "rewards/MMFormatORM/std": 0.2141388863325119, "rewards/MMRubricORM/mean": -0.1375, "rewards/MMRubricORM/std": 0.3288854479789734, "step": 470, "train_speed(iter/s)": 0.084976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 292.0, "completions/mean_length": 202.15, "completions/min_length": 133.0, "epoch": 0.22803648583773403, "frac_reward_zero_std": 0.3, "grad_norm": 0.24971622228622437, "kl": 0.016107177734375, "learning_rate": 9.888914796929732e-06, "loss": 0.000644554104655981, "memory(GiB)": 25.29, "reward": 0.42414999604225156, "reward_std": 0.18872679471969606, "rewards/MMContentORM/mean": 0.45350001454353334, "rewards/MMContentORM/std": 0.6859857916831971, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 475, "train_speed(iter/s)": 0.085055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.4, "completions/mean_length": 203.1875, "completions/min_length": 129.2, "epoch": 0.23043686989918388, "frac_reward_zero_std": 0.4, "grad_norm": 0.1769513338804245, "kl": 0.0209716796875, "learning_rate": 9.884715684824698e-06, "loss": 0.000839579850435257, "memory(GiB)": 25.29, "reward": 0.38434997797012327, "reward_std": 0.15351288318634032, "rewards/MMContentORM/mean": 0.39899998605251313, "rewards/MMContentORM/std": 0.7158730506896973, "rewards/MMFormatORM/mean": 0.5993749916553497, "rewards/MMFormatORM/std": 0.1306377649307251, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.1894427239894867, "step": 480, "train_speed(iter/s)": 0.085104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.6, "completions/mean_length": 194.1875, "completions/min_length": 132.0, "epoch": 0.2328372539606337, "frac_reward_zero_std": 0.25, "grad_norm": 0.23804667592048645, "kl": 0.014935302734375, "learning_rate": 9.880439598308759e-06, "loss": 0.0005985048599541187, "memory(GiB)": 25.29, "reward": 0.44059998989105226, "reward_std": 0.19487863630056382, "rewards/MMContentORM/mean": 0.5090000003576278, "rewards/MMContentORM/std": 0.6222364962100982, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 485, "train_speed(iter/s)": 0.085257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.2, "completions/mean_length": 205.2375, "completions/min_length": 136.2, "epoch": 0.23523763802208353, "frac_reward_zero_std": 0.45, "grad_norm": 0.2561754882335663, "kl": 0.013427734375, "learning_rate": 9.876086604765416e-06, "loss": 0.0005371436476707458, "memory(GiB)": 25.29, "reward": 0.36444997787475586, "reward_std": 0.1216930739581585, "rewards/MMContentORM/mean": 0.3330000042915344, "rewards/MMContentORM/std": 0.6553688704967499, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 490, "train_speed(iter/s)": 0.085298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.2, "completions/mean_length": 195.65, "completions/min_length": 130.2, "epoch": 0.23763802208353338, "frac_reward_zero_std": 0.4, "grad_norm": 0.14273209869861603, "kl": 0.019012451171875, "learning_rate": 9.871656772790088e-06, "loss": 0.0007593894377350807, "memory(GiB)": 25.29, "reward": 0.3240499943494797, "reward_std": 0.17599887698888778, "rewards/MMContentORM/mean": 0.26449999809265134, "rewards/MMContentORM/std": 0.7485530972480774, "rewards/MMFormatORM/mean": 0.5893749833106995, "rewards/MMFormatORM/std": 0.14441428184509278, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.21124515533447266, "step": 495, "train_speed(iter/s)": 0.085438 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 338.0, "completions/mean_length": 206.625, "completions/min_length": 114.4, "epoch": 0.2400384061449832, "frac_reward_zero_std": 0.4, "grad_norm": 0.1166425347328186, "kl": 0.02442626953125, "learning_rate": 9.86715017218903e-06, "loss": 0.0009763010777533055, "memory(GiB)": 25.29, "reward": 0.41664999127388, "reward_std": 0.15973542779684066, "rewards/MMContentORM/mean": 0.4634999930858612, "rewards/MMContentORM/std": 0.678302276134491, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 500, "train_speed(iter/s)": 0.085421 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.0, "completions/mean_length": 203.2125, "completions/min_length": 135.0, "epoch": 0.24243879020643302, "frac_reward_zero_std": 0.375, "grad_norm": 0.1965927928686142, "kl": 0.01986083984375, "learning_rate": 9.862566873978227e-06, "loss": 0.000794212706387043, "memory(GiB)": 25.29, "reward": 0.4267499804496765, "reward_std": 0.12225875928997994, "rewards/MMContentORM/mean": 0.4600000023841858, "rewards/MMContentORM/std": 0.6565978765487671, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 505, "train_speed(iter/s)": 0.085301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.2, "completions/mean_length": 196.475, "completions/min_length": 123.0, "epoch": 0.24483917426788285, "frac_reward_zero_std": 0.35, "grad_norm": 0.19395841658115387, "kl": 0.017010498046875, "learning_rate": 9.857906950382297e-06, "loss": 0.0006808775477111339, "memory(GiB)": 25.29, "reward": 0.3696999788284302, "reward_std": 0.15372501760721208, "rewards/MMContentORM/mean": 0.36050000339746474, "rewards/MMContentORM/std": 0.6960474014282226, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 510, "train_speed(iter/s)": 0.085386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.0, "completions/mean_length": 211.9125, "completions/min_length": 141.8, "epoch": 0.2472395583293327, "frac_reward_zero_std": 0.375, "grad_norm": 0.3381326496601105, "kl": 0.027838134765625, "learning_rate": 9.853170474833323e-06, "loss": 0.0011151479557156563, "memory(GiB)": 25.29, "reward": 0.37864998877048495, "reward_std": 0.23044609874486924, "rewards/MMContentORM/mean": 0.4135000079870224, "rewards/MMContentORM/std": 0.7175926685333252, "rewards/MMFormatORM/mean": 0.5831249773502349, "rewards/MMFormatORM/std": 0.16790457367897033, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2577557325363159, "step": 515, "train_speed(iter/s)": 0.085432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.0, "completions/mean_length": 208.3, "completions/min_length": 109.2, "epoch": 0.24963994239078252, "frac_reward_zero_std": 0.4, "grad_norm": 0.20441797375679016, "kl": 0.021405029296875, "learning_rate": 9.848357521969716e-06, "loss": 0.0008581820875406265, "memory(GiB)": 25.29, "reward": 0.36869998276233673, "reward_std": 0.2739331744611263, "rewards/MMContentORM/mean": 0.41549999415874483, "rewards/MMContentORM/std": 0.7344144463539124, "rewards/MMFormatORM/mean": 0.568749976158142, "rewards/MMFormatORM/std": 0.1936162531375885, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.2978711724281311, "step": 520, "train_speed(iter/s)": 0.085467 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.2, "completions/mean_length": 201.225, "completions/min_length": 113.4, "epoch": 0.25204032645223234, "frac_reward_zero_std": 0.375, "grad_norm": 0.18100592494010925, "kl": 0.030682373046875, "learning_rate": 9.843468167635034e-06, "loss": 0.0012254069559276104, "memory(GiB)": 25.63, "reward": 0.3865999817848206, "reward_std": 0.2231628954410553, "rewards/MMContentORM/mean": 0.41899999380111697, "rewards/MMContentORM/std": 0.7231726169586181, "rewards/MMFormatORM/mean": 0.5912499785423279, "rewards/MMFormatORM/std": 0.18667025864124298, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2866260170936584, "step": 525, "train_speed(iter/s)": 0.085558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.0, "completions/mean_length": 192.2625, "completions/min_length": 113.8, "epoch": 0.25444071051368217, "frac_reward_zero_std": 0.55, "grad_norm": 0.21060362458229065, "kl": 0.02144775390625, "learning_rate": 9.838502488876785e-06, "loss": 0.0008578533306717873, "memory(GiB)": 25.63, "reward": 0.38974998593330384, "reward_std": 0.11646047895774245, "rewards/MMContentORM/mean": 0.425, "rewards/MMContentORM/std": 0.7169785857200622, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.13730934262275696, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.21124515533447266, "step": 530, "train_speed(iter/s)": 0.085629 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.6, "completions/mean_length": 196.6625, "completions/min_length": 139.8, "epoch": 0.25684109457513205, "frac_reward_zero_std": 0.425, "grad_norm": 0.215934157371521, "kl": 0.017657470703125, "learning_rate": 9.833460563945213e-06, "loss": 0.0007070350926369429, "memory(GiB)": 25.63, "reward": 0.4114499926567078, "reward_std": 0.18292852416634559, "rewards/MMContentORM/mean": 0.45050002038478854, "rewards/MMContentORM/std": 0.679290497303009, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 535, "train_speed(iter/s)": 0.085733 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.6, "completions/mean_length": 199.0875, "completions/min_length": 125.4, "epoch": 0.25924147863658187, "frac_reward_zero_std": 0.375, "grad_norm": 0.3002188503742218, "kl": 0.01728515625, "learning_rate": 9.828342472292063e-06, "loss": 0.0006916997022926808, "memory(GiB)": 25.63, "reward": 0.3853999853134155, "reward_std": 0.1479267368093133, "rewards/MMContentORM/mean": 0.37100000232458114, "rewards/MMContentORM/std": 0.6368636965751648, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 540, "train_speed(iter/s)": 0.085766 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.4, "completions/mean_length": 202.4125, "completions/min_length": 129.8, "epoch": 0.2616418626980317, "frac_reward_zero_std": 0.325, "grad_norm": 0.1911313533782959, "kl": 0.019146728515625, "learning_rate": 9.823148294569342e-06, "loss": 0.0007662074174731969, "memory(GiB)": 25.63, "reward": 0.31649998128414153, "reward_std": 0.2351837173104286, "rewards/MMContentORM/mean": 0.25999999046325684, "rewards/MMContentORM/std": 0.7403302311897277, "rewards/MMFormatORM/mean": 0.5812499761581421, "rewards/MMFormatORM/std": 0.17399703860282897, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2672485947608948, "step": 545, "train_speed(iter/s)": 0.085865 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 440.2, "completions/mean_length": 210.1125, "completions/min_length": 121.8, "epoch": 0.2640422467594815, "frac_reward_zero_std": 0.575, "grad_norm": 0.13542525470256805, "kl": 0.019854736328125, "learning_rate": 9.817878112628026e-06, "loss": 0.0007948323152959346, "memory(GiB)": 25.63, "reward": 0.4270999848842621, "reward_std": 0.14835099875926971, "rewards/MMContentORM/mean": 0.5040000081062317, "rewards/MMContentORM/std": 0.6937057256698609, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 550, "train_speed(iter/s)": 0.08564 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.2, "completions/mean_length": 191.6125, "completions/min_length": 107.2, "epoch": 0.26644263082093134, "frac_reward_zero_std": 0.375, "grad_norm": 0.24546104669570923, "kl": 0.0204833984375, "learning_rate": 9.812532009516787e-06, "loss": 0.000820968858897686, "memory(GiB)": 25.63, "reward": 0.4259999990463257, "reward_std": 0.14651251956820488, "rewards/MMContentORM/mean": 0.47249999046325686, "rewards/MMContentORM/std": 0.6199936449527741, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 555, "train_speed(iter/s)": 0.085693 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.0, "completions/mean_length": 201.3375, "completions/min_length": 147.8, "epoch": 0.26884301488238116, "frac_reward_zero_std": 0.475, "grad_norm": 0.19194868206977844, "kl": 0.0193115234375, "learning_rate": 9.807110069480682e-06, "loss": 0.0007728527300059796, "memory(GiB)": 25.63, "reward": 0.4393999844789505, "reward_std": 0.15315932929515838, "rewards/MMContentORM/mean": 0.5384999871253967, "rewards/MMContentORM/std": 0.678757655620575, "rewards/MMFormatORM/mean": 0.5974999785423278, "rewards/MMFormatORM/std": 0.10973276048898697, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.14574271440505981, "step": 560, "train_speed(iter/s)": 0.085797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.8, "completions/mean_length": 198.1875, "completions/min_length": 138.0, "epoch": 0.27124339894383104, "frac_reward_zero_std": 0.35, "grad_norm": 0.2646057605743408, "kl": 0.02037353515625, "learning_rate": 9.801612377959817e-06, "loss": 0.0008142871782183647, "memory(GiB)": 25.63, "reward": 0.49279999136924746, "reward_std": 0.07127636531367898, "rewards/MMContentORM/mean": 0.5819999873638153, "rewards/MMContentORM/std": 0.6154716610908508, "rewards/MMFormatORM/mean": 0.6499999761581421, "rewards/MMFormatORM/std": 0.0, "rewards/MMRubricORM/mean": 0.0, "rewards/MMRubricORM/std": 0.0, "step": 565, "train_speed(iter/s)": 0.085862 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.8, "completions/mean_length": 194.2625, "completions/min_length": 130.8, "epoch": 0.27364378300528086, "frac_reward_zero_std": 0.45, "grad_norm": 0.25992026925086975, "kl": 0.020379638671875, "learning_rate": 9.796039021588011e-06, "loss": 0.0008148624561727047, "memory(GiB)": 25.63, "reward": 0.3938499867916107, "reward_std": 0.1806657761335373, "rewards/MMContentORM/mean": 0.40650000274181364, "rewards/MMContentORM/std": 0.6918909192085266, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 570, "train_speed(iter/s)": 0.085903 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 435.8, "completions/mean_length": 207.45, "completions/min_length": 110.0, "epoch": 0.2760441670667307, "frac_reward_zero_std": 0.325, "grad_norm": 0.19565346837043762, "kl": 0.13974609375, "learning_rate": 9.790390088191423e-06, "loss": 0.005578663945198059, "memory(GiB)": 25.63, "reward": 0.36904999017715456, "reward_std": 0.21177847310900688, "rewards/MMContentORM/mean": 0.40199999809265136, "rewards/MMContentORM/std": 0.7141570091247559, "rewards/MMFormatORM/mean": 0.5768749713897705, "rewards/MMFormatORM/std": 0.1856150358915329, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.285561603307724, "step": 575, "train_speed(iter/s)": 0.085739 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 444.2, "completions/mean_length": 208.9625, "completions/min_length": 132.0, "epoch": 0.2784445511281805, "frac_reward_zero_std": 0.45, "grad_norm": 0.1956750750541687, "kl": 0.026104736328125, "learning_rate": 9.784665666787176e-06, "loss": 0.0010431693866848946, "memory(GiB)": 25.63, "reward": 0.5113999903202057, "reward_std": 0.13548165708780288, "rewards/MMContentORM/mean": 0.6860000014305114, "rewards/MMContentORM/std": 0.5733676970005035, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 580, "train_speed(iter/s)": 0.085557 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.2, "completions/mean_length": 198.375, "completions/min_length": 128.6, "epoch": 0.28084493518963033, "frac_reward_zero_std": 0.5, "grad_norm": 0.1519889384508133, "kl": 0.02601318359375, "learning_rate": 9.778865847581941e-06, "loss": 0.0010399827733635902, "memory(GiB)": 25.63, "reward": 0.3574499785900116, "reward_std": 0.22040518671274184, "rewards/MMContentORM/mean": 0.3730000019073486, "rewards/MMContentORM/std": 0.7398205995559692, "rewards/MMFormatORM/mean": 0.5768749952316284, "rewards/MMFormatORM/std": 0.19223275780677795, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.29574271440505984, "step": 585, "train_speed(iter/s)": 0.085641 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.2, "completions/mean_length": 198.475, "completions/min_length": 120.0, "epoch": 0.28324531925108015, "frac_reward_zero_std": 0.475, "grad_norm": 0.3285408318042755, "kl": 0.02108154296875, "learning_rate": 9.772990721970534e-06, "loss": 0.0008435861207544803, "memory(GiB)": 25.63, "reward": 0.4222499907016754, "reward_std": 0.10656098783947528, "rewards/MMContentORM/mean": 0.4525000065565109, "rewards/MMContentORM/std": 0.6456537485122681, "rewards/MMFormatORM/mean": 0.621874988079071, "rewards/MMFormatORM/std": 0.11249999552965165, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 590, "train_speed(iter/s)": 0.085706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.8, "completions/mean_length": 194.6, "completions/min_length": 110.8, "epoch": 0.28564570331253003, "frac_reward_zero_std": 0.55, "grad_norm": 0.26459255814552307, "kl": 0.02178955078125, "learning_rate": 9.767040382534456e-06, "loss": 0.000872167106717825, "memory(GiB)": 25.63, "reward": 0.4603499710559845, "reward_std": 0.0990656575653702, "rewards/MMContentORM/mean": 0.5440000176429749, "rewards/MMContentORM/std": 0.6181544482707977, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 595, "train_speed(iter/s)": 0.085788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.8, "completions/mean_length": 206.9, "completions/min_length": 118.2, "epoch": 0.28804608737397985, "frac_reward_zero_std": 0.5, "grad_norm": 0.24745135009288788, "kl": 0.018096923828125, "learning_rate": 9.761014923040453e-06, "loss": 0.0007242465391755104, "memory(GiB)": 25.63, "reward": 0.45274998545646666, "reward_std": 0.096237235609442, "rewards/MMContentORM/mean": 0.5249999940395356, "rewards/MMContentORM/std": 0.6533244967460632, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 600, "train_speed(iter/s)": 0.085872 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.8, "completions/mean_length": 201.075, "completions/min_length": 126.2, "epoch": 0.2904464714354297, "frac_reward_zero_std": 0.35, "grad_norm": 0.27756911516189575, "kl": 0.0199951171875, "learning_rate": 9.754914438439021e-06, "loss": 0.0007998712360858917, "memory(GiB)": 25.63, "reward": 0.4426999866962433, "reward_std": 0.14665394686162472, "rewards/MMContentORM/mean": 0.5430000066757202, "rewards/MMContentORM/std": 0.6288729965686798, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 605, "train_speed(iter/s)": 0.085721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.2, "completions/mean_length": 209.825, "completions/min_length": 141.2, "epoch": 0.2928468554968795, "frac_reward_zero_std": 0.4, "grad_norm": 0.24841666221618652, "kl": 0.01837158203125, "learning_rate": 9.748739024862923e-06, "loss": 0.0007352313958108425, "memory(GiB)": 25.63, "reward": 0.4437499940395355, "reward_std": 0.1993333987891674, "rewards/MMContentORM/mean": 0.5800000041723251, "rewards/MMContentORM/std": 0.6180064260959626, "rewards/MMFormatORM/mean": 0.5793749749660492, "rewards/MMFormatORM/std": 0.11520133018493653, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.17888544797897338, "step": 610, "train_speed(iter/s)": 0.085709 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.0, "completions/mean_length": 214.075, "completions/min_length": 147.6, "epoch": 0.2952472395583293, "frac_reward_zero_std": 0.425, "grad_norm": 0.31791460514068604, "kl": 0.0159912109375, "learning_rate": 9.74248877962567e-06, "loss": 0.0006397653836756944, "memory(GiB)": 25.63, "reward": 0.40454997420310973, "reward_std": 0.10670241061598063, "rewards/MMContentORM/mean": 0.40449999570846557, "rewards/MMContentORM/std": 0.6732450246810913, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 615, "train_speed(iter/s)": 0.085735 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 474.4, "completions/mean_length": 219.6375, "completions/min_length": 134.8, "epoch": 0.29764762361977914, "frac_reward_zero_std": 0.3, "grad_norm": 0.252468466758728, "kl": 0.019781494140625, "learning_rate": 9.73616380121998e-06, "loss": 0.0007909733802080154, "memory(GiB)": 25.63, "reward": 0.3432499796152115, "reward_std": 0.24713381975889206, "rewards/MMContentORM/mean": 0.3699999928474426, "rewards/MMContentORM/std": 0.7536191344261169, "rewards/MMFormatORM/mean": 0.5568749785423279, "rewards/MMFormatORM/std": 0.2235463410615921, "rewards/MMRubricORM/mean": -0.1375, "rewards/MMRubricORM/std": 0.34438174962997437, "step": 620, "train_speed(iter/s)": 0.085518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.8, "completions/mean_length": 212.825, "completions/min_length": 125.0, "epoch": 0.300048007681229, "frac_reward_zero_std": 0.375, "grad_norm": 0.21043799817562103, "kl": 0.017236328125, "learning_rate": 9.729764189316239e-06, "loss": 0.0006894416641443968, "memory(GiB)": 25.63, "reward": 0.46269998550415037, "reward_std": 0.17041273787617683, "rewards/MMContentORM/mean": 0.5930000126361847, "rewards/MMContentORM/std": 0.6541426777839661, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 625, "train_speed(iter/s)": 0.085515 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.0, "completions/mean_length": 209.8125, "completions/min_length": 139.8, "epoch": 0.30244839174267885, "frac_reward_zero_std": 0.375, "grad_norm": 0.18214058876037598, "kl": 0.01575927734375, "learning_rate": 9.72329004476092e-06, "loss": 0.0006303795147687197, "memory(GiB)": 25.63, "reward": 0.4604499816894531, "reward_std": 0.13300678343512118, "rewards/MMContentORM/mean": 0.5730000138282776, "rewards/MMContentORM/std": 0.6502374410629272, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 630, "train_speed(iter/s)": 0.085502 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 438.6, "completions/mean_length": 216.5875, "completions/min_length": 127.6, "epoch": 0.30484877580412867, "frac_reward_zero_std": 0.35, "grad_norm": 0.538560152053833, "kl": 0.01651611328125, "learning_rate": 9.716741469575003e-06, "loss": 0.00066067217849195, "memory(GiB)": 25.63, "reward": 0.3206499844789505, "reward_std": 0.21290984600782395, "rewards/MMContentORM/mean": 0.2810000032186508, "rewards/MMContentORM/std": 0.7222515106201172, "rewards/MMFormatORM/mean": 0.576874977350235, "rewards/MMFormatORM/std": 0.17944467663764954, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.27606874108314516, "step": 635, "train_speed(iter/s)": 0.085343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.0, "completions/mean_length": 209.8875, "completions/min_length": 135.0, "epoch": 0.3072491598655785, "frac_reward_zero_std": 0.35, "grad_norm": 0.22361965477466583, "kl": 0.017071533203125, "learning_rate": 9.710118566952355e-06, "loss": 0.0006829463876783848, "memory(GiB)": 25.63, "reward": 0.34789999127388, "reward_std": 0.1711198389530182, "rewards/MMContentORM/mean": 0.36350000500679014, "rewards/MMContentORM/std": 0.7551613569259643, "rewards/MMFormatORM/mean": 0.5687499821186066, "rewards/MMFormatORM/std": 0.18744589388370514, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.28837831020355226, "step": 640, "train_speed(iter/s)": 0.085374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.8, "completions/mean_length": 200.7875, "completions/min_length": 137.6, "epoch": 0.3096495439270283, "frac_reward_zero_std": 0.35, "grad_norm": 0.2611067295074463, "kl": 0.017279052734375, "learning_rate": 9.703421441258116e-06, "loss": 0.0006911037024110555, "memory(GiB)": 25.63, "reward": 0.5186999857425689, "reward_std": 0.09008539766073227, "rewards/MMContentORM/mean": 0.6755000114440918, "rewards/MMContentORM/std": 0.5361402273178101, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 645, "train_speed(iter/s)": 0.085458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.2, "completions/mean_length": 209.5625, "completions/min_length": 123.0, "epoch": 0.31204992798847814, "frac_reward_zero_std": 0.425, "grad_norm": 0.217108815908432, "kl": 0.01778564453125, "learning_rate": 9.696650198027045e-06, "loss": 0.0007126822136342525, "memory(GiB)": 25.63, "reward": 0.35569998621940613, "reward_std": 0.15895759630948306, "rewards/MMContentORM/mean": 0.3830000050365925, "rewards/MMContentORM/std": 0.7334277391433716, "rewards/MMFormatORM/mean": 0.568749976158142, "rewards/MMFormatORM/std": 0.1936162531375885, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.2978711724281311, "step": 650, "train_speed(iter/s)": 0.085492 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.0, "completions/mean_length": 217.1375, "completions/min_length": 142.2, "epoch": 0.314450312049928, "frac_reward_zero_std": 0.325, "grad_norm": 0.20862624049186707, "kl": 0.016278076171875, "learning_rate": 9.689804943961868e-06, "loss": 0.0006509024649858474, "memory(GiB)": 25.63, "reward": 0.3771999955177307, "reward_std": 0.2166575163602829, "rewards/MMContentORM/mean": 0.4080000162124634, "rewards/MMContentORM/std": 0.7419367551803588, "rewards/MMFormatORM/mean": 0.5849999785423279, "rewards/MMFormatORM/std": 0.19821036159992217, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.3049390256404877, "step": 655, "train_speed(iter/s)": 0.085518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.4, "completions/mean_length": 214.8125, "completions/min_length": 146.4, "epoch": 0.31685069611137784, "frac_reward_zero_std": 0.5, "grad_norm": 0.10907348245382309, "kl": 0.01588134765625, "learning_rate": 9.682885786931581e-06, "loss": 0.0006347180809825659, "memory(GiB)": 25.63, "reward": 0.481499981880188, "reward_std": 0.08216580778826028, "rewards/MMContentORM/mean": 0.5825000107288361, "rewards/MMContentORM/std": 0.592711991071701, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 660, "train_speed(iter/s)": 0.085548 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.8, "completions/mean_length": 199.2, "completions/min_length": 142.8, "epoch": 0.31925108017282766, "frac_reward_zero_std": 0.35, "grad_norm": 0.24605858325958252, "kl": 0.01693115234375, "learning_rate": 9.675892835969767e-06, "loss": 0.0006764709949493408, "memory(GiB)": 25.63, "reward": 0.45569999814033507, "reward_std": 0.10677312165498734, "rewards/MMContentORM/mean": 0.5180000066757202, "rewards/MMContentORM/std": 0.6405679821968079, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 665, "train_speed(iter/s)": 0.08562 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.2, "completions/mean_length": 206.3625, "completions/min_length": 142.4, "epoch": 0.3216514642342775, "frac_reward_zero_std": 0.55, "grad_norm": 0.13116636872291565, "kl": 0.014251708984375, "learning_rate": 9.668826201272866e-06, "loss": 0.0005692524835467338, "memory(GiB)": 25.63, "reward": 0.5005499720573425, "reward_std": 0.08916615936905145, "rewards/MMContentORM/mean": 0.6444999992847442, "rewards/MMContentORM/std": 0.4851596847176552, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 670, "train_speed(iter/s)": 0.085665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.8, "completions/mean_length": 200.4875, "completions/min_length": 114.8, "epoch": 0.3240518482957273, "frac_reward_zero_std": 0.5, "grad_norm": 0.21502132713794708, "kl": 0.01915283203125, "learning_rate": 9.66168599419844e-06, "loss": 0.0007658099755644798, "memory(GiB)": 25.63, "reward": 0.4354999840259552, "reward_std": 0.0936209331266582, "rewards/MMContentORM/mean": 0.5250000029802322, "rewards/MMContentORM/std": 0.6535530805587768, "rewards/MMFormatORM/mean": 0.6012499868869782, "rewards/MMFormatORM/std": 0.12313776612281799, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.1894427239894867, "step": 675, "train_speed(iter/s)": 0.08572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.8, "completions/mean_length": 201.55, "completions/min_length": 131.2, "epoch": 0.32645223235717713, "frac_reward_zero_std": 0.4, "grad_norm": 0.1893157660961151, "kl": 0.01766357421875, "learning_rate": 9.654472327263426e-06, "loss": 0.0007074634078890086, "memory(GiB)": 25.63, "reward": 0.425249981880188, "reward_std": 0.1939593806862831, "rewards/MMContentORM/mean": 0.4850000083446503, "rewards/MMContentORM/std": 0.6584623217582702, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 680, "train_speed(iter/s)": 0.085754 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 337.0, "completions/mean_length": 213.975, "completions/min_length": 132.0, "epoch": 0.32885261641862695, "frac_reward_zero_std": 0.375, "grad_norm": 0.18459384143352509, "kl": 0.015380859375, "learning_rate": 9.647185314142354e-06, "loss": 0.0006157746538519859, "memory(GiB)": 25.63, "reward": 0.3858999848365784, "reward_std": 0.12006673291325569, "rewards/MMContentORM/mean": 0.34350000619888305, "rewards/MMContentORM/std": 0.6572001695632934, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 685, "train_speed(iter/s)": 0.085759 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.0, "completions/mean_length": 203.2625, "completions/min_length": 140.4, "epoch": 0.33125300048007683, "frac_reward_zero_std": 0.475, "grad_norm": 0.25200966000556946, "kl": 0.0184814453125, "learning_rate": 9.63982506966556e-06, "loss": 0.0007389162667095662, "memory(GiB)": 25.63, "reward": 0.43359999656677245, "reward_std": 0.13519881889224053, "rewards/MMContentORM/mean": 0.4915000021457672, "rewards/MMContentORM/std": 0.6082589268684387, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 690, "train_speed(iter/s)": 0.085756 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.8, "completions/mean_length": 196.825, "completions/min_length": 116.2, "epoch": 0.33365338454152665, "frac_reward_zero_std": 0.45, "grad_norm": 0.21117174625396729, "kl": 0.019537353515625, "learning_rate": 9.632391709817374e-06, "loss": 0.0007822229526937008, "memory(GiB)": 25.63, "reward": 0.4227499783039093, "reward_std": 0.15464425683021546, "rewards/MMContentORM/mean": 0.5075000166893006, "rewards/MMContentORM/std": 0.7225377321243286, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 695, "train_speed(iter/s)": 0.085769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.8, "completions/mean_length": 199.0875, "completions/min_length": 121.6, "epoch": 0.3360537686029765, "frac_reward_zero_std": 0.4, "grad_norm": 0.17211079597473145, "kl": 0.018890380859375, "learning_rate": 9.624885351734296e-06, "loss": 0.000755119789391756, "memory(GiB)": 25.63, "reward": 0.503549975156784, "reward_std": 0.1313097208738327, "rewards/MMContentORM/mean": 0.6520000100135803, "rewards/MMContentORM/std": 0.5663350522518158, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 700, "train_speed(iter/s)": 0.085818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.6, "completions/mean_length": 184.925, "completions/min_length": 108.6, "epoch": 0.3384541526644263, "frac_reward_zero_std": 0.375, "grad_norm": 0.20221319794654846, "kl": 0.023406982421875, "learning_rate": 9.617306113703148e-06, "loss": 0.0009360792115330696, "memory(GiB)": 25.63, "reward": 0.4147499859333038, "reward_std": 0.1776959329843521, "rewards/MMContentORM/mean": 0.46249999701976774, "rewards/MMContentORM/std": 0.6942957758903503, "rewards/MMFormatORM/mean": 0.6056249737739563, "rewards/MMFormatORM/std": 0.12368168532848359, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 705, "train_speed(iter/s)": 0.085714 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.6, "completions/mean_length": 193.7125, "completions/min_length": 135.2, "epoch": 0.3408545367258761, "frac_reward_zero_std": 0.475, "grad_norm": 0.18443672358989716, "kl": 0.02154541015625, "learning_rate": 9.60965411515921e-06, "loss": 0.0008609195239841938, "memory(GiB)": 25.63, "reward": 0.41414997577667234, "reward_std": 0.08068087929859757, "rewards/MMContentORM/mean": 0.42850000858306886, "rewards/MMContentORM/std": 0.6322312831878663, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 710, "train_speed(iter/s)": 0.085788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 424.2, "completions/mean_length": 213.925, "completions/min_length": 115.0, "epoch": 0.34325492078732595, "frac_reward_zero_std": 0.375, "grad_norm": 0.25778675079345703, "kl": 0.037451171875, "learning_rate": 9.601929476684335e-06, "loss": 0.0014980776235461236, "memory(GiB)": 25.63, "reward": 0.37259999513626096, "reward_std": 0.20449528098106384, "rewards/MMContentORM/mean": 0.3964999854564667, "rewards/MMContentORM/std": 0.737775981426239, "rewards/MMFormatORM/mean": 0.5849999785423279, "rewards/MMFormatORM/std": 0.19821036159992217, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.3049390256404877, "step": 715, "train_speed(iter/s)": 0.085686 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.6, "completions/mean_length": 198.0375, "completions/min_length": 122.4, "epoch": 0.3456553048487758, "frac_reward_zero_std": 0.625, "grad_norm": 0.21450744569301605, "kl": 0.01873779296875, "learning_rate": 9.594132320005056e-06, "loss": 0.0007497821934521198, "memory(GiB)": 25.63, "reward": 0.4351499855518341, "reward_std": 0.11151074110530317, "rewards/MMContentORM/mean": 0.48100000619888306, "rewards/MMContentORM/std": 0.6345597028732299, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 720, "train_speed(iter/s)": 0.08572 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 332.6, "completions/mean_length": 203.5, "completions/min_length": 127.2, "epoch": 0.34805568891022565, "frac_reward_zero_std": 0.6, "grad_norm": 0.17188891768455505, "kl": 0.017034912109375, "learning_rate": 9.58626276799066e-06, "loss": 0.0006807168014347553, "memory(GiB)": 25.63, "reward": 0.46594999432563783, "reward_std": 0.10245977491140365, "rewards/MMContentORM/mean": 0.5580000162124634, "rewards/MMContentORM/std": 0.5856799840927124, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 725, "train_speed(iter/s)": 0.085717 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.8, "completions/mean_length": 207.4625, "completions/min_length": 142.6, "epoch": 0.35045607297167547, "frac_reward_zero_std": 0.4, "grad_norm": 0.2247178703546524, "kl": 0.01895751953125, "learning_rate": 9.57832094465126e-06, "loss": 0.000757955340668559, "memory(GiB)": 25.63, "reward": 0.4785999894142151, "reward_std": 0.1292591169476509, "rewards/MMContentORM/mean": 0.6039999961853028, "rewards/MMContentORM/std": 0.6318910479545593, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 730, "train_speed(iter/s)": 0.085751 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.4, "completions/mean_length": 204.5125, "completions/min_length": 130.6, "epoch": 0.3528564570331253, "frac_reward_zero_std": 0.425, "grad_norm": 0.2592087686061859, "kl": 0.02066650390625, "learning_rate": 9.57030697513583e-06, "loss": 0.0008267030119895935, "memory(GiB)": 25.63, "reward": 0.4224999785423279, "reward_std": 0.15726054804399608, "rewards/MMContentORM/mean": 0.49250001907348634, "rewards/MMContentORM/std": 0.6467598974704742, "rewards/MMFormatORM/mean": 0.6012499868869782, "rewards/MMFormatORM/std": 0.12313776612281799, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.1894427239894867, "step": 735, "train_speed(iter/s)": 0.085797 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 337.2, "completions/mean_length": 204.025, "completions/min_length": 127.4, "epoch": 0.3552568410945751, "frac_reward_zero_std": 0.55, "grad_norm": 0.1708785593509674, "kl": 0.0177734375, "learning_rate": 9.562220985730246e-06, "loss": 0.0007100693415850401, "memory(GiB)": 25.63, "reward": 0.4913999915122986, "reward_std": 0.11002581561915577, "rewards/MMContentORM/mean": 0.6110000014305115, "rewards/MMContentORM/std": 0.6001002073287964, "rewards/MMFormatORM/mean": 0.6299999833106995, "rewards/MMFormatORM/std": 0.07999999672174454, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 740, "train_speed(iter/s)": 0.0858 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 371.4, "completions/mean_length": 213.3375, "completions/min_length": 134.6, "epoch": 0.35765722515602494, "frac_reward_zero_std": 0.525, "grad_norm": 0.13782000541687012, "kl": 0.01622314453125, "learning_rate": 9.554063103855285e-06, "loss": 0.0006494280882179737, "memory(GiB)": 25.63, "reward": 0.48459997177124026, "reward_std": 0.1038032690063119, "rewards/MMContentORM/mean": 0.5940000116825104, "rewards/MMContentORM/std": 0.5801396131515503, "rewards/MMFormatORM/mean": 0.6299999833106995, "rewards/MMFormatORM/std": 0.06737477481365203, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 745, "train_speed(iter/s)": 0.085736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 459.6, "completions/mean_length": 222.775, "completions/min_length": 120.6, "epoch": 0.3600576092174748, "frac_reward_zero_std": 0.525, "grad_norm": 0.1946718990802765, "kl": 0.0191650390625, "learning_rate": 9.54583345806462e-06, "loss": 0.0007668656297028064, "memory(GiB)": 25.63, "reward": 0.4473499894142151, "reward_std": 0.10330830663442611, "rewards/MMContentORM/mean": 0.5115000009536743, "rewards/MMContentORM/std": 0.6476596593856812, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 750, "train_speed(iter/s)": 0.085561 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 450.4, "completions/mean_length": 226.075, "completions/min_length": 133.0, "epoch": 0.36245799327892464, "frac_reward_zero_std": 0.375, "grad_norm": 0.18249185383319855, "kl": 0.019720458984375, "learning_rate": 9.537532178042796e-06, "loss": 0.0007876944728195667, "memory(GiB)": 25.63, "reward": 0.38464999198913574, "reward_std": 0.1914138063788414, "rewards/MMContentORM/mean": 0.41600000858306885, "rewards/MMContentORM/std": 0.7192264080047608, "rewards/MMFormatORM/mean": 0.5893749833106995, "rewards/MMFormatORM/std": 0.1641829013824463, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 755, "train_speed(iter/s)": 0.085399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 504.0, "completions/mean_length": 236.925, "completions/min_length": 153.0, "epoch": 0.36485837734037446, "frac_reward_zero_std": 0.45, "grad_norm": 0.09791433811187744, "kl": 0.01590576171875, "learning_rate": 9.529159394603192e-06, "loss": 0.0006361880339682102, "memory(GiB)": 25.63, "reward": 0.46654998064041137, "reward_std": 0.18250426054000854, "rewards/MMContentORM/mean": 0.6170000016689301, "rewards/MMContentORM/std": 0.6278144896030426, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.18630690574645997, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2866260170936584, "step": 760, "train_speed(iter/s)": 0.085164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.0, "completions/mean_length": 210.0375, "completions/min_length": 142.8, "epoch": 0.3672587614018243, "frac_reward_zero_std": 0.6, "grad_norm": 0.20352379977703094, "kl": 0.01798095703125, "learning_rate": 9.520715239685943e-06, "loss": 0.0007194386795163155, "memory(GiB)": 25.63, "reward": 0.441599977016449, "reward_std": 0.09956062764395028, "rewards/MMContentORM/mean": 0.5115000009536743, "rewards/MMContentORM/std": 0.6773199915885926, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 765, "train_speed(iter/s)": 0.085188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.2, "completions/mean_length": 214.0125, "completions/min_length": 154.8, "epoch": 0.3696591454632741, "frac_reward_zero_std": 0.575, "grad_norm": 0.19353719055652618, "kl": 0.01456298828125, "learning_rate": 9.512199846355879e-06, "loss": 0.0005822981242090463, "memory(GiB)": 25.63, "reward": 0.4994999825954437, "reward_std": 0.11610694080591202, "rewards/MMContentORM/mean": 0.6275000095367431, "rewards/MMContentORM/std": 0.5707884192466736, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 770, "train_speed(iter/s)": 0.085248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.0, "completions/mean_length": 222.7, "completions/min_length": 154.4, "epoch": 0.37205952952472393, "frac_reward_zero_std": 0.55, "grad_norm": 0.1961638182401657, "kl": 0.01519775390625, "learning_rate": 9.503613348800418e-06, "loss": 0.0006085673347115516, "memory(GiB)": 25.63, "reward": 0.44324998259544374, "reward_std": 0.14813887765631079, "rewards/MMContentORM/mean": 0.5300000160932541, "rewards/MMContentORM/std": 0.637897276878357, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 775, "train_speed(iter/s)": 0.085193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 497.0, "completions/mean_length": 242.0875, "completions/min_length": 145.8, "epoch": 0.3744599135861738, "frac_reward_zero_std": 0.5, "grad_norm": 0.21977363526821136, "kl": 0.01630859375, "learning_rate": 9.494955882327455e-06, "loss": 0.0006526447832584381, "memory(GiB)": 25.63, "reward": 0.42324999570846555, "reward_std": 0.11490485058166086, "rewards/MMContentORM/mean": 0.4800000011920929, "rewards/MMContentORM/std": 0.5872390195727348, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 780, "train_speed(iter/s)": 0.084974 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.2, "completions/mean_length": 211.6, "completions/min_length": 129.8, "epoch": 0.37686029764762363, "frac_reward_zero_std": 0.55, "grad_norm": 0.18094761669635773, "kl": 0.019195556640625, "learning_rate": 9.486227583363225e-06, "loss": 0.0007680790033191443, "memory(GiB)": 25.63, "reward": 0.49619998335838317, "reward_std": 0.1111571803689003, "rewards/MMContentORM/mean": 0.6480000019073486, "rewards/MMContentORM/std": 0.6220699548721313, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 785, "train_speed(iter/s)": 0.084985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.4, "completions/mean_length": 212.1875, "completions/min_length": 132.8, "epoch": 0.37926068170907346, "frac_reward_zero_std": 0.5, "grad_norm": 0.2051294595003128, "kl": 0.019140625, "learning_rate": 9.47742858945016e-06, "loss": 0.0007654055021703243, "memory(GiB)": 25.63, "reward": 0.42904997169971465, "reward_std": 0.13145114853978157, "rewards/MMContentORM/mean": 0.49449999928474425, "rewards/MMContentORM/std": 0.6315191209316253, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 790, "train_speed(iter/s)": 0.084986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025, "completions/max_length": 616.6, "completions/mean_length": 243.05, "completions/min_length": 138.0, "epoch": 0.3816610657705233, "frac_reward_zero_std": 0.4, "grad_norm": 0.14649176597595215, "kl": 0.022198486328125, "learning_rate": 9.468559039244718e-06, "loss": 0.000887654721736908, "memory(GiB)": 25.63, "reward": 0.46064999103546145, "reward_std": 0.15181582197546958, "rewards/MMContentORM/mean": 0.5735000014305115, "rewards/MMContentORM/std": 0.6020529091358184, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 795, "train_speed(iter/s)": 0.084659 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.4, "completions/mean_length": 213.4625, "completions/min_length": 121.0, "epoch": 0.3840614498319731, "frac_reward_zero_std": 0.575, "grad_norm": 0.17191362380981445, "kl": 0.01591796875, "learning_rate": 9.459619072515196e-06, "loss": 0.0006367039866745472, "memory(GiB)": 25.63, "reward": 0.46714999675750735, "reward_std": 0.07474118582904339, "rewards/MMContentORM/mean": 0.5484999895095826, "rewards/MMContentORM/std": 0.654664158821106, "rewards/MMFormatORM/mean": 0.6318749904632568, "rewards/MMFormatORM/std": 0.07249999642372132, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 800, "train_speed(iter/s)": 0.084706 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.0, "completions/mean_length": 216.2375, "completions/min_length": 132.4, "epoch": 0.3864618338934229, "frac_reward_zero_std": 0.35, "grad_norm": 0.18298013508319855, "kl": 0.018414306640625, "learning_rate": 9.450608830139537e-06, "loss": 0.0007364887278527021, "memory(GiB)": 25.63, "reward": 0.4408999800682068, "reward_std": 0.16447303146123887, "rewards/MMContentORM/mean": 0.5385000050067902, "rewards/MMContentORM/std": 0.686086630821228, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.17440344989299775, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.2683130085468292, "step": 805, "train_speed(iter/s)": 0.084608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 457.8, "completions/mean_length": 218.7375, "completions/min_length": 117.6, "epoch": 0.3888622179548728, "frac_reward_zero_std": 0.525, "grad_norm": 0.09511148929595947, "kl": 0.022802734375, "learning_rate": 9.44152845410309e-06, "loss": 0.0009122312068939209, "memory(GiB)": 25.63, "reward": 0.43959996700286863, "reward_std": 0.11992530548013747, "rewards/MMContentORM/mean": 0.5065000057220459, "rewards/MMContentORM/std": 0.6312320232391357, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.08490467071533203, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 810, "train_speed(iter/s)": 0.084476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.4, "completions/mean_length": 215.85, "completions/min_length": 147.6, "epoch": 0.3912626020163226, "frac_reward_zero_std": 0.6, "grad_norm": 0.13439743220806122, "kl": 0.0202392578125, "learning_rate": 9.4323780874964e-06, "loss": 0.0008096899837255478, "memory(GiB)": 25.63, "reward": 0.40035000443458557, "reward_std": 0.16298811305314304, "rewards/MMContentORM/mean": 0.451500004529953, "rewards/MMContentORM/std": 0.6973459839820861, "rewards/MMFormatORM/mean": 0.5931249916553497, "rewards/MMFormatORM/std": 0.1556377649307251, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.23944272398948668, "step": 815, "train_speed(iter/s)": 0.084482 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.6, "completions/mean_length": 211.5375, "completions/min_length": 136.8, "epoch": 0.39366298607777245, "frac_reward_zero_std": 0.55, "grad_norm": 0.21310864388942719, "kl": 0.017193603515625, "learning_rate": 9.42315787451293e-06, "loss": 0.0006876428611576557, "memory(GiB)": 25.63, "reward": 0.4637999773025513, "reward_std": 0.11851109359413385, "rewards/MMContentORM/mean": 0.5670000195503235, "rewards/MMContentORM/std": 0.6650908708572387, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 820, "train_speed(iter/s)": 0.084493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025, "completions/max_length": 609.6, "completions/mean_length": 235.6875, "completions/min_length": 147.2, "epoch": 0.39606337013922227, "frac_reward_zero_std": 0.45, "grad_norm": 0.1284942924976349, "kl": 0.016168212890625, "learning_rate": 9.413867960446796e-06, "loss": 0.0006466972175985575, "memory(GiB)": 25.63, "reward": 0.4465499848127365, "reward_std": 0.12763277366757392, "rewards/MMContentORM/mean": 0.5420000106096268, "rewards/MMContentORM/std": 0.6290184378623962, "rewards/MMFormatORM/mean": 0.6056249856948852, "rewards/MMFormatORM/std": 0.11977944672107696, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 825, "train_speed(iter/s)": 0.084165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.8, "completions/mean_length": 207.575, "completions/min_length": 131.4, "epoch": 0.3984637542006721, "frac_reward_zero_std": 0.65, "grad_norm": 0.1951877474784851, "kl": 0.015869140625, "learning_rate": 9.404508491690484e-06, "loss": 0.0006350751966238022, "memory(GiB)": 25.63, "reward": 0.5316999852657318, "reward_std": 0.07452905047684907, "rewards/MMContentORM/mean": 0.7080000102519989, "rewards/MMContentORM/std": 0.4912826240062714, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 830, "train_speed(iter/s)": 0.084201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.0, "completions/mean_length": 213.35, "completions/min_length": 149.4, "epoch": 0.4008641382621219, "frac_reward_zero_std": 0.6, "grad_norm": 0.17311322689056396, "kl": 0.0205078125, "learning_rate": 9.395079615732539e-06, "loss": 0.0008202603086829186, "memory(GiB)": 25.63, "reward": 0.47449998259544374, "reward_std": 0.10154052944853902, "rewards/MMContentORM/mean": 0.5689999997615814, "rewards/MMContentORM/std": 0.6329957902431488, "rewards/MMFormatORM/mean": 0.6299999833106995, "rewards/MMFormatORM/std": 0.09440345466136932, "rewards/MMRubricORM/mean": -0.025500000268220902, "rewards/MMRubricORM/std": 0.16631300747394562, "step": 835, "train_speed(iter/s)": 0.084227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.0, "completions/mean_length": 199.025, "completions/min_length": 104.8, "epoch": 0.4032645223235718, "frac_reward_zero_std": 0.625, "grad_norm": 0.14815396070480347, "kl": 0.022406005859375, "learning_rate": 9.385581481155233e-06, "loss": 0.0008968940936028957, "memory(GiB)": 25.63, "reward": 0.4544999897480011, "reward_std": 0.09220672622323037, "rewards/MMContentORM/mean": 0.5724999904632568, "rewards/MMContentORM/std": 0.6493830382823944, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 840, "train_speed(iter/s)": 0.08428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.4, "completions/mean_length": 212.925, "completions/min_length": 118.2, "epoch": 0.4056649063850216, "frac_reward_zero_std": 0.4, "grad_norm": 0.22879934310913086, "kl": 0.02987060546875, "learning_rate": 9.376014237632233e-06, "loss": 0.0011936011724174023, "memory(GiB)": 25.63, "reward": 0.43914997577667236, "reward_std": 0.1680792823433876, "rewards/MMContentORM/mean": 0.5485000014305115, "rewards/MMContentORM/std": 0.6784831821918488, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.18630690574645997, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2866260170936584, "step": 845, "train_speed(iter/s)": 0.084318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.0, "completions/mean_length": 213.9875, "completions/min_length": 133.4, "epoch": 0.40806529044647144, "frac_reward_zero_std": 0.575, "grad_norm": 0.0645090639591217, "kl": 0.016571044921875, "learning_rate": 9.366378035926244e-06, "loss": 0.0006628448609262705, "memory(GiB)": 25.63, "reward": 0.38159998059272765, "reward_std": 0.10861159779597074, "rewards/MMContentORM/mean": 0.3615000039339066, "rewards/MMContentORM/std": 0.677151370048523, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.08490467071533203, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 850, "train_speed(iter/s)": 0.084327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.0, "completions/mean_length": 222.175, "completions/min_length": 147.2, "epoch": 0.41046567450792126, "frac_reward_zero_std": 0.375, "grad_norm": 0.2151261568069458, "kl": 0.015704345703125, "learning_rate": 9.356673027886624e-06, "loss": 0.0006273643113672734, "memory(GiB)": 25.63, "reward": 0.4602999806404114, "reward_std": 0.1168140321969986, "rewards/MMContentORM/mean": 0.5820000171661377, "rewards/MMContentORM/std": 0.683591103553772, "rewards/MMFormatORM/mean": 0.5999999880790711, "rewards/MMFormatORM/std": 0.12756490409374238, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 855, "train_speed(iter/s)": 0.084361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.2, "completions/mean_length": 218.55, "completions/min_length": 114.6, "epoch": 0.4128660585693711, "frac_reward_zero_std": 0.55, "grad_norm": 0.12868833541870117, "kl": 0.0200927734375, "learning_rate": 9.346899366447e-06, "loss": 0.0008026616647839546, "memory(GiB)": 25.63, "reward": 0.4429999828338623, "reward_std": 0.16150318831205368, "rewards/MMContentORM/mean": 0.5725000083446503, "rewards/MMContentORM/std": 0.7186736941337586, "rewards/MMFormatORM/mean": 0.5849999904632568, "rewards/MMFormatORM/std": 0.19430812299251557, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2989355862140656, "step": 860, "train_speed(iter/s)": 0.084399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.2, "completions/mean_length": 224.7875, "completions/min_length": 154.4, "epoch": 0.4152664426308209, "frac_reward_zero_std": 0.45, "grad_norm": 0.24704837799072266, "kl": 0.015057373046875, "learning_rate": 9.337057205622848e-06, "loss": 0.0006027618423104286, "memory(GiB)": 25.63, "reward": 0.4193499803543091, "reward_std": 0.1847669929265976, "rewards/MMContentORM/mean": 0.49900001287460327, "rewards/MMContentORM/std": 0.7176998615264892, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.18630690574645997, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2866260170936584, "step": 865, "train_speed(iter/s)": 0.084412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 449.6, "completions/mean_length": 232.175, "completions/min_length": 159.2, "epoch": 0.4176668266922708, "frac_reward_zero_std": 0.525, "grad_norm": 0.21097196638584137, "kl": 0.016717529296875, "learning_rate": 9.327146700509082e-06, "loss": 0.0006690716370940208, "memory(GiB)": 25.63, "reward": 0.46244998574256896, "reward_std": 0.14983592703938484, "rewards/MMContentORM/mean": 0.6105000078678131, "rewards/MMContentORM/std": 0.5413160175085068, "rewards/MMFormatORM/mean": 0.5893749892711639, "rewards/MMFormatORM/std": 0.17063776403665543, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.23944272398948668, "step": 870, "train_speed(iter/s)": 0.08432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 376.8, "completions/mean_length": 226.425, "completions/min_length": 158.4, "epoch": 0.4200672107537206, "frac_reward_zero_std": 0.625, "grad_norm": 0.11343076825141907, "kl": 0.01474609375, "learning_rate": 9.317168007277589e-06, "loss": 0.0005900030490010976, "memory(GiB)": 25.63, "reward": 0.38004998564720155, "reward_std": 0.11193500086665154, "rewards/MMContentORM/mean": 0.371999990940094, "rewards/MMContentORM/std": 0.7004570722579956, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 875, "train_speed(iter/s)": 0.084275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.0, "completions/mean_length": 211.6375, "completions/min_length": 137.8, "epoch": 0.42246759481517043, "frac_reward_zero_std": 0.55, "grad_norm": 0.17401404678821564, "kl": 0.01837158203125, "learning_rate": 9.307121283174788e-06, "loss": 0.0007351872511208058, "memory(GiB)": 25.63, "reward": 0.38199999928474426, "reward_std": 0.18780755996704102, "rewards/MMContentORM/mean": 0.42000000476837157, "rewards/MMContentORM/std": 0.7346604287624359, "rewards/MMFormatORM/mean": 0.5849999904632568, "rewards/MMFormatORM/std": 0.16980934143066406, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2612451553344727, "step": 880, "train_speed(iter/s)": 0.084282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 335.2, "completions/mean_length": 225.7625, "completions/min_length": 164.0, "epoch": 0.42486797887662026, "frac_reward_zero_std": 0.425, "grad_norm": 0.23540575802326202, "kl": 0.01390380859375, "learning_rate": 9.297006686519139e-06, "loss": 0.0005556363612413406, "memory(GiB)": 25.63, "reward": 0.40919997692108157, "reward_std": 0.18893892914056779, "rewards/MMContentORM/mean": 0.4755000114440918, "rewards/MMContentORM/std": 0.7272086381912232, "rewards/MMFormatORM/mean": 0.5912499725818634, "rewards/MMFormatORM/std": 0.13540457487106322, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2077557325363159, "step": 885, "train_speed(iter/s)": 0.084274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 536.0, "completions/mean_length": 235.975, "completions/min_length": 157.6, "epoch": 0.4272683629380701, "frac_reward_zero_std": 0.475, "grad_norm": 0.13821138441562653, "kl": 0.017333984375, "learning_rate": 9.286824376698653e-06, "loss": 0.0006932040210813284, "memory(GiB)": 27.09, "reward": 0.3673999786376953, "reward_std": 0.20407101474702358, "rewards/MMContentORM/mean": 0.4410000145435333, "rewards/MMContentORM/std": 0.7875102996826172, "rewards/MMFormatORM/mean": 0.5524999797344208, "rewards/MMFormatORM/std": 0.23184934854507447, "rewards/MMRubricORM/mean": -0.15, "rewards/MMRubricORM/std": 0.35669131875038146, "step": 890, "train_speed(iter/s)": 0.084032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 363.0, "completions/mean_length": 227.0, "completions/min_length": 132.4, "epoch": 0.4296687469995199, "frac_reward_zero_std": 0.325, "grad_norm": 0.22291657328605652, "kl": 0.020013427734375, "learning_rate": 9.276574514168382e-06, "loss": 0.000801488570868969, "memory(GiB)": 27.09, "reward": 0.4104499936103821, "reward_std": 0.21955665349960327, "rewards/MMContentORM/mean": 0.4930000126361847, "rewards/MMContentORM/std": 0.7277546286582947, "rewards/MMFormatORM/mean": 0.5831249833106995, "rewards/MMFormatORM/std": 0.19391306340694428, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2989355862140656, "step": 895, "train_speed(iter/s)": 0.084014 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.2, "completions/mean_length": 227.45, "completions/min_length": 148.0, "epoch": 0.4320691310609698, "frac_reward_zero_std": 0.475, "grad_norm": 0.1956370770931244, "kl": 0.01566162109375, "learning_rate": 9.266257260447883e-06, "loss": 0.0006269800476729869, "memory(GiB)": 27.09, "reward": 0.41879996508359907, "reward_std": 0.08725697756744921, "rewards/MMContentORM/mean": 0.4420000076293945, "rewards/MMContentORM/std": 0.6016733646392822, "rewards/MMFormatORM/mean": 0.6237499833106994, "rewards/MMFormatORM/std": 0.05990467071533203, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.08062257766723632, "step": 900, "train_speed(iter/s)": 0.084041 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 345.8, "completions/mean_length": 223.0875, "completions/min_length": 156.8, "epoch": 0.4344695151224196, "frac_reward_zero_std": 0.6, "grad_norm": 0.2335289567708969, "kl": 0.017236328125, "learning_rate": 9.255872778118686e-06, "loss": 0.0006896716542541981, "memory(GiB)": 27.09, "reward": 0.4828499794006348, "reward_std": 0.12324871122837067, "rewards/MMContentORM/mean": 0.628999999165535, "rewards/MMContentORM/std": 0.5791173458099366, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 905, "train_speed(iter/s)": 0.083925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 331.6, "completions/mean_length": 228.55, "completions/min_length": 155.0, "epoch": 0.4368698991838694, "frac_reward_zero_std": 0.65, "grad_norm": 0.11608735471963882, "kl": 0.0136962890625, "learning_rate": 9.245421230821717e-06, "loss": 0.0005476945545524359, "memory(GiB)": 27.09, "reward": 0.4916999876499176, "reward_std": 0.06264965860173106, "rewards/MMContentORM/mean": 0.6080000042915344, "rewards/MMContentORM/std": 0.5947914302349091, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 910, "train_speed(iter/s)": 0.083928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 437.0, "completions/mean_length": 233.6375, "completions/min_length": 158.6, "epoch": 0.43927028324531925, "frac_reward_zero_std": 0.45, "grad_norm": 0.19499173760414124, "kl": 0.013690185546875, "learning_rate": 9.234902783254726e-06, "loss": 0.0005476208403706551, "memory(GiB)": 27.09, "reward": 0.46409996747970583, "reward_std": 0.11554124504327774, "rewards/MMContentORM/mean": 0.5715000033378601, "rewards/MMContentORM/std": 0.5974150598049164, "rewards/MMFormatORM/mean": 0.6137499809265137, "rewards/MMFormatORM/std": 0.11046060025691987, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 915, "train_speed(iter/s)": 0.083822 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.6, "completions/mean_length": 219.725, "completions/min_length": 143.0, "epoch": 0.44167066730676907, "frac_reward_zero_std": 0.675, "grad_norm": 0.07724174112081528, "kl": 0.01375732421875, "learning_rate": 9.224317601169699e-06, "loss": 0.000550596509128809, "memory(GiB)": 27.09, "reward": 0.5028999745845795, "reward_std": 0.06095260072033852, "rewards/MMContentORM/mean": 0.635999995470047, "rewards/MMContentORM/std": 0.5765919387340546, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 920, "train_speed(iter/s)": 0.083811 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 377.6, "completions/mean_length": 217.275, "completions/min_length": 134.8, "epoch": 0.4440710513682189, "frac_reward_zero_std": 0.55, "grad_norm": 0.19694066047668457, "kl": 0.01658935546875, "learning_rate": 9.213665851370232e-06, "loss": 0.0006623049266636372, "memory(GiB)": 27.09, "reward": 0.449649965763092, "reward_std": 0.1215516519267112, "rewards/MMContentORM/mean": 0.5460000038146973, "rewards/MMContentORM/std": 0.6028219342231751, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.09680812656879426, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.14893558621406555, "step": 925, "train_speed(iter/s)": 0.083789 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 440.8, "completions/mean_length": 213.4625, "completions/min_length": 139.2, "epoch": 0.4464714354296688, "frac_reward_zero_std": 0.575, "grad_norm": 0.16958890855312347, "kl": 0.012957763671875, "learning_rate": 9.202947701708915e-06, "loss": 0.000518304156139493, "memory(GiB)": 27.09, "reward": 0.4152499794960022, "reward_std": 0.15832121148705483, "rewards/MMContentORM/mean": 0.49250001907348634, "rewards/MMContentORM/std": 0.70787513256073, "rewards/MMFormatORM/mean": 0.5893749713897705, "rewards/MMFormatORM/std": 0.1807103618979454, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.25493902564048765, "step": 930, "train_speed(iter/s)": 0.083692 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.8, "completions/mean_length": 208.15, "completions/min_length": 133.4, "epoch": 0.4488718194911186, "frac_reward_zero_std": 0.575, "grad_norm": 0.16020964086055756, "kl": 0.01607666015625, "learning_rate": 9.192163321084678e-06, "loss": 0.0006430365610867739, "memory(GiB)": 27.09, "reward": 0.4014999806880951, "reward_std": 0.1008334287442267, "rewards/MMContentORM/mean": 0.44000001847743986, "rewards/MMContentORM/std": 0.648642772436142, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 935, "train_speed(iter/s)": 0.083729 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.8, "completions/mean_length": 207.1375, "completions/min_length": 117.0, "epoch": 0.4512722035525684, "frac_reward_zero_std": 0.55, "grad_norm": 1.190772294998169, "kl": 0.0478271484375, "learning_rate": 9.181312879440129e-06, "loss": 0.0019131312146782875, "memory(GiB)": 27.09, "reward": 0.45269997119903566, "reward_std": 0.12133952155709267, "rewards/MMContentORM/mean": 0.5680000096559524, "rewards/MMContentORM/std": 0.6713850498199463, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 940, "train_speed(iter/s)": 0.083772 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.8, "completions/mean_length": 203.5625, "completions/min_length": 139.4, "epoch": 0.45367258761401824, "frac_reward_zero_std": 0.575, "grad_norm": 0.15727227926254272, "kl": 0.015093994140625, "learning_rate": 9.170396547758892e-06, "loss": 0.0006036899052560329, "memory(GiB)": 27.09, "reward": 0.4320499897003174, "reward_std": 0.10684382803738117, "rewards/MMContentORM/mean": 0.5020000040531158, "rewards/MMContentORM/std": 0.688554298877716, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 945, "train_speed(iter/s)": 0.083855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 349.8, "completions/mean_length": 222.275, "completions/min_length": 149.2, "epoch": 0.45607297167546806, "frac_reward_zero_std": 0.4, "grad_norm": 0.20384089648723602, "kl": 0.01500244140625, "learning_rate": 9.159414498062889e-06, "loss": 0.0005995483603328467, "memory(GiB)": 27.09, "reward": 0.3853499710559845, "reward_std": 0.2594374790787697, "rewards/MMContentORM/mean": 0.4714999973773956, "rewards/MMContentORM/std": 0.7658512353897095, "rewards/MMFormatORM/mean": 0.5606249809265137, "rewards/MMFormatORM/std": 0.22611625194549562, "rewards/MMRubricORM/mean": -0.1375, "rewards/MMRubricORM/std": 0.3478711724281311, "step": 950, "train_speed(iter/s)": 0.083831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.2, "completions/mean_length": 223.95, "completions/min_length": 147.4, "epoch": 0.4584733557369179, "frac_reward_zero_std": 0.725, "grad_norm": 0.17926473915576935, "kl": 0.014312744140625, "learning_rate": 9.148366903409645e-06, "loss": 0.0005721227265894413, "memory(GiB)": 27.09, "reward": 0.43774998784065244, "reward_std": 0.0767210841178894, "rewards/MMContentORM/mean": 0.4875000238418579, "rewards/MMContentORM/std": 0.6453867673873901, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 955, "train_speed(iter/s)": 0.08384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.6, "completions/mean_length": 209.1375, "completions/min_length": 123.0, "epoch": 0.46087373979836777, "frac_reward_zero_std": 0.55, "grad_norm": 0.17937743663787842, "kl": 0.015667724609375, "learning_rate": 9.137253937889556e-06, "loss": 0.0006268246099352837, "memory(GiB)": 27.09, "reward": 0.41344997882843015, "reward_std": 0.19254517555236816, "rewards/MMContentORM/mean": 0.5130000054836273, "rewards/MMContentORM/std": 0.7074776887893677, "rewards/MMFormatORM/mean": 0.5768749833106994, "rewards/MMFormatORM/std": 0.2062115788459778, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.3172485947608948, "step": 960, "train_speed(iter/s)": 0.083871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.2, "completions/mean_length": 208.1375, "completions/min_length": 128.8, "epoch": 0.4632741238598176, "frac_reward_zero_std": 0.625, "grad_norm": 0.15272051095962524, "kl": 0.016375732421875, "learning_rate": 9.12607577662315e-06, "loss": 0.0006551730446517467, "memory(GiB)": 27.09, "reward": 0.4290499806404114, "reward_std": 0.1823628380894661, "rewards/MMContentORM/mean": 0.5520000040531159, "rewards/MMContentORM/std": 0.6928182065486908, "rewards/MMFormatORM/mean": 0.5768749952316284, "rewards/MMFormatORM/std": 0.20230934023857117, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.31124515533447267, "step": 965, "train_speed(iter/s)": 0.083892 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.6, "completions/mean_length": 219.725, "completions/min_length": 142.0, "epoch": 0.4656745079212674, "frac_reward_zero_std": 0.55, "grad_norm": 0.20706000924110413, "kl": 0.01566162109375, "learning_rate": 9.114832595758315e-06, "loss": 0.0006271812599152327, "memory(GiB)": 27.09, "reward": 0.4845999896526337, "reward_std": 0.1179454043507576, "rewards/MMContentORM/mean": 0.6515000104904175, "rewards/MMContentORM/std": 0.6177137017250061, "rewards/MMFormatORM/mean": 0.5974999666213989, "rewards/MMFormatORM/std": 0.1561816841363907, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 970, "train_speed(iter/s)": 0.083908 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.6, "completions/mean_length": 220.25, "completions/min_length": 135.8, "epoch": 0.46807489198271723, "frac_reward_zero_std": 0.475, "grad_norm": 0.15420930087566376, "kl": 0.029974365234375, "learning_rate": 9.103524572467542e-06, "loss": 0.0012021941132843495, "memory(GiB)": 27.09, "reward": 0.39144999980926515, "reward_std": 0.18759542852640151, "rewards/MMContentORM/mean": 0.43499999344348905, "rewards/MMContentORM/std": 0.7306297183036804, "rewards/MMFormatORM/mean": 0.5893749713897705, "rewards/MMFormatORM/std": 0.20309771001338958, "rewards/MMRubricORM/mean": -0.09149999916553497, "rewards/MMRubricORM/std": 0.3158472299575806, "step": 975, "train_speed(iter/s)": 0.083941 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 363.8, "completions/mean_length": 213.75, "completions/min_length": 141.8, "epoch": 0.47047527604416706, "frac_reward_zero_std": 0.725, "grad_norm": 0.15446196496486664, "kl": 0.016387939453125, "learning_rate": 9.092151884945117e-06, "loss": 0.0006551665253937244, "memory(GiB)": 27.09, "reward": 0.5428999841213227, "reward_std": 0.055295750661753115, "rewards/MMContentORM/mean": 0.735999995470047, "rewards/MMContentORM/std": 0.4234260804951191, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 980, "train_speed(iter/s)": 0.083929 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 292.6, "completions/mean_length": 211.8125, "completions/min_length": 134.4, "epoch": 0.4728756601056169, "frac_reward_zero_std": 0.575, "grad_norm": 0.10230781883001328, "kl": 0.017529296875, "learning_rate": 9.080714712404322e-06, "loss": 0.0007016819901764393, "memory(GiB)": 27.09, "reward": 0.3961999833583832, "reward_std": 0.22203153222799302, "rewards/MMContentORM/mean": 0.4880000114440918, "rewards/MMContentORM/std": 0.6864187598228455, "rewards/MMFormatORM/mean": 0.5649999797344207, "rewards/MMFormatORM/std": 0.18773135244846345, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.28837831020355226, "step": 985, "train_speed(iter/s)": 0.08397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 472.8, "completions/mean_length": 230.55, "completions/min_length": 149.6, "epoch": 0.47527604416706676, "frac_reward_zero_std": 0.6, "grad_norm": 0.14536498486995697, "kl": 0.014703369140625, "learning_rate": 9.069213235074606e-06, "loss": 0.0005882191471755505, "memory(GiB)": 27.09, "reward": 0.47779998779296873, "reward_std": 0.11624835301190614, "rewards/MMContentORM/mean": 0.6019999861717225, "rewards/MMContentORM/std": 0.646269428730011, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 990, "train_speed(iter/s)": 0.083826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.2, "completions/mean_length": 213.1, "completions/min_length": 143.2, "epoch": 0.4776764282285166, "frac_reward_zero_std": 0.6, "grad_norm": 0.22323811054229736, "kl": 0.01553955078125, "learning_rate": 9.057647634198745e-06, "loss": 0.0006211692001670599, "memory(GiB)": 27.09, "reward": 0.4356500029563904, "reward_std": 0.13456242978572847, "rewards/MMContentORM/mean": 0.511000007390976, "rewards/MMContentORM/std": 0.6877371788024902, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 995, "train_speed(iter/s)": 0.083869 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 341.4, "completions/mean_length": 207.8875, "completions/min_length": 126.6, "epoch": 0.4800768122899664, "frac_reward_zero_std": 0.65, "grad_norm": 0.12058038264513016, "kl": 0.01517333984375, "learning_rate": 9.046018092029991e-06, "loss": 0.0006069786846637726, "memory(GiB)": 27.09, "reward": 0.38174998164176943, "reward_std": 0.11306637614034117, "rewards/MMContentORM/mean": 0.40499998927116393, "rewards/MMContentORM/std": 0.7353096008300781, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 1000, "train_speed(iter/s)": 0.083877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.4, "completions/mean_length": 219.125, "completions/min_length": 153.2, "epoch": 0.4824771963514162, "frac_reward_zero_std": 0.7, "grad_norm": 0.1304665207862854, "kl": 0.0144775390625, "learning_rate": 9.034324791829198e-06, "loss": 0.0005794113036245108, "memory(GiB)": 27.09, "reward": 0.583199965953827, "reward_std": 0.028849952155724168, "rewards/MMContentORM/mean": 0.8079999923706055, "rewards/MMContentORM/std": 0.34920589849352834, "rewards/MMFormatORM/mean": 0.6499999761581421, "rewards/MMFormatORM/std": 0.0, "rewards/MMRubricORM/mean": 0.0, "rewards/MMRubricORM/std": 0.0, "step": 1005, "train_speed(iter/s)": 0.083816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.6, "completions/mean_length": 210.7125, "completions/min_length": 124.8, "epoch": 0.48487758041286605, "frac_reward_zero_std": 0.65, "grad_norm": 0.20664307475090027, "kl": 0.018109130859375, "learning_rate": 9.022567917861929e-06, "loss": 0.0007231380324810744, "memory(GiB)": 27.09, "reward": 0.36904996633529663, "reward_std": 0.08435783945024014, "rewards/MMContentORM/mean": 0.4019999861717224, "rewards/MMContentORM/std": 0.7406057715415955, "rewards/MMFormatORM/mean": 0.5768749713897705, "rewards/MMFormatORM/std": 0.1856150358915329, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.285561603307724, "step": 1010, "train_speed(iter/s)": 0.083848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.8, "completions/mean_length": 209.65, "completions/min_length": 125.8, "epoch": 0.48727796447431587, "frac_reward_zero_std": 0.55, "grad_norm": 0.18518085777759552, "kl": 0.014801025390625, "learning_rate": 9.010747655395558e-06, "loss": 0.0005913883913308382, "memory(GiB)": 27.09, "reward": 0.45489998161792755, "reward_std": 0.11144002974033355, "rewards/MMContentORM/mean": 0.5610000133514405, "rewards/MMContentORM/std": 0.6329753637313843, "rewards/MMFormatORM/mean": 0.6074999809265137, "rewards/MMFormatORM/std": 0.11700960993766785, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 1015, "train_speed(iter/s)": 0.083883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 442.2, "completions/mean_length": 222.925, "completions/min_length": 125.8, "epoch": 0.4896783485357657, "frac_reward_zero_std": 0.55, "grad_norm": 0.14311504364013672, "kl": 0.01640625, "learning_rate": 8.998864190696349e-06, "loss": 0.0006562491878867149, "memory(GiB)": 27.09, "reward": 0.4841999769210815, "reward_std": 0.12699637860059737, "rewards/MMContentORM/mean": 0.6180000066757202, "rewards/MMContentORM/std": 0.6347463011741639, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 1020, "train_speed(iter/s)": 0.083779 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.2, "completions/mean_length": 212.7125, "completions/min_length": 141.4, "epoch": 0.4920787325972156, "frac_reward_zero_std": 0.675, "grad_norm": 0.2222282737493515, "kl": 0.015057373046875, "learning_rate": 8.986917711026519e-06, "loss": 0.0006025471724569797, "memory(GiB)": 27.09, "reward": 0.5089999794960022, "reward_std": 0.12529932723846288, "rewards/MMContentORM/mean": 0.6800000011920929, "rewards/MMContentORM/std": 0.49175867438316345, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.08490467071533203, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 1025, "train_speed(iter/s)": 0.083808 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 327.0, "completions/mean_length": 217.975, "completions/min_length": 131.0, "epoch": 0.4944791166586654, "frac_reward_zero_std": 0.65, "grad_norm": 0.17176951467990875, "kl": 0.01639404296875, "learning_rate": 8.974908404641294e-06, "loss": 0.0006549724377691746, "memory(GiB)": 27.09, "reward": 0.3853999853134155, "reward_std": 0.16772572994232177, "rewards/MMContentORM/mean": 0.4284999996423721, "rewards/MMContentORM/std": 0.7090068340301514, "rewards/MMFormatORM/mean": 0.5849999845027923, "rewards/MMFormatORM/std": 0.16754122078418732, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2577557325363159, "step": 1030, "train_speed(iter/s)": 0.083807 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.0, "completions/mean_length": 204.675, "completions/min_length": 144.2, "epoch": 0.4968795007201152, "frac_reward_zero_std": 0.65, "grad_norm": 0.1412537693977356, "kl": 0.016650390625, "learning_rate": 8.962836460785929e-06, "loss": 0.0006653706543147564, "memory(GiB)": 27.09, "reward": 0.48119999170303346, "reward_std": 0.09107534990180284, "rewards/MMContentORM/mean": 0.6105000078678131, "rewards/MMContentORM/std": 0.6093651533126831, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 1035, "train_speed(iter/s)": 0.083855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 331.6, "completions/mean_length": 202.1125, "completions/min_length": 99.2, "epoch": 0.49927988478156504, "frac_reward_zero_std": 0.6, "grad_norm": 0.16436995565891266, "kl": 0.0226806640625, "learning_rate": 8.950702069692739e-06, "loss": 0.0009060959331691265, "memory(GiB)": 27.09, "reward": 0.46224998235702514, "reward_std": 0.11858180016279221, "rewards/MMContentORM/mean": 0.5775000125169754, "rewards/MMContentORM/std": 0.6317477941513061, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.09680812656879426, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.14893558621406555, "step": 1040, "train_speed(iter/s)": 0.083873 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.6, "completions/mean_length": 205.5125, "completions/min_length": 109.2, "epoch": 0.5016802688430149, "frac_reward_zero_std": 0.65, "grad_norm": 0.1451932191848755, "kl": 0.118011474609375, "learning_rate": 8.938505422578095e-06, "loss": 0.004709529504179954, "memory(GiB)": 27.09, "reward": 0.46809998750686643, "reward_std": 0.14071424752473832, "rewards/MMContentORM/mean": 0.5939999878406524, "rewards/MMContentORM/std": 0.6745672464370728, "rewards/MMFormatORM/mean": 0.6074999809265137, "rewards/MMFormatORM/std": 0.12880690693855285, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1045, "train_speed(iter/s)": 0.083921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 468.0, "completions/mean_length": 218.65, "completions/min_length": 135.8, "epoch": 0.5040806529044647, "frac_reward_zero_std": 0.55, "grad_norm": 0.24477499723434448, "kl": 0.019097900390625, "learning_rate": 8.92624671163941e-06, "loss": 0.0007639925926923752, "memory(GiB)": 27.09, "reward": 0.5066499829292297, "reward_std": 0.12579429522156715, "rewards/MMContentORM/mean": 0.6884999990463256, "rewards/MMContentORM/std": 0.60511314868927, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 1050, "train_speed(iter/s)": 0.08379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.2, "completions/mean_length": 196.3125, "completions/min_length": 116.0, "epoch": 0.5064810369659145, "frac_reward_zero_std": 0.7, "grad_norm": 0.09241645038127899, "kl": 0.020648193359375, "learning_rate": 8.913926130052116e-06, "loss": 0.0008254698477685452, "memory(GiB)": 27.09, "reward": 0.48494998812675477, "reward_std": 0.08577205466572195, "rewards/MMContentORM/mean": 0.6054999828338623, "rewards/MMContentORM/std": 0.5610681354999543, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1055, "train_speed(iter/s)": 0.083853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.2, "completions/mean_length": 206.4125, "completions/min_length": 138.6, "epoch": 0.5088814210273643, "frac_reward_zero_std": 0.5, "grad_norm": 0.1564904898405075, "kl": 0.016485595703125, "learning_rate": 8.901543871966614e-06, "loss": 0.0006593840662389994, "memory(GiB)": 27.09, "reward": 0.46794998049736025, "reward_std": 0.10472251027822495, "rewards/MMContentORM/mean": 0.5630000084638596, "rewards/MMContentORM/std": 0.6074943840503693, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1060, "train_speed(iter/s)": 0.083877 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 269.4, "completions/mean_length": 198.475, "completions/min_length": 135.0, "epoch": 0.5112818050888142, "frac_reward_zero_std": 0.7, "grad_norm": 0.16738565266132355, "kl": 0.016766357421875, "learning_rate": 8.889100132505217e-06, "loss": 0.0006704972125589848, "memory(GiB)": 27.09, "reward": 0.48009997606277466, "reward_std": 0.07410478852689266, "rewards/MMContentORM/mean": 0.6365000009536743, "rewards/MMContentORM/std": 0.5494045548141002, "rewards/MMFormatORM/mean": 0.6012499749660491, "rewards/MMFormatORM/std": 0.10254122316837311, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.15775573253631592, "step": 1065, "train_speed(iter/s)": 0.083925 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 457.8, "completions/mean_length": 219.9375, "completions/min_length": 134.6, "epoch": 0.5136821891502641, "frac_reward_zero_std": 0.725, "grad_norm": 0.19380617141723633, "kl": 0.014617919921875, "learning_rate": 8.876595107759075e-06, "loss": 0.000584835559129715, "memory(GiB)": 27.09, "reward": 0.5560999870300293, "reward_std": 0.0694378862157464, "rewards/MMContentORM/mean": 0.7690000057220459, "rewards/MMContentORM/std": 0.4652357272803783, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1070, "train_speed(iter/s)": 0.083827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 340.6, "completions/mean_length": 212.1625, "completions/min_length": 127.2, "epoch": 0.5160825732117139, "frac_reward_zero_std": 0.7, "grad_norm": 0.12439420074224472, "kl": 0.01439208984375, "learning_rate": 8.86402899478508e-06, "loss": 0.0005762668326497078, "memory(GiB)": 27.09, "reward": 0.5047999918460846, "reward_std": 0.0550129035487771, "rewards/MMContentORM/mean": 0.6694999992847442, "rewards/MMContentORM/std": 0.551321929693222, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 1075, "train_speed(iter/s)": 0.083826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.8, "completions/mean_length": 200.025, "completions/min_length": 134.0, "epoch": 0.5184829572731637, "frac_reward_zero_std": 0.575, "grad_norm": 0.20139308273792267, "kl": 0.0151123046875, "learning_rate": 8.851401991602776e-06, "loss": 0.0006052942015230655, "memory(GiB)": 27.09, "reward": 0.49969996213912965, "reward_std": 0.06660945881158113, "rewards/MMContentORM/mean": 0.628000020980835, "rewards/MMContentORM/std": 0.5635871171951294, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1080, "train_speed(iter/s)": 0.083879 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 340.2, "completions/mean_length": 219.8, "completions/min_length": 130.8, "epoch": 0.5208833413346136, "frac_reward_zero_std": 0.7, "grad_norm": 0.1856120079755783, "kl": 0.015875244140625, "learning_rate": 8.838714297191222e-06, "loss": 0.0006359885912388564, "memory(GiB)": 27.09, "reward": 0.5098999857902526, "reward_std": 0.08160011963918805, "rewards/MMContentORM/mean": 0.6534999966621399, "rewards/MMContentORM/std": 0.5234884560108185, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1085, "train_speed(iter/s)": 0.083889 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 364.6, "completions/mean_length": 212.625, "completions/min_length": 128.8, "epoch": 0.5232837253960634, "frac_reward_zero_std": 0.85, "grad_norm": 0.07770657539367676, "kl": 0.0150390625, "learning_rate": 8.82596611148586e-06, "loss": 0.0006017507985234261, "memory(GiB)": 27.09, "reward": 0.4991499662399292, "reward_std": 0.03245619940571487, "rewards/MMContentORM/mean": 0.6410000085830688, "rewards/MMContentORM/std": 0.5666637182235718, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.05240467190742493, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.08062257766723632, "step": 1090, "train_speed(iter/s)": 0.083867 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 341.6, "completions/mean_length": 213.2, "completions/min_length": 132.2, "epoch": 0.5256841094575132, "frac_reward_zero_std": 0.65, "grad_norm": 0.14051133394241333, "kl": 0.017071533203125, "learning_rate": 8.81315763537537e-06, "loss": 0.000683901971206069, "memory(GiB)": 27.09, "reward": 0.39309998154640197, "reward_std": 0.13378459885716437, "rewards/MMContentORM/mean": 0.41899998784065245, "rewards/MMContentORM/std": 0.7018602132797241, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.14990466833114624, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23062257766723632, "step": 1095, "train_speed(iter/s)": 0.083859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 371.2, "completions/mean_length": 225.4375, "completions/min_length": 138.0, "epoch": 0.528084493518963, "frac_reward_zero_std": 0.55, "grad_norm": 0.1682557910680771, "kl": 0.01475830078125, "learning_rate": 8.8002890706985e-06, "loss": 0.000590839795768261, "memory(GiB)": 27.09, "reward": 0.46239997148513795, "reward_std": 0.13746155560947954, "rewards/MMContentORM/mean": 0.5634999990463256, "rewards/MMContentORM/std": 0.6250258028507233, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 1100, "train_speed(iter/s)": 0.083827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.2, "completions/mean_length": 211.5, "completions/min_length": 137.2, "epoch": 0.5304848775804129, "frac_reward_zero_std": 0.6, "grad_norm": 0.10628636926412582, "kl": 0.020086669921875, "learning_rate": 8.787360620240891e-06, "loss": 0.0008035540580749512, "memory(GiB)": 27.09, "reward": 0.4173499792814255, "reward_std": 0.16496800733730196, "rewards/MMContentORM/mean": 0.4939999938011169, "rewards/MMContentORM/std": 0.6666546583175659, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.13730934262275696, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.21124515533447266, "step": 1105, "train_speed(iter/s)": 0.083764 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.8, "completions/mean_length": 221.6875, "completions/min_length": 120.4, "epoch": 0.5328852616418627, "frac_reward_zero_std": 0.6, "grad_norm": 0.2164710909128189, "kl": 0.01485595703125, "learning_rate": 8.77437248773187e-06, "loss": 0.0005937457084655762, "memory(GiB)": 27.09, "reward": 0.4448999762535095, "reward_std": 0.11822825372219085, "rewards/MMContentORM/mean": 0.5485000073909759, "rewards/MMContentORM/std": 0.6824892044067383, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.14990466833114624, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23062257766723632, "step": 1110, "train_speed(iter/s)": 0.08375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 333.2, "completions/mean_length": 226.6375, "completions/min_length": 122.2, "epoch": 0.5352856457033125, "frac_reward_zero_std": 0.45, "grad_norm": 0.17362850904464722, "kl": 0.020477294921875, "learning_rate": 8.761324877841254e-06, "loss": 0.0008191258646547794, "memory(GiB)": 27.09, "reward": 0.43039997220039367, "reward_std": 0.17041273415088654, "rewards/MMContentORM/mean": 0.5735000014305115, "rewards/MMContentORM/std": 0.7222278237342834, "rewards/MMFormatORM/mean": 0.5649999737739563, "rewards/MMFormatORM/std": 0.21917218267917632, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.335561603307724, "step": 1115, "train_speed(iter/s)": 0.083757 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 377.6, "completions/mean_length": 223.8625, "completions/min_length": 119.4, "epoch": 0.5376860297647623, "frac_reward_zero_std": 0.45, "grad_norm": 0.1994379311800003, "kl": 0.01700439453125, "learning_rate": 8.748217996176112e-06, "loss": 0.0006800967268645764, "memory(GiB)": 27.09, "reward": 0.39449998140335085, "reward_std": 0.26742778718471527, "rewards/MMContentORM/mean": 0.48000000715255736, "rewards/MMContentORM/std": 0.7163052916526794, "rewards/MMFormatORM/mean": 0.5687499821186066, "rewards/MMFormatORM/std": 0.18744589388370514, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.28837831020355226, "step": 1120, "train_speed(iter/s)": 0.08372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.4, "completions/mean_length": 216.15, "completions/min_length": 138.6, "epoch": 0.5400864138262121, "frac_reward_zero_std": 0.525, "grad_norm": 0.07599830627441406, "kl": 0.01529541015625, "learning_rate": 8.735052049277535e-06, "loss": 0.0006118299439549446, "memory(GiB)": 27.09, "reward": 0.42814998626708983, "reward_std": 0.18024151921272277, "rewards/MMContentORM/mean": 0.5210000097751617, "rewards/MMContentORM/std": 0.6903650641441346, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 1125, "train_speed(iter/s)": 0.083741 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 400.4, "completions/mean_length": 228.1375, "completions/min_length": 141.6, "epoch": 0.5424867978876621, "frac_reward_zero_std": 0.625, "grad_norm": 0.11949385702610016, "kl": 0.01396484375, "learning_rate": 8.721827244617371e-06, "loss": 0.000558951823040843, "memory(GiB)": 27.09, "reward": 0.4563499987125397, "reward_std": 0.1478560298681259, "rewards/MMContentORM/mean": 0.5789999961853027, "rewards/MMContentORM/std": 0.6697975873947144, "rewards/MMFormatORM/mean": 0.5993749856948852, "rewards/MMFormatORM/std": 0.15370826721191405, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 1130, "train_speed(iter/s)": 0.083701 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.4, "completions/mean_length": 216.7125, "completions/min_length": 152.6, "epoch": 0.5448871819491119, "frac_reward_zero_std": 0.7, "grad_norm": 0.1708482950925827, "kl": 0.0151611328125, "learning_rate": 8.708543790594966e-06, "loss": 0.0006066753529012203, "memory(GiB)": 27.09, "reward": 0.532749992609024, "reward_std": 0.06851864554919303, "rewards/MMContentORM/mean": 0.7250000238418579, "rewards/MMContentORM/std": 0.4237551301717758, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1135, "train_speed(iter/s)": 0.083719 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 331.0, "completions/mean_length": 214.9625, "completions/min_length": 138.4, "epoch": 0.5472875660105617, "frac_reward_zero_std": 0.625, "grad_norm": 0.14022567868232727, "kl": 0.013623046875, "learning_rate": 8.695201896533875e-06, "loss": 0.0005450892262160778, "memory(GiB)": 27.09, "reward": 0.4738999783992767, "reward_std": 0.15061374502256514, "rewards/MMContentORM/mean": 0.6210000067949295, "rewards/MMContentORM/std": 0.549624501913786, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 1140, "train_speed(iter/s)": 0.083726 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.0, "completions/mean_length": 217.35, "completions/min_length": 123.6, "epoch": 0.5496879500720115, "frac_reward_zero_std": 0.6, "grad_norm": 0.1822585016489029, "kl": 0.018511962890625, "learning_rate": 8.681801772678564e-06, "loss": 0.0007403687573969364, "memory(GiB)": 27.09, "reward": 0.3406499683856964, "reward_std": 0.20272752242162823, "rewards/MMContentORM/mean": 0.33100001215934755, "rewards/MMContentORM/std": 0.737173342704773, "rewards/MMFormatORM/mean": 0.576874977350235, "rewards/MMFormatORM/std": 0.17944467663764954, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.27606874108314516, "step": 1145, "train_speed(iter/s)": 0.083719 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.0, "completions/mean_length": 212.4125, "completions/min_length": 141.6, "epoch": 0.5520883341334614, "frac_reward_zero_std": 0.6, "grad_norm": 0.16844458878040314, "kl": 0.0210693359375, "learning_rate": 8.668343630191094e-06, "loss": 0.0008432833477854728, "memory(GiB)": 27.09, "reward": 0.48644999265670774, "reward_std": 0.1396535847336054, "rewards/MMContentORM/mean": 0.6380000114440918, "rewards/MMContentORM/std": 0.5572850041091442, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1150, "train_speed(iter/s)": 0.08373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.4, "completions/mean_length": 211.1125, "completions/min_length": 141.8, "epoch": 0.5544887181949112, "frac_reward_zero_std": 0.5, "grad_norm": 0.17075812816619873, "kl": 0.01673583984375, "learning_rate": 8.654827681147798e-06, "loss": 0.0006688498891890049, "memory(GiB)": 27.09, "reward": 0.4780499696731567, "reward_std": 0.11589480005204678, "rewards/MMContentORM/mean": 0.57950000166893, "rewards/MMContentORM/std": 0.6060647606849671, "rewards/MMFormatORM/mean": 0.6281249761581421, "rewards/MMFormatORM/std": 0.06690345257520676, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 1155, "train_speed(iter/s)": 0.083765 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.8, "completions/mean_length": 212.925, "completions/min_length": 129.6, "epoch": 0.556889102256361, "frac_reward_zero_std": 0.625, "grad_norm": 0.18922355771064758, "kl": 0.015997314453125, "learning_rate": 8.641254138535937e-06, "loss": 0.0006405468098819256, "memory(GiB)": 27.09, "reward": 0.4417499840259552, "reward_std": 0.146866075694561, "rewards/MMContentORM/mean": 0.5550000250339509, "rewards/MMContentORM/std": 0.6111162975430489, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.14121158123016359, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.21724859476089478, "step": 1160, "train_speed(iter/s)": 0.08377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 466.2, "completions/mean_length": 219.0375, "completions/min_length": 125.6, "epoch": 0.5592894863178108, "frac_reward_zero_std": 0.625, "grad_norm": 0.09351535886526108, "kl": 0.014825439453125, "learning_rate": 8.627623216250345e-06, "loss": 0.0005931487306952476, "memory(GiB)": 27.09, "reward": 0.4562999814748764, "reward_std": 0.08669129339978099, "rewards/MMContentORM/mean": 0.5520000055432319, "rewards/MMContentORM/std": 0.5042887216433882, "rewards/MMFormatORM/mean": 0.6137499809265137, "rewards/MMFormatORM/std": 0.09990466982126237, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 1165, "train_speed(iter/s)": 0.083689 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.8, "completions/mean_length": 198.775, "completions/min_length": 119.8, "epoch": 0.5616898703792607, "frac_reward_zero_std": 0.725, "grad_norm": 0.14243614673614502, "kl": 0.0142822265625, "learning_rate": 8.613935129090055e-06, "loss": 0.0005715936422348022, "memory(GiB)": 27.09, "reward": 0.5254999697208405, "reward_std": 0.06462955782189965, "rewards/MMContentORM/mean": 0.6925000309944153, "rewards/MMContentORM/std": 0.53197683095932, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1170, "train_speed(iter/s)": 0.083721 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 341.0, "completions/mean_length": 208.5875, "completions/min_length": 140.6, "epoch": 0.5640902544407105, "frac_reward_zero_std": 0.775, "grad_norm": 0.08023513108491898, "kl": 0.016717529296875, "learning_rate": 8.60019009275492e-06, "loss": 0.000668759923428297, "memory(GiB)": 27.09, "reward": 0.4964999794960022, "reward_std": 0.054164377762936054, "rewards/MMContentORM/mean": 0.620000010728836, "rewards/MMContentORM/std": 0.5584413051605225, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1175, "train_speed(iter/s)": 0.083728 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 470.0, "completions/mean_length": 220.7125, "completions/min_length": 119.2, "epoch": 0.5664906385021603, "frac_reward_zero_std": 0.675, "grad_norm": 0.2373812049627304, "kl": 0.019677734375, "learning_rate": 8.586388323842207e-06, "loss": 0.0007880028337240219, "memory(GiB)": 27.09, "reward": 0.39134998321533204, "reward_std": 0.16157390028238297, "rewards/MMContentORM/mean": 0.4289999961853027, "rewards/MMContentORM/std": 0.7077797532081604, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.18630690574645997, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2866260170936584, "step": 1180, "train_speed(iter/s)": 0.083613 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.6, "completions/mean_length": 201.75, "completions/min_length": 114.4, "epoch": 0.5688910225636101, "frac_reward_zero_std": 0.725, "grad_norm": 0.23980411887168884, "kl": 0.01798095703125, "learning_rate": 8.57253003984319e-06, "loss": 0.0007191254291683436, "memory(GiB)": 27.09, "reward": 0.5243999719619751, "reward_std": 0.07297341881785542, "rewards/MMContentORM/mean": 0.7185000061988831, "rewards/MMContentORM/std": 0.5680613338947296, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 1185, "train_speed(iter/s)": 0.083637 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.2, "completions/mean_length": 204.625, "completions/min_length": 149.6, "epoch": 0.5712914066250601, "frac_reward_zero_std": 0.725, "grad_norm": 0.12727871537208557, "kl": 0.014764404296875, "learning_rate": 8.558615459139717e-06, "loss": 0.0005905915051698685, "memory(GiB)": 27.09, "reward": 0.4597499847412109, "reward_std": 0.11066221240907907, "rewards/MMContentORM/mean": 0.5424999952316284, "rewards/MMContentORM/std": 0.6061853706836701, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1190, "train_speed(iter/s)": 0.083665 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 348.0, "completions/mean_length": 218.9125, "completions/min_length": 149.8, "epoch": 0.5736917906865099, "frac_reward_zero_std": 0.575, "grad_norm": 0.15900759398937225, "kl": 0.0161865234375, "learning_rate": 8.544644801000777e-06, "loss": 0.0006472207140177488, "memory(GiB)": 27.09, "reward": 0.409499979019165, "reward_std": 0.14325983561575412, "rewards/MMContentORM/mean": 0.46000003516674043, "rewards/MMContentORM/std": 0.5570301927626133, "rewards/MMFormatORM/mean": 0.6012499690055847, "rewards/MMFormatORM/std": 0.13321036398410796, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.20493902564048766, "step": 1195, "train_speed(iter/s)": 0.083651 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.4, "completions/mean_length": 206.2, "completions/min_length": 123.6, "epoch": 0.5760921747479597, "frac_reward_zero_std": 0.65, "grad_norm": 0.009572326205670834, "kl": 0.034039306640625, "learning_rate": 8.53061828557903e-06, "loss": 0.001363489031791687, "memory(GiB)": 27.09, "reward": 0.4724999785423279, "reward_std": 0.15542207062244415, "rewards/MMContentORM/mean": 0.6175000131130218, "rewards/MMContentORM/std": 0.6357908546924591, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 1200, "train_speed(iter/s)": 0.083674 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 449.6, "completions/mean_length": 214.8375, "completions/min_length": 136.4, "epoch": 0.5784925588094095, "frac_reward_zero_std": 0.775, "grad_norm": 0.08620696514844894, "kl": 0.015582275390625, "learning_rate": 8.51653613390736e-06, "loss": 0.0006235348992049694, "memory(GiB)": 27.09, "reward": 0.4412499874830246, "reward_std": 0.11023794980719685, "rewards/MMContentORM/mean": 0.5250000119209289, "rewards/MMContentORM/std": 0.6168273031711579, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 1205, "train_speed(iter/s)": 0.083513 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 341.6, "completions/mean_length": 208.2875, "completions/min_length": 119.8, "epoch": 0.5808929428708594, "frac_reward_zero_std": 0.7, "grad_norm": 0.25394561886787415, "kl": 0.0171142578125, "learning_rate": 8.502398567895369e-06, "loss": 0.0006845718715339899, "memory(GiB)": 27.09, "reward": 0.4345999777317047, "reward_std": 0.09871211070567369, "rewards/MMContentORM/mean": 0.4939999908208847, "rewards/MMContentORM/std": 0.6138741195201873, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 1210, "train_speed(iter/s)": 0.083519 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.4, "completions/mean_length": 209.825, "completions/min_length": 141.4, "epoch": 0.5832933269323092, "frac_reward_zero_std": 0.675, "grad_norm": 0.17071519792079926, "kl": 0.018353271484375, "learning_rate": 8.488205810325892e-06, "loss": 0.0007337843533605337, "memory(GiB)": 27.09, "reward": 0.49284998178482053, "reward_std": 0.13003694042563438, "rewards/MMContentORM/mean": 0.6540000021457673, "rewards/MMContentORM/std": 0.5871677160263061, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 1215, "train_speed(iter/s)": 0.083533 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.2, "completions/mean_length": 207.375, "completions/min_length": 124.6, "epoch": 0.585693710993759, "frac_reward_zero_std": 0.725, "grad_norm": 0.17592158913612366, "kl": 0.0169677734375, "learning_rate": 8.473958084851487e-06, "loss": 0.000678650476038456, "memory(GiB)": 27.09, "reward": 0.5602999925613403, "reward_std": 0.06406386941671371, "rewards/MMContentORM/mean": 0.7795000076293945, "rewards/MMContentORM/std": 0.4123713135719299, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1220, "train_speed(iter/s)": 0.083569 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.4, "completions/mean_length": 204.0375, "completions/min_length": 137.8, "epoch": 0.5880940950552088, "frac_reward_zero_std": 0.675, "grad_norm": 0.1302787959575653, "kl": 0.01573486328125, "learning_rate": 8.459655615990908e-06, "loss": 0.000629202276468277, "memory(GiB)": 27.09, "reward": 0.45289998650550845, "reward_std": 0.1441083623562008, "rewards/MMContentORM/mean": 0.5685000061988831, "rewards/MMContentORM/std": 0.6687065124511719, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 1225, "train_speed(iter/s)": 0.083593 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.6, "completions/mean_length": 209.125, "completions/min_length": 138.2, "epoch": 0.5904944791166586, "frac_reward_zero_std": 0.75, "grad_norm": 0.08660220354795456, "kl": 0.01583251953125, "learning_rate": 8.445298629125566e-06, "loss": 0.0006336371414363384, "memory(GiB)": 27.09, "reward": 0.5385999858379364, "reward_std": 0.09079250784125178, "rewards/MMContentORM/mean": 0.7539999961853028, "rewards/MMContentORM/std": 0.5569849014282227, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 1230, "train_speed(iter/s)": 0.083624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.8, "completions/mean_length": 207.4375, "completions/min_length": 133.8, "epoch": 0.5928948631781085, "frac_reward_zero_std": 0.75, "grad_norm": 0.10941971838474274, "kl": 0.013848876953125, "learning_rate": 8.430887350495978e-06, "loss": 0.0005538208410143852, "memory(GiB)": 27.09, "reward": 0.5291499614715576, "reward_std": 0.0521137666888535, "rewards/MMContentORM/mean": 0.7159999907016754, "rewards/MMContentORM/std": 0.41987812891602516, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1235, "train_speed(iter/s)": 0.083672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.0, "completions/mean_length": 207.2, "completions/min_length": 133.4, "epoch": 0.5952952472395583, "frac_reward_zero_std": 0.65, "grad_norm": 0.17035551369190216, "kl": 0.016778564453125, "learning_rate": 8.416422007198204e-06, "loss": 0.0006709801964461803, "memory(GiB)": 27.09, "reward": 0.49859996438026427, "reward_std": 0.09135819533839822, "rewards/MMContentORM/mean": 0.6540000200271606, "rewards/MMContentORM/std": 0.6145297229290009, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 1240, "train_speed(iter/s)": 0.083718 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 468.6, "completions/mean_length": 221.25, "completions/min_length": 141.2, "epoch": 0.5976956313010081, "frac_reward_zero_std": 0.575, "grad_norm": 0.16948705911636353, "kl": 0.016485595703125, "learning_rate": 8.401902827180267e-06, "loss": 0.0006599447224289179, "memory(GiB)": 27.09, "reward": 0.4580999851226807, "reward_std": 0.10960154831409455, "rewards/MMContentORM/mean": 0.5815000176429749, "rewards/MMContentORM/std": 0.6139387130737305, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 1245, "train_speed(iter/s)": 0.083619 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 347.2, "completions/mean_length": 225.325, "completions/min_length": 134.2, "epoch": 0.600096015362458, "frac_reward_zero_std": 0.5, "grad_norm": 0.10282408446073532, "kl": 0.0154541015625, "learning_rate": 8.387330039238558e-06, "loss": 0.000617855554446578, "memory(GiB)": 27.09, "reward": 0.38104998469352724, "reward_std": 0.2287490501999855, "rewards/MMContentORM/mean": 0.4320000112056732, "rewards/MMContentORM/std": 0.749615466594696, "rewards/MMFormatORM/mean": 0.5768749833106994, "rewards/MMFormatORM/std": 0.2062115788459778, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.3172485947608948, "step": 1250, "train_speed(iter/s)": 0.083602 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.0, "completions/mean_length": 237.125, "completions/min_length": 158.0, "epoch": 0.6024963994239079, "frac_reward_zero_std": 0.6, "grad_norm": 0.13665515184402466, "kl": 0.0227783203125, "learning_rate": 8.372703873014236e-06, "loss": 0.0009101461619138718, "memory(GiB)": 27.09, "reward": 0.4327999770641327, "reward_std": 0.1585333364084363, "rewards/MMContentORM/mean": 0.5470000147819519, "rewards/MMContentORM/std": 0.6171560496091842, "rewards/MMFormatORM/mean": 0.5849999785423279, "rewards/MMFormatORM/std": 0.1737115800380707, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2672485947608948, "step": 1255, "train_speed(iter/s)": 0.083549 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 338.8, "completions/mean_length": 221.9875, "completions/min_length": 144.6, "epoch": 0.6048967834853577, "frac_reward_zero_std": 0.75, "grad_norm": 0.1299554705619812, "kl": 0.01610107421875, "learning_rate": 8.358024558989606e-06, "loss": 0.0006435022689402104, "memory(GiB)": 27.09, "reward": 0.49649999141693113, "reward_std": 0.0552957511274144, "rewards/MMContentORM/mean": 0.6199999928474427, "rewards/MMContentORM/std": 0.6000278711318969, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1260, "train_speed(iter/s)": 0.083545 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 459.4, "completions/mean_length": 225.875, "completions/min_length": 150.6, "epoch": 0.6072971675468075, "frac_reward_zero_std": 0.675, "grad_norm": 0.12043029069900513, "kl": 0.014471435546875, "learning_rate": 8.34329232848449e-06, "loss": 0.0005782137159258127, "memory(GiB)": 27.09, "reward": 0.4554999887943268, "reward_std": 0.1602303996682167, "rewards/MMContentORM/mean": 0.5749999940395355, "rewards/MMContentORM/std": 0.6556944012641907, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.14990466833114624, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23062257766723632, "step": 1265, "train_speed(iter/s)": 0.083458 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 459.0, "completions/mean_length": 224.525, "completions/min_length": 138.0, "epoch": 0.6096975516082573, "frac_reward_zero_std": 0.7, "grad_norm": 0.15134023129940033, "kl": 0.017626953125, "learning_rate": 8.328507413652569e-06, "loss": 0.0007050371263176203, "memory(GiB)": 27.09, "reward": 0.45614997744560243, "reward_std": 0.14799745231866837, "rewards/MMContentORM/mean": 0.5910000026226043, "rewards/MMContentORM/std": 0.6682988286018372, "rewards/MMFormatORM/mean": 0.5931249976158142, "rewards/MMFormatORM/std": 0.18240466713905334, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2806225776672363, "step": 1270, "train_speed(iter/s)": 0.083379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.4, "completions/mean_length": 216.9125, "completions/min_length": 131.2, "epoch": 0.6120979356697072, "frac_reward_zero_std": 0.675, "grad_norm": 0.1947038173675537, "kl": 0.017083740234375, "learning_rate": 8.313670047477751e-06, "loss": 0.0006824467331171036, "memory(GiB)": 27.09, "reward": 0.43594998121261597, "reward_std": 0.1567655718419701, "rewards/MMContentORM/mean": 0.5405000030994416, "rewards/MMContentORM/std": 0.6641241073608398, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.13730934262275696, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.21124515533447266, "step": 1275, "train_speed(iter/s)": 0.083394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 455.8, "completions/mean_length": 234.675, "completions/min_length": 138.6, "epoch": 0.614498319731157, "frac_reward_zero_std": 0.65, "grad_norm": 0.14940251410007477, "kl": 0.0174072265625, "learning_rate": 8.29878046377047e-06, "loss": 0.0006969640962779522, "memory(GiB)": 27.09, "reward": 0.5129499852657318, "reward_std": 0.10443966835737228, "rewards/MMContentORM/mean": 0.6755000114440918, "rewards/MMContentORM/std": 0.5826462268829345, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1280, "train_speed(iter/s)": 0.083311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.0, "completions/mean_length": 224.5375, "completions/min_length": 146.6, "epoch": 0.6168987037926068, "frac_reward_zero_std": 0.75, "grad_norm": 0.23286378383636475, "kl": 0.01444091796875, "learning_rate": 8.283838897164022e-06, "loss": 0.0005787207745015621, "memory(GiB)": 27.09, "reward": 0.5347499787807465, "reward_std": 0.0849235224770382, "rewards/MMContentORM/mean": 0.7300000071525574, "rewards/MMContentORM/std": 0.5398510098457336, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1285, "train_speed(iter/s)": 0.08333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 361.8, "completions/mean_length": 215.95, "completions/min_length": 139.2, "epoch": 0.6192990878540566, "frac_reward_zero_std": 0.75, "grad_norm": 0.16865944862365723, "kl": 0.01298828125, "learning_rate": 8.268845583110863e-06, "loss": 0.0005195950157940388, "memory(GiB)": 27.09, "reward": 0.5143999695777893, "reward_std": 0.09899494871497154, "rewards/MMContentORM/mean": 0.693500018119812, "rewards/MMContentORM/std": 0.5922718286514282, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 1290, "train_speed(iter/s)": 0.083314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.6, "completions/mean_length": 209.5, "completions/min_length": 135.2, "epoch": 0.6216994719155065, "frac_reward_zero_std": 0.6, "grad_norm": 0.12279748171567917, "kl": 0.016485595703125, "learning_rate": 8.253800757878886e-06, "loss": 0.0006598389707505703, "memory(GiB)": 27.09, "reward": 0.41129997968673704, "reward_std": 0.16164461448788642, "rewards/MMContentORM/mean": 0.46449999809265136, "rewards/MMContentORM/std": 0.6922466158866882, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 1295, "train_speed(iter/s)": 0.083344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.8, "completions/mean_length": 217.825, "completions/min_length": 136.2, "epoch": 0.6240998559769563, "frac_reward_zero_std": 0.8, "grad_norm": 0.1909555196762085, "kl": 0.02734375, "learning_rate": 8.238704658547722e-06, "loss": 0.0010941483080387116, "memory(GiB)": 27.09, "reward": 0.5067499876022339, "reward_std": 0.09284311935771257, "rewards/MMContentORM/mean": 0.6600000023841858, "rewards/MMContentORM/std": 0.5655688047409058, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1300, "train_speed(iter/s)": 0.083366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.6, "completions/mean_length": 210.3875, "completions/min_length": 144.0, "epoch": 0.6265002400384061, "frac_reward_zero_std": 0.75, "grad_norm": 0.0646437555551529, "kl": 0.015008544921875, "learning_rate": 8.223557523004982e-06, "loss": 0.0006002359557896852, "memory(GiB)": 27.09, "reward": 0.431849992275238, "reward_std": 0.11052078779321164, "rewards/MMContentORM/mean": 0.5015000164508819, "rewards/MMContentORM/std": 0.6876445889472962, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1305, "train_speed(iter/s)": 0.083302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 339.0, "completions/mean_length": 222.225, "completions/min_length": 157.6, "epoch": 0.628900624099856, "frac_reward_zero_std": 0.7, "grad_norm": 0.06356562674045563, "kl": 0.014605712890625, "learning_rate": 8.208359589942515e-06, "loss": 0.000583806075155735, "memory(GiB)": 27.09, "reward": 0.46749998927116393, "reward_std": 0.10931870595086365, "rewards/MMContentORM/mean": 0.5799999982118607, "rewards/MMContentORM/std": 0.5833412051200867, "rewards/MMFormatORM/mean": 0.6137499809265137, "rewards/MMFormatORM/std": 0.08727944791316986, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 1310, "train_speed(iter/s)": 0.083321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 344.2, "completions/mean_length": 220.925, "completions/min_length": 134.4, "epoch": 0.6313010081613059, "frac_reward_zero_std": 0.7, "grad_norm": 0.18615718185901642, "kl": 0.016546630859375, "learning_rate": 8.193111098852654e-06, "loss": 0.0006628592498600483, "memory(GiB)": 27.09, "reward": 0.4991499900817871, "reward_std": 0.06738727213814855, "rewards/MMContentORM/mean": 0.6410000026226044, "rewards/MMContentORM/std": 0.49751891270279885, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1315, "train_speed(iter/s)": 0.083324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.6, "completions/mean_length": 214.4125, "completions/min_length": 132.6, "epoch": 0.6337013922227557, "frac_reward_zero_std": 0.6, "grad_norm": 0.1614212691783905, "kl": 0.012420654296875, "learning_rate": 8.177812290024438e-06, "loss": 0.000497491005808115, "memory(GiB)": 27.09, "reward": 0.4735999882221222, "reward_std": 0.10012631714344025, "rewards/MMContentORM/mean": 0.5914999902248382, "rewards/MMContentORM/std": 0.6162684261798859, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 1320, "train_speed(iter/s)": 0.083354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 373.0, "completions/mean_length": 214.65, "completions/min_length": 152.4, "epoch": 0.6361017762842055, "frac_reward_zero_std": 0.675, "grad_norm": 0.08849076926708221, "kl": 0.014666748046875, "learning_rate": 8.162463404539812e-06, "loss": 0.0005868059583008289, "memory(GiB)": 27.09, "reward": 0.48304998874664307, "reward_std": 0.12183449864387512, "rewards/MMContentORM/mean": 0.6295000195503235, "rewards/MMContentORM/std": 0.6145186185836792, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1325, "train_speed(iter/s)": 0.083338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.6, "completions/mean_length": 221.7125, "completions/min_length": 138.0, "epoch": 0.6385021603456553, "frac_reward_zero_std": 0.675, "grad_norm": 0.29121577739715576, "kl": 0.018963623046875, "learning_rate": 8.147064684269854e-06, "loss": 0.0007598603144288063, "memory(GiB)": 27.09, "reward": 0.4774999737739563, "reward_std": 0.13194613000378014, "rewards/MMContentORM/mean": 0.6175000011920929, "rewards/MMContentORM/std": 0.5826177567243576, "rewards/MMFormatORM/mean": 0.6074999928474426, "rewards/MMFormatORM/std": 0.12490466833114625, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 1330, "train_speed(iter/s)": 0.083346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 368.2, "completions/mean_length": 220.975, "completions/min_length": 138.8, "epoch": 0.6409025444071051, "frac_reward_zero_std": 0.7, "grad_norm": 0.1789148896932602, "kl": 0.01461181640625, "learning_rate": 8.131616371870941e-06, "loss": 0.0005845078732818366, "memory(GiB)": 27.09, "reward": 0.48419997096061707, "reward_std": 0.09079250679351389, "rewards/MMContentORM/mean": 0.6180000066757202, "rewards/MMContentORM/std": 0.5878942906856537, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.08490467071533203, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 1335, "train_speed(iter/s)": 0.083336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.0, "completions/mean_length": 215.825, "completions/min_length": 145.6, "epoch": 0.643302928468555, "frac_reward_zero_std": 0.625, "grad_norm": 0.12938667833805084, "kl": 0.01439208984375, "learning_rate": 8.116118710780936e-06, "loss": 0.0005751181393861771, "memory(GiB)": 27.09, "reward": 0.45569998025894165, "reward_std": 0.15994756268337368, "rewards/MMContentORM/mean": 0.5754999995231629, "rewards/MMContentORM/std": 0.6518269121646881, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 1340, "train_speed(iter/s)": 0.083378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.0, "completions/mean_length": 214.35, "completions/min_length": 154.2, "epoch": 0.6457033125300048, "frac_reward_zero_std": 0.675, "grad_norm": 0.1686517745256424, "kl": 0.013623046875, "learning_rate": 8.100571945215349e-06, "loss": 0.0005452525801956654, "memory(GiB)": 27.09, "reward": 0.4809499800205231, "reward_std": 0.06936717408243567, "rewards/MMContentORM/mean": 0.5954999804496766, "rewards/MMContentORM/std": 0.5270328655838966, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1345, "train_speed(iter/s)": 0.083404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 346.6, "completions/mean_length": 224.625, "completions/min_length": 151.4, "epoch": 0.6481036965914546, "frac_reward_zero_std": 0.65, "grad_norm": 0.06384813040494919, "kl": 0.014715576171875, "learning_rate": 8.08497632016349e-06, "loss": 0.0005893761292099953, "memory(GiB)": 27.09, "reward": 0.4773499846458435, "reward_std": 0.12423865795135498, "rewards/MMContentORM/mean": 0.6190000057220459, "rewards/MMContentORM/std": 0.6496911168098449, "rewards/MMFormatORM/mean": 0.6056249856948852, "rewards/MMFormatORM/std": 0.15690345019102098, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 1350, "train_speed(iter/s)": 0.083396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.2, "completions/mean_length": 216.75, "completions/min_length": 128.2, "epoch": 0.6505040806529044, "frac_reward_zero_std": 0.65, "grad_norm": 0.16834478080272675, "kl": 0.018731689453125, "learning_rate": 8.069332081384604e-06, "loss": 0.0007483120542019605, "memory(GiB)": 27.09, "reward": 0.4377999842166901, "reward_std": 0.0825900660827756, "rewards/MMContentORM/mean": 0.559500002861023, "rewards/MMContentORM/std": 0.7009612798690796, "rewards/MMFormatORM/mean": 0.5849999845027923, "rewards/MMFormatORM/std": 0.16754122078418732, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2577557325363159, "step": 1355, "train_speed(iter/s)": 0.0834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 341.8, "completions/mean_length": 222.05, "completions/min_length": 152.6, "epoch": 0.6529044647143543, "frac_reward_zero_std": 0.625, "grad_norm": 0.17165519297122955, "kl": 0.02119140625, "learning_rate": 8.053639475404008e-06, "loss": 0.0008492187596857547, "memory(GiB)": 27.09, "reward": 0.45319998264312744, "reward_std": 0.17027131617069244, "rewards/MMContentORM/mean": 0.5979999959468841, "rewards/MMContentORM/std": 0.6957221150398254, "rewards/MMFormatORM/mean": 0.5849999904632568, "rewards/MMFormatORM/std": 0.19430812299251557, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2989355862140656, "step": 1360, "train_speed(iter/s)": 0.083404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.6, "completions/mean_length": 220.6125, "completions/min_length": 154.6, "epoch": 0.6553048487758041, "frac_reward_zero_std": 0.7, "grad_norm": 0.16907748579978943, "kl": 0.0153076171875, "learning_rate": 8.037898749509193e-06, "loss": 0.0006130572408437728, "memory(GiB)": 27.09, "reward": 0.4759499728679657, "reward_std": 0.06738727379124612, "rewards/MMContentORM/mean": 0.5830000102519989, "rewards/MMContentORM/std": 0.5839648485183716, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1365, "train_speed(iter/s)": 0.083414 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.6, "completions/mean_length": 207.2625, "completions/min_length": 122.0, "epoch": 0.6577052328372539, "frac_reward_zero_std": 0.65, "grad_norm": 0.16428223252296448, "kl": 0.019390869140625, "learning_rate": 8.022110151745939e-06, "loss": 0.0007753587327897548, "memory(GiB)": 27.09, "reward": 0.39994998574256896, "reward_std": 0.12126881405711173, "rewards/MMContentORM/mean": 0.45050000548362734, "rewards/MMContentORM/std": 0.7288543343544006, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 1370, "train_speed(iter/s)": 0.083439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.4, "completions/mean_length": 215.875, "completions/min_length": 144.8, "epoch": 0.6601056168987038, "frac_reward_zero_std": 0.7, "grad_norm": 0.0659169927239418, "kl": 0.01590576171875, "learning_rate": 8.006273930914397e-06, "loss": 0.0006364564411342144, "memory(GiB)": 27.09, "reward": 0.5269499838352203, "reward_std": 0.06936717077624052, "rewards/MMContentORM/mean": 0.7104999959468842, "rewards/MMContentORM/std": 0.42932928130030634, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1375, "train_speed(iter/s)": 0.083445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.6, "completions/mean_length": 223.675, "completions/min_length": 133.8, "epoch": 0.6625060009601537, "frac_reward_zero_std": 0.775, "grad_norm": 0.06429751962423325, "kl": 0.018243408203125, "learning_rate": 7.990390336565179e-06, "loss": 0.0007286330219358206, "memory(GiB)": 27.09, "reward": 0.48864997625350953, "reward_std": 0.09581296914257109, "rewards/MMContentORM/mean": 0.643500006198883, "rewards/MMContentORM/std": 0.5242509357631207, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1380, "train_speed(iter/s)": 0.083473 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.8, "completions/mean_length": 216.2125, "completions/min_length": 117.8, "epoch": 0.6649063850216035, "frac_reward_zero_std": 0.725, "grad_norm": 0.11517506837844849, "kl": 0.02274169921875, "learning_rate": 7.97445961899541e-06, "loss": 0.0009097927249968052, "memory(GiB)": 27.09, "reward": 0.5096500039100647, "reward_std": 0.1130663748132065, "rewards/MMContentORM/mean": 0.6960000216960907, "rewards/MMContentORM/std": 0.44011374935507774, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 1385, "train_speed(iter/s)": 0.083494 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.2, "completions/mean_length": 206.375, "completions/min_length": 116.0, "epoch": 0.6673067690830533, "frac_reward_zero_std": 0.5, "grad_norm": 0.27704310417175293, "kl": 0.021282958984375, "learning_rate": 7.958482029244803e-06, "loss": 0.0008504557423293591, "memory(GiB)": 27.09, "reward": 0.37329998016357424, "reward_std": 0.16235171258449554, "rewards/MMContentORM/mean": 0.42700001001358034, "rewards/MMContentORM/std": 0.7586719036102295, "rewards/MMFormatORM/mean": 0.5687499940395355, "rewards/MMFormatORM/std": 0.20804243683815002, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.32006530165672303, "step": 1390, "train_speed(iter/s)": 0.083518 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.4, "completions/mean_length": 225.675, "completions/min_length": 130.6, "epoch": 0.6697071531445031, "frac_reward_zero_std": 0.675, "grad_norm": 0.09009167551994324, "kl": 0.013970947265625, "learning_rate": 7.942457819091686e-06, "loss": 0.0005581377539783716, "memory(GiB)": 27.09, "reward": 0.46669996380805967, "reward_std": 0.07254915833473205, "rewards/MMContentORM/mean": 0.5455000042915344, "rewards/MMContentORM/std": 0.6198044538497924, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1395, "train_speed(iter/s)": 0.083434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.8, "completions/mean_length": 206.9875, "completions/min_length": 137.2, "epoch": 0.672107537205953, "frac_reward_zero_std": 0.675, "grad_norm": 0.22825519740581512, "kl": 0.014642333984375, "learning_rate": 7.926387241049045e-06, "loss": 0.0005855937954038382, "memory(GiB)": 27.09, "reward": 0.48020000457763673, "reward_std": 0.11455129608511924, "rewards/MMContentORM/mean": 0.6080000162124634, "rewards/MMContentORM/std": 0.5722609221935272, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.08490467071533203, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 1400, "train_speed(iter/s)": 0.083454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 357.8, "completions/mean_length": 216.625, "completions/min_length": 137.0, "epoch": 0.6745079212674028, "frac_reward_zero_std": 0.65, "grad_norm": 0.19864213466644287, "kl": 0.014227294921875, "learning_rate": 7.910270548360537e-06, "loss": 0.0005694822408258915, "memory(GiB)": 27.09, "reward": 0.3801499783992767, "reward_std": 0.15648273127153517, "rewards/MMContentORM/mean": 0.40100000500679017, "rewards/MMContentORM/std": 0.7135852456092835, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 1405, "train_speed(iter/s)": 0.083378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.6, "completions/mean_length": 213.2375, "completions/min_length": 123.8, "epoch": 0.6769083053288526, "frac_reward_zero_std": 0.75, "grad_norm": 0.13676653802394867, "kl": 0.015936279296875, "learning_rate": 7.89410799499651e-06, "loss": 0.0006383438128978014, "memory(GiB)": 27.09, "reward": 0.4782499849796295, "reward_std": 0.08407499901950359, "rewards/MMContentORM/mean": 0.6174999952316285, "rewards/MMContentORM/std": 0.6308860540390014, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1410, "train_speed(iter/s)": 0.083388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.6, "completions/mean_length": 219.2125, "completions/min_length": 160.4, "epoch": 0.6793086893903024, "frac_reward_zero_std": 0.65, "grad_norm": 0.21563780307769775, "kl": 0.01568603515625, "learning_rate": 7.877899835649988e-06, "loss": 0.0006277403328567744, "memory(GiB)": 27.09, "reward": 0.49304999113082887, "reward_std": 0.13088545948266983, "rewards/MMContentORM/mean": 0.654500013589859, "rewards/MMContentORM/std": 0.5817828834056854, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 1415, "train_speed(iter/s)": 0.083413 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 326.2, "completions/mean_length": 214.4375, "completions/min_length": 154.2, "epoch": 0.6817090734517522, "frac_reward_zero_std": 0.675, "grad_norm": 0.08068116009235382, "kl": 0.015985107421875, "learning_rate": 7.86164632573267e-06, "loss": 0.0006394727155566215, "memory(GiB)": 27.09, "reward": 0.5065499901771545, "reward_std": 0.08464068165048957, "rewards/MMContentORM/mean": 0.659500002861023, "rewards/MMContentORM/std": 0.6008779644966126, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1420, "train_speed(iter/s)": 0.083412 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.8, "completions/mean_length": 211.925, "completions/min_length": 142.2, "epoch": 0.6841094575132021, "frac_reward_zero_std": 0.625, "grad_norm": 0.46003836393356323, "kl": 0.015850830078125, "learning_rate": 7.845347721370894e-06, "loss": 0.0006344554014503956, "memory(GiB)": 27.09, "reward": 0.47069998979568484, "reward_std": 0.0646295606624335, "rewards/MMContentORM/mean": 0.5554999947547913, "rewards/MMContentORM/std": 0.6328672289848327, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1425, "train_speed(iter/s)": 0.083437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 332.0, "completions/mean_length": 209.475, "completions/min_length": 115.6, "epoch": 0.6865098415746519, "frac_reward_zero_std": 0.75, "grad_norm": 0.11785315722227097, "kl": 0.020574951171875, "learning_rate": 7.82900427940161e-06, "loss": 0.0008225988596677781, "memory(GiB)": 27.09, "reward": 0.45914997458457946, "reward_std": 0.1380979523062706, "rewards/MMContentORM/mean": 0.5985000252723693, "rewards/MMContentORM/std": 0.6026765942573548, "rewards/MMFormatORM/mean": 0.5931249976158142, "rewards/MMFormatORM/std": 0.18240466713905334, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2806225776672363, "step": 1430, "train_speed(iter/s)": 0.083462 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.4, "completions/mean_length": 205.8375, "completions/min_length": 148.4, "epoch": 0.6889102256361018, "frac_reward_zero_std": 0.7, "grad_norm": 0.16498683393001556, "kl": 0.016790771484375, "learning_rate": 7.812616257368324e-06, "loss": 0.0006715003866702319, "memory(GiB)": 27.09, "reward": 0.4596499800682068, "reward_std": 0.13795653358101845, "rewards/MMContentORM/mean": 0.5710000157356262, "rewards/MMContentORM/std": 0.6740511536598206, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1435, "train_speed(iter/s)": 0.083488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 359.0, "completions/mean_length": 225.9875, "completions/min_length": 151.2, "epoch": 0.6913106096975516, "frac_reward_zero_std": 0.55, "grad_norm": 0.11214323341846466, "kl": 0.02467041015625, "learning_rate": 7.79618391351705e-06, "loss": 0.0009878157638013364, "memory(GiB)": 27.09, "reward": 0.48644998073577883, "reward_std": 0.15662415251135825, "rewards/MMContentORM/mean": 0.6380000293254853, "rewards/MMContentORM/std": 0.5628958165645599, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 1440, "train_speed(iter/s)": 0.083475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 348.6, "completions/mean_length": 220.3, "completions/min_length": 129.6, "epoch": 0.6937109937590015, "frac_reward_zero_std": 0.675, "grad_norm": 0.10853149741888046, "kl": 0.0424072265625, "learning_rate": 7.779707506792232e-06, "loss": 0.001695425808429718, "memory(GiB)": 27.09, "reward": 0.5110499858856201, "reward_std": 0.12240018071606755, "rewards/MMContentORM/mean": 0.6995000064373016, "rewards/MMContentORM/std": 0.4983797550201416, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1445, "train_speed(iter/s)": 0.083475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 565.4, "completions/mean_length": 235.8375, "completions/min_length": 151.4, "epoch": 0.6961113778204513, "frac_reward_zero_std": 0.75, "grad_norm": 0.06722735613584518, "kl": 0.01473388671875, "learning_rate": 7.763187296832664e-06, "loss": 0.0005892225075513124, "memory(GiB)": 27.09, "reward": 0.48484996557235716, "reward_std": 0.11080363169312477, "rewards/MMContentORM/mean": 0.634000015258789, "rewards/MMContentORM/std": 0.5153150960803032, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1450, "train_speed(iter/s)": 0.083401 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 451.2, "completions/mean_length": 219.5625, "completions/min_length": 131.4, "epoch": 0.6985117618819011, "frac_reward_zero_std": 0.75, "grad_norm": 0.14000947773456573, "kl": 0.0172607421875, "learning_rate": 7.746623543967406e-06, "loss": 0.0006907809525728226, "memory(GiB)": 27.09, "reward": 0.5349499821662903, "reward_std": 0.06767011939082294, "rewards/MMContentORM/mean": 0.7305000185966491, "rewards/MMContentORM/std": 0.5024080984294415, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1455, "train_speed(iter/s)": 0.083335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.6, "completions/mean_length": 214.1, "completions/min_length": 147.0, "epoch": 0.7009121459433509, "frac_reward_zero_std": 0.7, "grad_norm": 0.1414344757795334, "kl": 0.019146728515625, "learning_rate": 7.730016509211672e-06, "loss": 0.0007662178948521614, "memory(GiB)": 27.09, "reward": 0.5028499722480774, "reward_std": 0.11080363541841506, "rewards/MMContentORM/mean": 0.6790000140666962, "rewards/MMContentORM/std": 0.5045298062264919, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1460, "train_speed(iter/s)": 0.083355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.4, "completions/mean_length": 209.0125, "completions/min_length": 142.8, "epoch": 0.7033125300048008, "frac_reward_zero_std": 0.675, "grad_norm": 0.10340370237827301, "kl": 0.016058349609375, "learning_rate": 7.713366454262724e-06, "loss": 0.0006422744132578373, "memory(GiB)": 27.09, "reward": 0.4182499825954437, "reward_std": 0.10146982565056532, "rewards/MMContentORM/mean": 0.4675000041723251, "rewards/MMContentORM/std": 0.6833849430084229, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1465, "train_speed(iter/s)": 0.083393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.2, "completions/mean_length": 210.225, "completions/min_length": 135.8, "epoch": 0.7057129140662506, "frac_reward_zero_std": 0.8, "grad_norm": 0.076369509100914, "kl": 0.016009521484375, "learning_rate": 7.696673641495747e-06, "loss": 0.0006410168949514628, "memory(GiB)": 27.09, "reward": 0.4791999697685242, "reward_std": 0.09729789346456527, "rewards/MMContentORM/mean": 0.6054999947547912, "rewards/MMContentORM/std": 0.6232686996459961, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 1470, "train_speed(iter/s)": 0.083415 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 327.4, "completions/mean_length": 211.225, "completions/min_length": 125.2, "epoch": 0.7081132981277004, "frac_reward_zero_std": 0.75, "grad_norm": 0.15498086810112, "kl": 0.016680908203125, "learning_rate": 7.679938333959709e-06, "loss": 0.0006680141203105449, "memory(GiB)": 27.09, "reward": 0.5133999764919281, "reward_std": 0.11455130055546761, "rewards/MMContentORM/mean": 0.6910000026226044, "rewards/MMContentORM/std": 0.5742799043655396, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 1475, "train_speed(iter/s)": 0.083418 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.6, "completions/mean_length": 209.6875, "completions/min_length": 144.0, "epoch": 0.7105136821891502, "frac_reward_zero_std": 0.775, "grad_norm": 0.16933397948741913, "kl": 0.018133544921875, "learning_rate": 7.663160795373221e-06, "loss": 0.0007249978370964527, "memory(GiB)": 27.09, "reward": 0.5215499997138977, "reward_std": 0.06682158990297467, "rewards/MMContentORM/mean": 0.697000014781952, "rewards/MMContentORM/std": 0.49735930785536764, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1480, "train_speed(iter/s)": 0.083447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.2, "completions/mean_length": 214.2625, "completions/min_length": 126.0, "epoch": 0.7129140662506, "frac_reward_zero_std": 0.625, "grad_norm": 0.37113457918167114, "kl": 0.031427001953125, "learning_rate": 7.64634129012038e-06, "loss": 0.0012564392760396003, "memory(GiB)": 27.09, "reward": 0.4060499906539917, "reward_std": 0.15563419908285142, "rewards/MMContentORM/mean": 0.4945000231266022, "rewards/MMContentORM/std": 0.71776123046875, "rewards/MMFormatORM/mean": 0.5768749713897705, "rewards/MMFormatORM/std": 0.1856150358915329, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.285561603307724, "step": 1485, "train_speed(iter/s)": 0.083439 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.2, "completions/mean_length": 209.775, "completions/min_length": 127.6, "epoch": 0.7153144503120499, "frac_reward_zero_std": 0.675, "grad_norm": 0.16112570464611053, "kl": 0.01470947265625, "learning_rate": 7.629480083246607e-06, "loss": 0.0005889590363949537, "memory(GiB)": 27.09, "reward": 0.4991499841213226, "reward_std": 0.07926666894927621, "rewards/MMContentORM/mean": 0.6409999907016755, "rewards/MMContentORM/std": 0.5213750995695591, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1490, "train_speed(iter/s)": 0.083445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 350.0, "completions/mean_length": 211.7375, "completions/min_length": 126.8, "epoch": 0.7177148343734998, "frac_reward_zero_std": 0.775, "grad_norm": 0.47146689891815186, "kl": 0.03209228515625, "learning_rate": 7.61257744045446e-06, "loss": 0.0012816525064408778, "memory(GiB)": 27.09, "reward": 0.4565499722957611, "reward_std": 0.04914391748607159, "rewards/MMContentORM/mean": 0.5345000147819519, "rewards/MMContentORM/std": 0.6424328684806824, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1495, "train_speed(iter/s)": 0.083444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.0, "completions/mean_length": 205.65, "completions/min_length": 124.8, "epoch": 0.7201152184349496, "frac_reward_zero_std": 0.7, "grad_norm": 0.1686468869447708, "kl": 0.016204833984375, "learning_rate": 7.595633628099459e-06, "loss": 0.0006481107324361801, "memory(GiB)": 27.09, "reward": 0.44354997873306273, "reward_std": 0.16298812627792358, "rewards/MMContentORM/mean": 0.5594999849796295, "rewards/MMContentORM/std": 0.6292815625667572, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 1500, "train_speed(iter/s)": 0.083468 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 491.6, "completions/mean_length": 228.2625, "completions/min_length": 140.2, "epoch": 0.7225156024963995, "frac_reward_zero_std": 0.675, "grad_norm": 0.16463321447372437, "kl": 0.014453125, "learning_rate": 7.578648913185877e-06, "loss": 0.0005780975334346294, "memory(GiB)": 27.09, "reward": 0.4644499897956848, "reward_std": 0.1550685167312622, "rewards/MMContentORM/mean": 0.640500009059906, "rewards/MMContentORM/std": 0.6791411757469177, "rewards/MMFormatORM/mean": 0.5768749713897705, "rewards/MMFormatORM/std": 0.2101138174533844, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.3232520341873169, "step": 1505, "train_speed(iter/s)": 0.083311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.6, "completions/mean_length": 198.4125, "completions/min_length": 133.2, "epoch": 0.7249159865578493, "frac_reward_zero_std": 0.7, "grad_norm": 0.10300061106681824, "kl": 0.01964111328125, "learning_rate": 7.561623563362541e-06, "loss": 0.0007859501987695694, "memory(GiB)": 27.09, "reward": 0.54544997215271, "reward_std": 0.05904341547284275, "rewards/MMContentORM/mean": 0.728000009059906, "rewards/MMContentORM/std": 0.4972465097904205, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 1510, "train_speed(iter/s)": 0.083355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 344.4, "completions/mean_length": 214.025, "completions/min_length": 122.0, "epoch": 0.7273163706192991, "frac_reward_zero_std": 0.725, "grad_norm": 0.004715959541499615, "kl": 0.0142822265625, "learning_rate": 7.5445578469186135e-06, "loss": 0.0005710616242140532, "memory(GiB)": 27.09, "reward": 0.4336499750614166, "reward_std": 0.10401540845632554, "rewards/MMContentORM/mean": 0.5060000061988831, "rewards/MMContentORM/std": 0.6872617721557617, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 1515, "train_speed(iter/s)": 0.083352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 456.6, "completions/mean_length": 219.925, "completions/min_length": 135.4, "epoch": 0.7297167546807489, "frac_reward_zero_std": 0.625, "grad_norm": 0.1058562770485878, "kl": 0.0159423828125, "learning_rate": 7.527452032779361e-06, "loss": 0.0006374444346874952, "memory(GiB)": 27.09, "reward": 0.5062999784946441, "reward_std": 0.12628927137702703, "rewards/MMContentORM/mean": 0.7019999921321869, "rewards/MMContentORM/std": 0.45146496072411535, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 1520, "train_speed(iter/s)": 0.083288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 456.0, "completions/mean_length": 212.125, "completions/min_length": 101.8, "epoch": 0.7321171387421987, "frac_reward_zero_std": 0.725, "grad_norm": 0.1445714682340622, "kl": 0.021337890625, "learning_rate": 7.510306390501919e-06, "loss": 0.000853828527033329, "memory(GiB)": 27.09, "reward": 0.4123999834060669, "reward_std": 0.15980613380670547, "rewards/MMContentORM/mean": 0.5535000085830688, "rewards/MMContentORM/std": 0.7459115505218505, "rewards/MMFormatORM/mean": 0.5524999856948852, "rewards/MMFormatORM/std": 0.23411746919155121, "rewards/MMRubricORM/mean": -0.15, "rewards/MMRubricORM/std": 0.3601807415485382, "step": 1525, "train_speed(iter/s)": 0.083215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 326.2, "completions/mean_length": 202.3625, "completions/min_length": 116.6, "epoch": 0.7345175228036486, "frac_reward_zero_std": 0.7, "grad_norm": 0.20122231543064117, "kl": 0.0188720703125, "learning_rate": 7.493121190271044e-06, "loss": 0.000754462881013751, "memory(GiB)": 27.09, "reward": 0.4821499824523926, "reward_std": 0.1300369380041957, "rewards/MMContentORM/mean": 0.6559999942779541, "rewards/MMContentORM/std": 0.6435462713241578, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.16571036279201506, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.25493902564048765, "step": 1530, "train_speed(iter/s)": 0.083228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.2, "completions/mean_length": 214.9875, "completions/min_length": 138.0, "epoch": 0.7369179068650984, "frac_reward_zero_std": 0.55, "grad_norm": 0.22068944573402405, "kl": 0.03699951171875, "learning_rate": 7.475896702894854e-06, "loss": 0.0014766624197363853, "memory(GiB)": 27.09, "reward": 0.4285499930381775, "reward_std": 0.1915552258491516, "rewards/MMContentORM/mean": 0.5220000118017196, "rewards/MMContentORM/std": 0.6519321262836456, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.16571036279201506, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.25493902564048765, "step": 1535, "train_speed(iter/s)": 0.083241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.8, "completions/mean_length": 199.75, "completions/min_length": 128.6, "epoch": 0.7393182909265482, "frac_reward_zero_std": 0.775, "grad_norm": 0.0786973237991333, "kl": 0.015472412109375, "learning_rate": 7.458633199800562e-06, "loss": 0.000618355255573988, "memory(GiB)": 27.09, "reward": 0.48274998664855956, "reward_std": 0.06569022093899549, "rewards/MMContentORM/mean": 0.6000000178813935, "rewards/MMContentORM/std": 0.5159270875155926, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1540, "train_speed(iter/s)": 0.083273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 340.2, "completions/mean_length": 209.1, "completions/min_length": 136.6, "epoch": 0.741718674987998, "frac_reward_zero_std": 0.625, "grad_norm": 0.16483508050441742, "kl": 0.019476318359375, "learning_rate": 7.4413309530302e-06, "loss": 0.0007791164331138134, "memory(GiB)": 27.09, "reward": 0.4971999883651733, "reward_std": 0.17324115931987763, "rewards/MMContentORM/mean": 0.6955000162124634, "rewards/MMContentORM/std": 0.601590758562088, "rewards/MMFormatORM/mean": 0.5912499845027923, "rewards/MMFormatORM/std": 0.15600111782550813, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.23944272398948668, "step": 1545, "train_speed(iter/s)": 0.083282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.8, "completions/mean_length": 209.175, "completions/min_length": 144.8, "epoch": 0.7441190590494479, "frac_reward_zero_std": 0.725, "grad_norm": 0.11053567379713058, "kl": 0.01842041015625, "learning_rate": 7.423990235236331e-06, "loss": 0.0007370706647634506, "memory(GiB)": 27.09, "reward": 0.48004999160766604, "reward_std": 0.13569379299879075, "rewards/MMContentORM/mean": 0.622000002861023, "rewards/MMContentORM/std": 0.6143165111541748, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 1550, "train_speed(iter/s)": 0.083297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.8, "completions/mean_length": 203.4375, "completions/min_length": 141.0, "epoch": 0.7465194431108978, "frac_reward_zero_std": 0.6, "grad_norm": 0.10629149526357651, "kl": 0.0184814453125, "learning_rate": 7.406611319677756e-06, "loss": 0.000739166047424078, "memory(GiB)": 27.09, "reward": 0.4108499825000763, "reward_std": 0.200747612118721, "rewards/MMContentORM/mean": 0.5065000057220459, "rewards/MMContentORM/std": 0.7548076272010803, "rewards/MMFormatORM/mean": 0.5768749833106994, "rewards/MMFormatORM/std": 0.2062115788459778, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.3172485947608948, "step": 1555, "train_speed(iter/s)": 0.083321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.2, "completions/mean_length": 197.025, "completions/min_length": 133.8, "epoch": 0.7489198271723476, "frac_reward_zero_std": 0.75, "grad_norm": 0.1347576528787613, "kl": 0.01641845703125, "learning_rate": 7.389194480215198e-06, "loss": 0.0006570426747202873, "memory(GiB)": 27.09, "reward": 0.44274998307228086, "reward_std": 0.11108647137880326, "rewards/MMContentORM/mean": 0.557500010728836, "rewards/MMContentORM/std": 0.6644225358963013, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.16571036279201506, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.25493902564048765, "step": 1560, "train_speed(iter/s)": 0.083368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.2, "completions/mean_length": 200.275, "completions/min_length": 116.2, "epoch": 0.7513202112337974, "frac_reward_zero_std": 0.8, "grad_norm": 0.12396983802318573, "kl": 0.0153076171875, "learning_rate": 7.3717399913069995e-06, "loss": 0.0006124400533735752, "memory(GiB)": 27.09, "reward": 0.5360499680042267, "reward_std": 0.03330472994130105, "rewards/MMContentORM/mean": 0.7045000195503235, "rewards/MMContentORM/std": 0.44892730191349983, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 1565, "train_speed(iter/s)": 0.083404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 431.0, "completions/mean_length": 225.5, "completions/min_length": 150.2, "epoch": 0.7537205952952473, "frac_reward_zero_std": 0.7, "grad_norm": 0.1431017369031906, "kl": 0.017681884765625, "learning_rate": 7.354248128004788e-06, "loss": 0.000707083148881793, "memory(GiB)": 27.09, "reward": 0.45769999623298646, "reward_std": 0.09899494783021509, "rewards/MMContentORM/mean": 0.5805000126361847, "rewards/MMContentORM/std": 0.6738178968429566, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 1570, "train_speed(iter/s)": 0.083352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.4, "completions/mean_length": 211.7625, "completions/min_length": 108.4, "epoch": 0.7561209793566971, "frac_reward_zero_std": 0.75, "grad_norm": 0.11345090717077255, "kl": 0.015887451171875, "learning_rate": 7.336719165949144e-06, "loss": 0.0006354267243295908, "memory(GiB)": 27.09, "reward": 0.444299989938736, "reward_std": 0.14212846513837576, "rewards/MMContentORM/mean": 0.5470000028610229, "rewards/MMContentORM/std": 0.6608891606330871, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 1575, "train_speed(iter/s)": 0.083381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 331.6, "completions/mean_length": 209.275, "completions/min_length": 143.4, "epoch": 0.7585213634181469, "frac_reward_zero_std": 0.7, "grad_norm": 0.00587793905287981, "kl": 0.0219482421875, "learning_rate": 7.319153381365261e-06, "loss": 0.0008785548619925976, "memory(GiB)": 27.09, "reward": 0.5002999901771545, "reward_std": 0.13307749554514886, "rewards/MMContentORM/mean": 0.6869999945163727, "rewards/MMContentORM/std": 0.5800727725028991, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 1580, "train_speed(iter/s)": 0.083389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.6, "completions/mean_length": 203.225, "completions/min_length": 140.6, "epoch": 0.7609217474795967, "frac_reward_zero_std": 0.725, "grad_norm": 0.20501793920993805, "kl": 0.02083740234375, "learning_rate": 7.301551051058586e-06, "loss": 0.0008345272392034531, "memory(GiB)": 27.09, "reward": 0.4959499776363373, "reward_std": 0.06682158932089806, "rewards/MMContentORM/mean": 0.6330000102519989, "rewards/MMContentORM/std": 0.5427005112171173, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1585, "train_speed(iter/s)": 0.083423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.6, "completions/mean_length": 212.8375, "completions/min_length": 142.0, "epoch": 0.7633221315410466, "frac_reward_zero_std": 0.7, "grad_norm": 0.06863018125295639, "kl": 0.0158203125, "learning_rate": 7.283912452410468e-06, "loss": 0.0006327041424810887, "memory(GiB)": 27.09, "reward": 0.43779999017715454, "reward_std": 0.10097484942525625, "rewards/MMContentORM/mean": 0.5019999980926514, "rewards/MMContentORM/std": 0.6665767431259155, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 1590, "train_speed(iter/s)": 0.08342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.0, "completions/mean_length": 209.3875, "completions/min_length": 140.2, "epoch": 0.7657225156024964, "frac_reward_zero_std": 0.675, "grad_norm": 0.11942637711763382, "kl": 0.014971923828125, "learning_rate": 7.266237863373772e-06, "loss": 0.0005985536612570286, "memory(GiB)": 27.09, "reward": 0.48544998168945314, "reward_std": 0.10486393286846578, "rewards/MMContentORM/mean": 0.6105000019073487, "rewards/MMContentORM/std": 0.5260161735117436, "rewards/MMFormatORM/mean": 0.6218749761581421, "rewards/MMFormatORM/std": 0.09190345257520675, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1595, "train_speed(iter/s)": 0.083437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.2, "completions/mean_length": 201.3875, "completions/min_length": 132.4, "epoch": 0.7681228996639462, "frac_reward_zero_std": 0.8, "grad_norm": 0.07546590268611908, "kl": 0.01690673828125, "learning_rate": 7.248527562468513e-06, "loss": 0.0006768060848116875, "memory(GiB)": 27.09, "reward": 0.4923499941825867, "reward_std": 0.07417550361715257, "rewards/MMContentORM/mean": 0.6239999949932098, "rewards/MMContentORM/std": 0.5938864171504974, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1600, "train_speed(iter/s)": 0.083478 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.8, "completions/mean_length": 197.5375, "completions/min_length": 132.2, "epoch": 0.770523283725396, "frac_reward_zero_std": 0.775, "grad_norm": 0.007782844360917807, "kl": 0.015924072265625, "learning_rate": 7.230781828777462e-06, "loss": 0.0006374673917889595, "memory(GiB)": 27.09, "reward": 0.5460999727249145, "reward_std": 0.03719381578266621, "rewards/MMContentORM/mean": 0.7440000057220459, "rewards/MMContentORM/std": 0.44936863109469416, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1605, "train_speed(iter/s)": 0.083452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025, "completions/max_length": 578.0, "completions/mean_length": 221.8625, "completions/min_length": 121.6, "epoch": 0.7729236677868458, "frac_reward_zero_std": 0.75, "grad_norm": 0.005166975781321526, "kl": 0.016876220703125, "learning_rate": 7.213000941941743e-06, "loss": 0.0006743951700627804, "memory(GiB)": 27.09, "reward": 0.4998499691486359, "reward_std": 0.10373256290331483, "rewards/MMContentORM/mean": 0.6714999973773956, "rewards/MMContentORM/std": 0.600242418050766, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1610, "train_speed(iter/s)": 0.08332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.0, "completions/mean_length": 209.65, "completions/min_length": 140.8, "epoch": 0.7753240518482958, "frac_reward_zero_std": 0.675, "grad_norm": 0.07150600850582123, "kl": 0.01649169921875, "learning_rate": 7.195185182156437e-06, "loss": 0.0006602241192013025, "memory(GiB)": 27.09, "reward": 0.47224998474121094, "reward_std": 0.1037325656041503, "rewards/MMContentORM/mean": 0.6025000065565109, "rewards/MMContentORM/std": 0.5951342463493348, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 1615, "train_speed(iter/s)": 0.083336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.0, "completions/mean_length": 205.8625, "completions/min_length": 139.0, "epoch": 0.7777244359097456, "frac_reward_zero_std": 0.675, "grad_norm": 0.15874557197093964, "kl": 0.0163330078125, "learning_rate": 7.177334830166151e-06, "loss": 0.0006535663735121489, "memory(GiB)": 27.09, "reward": 0.4278499722480774, "reward_std": 0.11278353529050947, "rewards/MMContentORM/mean": 0.4915000081062317, "rewards/MMContentORM/std": 0.6723409533500672, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1620, "train_speed(iter/s)": 0.083371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 348.2, "completions/mean_length": 205.5875, "completions/min_length": 109.4, "epoch": 0.7801248199711954, "frac_reward_zero_std": 0.75, "grad_norm": 0.03448270633816719, "kl": 0.03670654296875, "learning_rate": 7.159450167260613e-06, "loss": 0.0014746349304914474, "memory(GiB)": 27.09, "reward": 0.4049499869346619, "reward_std": 0.1109450563788414, "rewards/MMContentORM/mean": 0.4629999935626984, "rewards/MMContentORM/std": 0.6945199608802796, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.12723276019096375, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.1957427144050598, "step": 1625, "train_speed(iter/s)": 0.083357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.0, "completions/mean_length": 200.1125, "completions/min_length": 129.6, "epoch": 0.7825252040326452, "frac_reward_zero_std": 0.775, "grad_norm": 0.007031524088233709, "kl": 0.0147705078125, "learning_rate": 7.141531475270227e-06, "loss": 0.0005904654040932656, "memory(GiB)": 27.09, "reward": 0.45404996871948244, "reward_std": 0.13682516813278198, "rewards/MMContentORM/mean": 0.5569999933242797, "rewards/MMContentORM/std": 0.6599100232124329, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 1630, "train_speed(iter/s)": 0.08337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.0, "completions/mean_length": 199.3, "completions/min_length": 117.0, "epoch": 0.7849255880940951, "frac_reward_zero_std": 0.7, "grad_norm": 0.28181007504463196, "kl": 0.027081298828125, "learning_rate": 7.123579036561634e-06, "loss": 0.0010821642354130745, "memory(GiB)": 27.09, "reward": 0.4140499770641327, "reward_std": 0.17317044883966445, "rewards/MMContentORM/mean": 0.5144999921321869, "rewards/MMContentORM/std": 0.6127165146172047, "rewards/MMFormatORM/mean": 0.5768749713897705, "rewards/MMFormatORM/std": 0.1856150358915329, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.285561603307724, "step": 1635, "train_speed(iter/s)": 0.083396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 412.6, "completions/mean_length": 211.725, "completions/min_length": 125.8, "epoch": 0.7873259721555449, "frac_reward_zero_std": 0.5, "grad_norm": 0.07750914245843887, "kl": 0.03267822265625, "learning_rate": 7.1055931340332605e-06, "loss": 0.0013033310882747174, "memory(GiB)": 27.09, "reward": 0.40509998202323916, "reward_std": 0.1790394376264885, "rewards/MMContentORM/mean": 0.5065000236034394, "rewards/MMContentORM/std": 0.7263549327850342, "rewards/MMFormatORM/mean": 0.568749976158142, "rewards/MMFormatORM/std": 0.1590408891439438, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.24467830061912538, "step": 1640, "train_speed(iter/s)": 0.083359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.0, "completions/mean_length": 206.0625, "completions/min_length": 132.2, "epoch": 0.7897263562169947, "frac_reward_zero_std": 0.65, "grad_norm": 0.21835249662399292, "kl": 0.015789794921875, "learning_rate": 7.0875740511108695e-06, "loss": 0.0006318403407931328, "memory(GiB)": 27.09, "reward": 0.39799998998641967, "reward_std": 0.1074802316725254, "rewards/MMContentORM/mean": 0.4025000035762787, "rewards/MMContentORM/std": 0.6922868967056275, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 1645, "train_speed(iter/s)": 0.083353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.2, "completions/mean_length": 210.0875, "completions/min_length": 118.4, "epoch": 0.7921267402784445, "frac_reward_zero_std": 0.65, "grad_norm": 0.18537920713424683, "kl": 0.01591796875, "learning_rate": 7.06952207174308e-06, "loss": 0.0006365090608596802, "memory(GiB)": 27.09, "reward": 0.47984997630119325, "reward_std": 0.14333054379094393, "rewards/MMContentORM/mean": 0.6215000152587891, "rewards/MMContentORM/std": 0.5664967365562916, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1650, "train_speed(iter/s)": 0.083384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 365.2, "completions/mean_length": 217.1375, "completions/min_length": 146.6, "epoch": 0.7945271243398944, "frac_reward_zero_std": 0.65, "grad_norm": 0.15769225358963013, "kl": 0.020892333984375, "learning_rate": 7.051437480396907e-06, "loss": 0.0008358799852430821, "memory(GiB)": 27.09, "reward": 0.4181499779224396, "reward_std": 0.16383664608001708, "rewards/MMContentORM/mean": 0.4960000038146973, "rewards/MMContentORM/std": 0.7032395720481872, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.18630690574645997, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2866260170936584, "step": 1655, "train_speed(iter/s)": 0.083365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.6, "completions/mean_length": 208.675, "completions/min_length": 115.8, "epoch": 0.7969275084013442, "frac_reward_zero_std": 0.625, "grad_norm": 0.18478098511695862, "kl": 0.014483642578125, "learning_rate": 7.03332056205327e-06, "loss": 0.0005793534219264984, "memory(GiB)": 27.09, "reward": 0.4719999849796295, "reward_std": 0.09107535546645522, "rewards/MMContentORM/mean": 0.5875000059604645, "rewards/MMContentORM/std": 0.6219356417655945, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 1660, "train_speed(iter/s)": 0.083375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 460.6, "completions/mean_length": 215.6625, "completions/min_length": 116.6, "epoch": 0.799327892462794, "frac_reward_zero_std": 0.65, "grad_norm": 0.006300389766693115, "kl": 0.0181884765625, "learning_rate": 7.015171602202502e-06, "loss": 0.0007266091182827949, "memory(GiB)": 27.09, "reward": 0.4193999707698822, "reward_std": 0.1954443134367466, "rewards/MMContentORM/mean": 0.5135000150650739, "rewards/MMContentORM/std": 0.5563328020274639, "rewards/MMFormatORM/mean": 0.5849999845027923, "rewards/MMFormatORM/std": 0.1430424392223358, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.22006530165672303, "step": 1665, "train_speed(iter/s)": 0.083317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.2, "completions/mean_length": 203.8, "completions/min_length": 130.6, "epoch": 0.8017282765242438, "frac_reward_zero_std": 0.55, "grad_norm": 0.11067415028810501, "kl": 0.01673583984375, "learning_rate": 6.996990886839856e-06, "loss": 0.0006691563874483108, "memory(GiB)": 27.09, "reward": 0.42014997601509096, "reward_std": 0.14545186161994933, "rewards/MMContentORM/mean": 0.546000012755394, "rewards/MMContentORM/std": 0.7207041382789612, "rewards/MMFormatORM/mean": 0.5668749928474426, "rewards/MMFormatORM/std": 0.2141141563653946, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.3295581638813019, "step": 1670, "train_speed(iter/s)": 0.083354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.6, "completions/mean_length": 203.7, "completions/min_length": 150.8, "epoch": 0.8041286605856938, "frac_reward_zero_std": 0.6, "grad_norm": 0.10222038626670837, "kl": 0.014013671875, "learning_rate": 6.978778702460994e-06, "loss": 0.0005606257822364568, "memory(GiB)": 27.09, "reward": 0.3979499816894531, "reward_std": 0.12621856052428485, "rewards/MMContentORM/mean": 0.38800000548362734, "rewards/MMContentORM/std": 0.6839123487472534, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1675, "train_speed(iter/s)": 0.083378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.8, "completions/mean_length": 196.65, "completions/min_length": 111.2, "epoch": 0.8065290446471436, "frac_reward_zero_std": 0.6, "grad_norm": 0.15431593358516693, "kl": 0.017578125, "learning_rate": 6.9605353360574745e-06, "loss": 0.0007030891254544258, "memory(GiB)": 27.09, "reward": 0.4542999804019928, "reward_std": 0.09135819002985954, "rewards/MMContentORM/mean": 0.5720000088214874, "rewards/MMContentORM/std": 0.6552067339420319, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 1680, "train_speed(iter/s)": 0.083423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 365.0, "completions/mean_length": 207.6375, "completions/min_length": 120.4, "epoch": 0.8089294287085934, "frac_reward_zero_std": 0.725, "grad_norm": 0.19521333277225494, "kl": 0.018505859375, "learning_rate": 6.9422610751122276e-06, "loss": 0.0007405009120702744, "memory(GiB)": 27.09, "reward": 0.4330499887466431, "reward_std": 0.10316687764134258, "rewards/MMContentORM/mean": 0.504500013589859, "rewards/MMContentORM/std": 0.6536614775657654, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 1685, "train_speed(iter/s)": 0.083404 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.2, "completions/mean_length": 188.7375, "completions/min_length": 112.4, "epoch": 0.8113298127700432, "frac_reward_zero_std": 0.75, "grad_norm": 0.12136948853731155, "kl": 0.01881103515625, "learning_rate": 6.923956207595028e-06, "loss": 0.000752145517617464, "memory(GiB)": 27.09, "reward": 0.47574997544288633, "reward_std": 0.06710443496704102, "rewards/MMContentORM/mean": 0.5825000107288361, "rewards/MMContentORM/std": 0.5429187417030334, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1690, "train_speed(iter/s)": 0.083447 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.6, "completions/mean_length": 198.775, "completions/min_length": 138.2, "epoch": 0.8137301968314931, "frac_reward_zero_std": 0.775, "grad_norm": 0.17351680994033813, "kl": 0.02083740234375, "learning_rate": 6.905621021957953e-06, "loss": 0.0008334385231137276, "memory(GiB)": 27.09, "reward": 0.4406499922275543, "reward_std": 0.07785245906561614, "rewards/MMContentORM/mean": 0.5235000073909759, "rewards/MMContentORM/std": 0.6393161118030548, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.09680812656879426, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.14893558621406555, "step": 1695, "train_speed(iter/s)": 0.083453 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.2, "completions/mean_length": 199.2375, "completions/min_length": 135.0, "epoch": 0.8161305808929429, "frac_reward_zero_std": 0.625, "grad_norm": 0.15735894441604614, "kl": 0.015130615234375, "learning_rate": 6.887255807130844e-06, "loss": 0.00060483543202281, "memory(GiB)": 27.09, "reward": 0.479749983549118, "reward_std": 0.07672108160331845, "rewards/MMContentORM/mean": 0.5925000190734864, "rewards/MMContentORM/std": 0.5261063687503338, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1700, "train_speed(iter/s)": 0.083481 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.8, "completions/mean_length": 207.0, "completions/min_length": 138.0, "epoch": 0.8185309649543927, "frac_reward_zero_std": 0.625, "grad_norm": 0.0804097130894661, "kl": 0.0136962890625, "learning_rate": 6.868860852516742e-06, "loss": 0.0005484659224748612, "memory(GiB)": 27.09, "reward": 0.42814998626708983, "reward_std": 0.09652007222175599, "rewards/MMContentORM/mean": 0.4634999930858612, "rewards/MMContentORM/std": 0.6531470894813538, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1705, "train_speed(iter/s)": 0.083426 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.2, "completions/mean_length": 209.5125, "completions/min_length": 132.2, "epoch": 0.8209313490158425, "frac_reward_zero_std": 0.625, "grad_norm": 0.0899352878332138, "kl": 0.017169189453125, "learning_rate": 6.85043644798734e-06, "loss": 0.0006862088106572628, "memory(GiB)": 27.09, "reward": 0.38989998400211334, "reward_std": 0.15853333994746208, "rewards/MMContentORM/mean": 0.41100001335144043, "rewards/MMContentORM/std": 0.6764008283615113, "rewards/MMFormatORM/mean": 0.6012499868869782, "rewards/MMFormatORM/std": 0.12313776612281799, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.1894427239894867, "step": 1710, "train_speed(iter/s)": 0.083445 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 347.2, "completions/mean_length": 208.825, "completions/min_length": 143.2, "epoch": 0.8233317330772923, "frac_reward_zero_std": 0.675, "grad_norm": 0.18190357089042664, "kl": 0.015631103515625, "learning_rate": 6.831982883878406e-06, "loss": 0.0006255049258470536, "memory(GiB)": 27.09, "reward": 0.4102999925613403, "reward_std": 0.12628926811739802, "rewards/MMContentORM/mean": 0.46200000643730166, "rewards/MMContentORM/std": 0.688849925994873, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 1715, "train_speed(iter/s)": 0.083444 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.4, "completions/mean_length": 202.925, "completions/min_length": 106.0, "epoch": 0.8257321171387422, "frac_reward_zero_std": 0.75, "grad_norm": 0.2049115151166916, "kl": 0.015179443359375, "learning_rate": 6.8135004509852135e-06, "loss": 0.0006078362464904785, "memory(GiB)": 27.09, "reward": 0.4344499886035919, "reward_std": 0.07756961137056351, "rewards/MMContentORM/mean": 0.5080000042915345, "rewards/MMContentORM/std": 0.6811013698577881, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 1720, "train_speed(iter/s)": 0.083479 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.4, "completions/mean_length": 215.35, "completions/min_length": 131.8, "epoch": 0.828132501200192, "frac_reward_zero_std": 0.65, "grad_norm": 0.1609988808631897, "kl": 0.01397705078125, "learning_rate": 6.794989440557954e-06, "loss": 0.0005596654955297709, "memory(GiB)": 27.09, "reward": 0.45239998698234557, "reward_std": 0.08994398396462203, "rewards/MMContentORM/mean": 0.5384999930858612, "rewards/MMContentORM/std": 0.6075566828250885, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 1725, "train_speed(iter/s)": 0.083487 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 453.0, "completions/mean_length": 216.5125, "completions/min_length": 127.8, "epoch": 0.8305328852616418, "frac_reward_zero_std": 0.725, "grad_norm": 0.15101298689842224, "kl": 0.016229248046875, "learning_rate": 6.776450144297152e-06, "loss": 0.0006488990969955921, "memory(GiB)": 27.09, "reward": 0.4963999569416046, "reward_std": 0.10521748885512353, "rewards/MMContentORM/mean": 0.6484999895095825, "rewards/MMContentORM/std": 0.5765063345432282, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 1730, "train_speed(iter/s)": 0.08343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.2, "completions/mean_length": 203.075, "completions/min_length": 117.8, "epoch": 0.8329332693230916, "frac_reward_zero_std": 0.625, "grad_norm": 0.18309247493743896, "kl": 0.03756103515625, "learning_rate": 6.757882854349065e-06, "loss": 0.0015039796009659768, "memory(GiB)": 27.09, "reward": 0.47954997420310974, "reward_std": 0.07700393050909042, "rewards/MMContentORM/mean": 0.5920000076293945, "rewards/MMContentORM/std": 0.6195413947105408, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1735, "train_speed(iter/s)": 0.083452 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.4, "completions/mean_length": 209.7625, "completions/min_length": 123.6, "epoch": 0.8353336533845416, "frac_reward_zero_std": 0.5, "grad_norm": 0.16256070137023926, "kl": 0.0221435546875, "learning_rate": 6.739287863301082e-06, "loss": 0.0008845901116728783, "memory(GiB)": 27.09, "reward": 0.4170499801635742, "reward_std": 0.1987677127122879, "rewards/MMContentORM/mean": 0.5220000147819519, "rewards/MMContentORM/std": 0.6289721466600895, "rewards/MMFormatORM/mean": 0.576874977350235, "rewards/MMFormatORM/std": 0.17944467663764954, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.27606874108314516, "step": 1740, "train_speed(iter/s)": 0.083476 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.2, "completions/mean_length": 205.825, "completions/min_length": 140.0, "epoch": 0.8377340374459914, "frac_reward_zero_std": 0.65, "grad_norm": 0.152576744556427, "kl": 0.0164794921875, "learning_rate": 6.720665464177109e-06, "loss": 0.0006592854391783476, "memory(GiB)": 27.09, "reward": 0.4217999815940857, "reward_std": 0.1360473409295082, "rewards/MMContentORM/mean": 0.4944999933242798, "rewards/MMContentORM/std": 0.6864893198013305, "rewards/MMFormatORM/mean": 0.5974999785423278, "rewards/MMFormatORM/std": 0.16880690604448317, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 1745, "train_speed(iter/s)": 0.083496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.8, "completions/mean_length": 209.375, "completions/min_length": 137.4, "epoch": 0.8401344215074412, "frac_reward_zero_std": 0.6, "grad_norm": 0.20455506443977356, "kl": 0.017779541015625, "learning_rate": 6.702015950432958e-06, "loss": 0.0007104447111487388, "memory(GiB)": 27.09, "reward": 0.44764997959136965, "reward_std": 0.1215516522526741, "rewards/MMContentORM/mean": 0.5409999847412109, "rewards/MMContentORM/std": 0.6508532583713531, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 1750, "train_speed(iter/s)": 0.083506 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.4, "completions/mean_length": 218.05, "completions/min_length": 141.4, "epoch": 0.842534805568891, "frac_reward_zero_std": 0.75, "grad_norm": 0.12836699187755585, "kl": 0.01434326171875, "learning_rate": 6.6833396159517206e-06, "loss": 0.0005732546094805002, "memory(GiB)": 27.09, "reward": 0.49699997901916504, "reward_std": 0.08909545510541647, "rewards/MMContentORM/mean": 0.650000023841858, "rewards/MMContentORM/std": 0.5488846890628338, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.08490467071533203, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 1755, "train_speed(iter/s)": 0.083526 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 370.4, "completions/mean_length": 215.375, "completions/min_length": 154.8, "epoch": 0.8449351896303409, "frac_reward_zero_std": 0.6, "grad_norm": 0.11277302354574203, "kl": 0.0169677734375, "learning_rate": 6.66463675503913e-06, "loss": 0.000678945379331708, "memory(GiB)": 27.09, "reward": 0.3402999937534332, "reward_std": 0.1948786199092865, "rewards/MMContentORM/mean": 0.3445000022649765, "rewards/MMContentORM/std": 0.7665389060974122, "rewards/MMFormatORM/mean": 0.5687499880790711, "rewards/MMFormatORM/std": 0.2142127960920334, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.3295581638813019, "step": 1760, "train_speed(iter/s)": 0.083508 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 347.0, "completions/mean_length": 209.9125, "completions/min_length": 116.8, "epoch": 0.8473355736917907, "frac_reward_zero_std": 0.65, "grad_norm": 0.1689959019422531, "kl": 0.0170654296875, "learning_rate": 6.645907662418933e-06, "loss": 0.0006823433097451926, "memory(GiB)": 27.09, "reward": 0.45354996919631957, "reward_std": 0.17373612970113755, "rewards/MMContentORM/mean": 0.5845000147819519, "rewards/MMContentORM/std": 0.6833672761917114, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.18630690574645997, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2866260170936584, "step": 1765, "train_speed(iter/s)": 0.083507 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.8, "completions/mean_length": 207.425, "completions/min_length": 135.0, "epoch": 0.8497359577532405, "frac_reward_zero_std": 0.7, "grad_norm": 0.1972796618938446, "kl": 0.015631103515625, "learning_rate": 6.627152633228238e-06, "loss": 0.0006257255561649799, "memory(GiB)": 27.09, "reward": 0.4673499882221222, "reward_std": 0.09425733387470245, "rewards/MMContentORM/mean": 0.5615000009536744, "rewards/MMContentORM/std": 0.6091739594936371, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1770, "train_speed(iter/s)": 0.08352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.2, "completions/mean_length": 213.6875, "completions/min_length": 140.4, "epoch": 0.8521363418146903, "frac_reward_zero_std": 0.45, "grad_norm": 0.12356596440076828, "kl": 0.020318603515625, "learning_rate": 6.608371963012872e-06, "loss": 0.00081367501989007, "memory(GiB)": 27.09, "reward": 0.3578499734401703, "reward_std": 0.21842527836561204, "rewards/MMContentORM/mean": 0.4065000042319298, "rewards/MMContentORM/std": 0.7466981053352356, "rewards/MMFormatORM/mean": 0.5568749845027924, "rewards/MMFormatORM/std": 0.19573256969451905, "rewards/MMRubricORM/mean": -0.1375, "rewards/MMRubricORM/std": 0.30068787932395935, "step": 1775, "train_speed(iter/s)": 0.083538 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.2, "completions/mean_length": 218.6375, "completions/min_length": 138.6, "epoch": 0.8545367258761402, "frac_reward_zero_std": 0.7, "grad_norm": 0.0733184739947319, "kl": 0.01422119140625, "learning_rate": 6.589565947722711e-06, "loss": 0.0005693596322089434, "memory(GiB)": 27.09, "reward": 0.5736999750137329, "reward_std": 0.06547808428294957, "rewards/MMContentORM/mean": 0.8130000233650208, "rewards/MMContentORM/std": 0.38130461126565934, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 1780, "train_speed(iter/s)": 0.08354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 469.0, "completions/mean_length": 224.125, "completions/min_length": 131.4, "epoch": 0.85693710993759, "frac_reward_zero_std": 0.475, "grad_norm": 0.12514179944992065, "kl": 0.017791748046875, "learning_rate": 6.570734883707036e-06, "loss": 0.0007113578729331493, "memory(GiB)": 27.09, "reward": 0.45974999070167544, "reward_std": 0.15931115644052624, "rewards/MMContentORM/mean": 0.6, "rewards/MMContentORM/std": 0.6618961155414581, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.16571036279201506, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.25493902564048765, "step": 1785, "train_speed(iter/s)": 0.083463 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.0, "completions/mean_length": 212.8, "completions/min_length": 120.2, "epoch": 0.8593374939990398, "frac_reward_zero_std": 0.675, "grad_norm": 0.20723937451839447, "kl": 0.01754150390625, "learning_rate": 6.5518790677098385e-06, "loss": 0.0007023832760751248, "memory(GiB)": 27.09, "reward": 0.4733999729156494, "reward_std": 0.12190521762240678, "rewards/MMContentORM/mean": 0.5909999907016754, "rewards/MMContentORM/std": 0.6185662746429443, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 1790, "train_speed(iter/s)": 0.083484 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 367.8, "completions/mean_length": 220.6125, "completions/min_length": 132.4, "epoch": 0.8617378780604896, "frac_reward_zero_std": 0.675, "grad_norm": 0.1408216804265976, "kl": 0.013970947265625, "learning_rate": 6.532998796865169e-06, "loss": 0.0005585259757936, "memory(GiB)": 27.09, "reward": 0.4389999687671661, "reward_std": 0.0987121019512415, "rewards/MMContentORM/mean": 0.5050000041723252, "rewards/MMContentORM/std": 0.6495143830776214, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 1795, "train_speed(iter/s)": 0.083474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.2, "completions/mean_length": 207.2125, "completions/min_length": 116.4, "epoch": 0.8641382621219396, "frac_reward_zero_std": 0.55, "grad_norm": 0.26537343859672546, "kl": 0.01622314453125, "learning_rate": 6.5140943686924316e-06, "loss": 0.0006490823347121477, "memory(GiB)": 27.09, "reward": 0.4860499739646912, "reward_std": 0.1266428239643574, "rewards/MMContentORM/mean": 0.6245000183582305, "rewards/MMContentORM/std": 0.5797793388366699, "rewards/MMFormatORM/mean": 0.6156249880790711, "rewards/MMFormatORM/std": 0.13036334812641143, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 1800, "train_speed(iter/s)": 0.083505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.8, "completions/mean_length": 209.7875, "completions/min_length": 118.0, "epoch": 0.8665386461833894, "frac_reward_zero_std": 0.825, "grad_norm": 0.08358591049909592, "kl": 0.01490478515625, "learning_rate": 6.495166081091716e-06, "loss": 0.0005963623523712158, "memory(GiB)": 27.09, "reward": 0.4978999674320221, "reward_std": 0.05218447903171182, "rewards/MMContentORM/mean": 0.623499995470047, "rewards/MMContentORM/std": 0.543954461812973, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1805, "train_speed(iter/s)": 0.083454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.2, "completions/mean_length": 210.3375, "completions/min_length": 136.0, "epoch": 0.8689390302448392, "frac_reward_zero_std": 0.525, "grad_norm": 0.16976818442344666, "kl": 0.014532470703125, "learning_rate": 6.476214232339088e-06, "loss": 0.0005812739953398704, "memory(GiB)": 27.09, "reward": 0.3728999882936478, "reward_std": 0.1367544449865818, "rewards/MMContentORM/mean": 0.36850000321865084, "rewards/MMContentORM/std": 0.7042155861854553, "rewards/MMFormatORM/mean": 0.6012499868869782, "rewards/MMFormatORM/std": 0.12313776612281799, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.1894427239894867, "step": 1810, "train_speed(iter/s)": 0.083466 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.6, "completions/mean_length": 202.7375, "completions/min_length": 110.6, "epoch": 0.871339414306289, "frac_reward_zero_std": 0.725, "grad_norm": 0.13253819942474365, "kl": 0.02115478515625, "learning_rate": 6.457239121081898e-06, "loss": 0.0008474783971905708, "memory(GiB)": 27.09, "reward": 0.4685999810695648, "reward_std": 0.10832875426858664, "rewards/MMContentORM/mean": 0.5790000200271607, "rewards/MMContentORM/std": 0.600553035736084, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.08490467071533203, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 1815, "train_speed(iter/s)": 0.083464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.0, "completions/mean_length": 201.4625, "completions/min_length": 92.6, "epoch": 0.8737397983677389, "frac_reward_zero_std": 0.475, "grad_norm": 0.22328393161296844, "kl": 0.03316650390625, "learning_rate": 6.43824104633407e-06, "loss": 0.0013257008045911788, "memory(GiB)": 27.09, "reward": 0.4241999924182892, "reward_std": 0.20619233280885965, "rewards/MMContentORM/mean": 0.5254999876022339, "rewards/MMContentORM/std": 0.6518503844738006, "rewards/MMFormatORM/mean": 0.5849999904632568, "rewards/MMFormatORM/std": 0.16980934143066406, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2612451553344727, "step": 1820, "train_speed(iter/s)": 0.083486 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.8, "completions/mean_length": 211.9125, "completions/min_length": 138.2, "epoch": 0.8761401824291887, "frac_reward_zero_std": 0.775, "grad_norm": 0.13617774844169617, "kl": 0.0123046875, "learning_rate": 6.419220307471395e-06, "loss": 0.0004924539476633072, "memory(GiB)": 27.09, "reward": 0.5035999953746796, "reward_std": 0.07198347002267838, "rewards/MMContentORM/mean": 0.6665000081062317, "rewards/MMContentORM/std": 0.6048071205615997, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 1825, "train_speed(iter/s)": 0.083493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.6, "completions/mean_length": 208.5125, "completions/min_length": 145.6, "epoch": 0.8785405664906385, "frac_reward_zero_std": 0.775, "grad_norm": 0.10531944036483765, "kl": 0.013299560546875, "learning_rate": 6.400177204226809e-06, "loss": 0.0005324467085301877, "memory(GiB)": 27.09, "reward": 0.5384999752044678, "reward_std": 0.06321534756571054, "rewards/MMContentORM/mean": 0.725, "rewards/MMContentORM/std": 0.5201015174388885, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1830, "train_speed(iter/s)": 0.083512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.0, "completions/mean_length": 208.0375, "completions/min_length": 131.2, "epoch": 0.8809409505520883, "frac_reward_zero_std": 0.55, "grad_norm": 0.3434266448020935, "kl": 0.020574951171875, "learning_rate": 6.381112036685666e-06, "loss": 0.0008229421451687813, "memory(GiB)": 27.09, "reward": 0.42379998564720156, "reward_std": 0.10040915869176388, "rewards/MMContentORM/mean": 0.46700001060962676, "rewards/MMContentORM/std": 0.6455421566963195, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 1835, "train_speed(iter/s)": 0.08351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 485.6, "completions/mean_length": 216.975, "completions/min_length": 137.4, "epoch": 0.8833413346135381, "frac_reward_zero_std": 0.65, "grad_norm": 0.17209282517433167, "kl": 0.0139007568359375, "learning_rate": 6.36202510528102e-06, "loss": 0.0005555123090744019, "memory(GiB)": 27.09, "reward": 0.475549989938736, "reward_std": 0.09567154424730688, "rewards/MMContentORM/mean": 0.5819999992847442, "rewards/MMContentORM/std": 0.6257418870925904, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1840, "train_speed(iter/s)": 0.08343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 367.4, "completions/mean_length": 212.0, "completions/min_length": 143.4, "epoch": 0.885741718674988, "frac_reward_zero_std": 0.725, "grad_norm": 0.20552751421928406, "kl": 0.0138427734375, "learning_rate": 6.342916710788882e-06, "loss": 0.0005536759272217751, "memory(GiB)": 27.09, "reward": 0.4509999692440033, "reward_std": 0.11624835580587387, "rewards/MMContentORM/mean": 0.5349999845027924, "rewards/MMContentORM/std": 0.6666475296020508, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 1845, "train_speed(iter/s)": 0.083409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.4, "completions/mean_length": 212.4875, "completions/min_length": 132.4, "epoch": 0.8881421027364378, "frac_reward_zero_std": 0.75, "grad_norm": 0.14356966316699982, "kl": 0.012799072265625, "learning_rate": 6.323787154323484e-06, "loss": 0.0005117998458445072, "memory(GiB)": 27.09, "reward": 0.5021499991416931, "reward_std": 0.08520636514294892, "rewards/MMContentORM/mean": 0.6485000073909759, "rewards/MMContentORM/std": 0.5814681231975556, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1850, "train_speed(iter/s)": 0.083424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.2, "completions/mean_length": 211.4875, "completions/min_length": 145.2, "epoch": 0.8905424867978876, "frac_reward_zero_std": 0.775, "grad_norm": 0.07189600169658661, "kl": 0.014239501953125, "learning_rate": 6.304636737332534e-06, "loss": 0.0005696343258023262, "memory(GiB)": 27.09, "reward": 0.42489999532699585, "reward_std": 0.13166328519582748, "rewards/MMContentORM/mean": 0.4985000014305115, "rewards/MMContentORM/std": 0.6740443706512451, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.14990466833114624, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23062257766723632, "step": 1855, "train_speed(iter/s)": 0.083428 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.0, "completions/mean_length": 200.425, "completions/min_length": 128.0, "epoch": 0.8929428708593375, "frac_reward_zero_std": 0.75, "grad_norm": 0.12281981855630875, "kl": 0.016815185546875, "learning_rate": 6.285465761592459e-06, "loss": 0.0006720408797264099, "memory(GiB)": 27.09, "reward": 0.485349977016449, "reward_std": 0.06371032111346722, "rewards/MMContentORM/mean": 0.5940000057220459, "rewards/MMContentORM/std": 0.5279915370047092, "rewards/MMFormatORM/mean": 0.6318749785423279, "rewards/MMFormatORM/std": 0.06536335051059723, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1860, "train_speed(iter/s)": 0.083449 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 387.2, "completions/mean_length": 224.4125, "completions/min_length": 148.8, "epoch": 0.8953432549207874, "frac_reward_zero_std": 0.7, "grad_norm": 0.09118235856294632, "kl": 0.0157958984375, "learning_rate": 6.266274529203663e-06, "loss": 0.0006318187341094017, "memory(GiB)": 27.09, "reward": 0.50444997549057, "reward_std": 0.06908432939089834, "rewards/MMContentORM/mean": 0.6829999923706055, "rewards/MMContentORM/std": 0.4913133792579174, "rewards/MMFormatORM/mean": 0.6093749821186065, "rewards/MMFormatORM/std": 0.09063776731491088, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.1394427239894867, "step": 1865, "train_speed(iter/s)": 0.083431 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 524.6, "completions/mean_length": 226.525, "completions/min_length": 128.0, "epoch": 0.8977436389822372, "frac_reward_zero_std": 0.625, "grad_norm": 0.10997837036848068, "kl": 0.01796875, "learning_rate": 6.247063342585753e-06, "loss": 0.0007188735064119101, "memory(GiB)": 27.09, "reward": 0.45734997391700744, "reward_std": 0.12932982593774794, "rewards/MMContentORM/mean": 0.5939999997615815, "rewards/MMContentORM/std": 0.6364952743053436, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.18630690574645997, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2866260170936584, "step": 1870, "train_speed(iter/s)": 0.083354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.0, "completions/mean_length": 196.575, "completions/min_length": 120.2, "epoch": 0.900144023043687, "frac_reward_zero_std": 0.775, "grad_norm": 0.15118958055973053, "kl": 0.0134521484375, "learning_rate": 6.227832504472782e-06, "loss": 0.0005381078924983739, "memory(GiB)": 27.09, "reward": 0.4734999716281891, "reward_std": 0.060104073002003135, "rewards/MMContentORM/mean": 0.5625000059604645, "rewards/MMContentORM/std": 0.6234762132167816, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1875, "train_speed(iter/s)": 0.083341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.6, "completions/mean_length": 204.7375, "completions/min_length": 124.2, "epoch": 0.9025444071051368, "frac_reward_zero_std": 0.85, "grad_norm": 0.07756619155406952, "kl": 0.014874267578125, "learning_rate": 6.208582317908473e-06, "loss": 0.000595169048756361, "memory(GiB)": 27.09, "reward": 0.49734997749328613, "reward_std": 0.06653874590992928, "rewards/MMContentORM/mean": 0.6364999890327454, "rewards/MMContentORM/std": 0.5963007152080536, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1880, "train_speed(iter/s)": 0.083348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.0, "completions/mean_length": 205.8, "completions/min_length": 96.0, "epoch": 0.9049447911665867, "frac_reward_zero_std": 0.775, "grad_norm": 0.17429836094379425, "kl": 0.018310546875, "learning_rate": 6.1893130862414506e-06, "loss": 0.0007323648314923048, "memory(GiB)": 27.09, "reward": 0.48079999089241027, "reward_std": 0.029981326917186378, "rewards/MMContentORM/mean": 0.551999980211258, "rewards/MMContentORM/std": 0.5579077005386353, "rewards/MMFormatORM/mean": 0.6499999761581421, "rewards/MMFormatORM/std": 0.0, "rewards/MMRubricORM/mean": 0.0, "rewards/MMRubricORM/std": 0.0, "step": 1885, "train_speed(iter/s)": 0.08336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 331.2, "completions/mean_length": 206.85, "completions/min_length": 128.4, "epoch": 0.9073451752280365, "frac_reward_zero_std": 0.65, "grad_norm": 0.011451794765889645, "kl": 0.0149658203125, "learning_rate": 6.1700251131204525e-06, "loss": 0.0005986175034195185, "memory(GiB)": 27.09, "reward": 0.5000999927520752, "reward_std": 0.08075158959254622, "rewards/MMContentORM/mean": 0.6290000021457672, "rewards/MMContentORM/std": 0.5125225283205509, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1890, "train_speed(iter/s)": 0.08337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.0, "completions/mean_length": 192.5375, "completions/min_length": 97.6, "epoch": 0.9097455592894863, "frac_reward_zero_std": 0.675, "grad_norm": 0.3059964179992676, "kl": 0.030767822265625, "learning_rate": 6.1507187024895475e-06, "loss": 0.0012321647256612778, "memory(GiB)": 27.09, "reward": 0.4602999806404114, "reward_std": 0.14212846592999995, "rewards/MMContentORM/mean": 0.5870000183582306, "rewards/MMContentORM/std": 0.6352852940559387, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 1895, "train_speed(iter/s)": 0.083396 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.6, "completions/mean_length": 197.9125, "completions/min_length": 130.6, "epoch": 0.9121459433509361, "frac_reward_zero_std": 0.625, "grad_norm": 0.16474473476409912, "kl": 0.014501953125, "learning_rate": 6.131394158583351e-06, "loss": 0.0005803803913295269, "memory(GiB)": 27.09, "reward": 0.421099978685379, "reward_std": 0.1438255153596401, "rewards/MMContentORM/mean": 0.489000004529953, "rewards/MMContentORM/std": 0.6867013454437256, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 1900, "train_speed(iter/s)": 0.083417 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.6, "completions/mean_length": 209.8875, "completions/min_length": 146.2, "epoch": 0.914546327412386, "frac_reward_zero_std": 0.725, "grad_norm": 0.12378682941198349, "kl": 0.01851806640625, "learning_rate": 6.112051785922221e-06, "loss": 0.0007428077049553394, "memory(GiB)": 27.09, "reward": 0.4398999661207199, "reward_std": 0.09984347894787789, "rewards/MMContentORM/mean": 0.5235000193119049, "rewards/MMContentORM/std": 0.6574730277061462, "rewards/MMFormatORM/mean": 0.6074999928474426, "rewards/MMFormatORM/std": 0.12490466833114625, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 1905, "train_speed(iter/s)": 0.083366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 474.0, "completions/mean_length": 222.4625, "completions/min_length": 128.4, "epoch": 0.9169467114738358, "frac_reward_zero_std": 0.625, "grad_norm": 0.1761566698551178, "kl": 0.013580322265625, "learning_rate": 6.092691889307469e-06, "loss": 0.0005431583616882562, "memory(GiB)": 27.09, "reward": 0.48289998769760134, "reward_std": 0.14580541402101516, "rewards/MMContentORM/mean": 0.631000018119812, "rewards/MMContentORM/std": 0.6453944146633148, "rewards/MMFormatORM/mean": 0.6074999928474426, "rewards/MMFormatORM/std": 0.14940344989299775, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 1910, "train_speed(iter/s)": 0.083304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.4, "completions/mean_length": 194.9, "completions/min_length": 118.0, "epoch": 0.9193470955352856, "frac_reward_zero_std": 0.75, "grad_norm": 0.09627247601747513, "kl": 0.017974853515625, "learning_rate": 6.073314773816553e-06, "loss": 0.0007188072893768549, "memory(GiB)": 27.09, "reward": 0.5188999831676483, "reward_std": 0.05529574886895716, "rewards/MMContentORM/mean": 0.6759999990463257, "rewards/MMContentORM/std": 0.5570424318313598, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1915, "train_speed(iter/s)": 0.083336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.4, "completions/mean_length": 201.7375, "completions/min_length": 143.8, "epoch": 0.9217474795967355, "frac_reward_zero_std": 0.75, "grad_norm": 0.1766253411769867, "kl": 0.016607666015625, "learning_rate": 6.053920744798267e-06, "loss": 0.0006644959561526775, "memory(GiB)": 27.09, "reward": 0.5365999698638916, "reward_std": 0.04567909436300397, "rewards/MMContentORM/mean": 0.7490000247955322, "rewards/MMContentORM/std": 0.5337904691696167, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 1920, "train_speed(iter/s)": 0.08337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.4, "completions/mean_length": 205.6875, "completions/min_length": 146.4, "epoch": 0.9241478636581854, "frac_reward_zero_std": 0.55, "grad_norm": 0.17899306118488312, "kl": 0.017431640625, "learning_rate": 6.034510107867933e-06, "loss": 0.0006970945280045271, "memory(GiB)": 27.09, "reward": 0.417499977350235, "reward_std": 0.1641901969909668, "rewards/MMContentORM/mean": 0.47999998927116394, "rewards/MMContentORM/std": 0.6940834045410156, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.14990466833114624, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23062257766723632, "step": 1925, "train_speed(iter/s)": 0.083381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 443.6, "completions/mean_length": 226.2625, "completions/min_length": 141.4, "epoch": 0.9265482477196352, "frac_reward_zero_std": 0.65, "grad_norm": 0.10172195732593536, "kl": 0.0140380859375, "learning_rate": 6.015083168902586e-06, "loss": 0.0005614136345684529, "memory(GiB)": 27.09, "reward": 0.4858999788761139, "reward_std": 0.08160012271255254, "rewards/MMContentORM/mean": 0.5935000061988831, "rewards/MMContentORM/std": 0.6041896402835846, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1930, "train_speed(iter/s)": 0.083321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.2, "completions/mean_length": 207.8375, "completions/min_length": 122.4, "epoch": 0.928948631781085, "frac_reward_zero_std": 0.65, "grad_norm": 0.11313877999782562, "kl": 0.015618896484375, "learning_rate": 5.995640234036149e-06, "loss": 0.0006248398683965206, "memory(GiB)": 27.09, "reward": 0.4472499847412109, "reward_std": 0.08761052712798119, "rewards/MMContentORM/mean": 0.5150000095367432, "rewards/MMContentORM/std": 0.6490139365196228, "rewards/MMFormatORM/mean": 0.6218749761581421, "rewards/MMFormatORM/std": 0.09190345257520675, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 1935, "train_speed(iter/s)": 0.083355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.0, "completions/mean_length": 210.6625, "completions/min_length": 144.2, "epoch": 0.9313490158425348, "frac_reward_zero_std": 0.75, "grad_norm": 0.16516950726509094, "kl": 0.0192626953125, "learning_rate": 5.9761816096546135e-06, "loss": 0.0007695911917835474, "memory(GiB)": 27.09, "reward": 0.5396999716758728, "reward_std": 0.05034599886275828, "rewards/MMContentORM/mean": 0.715500009059906, "rewards/MMContentORM/std": 0.4298131003975868, "rewards/MMFormatORM/mean": 0.6399999856948853, "rewards/MMFormatORM/std": 0.03999999761581421, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 1940, "train_speed(iter/s)": 0.08336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.6, "completions/mean_length": 206.6875, "completions/min_length": 145.4, "epoch": 0.9337493999039846, "frac_reward_zero_std": 0.75, "grad_norm": 0.07769346237182617, "kl": 0.01458740234375, "learning_rate": 5.956707602391209e-06, "loss": 0.0005837498232722282, "memory(GiB)": 27.09, "reward": 0.5033499836921692, "reward_std": 0.04334564357995987, "rewards/MMContentORM/mean": 0.6265000075101852, "rewards/MMContentORM/std": 0.4830021485686302, "rewards/MMFormatORM/mean": 0.6381249785423279, "rewards/MMFormatORM/std": 0.04749999791383743, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 1945, "train_speed(iter/s)": 0.083377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.2, "completions/mean_length": 206.6125, "completions/min_length": 136.2, "epoch": 0.9361497839654345, "frac_reward_zero_std": 0.7, "grad_norm": 0.2008439302444458, "kl": 0.015899658203125, "learning_rate": 5.937218519121575e-06, "loss": 0.0006357332691550255, "memory(GiB)": 27.09, "reward": 0.5125499844551087, "reward_std": 0.06936717720236629, "rewards/MMContentORM/mean": 0.6745000123977661, "rewards/MMContentORM/std": 0.5375838339328766, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1950, "train_speed(iter/s)": 0.083384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.6, "completions/mean_length": 209.15, "completions/min_length": 121.4, "epoch": 0.9385501680268843, "frac_reward_zero_std": 0.8, "grad_norm": 0.006351741962134838, "kl": 0.013763427734375, "learning_rate": 5.917714666958917e-06, "loss": 0.0005507726222276687, "memory(GiB)": 27.09, "reward": 0.5436999797821045, "reward_std": 0.04624478132463992, "rewards/MMContentORM/mean": 0.7380000114440918, "rewards/MMContentORM/std": 0.4596146807074547, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 1955, "train_speed(iter/s)": 0.083393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.4, "completions/mean_length": 200.225, "completions/min_length": 137.2, "epoch": 0.9409505520883341, "frac_reward_zero_std": 0.7, "grad_norm": 0.11096161603927612, "kl": 0.01678466796875, "learning_rate": 5.8981963532491746e-06, "loss": 0.000671281386166811, "memory(GiB)": 27.09, "reward": 0.47979997396469115, "reward_std": 0.06363960476592183, "rewards/MMContentORM/mean": 0.6070000171661377, "rewards/MMContentORM/std": 0.6155083239078522, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 1960, "train_speed(iter/s)": 0.083427 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 344.4, "completions/mean_length": 217.85, "completions/min_length": 153.4, "epoch": 0.9433509361497839, "frac_reward_zero_std": 0.7, "grad_norm": 0.1260797083377838, "kl": 0.01507568359375, "learning_rate": 5.878663885566178e-06, "loss": 0.0006023185327649116, "memory(GiB)": 27.09, "reward": 0.49534996747970583, "reward_std": 0.09369164705276489, "rewards/MMContentORM/mean": 0.6315000057220459, "rewards/MMContentORM/std": 0.604107654094696, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1965, "train_speed(iter/s)": 0.083409 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.8, "completions/mean_length": 201.225, "completions/min_length": 153.8, "epoch": 0.9457513202112338, "frac_reward_zero_std": 0.725, "grad_norm": 0.15319658815860748, "kl": 0.017413330078125, "learning_rate": 5.859117571706791e-06, "loss": 0.0006967600900679826, "memory(GiB)": 27.09, "reward": 0.5093499839305877, "reward_std": 0.07728676870465279, "rewards/MMContentORM/mean": 0.6664999902248383, "rewards/MMContentORM/std": 0.5927111029624939, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 1970, "train_speed(iter/s)": 0.083437 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.0, "completions/mean_length": 212.5125, "completions/min_length": 143.6, "epoch": 0.9481517042726836, "frac_reward_zero_std": 0.7, "grad_norm": 0.21331176161766052, "kl": 0.014532470703125, "learning_rate": 5.83955771968608e-06, "loss": 0.0005808803252875805, "memory(GiB)": 27.09, "reward": 0.5360999882221222, "reward_std": 0.05642712083645165, "rewards/MMContentORM/mean": 0.7190000295639039, "rewards/MMContentORM/std": 0.4764695011079311, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 1975, "train_speed(iter/s)": 0.083446 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.4, "completions/mean_length": 208.9375, "completions/min_length": 147.0, "epoch": 0.9505520883341335, "frac_reward_zero_std": 0.725, "grad_norm": 0.12605807185173035, "kl": 0.015679931640625, "learning_rate": 5.819984637732436e-06, "loss": 0.0006278078071773052, "memory(GiB)": 27.09, "reward": 0.5486499905586243, "reward_std": 0.03471893714740872, "rewards/MMContentORM/mean": 0.7360000014305115, "rewards/MMContentORM/std": 0.49632705450057985, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 1980, "train_speed(iter/s)": 0.083456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 341.0, "completions/mean_length": 218.3625, "completions/min_length": 130.4, "epoch": 0.9529524723955833, "frac_reward_zero_std": 0.7, "grad_norm": 0.10039184242486954, "kl": 0.015283203125, "learning_rate": 5.80039863428274e-06, "loss": 0.0006117623299360276, "memory(GiB)": 27.09, "reward": 0.548499995470047, "reward_std": 0.08810550635680556, "rewards/MMContentORM/mean": 0.7500000119209289, "rewards/MMContentORM/std": 0.4995552241802216, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 1985, "train_speed(iter/s)": 0.083456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.6, "completions/mean_length": 206.6, "completions/min_length": 143.0, "epoch": 0.9553528564570332, "frac_reward_zero_std": 0.825, "grad_norm": 0.16859173774719238, "kl": 0.015777587890625, "learning_rate": 5.780800017977491e-06, "loss": 0.0006312967278063297, "memory(GiB)": 27.09, "reward": 0.5292499780654907, "reward_std": 0.04065863774158061, "rewards/MMContentORM/mean": 0.6875, "rewards/MMContentORM/std": 0.5327123403549194, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 1990, "train_speed(iter/s)": 0.083474 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.6, "completions/mean_length": 211.175, "completions/min_length": 141.4, "epoch": 0.957753240518483, "frac_reward_zero_std": 0.775, "grad_norm": 0.06966093927621841, "kl": 0.020062255859375, "learning_rate": 5.761189097655937e-06, "loss": 0.0008020093664526939, "memory(GiB)": 27.09, "reward": 0.43149998784065247, "reward_std": 0.10479322522878647, "rewards/MMContentORM/mean": 0.5149999976158142, "rewards/MMContentORM/std": 0.6905157566070557, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.17440344989299775, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.2683130085468292, "step": 1995, "train_speed(iter/s)": 0.083493 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.2, "completions/mean_length": 213.875, "completions/min_length": 141.2, "epoch": 0.9601536245799328, "frac_reward_zero_std": 0.65, "grad_norm": 0.13645286858081818, "kl": 0.019488525390625, "learning_rate": 5.7415661823512245e-06, "loss": 0.0007798057049512863, "memory(GiB)": 27.09, "reward": 0.46374998688697816, "reward_std": 0.17232191623188556, "rewards/MMContentORM/mean": 0.6100000143051147, "rewards/MMContentORM/std": 0.59096859395504, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 2000, "train_speed(iter/s)": 0.083497 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.4, "completions/mean_length": 209.8375, "completions/min_length": 144.2, "epoch": 0.9625540086413826, "frac_reward_zero_std": 0.675, "grad_norm": 0.10858377814292908, "kl": 0.017437744140625, "learning_rate": 5.721931581285514e-06, "loss": 0.000697833951562643, "memory(GiB)": 27.09, "reward": 0.4571499764919281, "reward_std": 0.1461589643266052, "rewards/MMContentORM/mean": 0.5935000061988831, "rewards/MMContentORM/std": 0.6896682381629944, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 2005, "train_speed(iter/s)": 0.08346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.2, "completions/mean_length": 212.15, "completions/min_length": 127.2, "epoch": 0.9649543927028325, "frac_reward_zero_std": 0.7, "grad_norm": 0.22721628844738007, "kl": 0.016845703125, "learning_rate": 5.702285603865115e-06, "loss": 0.0006736557465046644, "memory(GiB)": 27.09, "reward": 0.5219999849796295, "reward_std": 0.10691454559564591, "rewards/MMContentORM/mean": 0.7125000119209289, "rewards/MMContentORM/std": 0.590934443473816, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2010, "train_speed(iter/s)": 0.083488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.0, "completions/mean_length": 216.8, "completions/min_length": 152.2, "epoch": 0.9673547767642823, "frac_reward_zero_std": 0.65, "grad_norm": 0.15032155811786652, "kl": 0.01512451171875, "learning_rate": 5.682628559675609e-06, "loss": 0.0006046965718269348, "memory(GiB)": 27.09, "reward": 0.4427499771118164, "reward_std": 0.07700392529368401, "rewards/MMContentORM/mean": 0.4999999850988388, "rewards/MMContentORM/std": 0.6438470005989074, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 2015, "train_speed(iter/s)": 0.083505 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 361.4, "completions/mean_length": 219.5875, "completions/min_length": 122.4, "epoch": 0.9697551608257321, "frac_reward_zero_std": 0.55, "grad_norm": 0.17557469010353088, "kl": 0.015576171875, "learning_rate": 5.662960758476965e-06, "loss": 0.0006231794133782387, "memory(GiB)": 27.09, "reward": 0.5015999853610993, "reward_std": 0.08145869905129074, "rewards/MMContentORM/mean": 0.6490000009536743, "rewards/MMContentORM/std": 0.5806757628917694, "rewards/MMFormatORM/mean": 0.6237499833106994, "rewards/MMFormatORM/std": 0.08440345227718353, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2020, "train_speed(iter/s)": 0.083499 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.0, "completions/mean_length": 211.525, "completions/min_length": 150.0, "epoch": 0.9721555448871819, "frac_reward_zero_std": 0.675, "grad_norm": 0.1798633337020874, "kl": 0.016705322265625, "learning_rate": 5.6432825101986725e-06, "loss": 0.0006679143756628036, "memory(GiB)": 27.09, "reward": 0.49559998512268066, "reward_std": 0.12614785209298135, "rewards/MMContentORM/mean": 0.6465000152587891, "rewards/MMContentORM/std": 0.632487416267395, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2025, "train_speed(iter/s)": 0.083521 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.6, "completions/mean_length": 212.1125, "completions/min_length": 137.8, "epoch": 0.9745559289486317, "frac_reward_zero_std": 0.7, "grad_norm": 0.09048443287611008, "kl": 0.01593017578125, "learning_rate": 5.623594124934836e-06, "loss": 0.0006376095581799746, "memory(GiB)": 27.09, "reward": 0.5258999943733216, "reward_std": 0.03973939623683691, "rewards/MMContentORM/mean": 0.6935000121593475, "rewards/MMContentORM/std": 0.48069806694984435, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2030, "train_speed(iter/s)": 0.083527 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 439.4, "completions/mean_length": 219.675, "completions/min_length": 147.2, "epoch": 0.9769563130100816, "frac_reward_zero_std": 0.8, "grad_norm": 0.13792569935321808, "kl": 0.01651611328125, "learning_rate": 5.603895912939312e-06, "loss": 0.0006604710128158331, "memory(GiB)": 27.09, "reward": 0.41819998621940613, "reward_std": 0.08061017030850053, "rewards/MMContentORM/mean": 0.453000009059906, "rewards/MMContentORM/std": 0.6981132864952088, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2035, "train_speed(iter/s)": 0.083488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 390.4, "completions/mean_length": 225.675, "completions/min_length": 144.2, "epoch": 0.9793566970715314, "frac_reward_zero_std": 0.6, "grad_norm": 0.1497989296913147, "kl": 0.01591796875, "learning_rate": 5.584188184620803e-06, "loss": 0.0006368092261254787, "memory(GiB)": 27.09, "reward": 0.43914997577667236, "reward_std": 0.12904698103666307, "rewards/MMContentORM/mean": 0.5485000073909759, "rewards/MMContentORM/std": 0.6871413588523865, "rewards/MMFormatORM/mean": 0.5931249976158142, "rewards/MMFormatORM/std": 0.18240466713905334, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2806225776672363, "step": 2040, "train_speed(iter/s)": 0.083459 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.4, "completions/mean_length": 220.75, "completions/min_length": 157.8, "epoch": 0.9817570811329813, "frac_reward_zero_std": 0.575, "grad_norm": 0.13917264342308044, "kl": 0.01572265625, "learning_rate": 5.564471250537974e-06, "loss": 0.0006287385243922472, "memory(GiB)": 27.09, "reward": 0.4864999830722809, "reward_std": 0.07113494109362364, "rewards/MMContentORM/mean": 0.5950000047683716, "rewards/MMContentORM/std": 0.5641628682613373, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 2045, "train_speed(iter/s)": 0.083475 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.2, "completions/mean_length": 207.475, "completions/min_length": 135.4, "epoch": 0.9841574651944311, "frac_reward_zero_std": 0.6, "grad_norm": 0.14633502066135406, "kl": 0.01990966796875, "learning_rate": 5.544745421394554e-06, "loss": 0.0007959839887917042, "memory(GiB)": 27.09, "reward": 0.5193999826908111, "reward_std": 0.10493464283645153, "rewards/MMContentORM/mean": 0.7060000181198121, "rewards/MMContentORM/std": 0.5711513638496399, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 2050, "train_speed(iter/s)": 0.083485 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 350.4, "completions/mean_length": 219.575, "completions/min_length": 128.2, "epoch": 0.986557849255881, "frac_reward_zero_std": 0.65, "grad_norm": 0.14857056736946106, "kl": 0.01473388671875, "learning_rate": 5.525011008034444e-06, "loss": 0.00058915582485497, "memory(GiB)": 27.09, "reward": 0.44309998750686647, "reward_std": 0.08216580655425787, "rewards/MMContentORM/mean": 0.4865000069141388, "rewards/MMContentORM/std": 0.6088324308395385, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 2055, "train_speed(iter/s)": 0.083477 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.8, "completions/mean_length": 202.225, "completions/min_length": 140.0, "epoch": 0.9889582333173308, "frac_reward_zero_std": 0.575, "grad_norm": 0.2547079622745514, "kl": 0.01441650390625, "learning_rate": 5.505268321436819e-06, "loss": 0.0005766792222857475, "memory(GiB)": 27.09, "reward": 0.4755499839782715, "reward_std": 0.08888332126662135, "rewards/MMContentORM/mean": 0.5820000112056732, "rewards/MMContentORM/std": 0.5622067280113697, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 2060, "train_speed(iter/s)": 0.083503 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.4, "completions/mean_length": 209.1, "completions/min_length": 127.8, "epoch": 0.9913586173787806, "frac_reward_zero_std": 0.675, "grad_norm": 0.08104149997234344, "kl": 0.018072509765625, "learning_rate": 5.485517672711221e-06, "loss": 0.0007230919785797596, "memory(GiB)": 27.09, "reward": 0.47184998393058775, "reward_std": 0.10203550313599408, "rewards/MMContentORM/mean": 0.6015000164508819, "rewards/MMContentORM/std": 0.5722574293613434, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2065, "train_speed(iter/s)": 0.083517 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 330.0, "completions/mean_length": 208.3, "completions/min_length": 137.4, "epoch": 0.9937590014402304, "frac_reward_zero_std": 0.875, "grad_norm": 0.12099120765924454, "kl": 0.0142578125, "learning_rate": 5.465759373092664e-06, "loss": 0.0005701714660972356, "memory(GiB)": 27.09, "reward": 0.5222999691963196, "reward_std": 0.03521391893737018, "rewards/MMContentORM/mean": 0.6845000147819519, "rewards/MMContentORM/std": 0.5714206457138061, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 2070, "train_speed(iter/s)": 0.083524 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.6, "completions/mean_length": 210.7875, "completions/min_length": 146.0, "epoch": 0.9961593855016803, "frac_reward_zero_std": 0.6, "grad_norm": 0.1514410525560379, "kl": 0.017779541015625, "learning_rate": 5.445993733936725e-06, "loss": 0.0007106260396540165, "memory(GiB)": 27.09, "reward": 0.48714996576309205, "reward_std": 0.09906565884593874, "rewards/MMContentORM/mean": 0.611000019311905, "rewards/MMContentORM/std": 0.5427431344985962, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2075, "train_speed(iter/s)": 0.083534 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.0, "completions/mean_length": 209.0375, "completions/min_length": 130.2, "epoch": 0.9985597695631301, "frac_reward_zero_std": 0.625, "grad_norm": 0.12781541049480438, "kl": 0.018701171875, "learning_rate": 5.426221066714641e-06, "loss": 0.0007481152191758156, "memory(GiB)": 27.09, "reward": 0.511499959230423, "reward_std": 0.06349818790331482, "rewards/MMContentORM/mean": 0.6575000166893006, "rewards/MMContentORM/std": 0.5641934812068939, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 2080, "train_speed(iter/s)": 0.083547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.0, "completions/mean_length": 215.65, "completions/min_length": 134.6, "epoch": 1.00096015362458, "frac_reward_zero_std": 0.675, "grad_norm": 0.004740873351693153, "kl": 0.0146728515625, "learning_rate": 5.406441683008395e-06, "loss": 0.000586447911337018, "memory(GiB)": 27.09, "reward": 0.42514997720718384, "reward_std": 0.11504627112299204, "rewards/MMContentORM/mean": 0.5135000109672546, "rewards/MMContentORM/std": 0.6854106187820435, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.14121158123016359, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.21724859476089478, "step": 2085, "train_speed(iter/s)": 0.083547 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.8, "completions/mean_length": 222.7, "completions/min_length": 157.2, "epoch": 1.0033605376860297, "frac_reward_zero_std": 0.675, "grad_norm": 0.11715701222419739, "kl": 0.01546630859375, "learning_rate": 5.386655894505816e-06, "loss": 0.0006185109727084636, "memory(GiB)": 27.09, "reward": 0.467849987745285, "reward_std": 0.11221784176304936, "rewards/MMContentORM/mean": 0.5915000081062317, "rewards/MMContentORM/std": 0.6085148751735687, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.09680812656879426, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.14893558621406555, "step": 2090, "train_speed(iter/s)": 0.083558 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.0, "completions/mean_length": 205.6875, "completions/min_length": 148.0, "epoch": 1.0057609217474797, "frac_reward_zero_std": 0.725, "grad_norm": 0.1382003277540207, "kl": 0.014483642578125, "learning_rate": 5.366864012995654e-06, "loss": 0.0005779881961643696, "memory(GiB)": 27.09, "reward": 0.5312999904155731, "reward_std": 0.06321534309536218, "rewards/MMContentORM/mean": 0.7070000171661377, "rewards/MMContentORM/std": 0.48905040323734283, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2095, "train_speed(iter/s)": 0.083579 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025, "completions/max_length": 616.0, "completions/mean_length": 230.3375, "completions/min_length": 131.6, "epoch": 1.0081613058089294, "frac_reward_zero_std": 0.5, "grad_norm": 0.23635810613632202, "kl": 0.0135009765625, "learning_rate": 5.347066350362678e-06, "loss": 0.0005402253940701484, "memory(GiB)": 27.09, "reward": 0.4407999932765961, "reward_std": 0.11045007631182671, "rewards/MMContentORM/mean": 0.5095000088214874, "rewards/MMContentORM/std": 0.6070198595523835, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2100, "train_speed(iter/s)": 0.083454 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.4, "completions/mean_length": 209.8375, "completions/min_length": 126.0, "epoch": 1.0105616898703793, "frac_reward_zero_std": 0.6, "grad_norm": 0.1306273341178894, "kl": 0.014874267578125, "learning_rate": 5.327263218582758e-06, "loss": 0.0005949225276708602, "memory(GiB)": 27.09, "reward": 0.42854997515678406, "reward_std": 0.12537002861499785, "rewards/MMContentORM/mean": 0.496999990940094, "rewards/MMContentORM/std": 0.6504930973052978, "rewards/MMFormatORM/mean": 0.6056249976158142, "rewards/MMFormatORM/std": 0.16487477123737335, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.25, "step": 2105, "train_speed(iter/s)": 0.08341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 330.0, "completions/mean_length": 221.3875, "completions/min_length": 156.0, "epoch": 1.012962073931829, "frac_reward_zero_std": 0.625, "grad_norm": 0.18639177083969116, "kl": 0.02562255859375, "learning_rate": 5.307454929717944e-06, "loss": 0.0010251142084598541, "memory(GiB)": 27.09, "reward": 0.43019998669624326, "reward_std": 0.19318157732486724, "rewards/MMContentORM/mean": 0.540500009059906, "rewards/MMContentORM/std": 0.7170865178108216, "rewards/MMFormatORM/mean": 0.5849999904632568, "rewards/MMFormatORM/std": 0.19430812299251557, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2989355862140656, "step": 2110, "train_speed(iter/s)": 0.08342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.8, "completions/mean_length": 222.8375, "completions/min_length": 147.4, "epoch": 1.015362457993279, "frac_reward_zero_std": 0.575, "grad_norm": 0.10656420886516571, "kl": 0.01639404296875, "learning_rate": 5.28764179591156e-06, "loss": 0.0006556062027812005, "memory(GiB)": 27.09, "reward": 0.4640499770641327, "reward_std": 0.1481388673186302, "rewards/MMContentORM/mean": 0.5820000290870666, "rewards/MMContentORM/std": 0.6475385546684265, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 2115, "train_speed(iter/s)": 0.083433 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.8, "completions/mean_length": 216.5875, "completions/min_length": 150.4, "epoch": 1.0177628420547287, "frac_reward_zero_std": 0.675, "grad_norm": 0.11035939306020737, "kl": 0.017095947265625, "learning_rate": 5.267824129383267e-06, "loss": 0.000684003159403801, "memory(GiB)": 27.09, "reward": 0.5336999654769897, "reward_std": 0.05642711999826133, "rewards/MMContentORM/mean": 0.7130000233650208, "rewards/MMContentORM/std": 0.5324123561382293, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2120, "train_speed(iter/s)": 0.083443 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.8, "completions/mean_length": 222.8125, "completions/min_length": 165.8, "epoch": 1.0201632261161786, "frac_reward_zero_std": 0.575, "grad_norm": 0.12353075295686722, "kl": 0.01419677734375, "learning_rate": 5.248002242424164e-06, "loss": 0.0005672593601047992, "memory(GiB)": 27.09, "reward": 0.5149999797344208, "reward_std": 0.11851109731942415, "rewards/MMContentORM/mean": 0.6949999928474426, "rewards/MMContentORM/std": 0.583082401752472, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2125, "train_speed(iter/s)": 0.08345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 358.8, "completions/mean_length": 225.7, "completions/min_length": 148.6, "epoch": 1.0225636101776283, "frac_reward_zero_std": 0.7, "grad_norm": 0.1546584665775299, "kl": 0.01431884765625, "learning_rate": 5.228176447391848e-06, "loss": 0.0005729184485971928, "memory(GiB)": 27.09, "reward": 0.4828499734401703, "reward_std": 0.1130663676187396, "rewards/MMContentORM/mean": 0.6290000081062317, "rewards/MMContentORM/std": 0.6190735220909118, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.09680812656879426, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.14893558621406555, "step": 2130, "train_speed(iter/s)": 0.083434 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.6, "completions/mean_length": 212.1125, "completions/min_length": 127.6, "epoch": 1.0249639942390782, "frac_reward_zero_std": 0.55, "grad_norm": 0.15606635808944702, "kl": 0.012115478515625, "learning_rate": 5.208347056705506e-06, "loss": 0.0004839696455746889, "memory(GiB)": 27.09, "reward": 0.4859499931335449, "reward_std": 0.10585388457402586, "rewards/MMContentORM/mean": 0.6080000042915344, "rewards/MMContentORM/std": 0.5991616785526276, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 2135, "train_speed(iter/s)": 0.083448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 400.6, "completions/mean_length": 233.0375, "completions/min_length": 124.6, "epoch": 1.0273643783005282, "frac_reward_zero_std": 0.725, "grad_norm": 0.10234855115413666, "kl": 0.017791748046875, "learning_rate": 5.188514382840984e-06, "loss": 0.0007121129892766476, "memory(GiB)": 27.09, "reward": 0.42114998698234557, "reward_std": 0.16525085866451264, "rewards/MMContentORM/mean": 0.5035000085830689, "rewards/MMContentORM/std": 0.714464795589447, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.18630690574645997, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2866260170936584, "step": 2140, "train_speed(iter/s)": 0.08342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 463.4, "completions/mean_length": 223.875, "completions/min_length": 127.6, "epoch": 1.029764762361978, "frac_reward_zero_std": 0.475, "grad_norm": 0.2525361478328705, "kl": 0.02169189453125, "learning_rate": 5.168678738325863e-06, "loss": 0.0008673015981912613, "memory(GiB)": 27.09, "reward": 0.4111499905586243, "reward_std": 0.17882730215787887, "rewards/MMContentORM/mean": 0.47850002646446227, "rewards/MMContentORM/std": 0.7203184485435485, "rewards/MMFormatORM/mean": 0.5931249976158142, "rewards/MMFormatORM/std": 0.18240466713905334, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2806225776672363, "step": 2145, "train_speed(iter/s)": 0.083368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.6, "completions/mean_length": 216.4375, "completions/min_length": 143.0, "epoch": 1.0321651464234278, "frac_reward_zero_std": 0.6, "grad_norm": 0.11111954599618912, "kl": 0.020556640625, "learning_rate": 5.148840435734542e-06, "loss": 0.0008218312636017799, "memory(GiB)": 27.09, "reward": 0.41229996979236605, "reward_std": 0.22641559094190597, "rewards/MMContentORM/mean": 0.5245000123977661, "rewards/MMContentORM/std": 0.6923137307167053, "rewards/MMFormatORM/mean": 0.5687499940395355, "rewards/MMFormatORM/std": 0.20804243683815002, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.32006530165672303, "step": 2150, "train_speed(iter/s)": 0.083382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.6, "completions/mean_length": 219.275, "completions/min_length": 136.8, "epoch": 1.0345655304848775, "frac_reward_zero_std": 0.675, "grad_norm": 0.20036152005195618, "kl": 0.01781005859375, "learning_rate": 5.128999787683301e-06, "loss": 0.0007116260938346386, "memory(GiB)": 27.09, "reward": 0.46249998211860655, "reward_std": 0.11525840454269201, "rewards/MMContentORM/mean": 0.5799999892711639, "rewards/MMContentORM/std": 0.6871018171310425, "rewards/MMFormatORM/mean": 0.6074999809265137, "rewards/MMFormatORM/std": 0.12120826840400696, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2155, "train_speed(iter/s)": 0.083386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.6, "completions/mean_length": 213.925, "completions/min_length": 129.0, "epoch": 1.0369659145463275, "frac_reward_zero_std": 0.6, "grad_norm": 0.22052158415317535, "kl": 0.021417236328125, "learning_rate": 5.109157106825382e-06, "loss": 0.0008573445491492748, "memory(GiB)": 27.09, "reward": 0.4300999820232391, "reward_std": 0.1565534472465515, "rewards/MMContentORM/mean": 0.5115000009536743, "rewards/MMContentORM/std": 0.7336692571640014, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 2160, "train_speed(iter/s)": 0.083405 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.6, "completions/mean_length": 213.975, "completions/min_length": 139.0, "epoch": 1.0393662986077772, "frac_reward_zero_std": 0.575, "grad_norm": 0.1396639049053192, "kl": 0.014166259765625, "learning_rate": 5.089312705846059e-06, "loss": 0.0005671509075909853, "memory(GiB)": 27.09, "reward": 0.48359997272491456, "reward_std": 0.11030865609645843, "rewards/MMContentORM/mean": 0.6164999902248383, "rewards/MMContentORM/std": 0.6159408092498779, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2165, "train_speed(iter/s)": 0.083423 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025, "completions/max_length": 455.0, "completions/mean_length": 234.1125, "completions/min_length": 146.0, "epoch": 1.0417666826692271, "frac_reward_zero_std": 0.65, "grad_norm": 0.1445954293012619, "kl": 0.015972900390625, "learning_rate": 5.069466897457716e-06, "loss": 0.0006388931069523096, "memory(GiB)": 27.09, "reward": 0.4729499876499176, "reward_std": 0.08181225277949125, "rewards/MMContentORM/mean": 0.5754999935626983, "rewards/MMContentORM/std": 0.6227695643901825, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.05240467190742493, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.08062257766723632, "step": 2170, "train_speed(iter/s)": 0.083381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.8, "completions/mean_length": 215.3875, "completions/min_length": 138.2, "epoch": 1.0441670667306768, "frac_reward_zero_std": 0.725, "grad_norm": 0.1612766534090042, "kl": 0.01553955078125, "learning_rate": 5.049619994394913e-06, "loss": 0.0006216233130544424, "memory(GiB)": 27.09, "reward": 0.5203999698162078, "reward_std": 0.09107535094954074, "rewards/MMContentORM/mean": 0.7085000276565552, "rewards/MMContentORM/std": 0.5656350731849671, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 2175, "train_speed(iter/s)": 0.083399 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 361.6, "completions/mean_length": 226.025, "completions/min_length": 151.8, "epoch": 1.0465674507921268, "frac_reward_zero_std": 0.725, "grad_norm": 0.08171720802783966, "kl": 0.016748046875, "learning_rate": 5.029772309409458e-06, "loss": 0.000670594209805131, "memory(GiB)": 27.09, "reward": 0.48629997968673705, "reward_std": 0.04709330874029547, "rewards/MMContentORM/mean": 0.5945000052452087, "rewards/MMContentORM/std": 0.6320461511611939, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2180, "train_speed(iter/s)": 0.083388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.8, "completions/mean_length": 226.75, "completions/min_length": 144.4, "epoch": 1.0489678348535765, "frac_reward_zero_std": 0.725, "grad_norm": 0.06735818833112717, "kl": 0.01427001953125, "learning_rate": 5.009924155265484e-06, "loss": 0.0005706480704247951, "memory(GiB)": 27.09, "reward": 0.4983999729156494, "reward_std": 0.08202438042499124, "rewards/MMContentORM/mean": 0.653499984741211, "rewards/MMContentORM/std": 0.600805139541626, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2185, "train_speed(iter/s)": 0.083397 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 522.6, "completions/mean_length": 237.8, "completions/min_length": 159.2, "epoch": 1.0513682189150264, "frac_reward_zero_std": 0.65, "grad_norm": 0.2018902599811554, "kl": 0.0145751953125, "learning_rate": 4.9900758447345156e-06, "loss": 0.0005828267894685268, "memory(GiB)": 27.09, "reward": 0.5095999836921692, "reward_std": 0.09220672026276588, "rewards/MMContentORM/mean": 0.6815000116825104, "rewards/MMContentORM/std": 0.4883635245263577, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 2190, "train_speed(iter/s)": 0.083317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.6, "completions/mean_length": 220.4125, "completions/min_length": 160.6, "epoch": 1.0537686029764763, "frac_reward_zero_std": 0.65, "grad_norm": 0.11942193657159805, "kl": 0.01512451171875, "learning_rate": 4.9702276905905435e-06, "loss": 0.0006050709635019302, "memory(GiB)": 27.09, "reward": 0.5128499686717987, "reward_std": 0.1390879033599049, "rewards/MMContentORM/mean": 0.7040000319480896, "rewards/MMContentORM/std": 0.5877701699733734, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2195, "train_speed(iter/s)": 0.083333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.6, "completions/mean_length": 221.0, "completions/min_length": 124.4, "epoch": 1.056168987037926, "frac_reward_zero_std": 0.65, "grad_norm": 0.13161082565784454, "kl": 0.014984130859375, "learning_rate": 4.950380005605088e-06, "loss": 0.0005988968070596457, "memory(GiB)": 27.09, "reward": 0.4813499927520752, "reward_std": 0.07785245627164841, "rewards/MMContentORM/mean": 0.5965000092983246, "rewards/MMContentORM/std": 0.6116379499435425, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 2200, "train_speed(iter/s)": 0.083343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 454.6, "completions/mean_length": 228.3125, "completions/min_length": 129.4, "epoch": 1.058569371099376, "frac_reward_zero_std": 0.6, "grad_norm": 0.1304662823677063, "kl": 0.024444580078125, "learning_rate": 4.9305331025422845e-06, "loss": 0.0009780921041965484, "memory(GiB)": 27.09, "reward": 0.47104998826980593, "reward_std": 0.08350930837914347, "rewards/MMContentORM/mean": 0.5995000153779984, "rewards/MMContentORM/std": 0.6014656841754913, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.09680812656879426, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.14893558621406555, "step": 2205, "train_speed(iter/s)": 0.083246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 476.8, "completions/mean_length": 229.775, "completions/min_length": 156.0, "epoch": 1.0609697551608257, "frac_reward_zero_std": 0.6, "grad_norm": 0.06856456398963928, "kl": 0.017633056640625, "learning_rate": 4.9106872941539424e-06, "loss": 0.0007049092557281256, "memory(GiB)": 27.09, "reward": 0.42249998450279236, "reward_std": 0.13392602608073503, "rewards/MMContentORM/mean": 0.49250001311302183, "rewards/MMContentORM/std": 0.633867347240448, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 2210, "train_speed(iter/s)": 0.08319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.4, "completions/mean_length": 217.775, "completions/min_length": 120.0, "epoch": 1.0633701392222756, "frac_reward_zero_std": 0.525, "grad_norm": 0.1842622011899948, "kl": 0.022515869140625, "learning_rate": 4.8908428931746195e-06, "loss": 0.0009016531519591808, "memory(GiB)": 27.09, "reward": 0.38274996876716616, "reward_std": 0.20951574475038798, "rewards/MMContentORM/mean": 0.4525000035762787, "rewards/MMContentORM/std": 0.7619948863983155, "rewards/MMFormatORM/mean": 0.5668749690055848, "rewards/MMFormatORM/std": 0.19322119355201722, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.2978711724281311, "step": 2215, "train_speed(iter/s)": 0.083199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.0, "completions/mean_length": 217.6875, "completions/min_length": 138.0, "epoch": 1.0657705232837253, "frac_reward_zero_std": 0.825, "grad_norm": 0.1208593100309372, "kl": 0.014208984375, "learning_rate": 4.871000212316701e-06, "loss": 0.0005678186193108558, "memory(GiB)": 27.09, "reward": 0.514799976348877, "reward_std": 0.011879390012472868, "rewards/MMContentORM/mean": 0.6370000064373016, "rewards/MMContentORM/std": 0.5178160190582275, "rewards/MMFormatORM/mean": 0.6499999761581421, "rewards/MMFormatORM/std": 0.0, "rewards/MMRubricORM/mean": 0.0, "rewards/MMRubricORM/std": 0.0, "step": 2220, "train_speed(iter/s)": 0.083209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.4, "completions/mean_length": 216.4875, "completions/min_length": 151.2, "epoch": 1.0681709073451753, "frac_reward_zero_std": 0.725, "grad_norm": 0.1443309485912323, "kl": 0.013623046875, "learning_rate": 4.851159564265459e-06, "loss": 0.0005446367897093296, "memory(GiB)": 27.09, "reward": 0.41879999041557314, "reward_std": 0.1261478431522846, "rewards/MMContentORM/mean": 0.45449999570846555, "rewards/MMContentORM/std": 0.6680951356887818, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.08490467071533203, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 2225, "train_speed(iter/s)": 0.083226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 377.0, "completions/mean_length": 227.0125, "completions/min_length": 129.6, "epoch": 1.070571291406625, "frac_reward_zero_std": 0.575, "grad_norm": 0.10511992871761322, "kl": 0.01990966796875, "learning_rate": 4.831321261674138e-06, "loss": 0.0007973327301442623, "memory(GiB)": 27.09, "reward": 0.4790499657392502, "reward_std": 0.20272751227021218, "rewards/MMContentORM/mean": 0.6770000040531159, "rewards/MMContentORM/std": 0.5581120260059833, "rewards/MMFormatORM/mean": 0.5768749833106994, "rewards/MMFormatORM/std": 0.17163621485233307, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.26405572295188906, "step": 2230, "train_speed(iter/s)": 0.08321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 337.2, "completions/mean_length": 217.4875, "completions/min_length": 141.2, "epoch": 1.072971675468075, "frac_reward_zero_std": 0.55, "grad_norm": 0.23232097923755646, "kl": 0.01639404296875, "learning_rate": 4.811485617159018e-06, "loss": 0.0006561274174600839, "memory(GiB)": 27.09, "reward": 0.4388999938964844, "reward_std": 0.14424977898597718, "rewards/MMContentORM/mean": 0.5335000038146973, "rewards/MMContentORM/std": 0.7047018647193909, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 2235, "train_speed(iter/s)": 0.083209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 357.8, "completions/mean_length": 219.5125, "completions/min_length": 151.4, "epoch": 1.0753720595295246, "frac_reward_zero_std": 0.725, "grad_norm": 0.08917021751403809, "kl": 0.015106201171875, "learning_rate": 4.791652943294496e-06, "loss": 0.0006044380366802216, "memory(GiB)": 27.09, "reward": 0.5175999701023102, "reward_std": 0.11653119549155236, "rewards/MMContentORM/mean": 0.7015000104904174, "rewards/MMContentORM/std": 0.5376629948616027, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2240, "train_speed(iter/s)": 0.0832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.2, "completions/mean_length": 214.7125, "completions/min_length": 126.2, "epoch": 1.0777724435909746, "frac_reward_zero_std": 0.675, "grad_norm": 0.06148146465420723, "kl": 0.0146240234375, "learning_rate": 4.771823552608153e-06, "loss": 0.0005852002650499344, "memory(GiB)": 27.09, "reward": 0.461699965596199, "reward_std": 0.12600642547477037, "rewards/MMContentORM/mean": 0.5905000075697899, "rewards/MMContentORM/std": 0.5648605763912201, "rewards/MMFormatORM/mean": 0.6012499868869782, "rewards/MMFormatORM/std": 0.12313776612281799, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.1894427239894867, "step": 2245, "train_speed(iter/s)": 0.083202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.8, "completions/mean_length": 219.975, "completions/min_length": 158.6, "epoch": 1.0801728276524245, "frac_reward_zero_std": 0.675, "grad_norm": 0.1749095767736435, "kl": 0.019097900390625, "learning_rate": 4.751997757575837e-06, "loss": 0.0007640034891664982, "memory(GiB)": 27.09, "reward": 0.4941999793052673, "reward_std": 0.10550032928586006, "rewards/MMContentORM/mean": 0.6430000066757202, "rewards/MMContentORM/std": 0.5990769028663635, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2250, "train_speed(iter/s)": 0.083217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.4, "completions/mean_length": 215.775, "completions/min_length": 141.8, "epoch": 1.0825732117138742, "frac_reward_zero_std": 0.775, "grad_norm": 0.12527073919773102, "kl": 0.013714599609375, "learning_rate": 4.732175870616734e-06, "loss": 0.0005480511114001274, "memory(GiB)": 27.09, "reward": 0.5476999878883362, "reward_std": 0.054164378554560244, "rewards/MMContentORM/mean": 0.7479999840259552, "rewards/MMContentORM/std": 0.419530663639307, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2255, "train_speed(iter/s)": 0.083218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.8, "completions/mean_length": 209.775, "completions/min_length": 131.2, "epoch": 1.0849735957753242, "frac_reward_zero_std": 0.75, "grad_norm": 0.00807945616543293, "kl": 0.015020751953125, "learning_rate": 4.71235820408844e-06, "loss": 0.0006013516336679459, "memory(GiB)": 27.09, "reward": 0.5140499889850616, "reward_std": 0.05989194584544748, "rewards/MMContentORM/mean": 0.6495000064373017, "rewards/MMContentORM/std": 0.4968143880367279, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 2260, "train_speed(iter/s)": 0.08322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.4, "completions/mean_length": 217.525, "completions/min_length": 136.4, "epoch": 1.0873739798367739, "frac_reward_zero_std": 0.575, "grad_norm": 0.1432449072599411, "kl": 0.01522216796875, "learning_rate": 4.692545070282057e-06, "loss": 0.0006086730398237705, "memory(GiB)": 27.09, "reward": 0.4586499869823456, "reward_std": 0.15577562851831317, "rewards/MMContentORM/mean": 0.5684999942779541, "rewards/MMContentORM/std": 0.6647186577320099, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2265, "train_speed(iter/s)": 0.083209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.4, "completions/mean_length": 214.075, "completions/min_length": 138.8, "epoch": 1.0897743638982238, "frac_reward_zero_std": 0.6, "grad_norm": 0.16632573306560516, "kl": 0.013531494140625, "learning_rate": 4.672736781417244e-06, "loss": 0.000541134737432003, "memory(GiB)": 27.09, "reward": 0.521150004863739, "reward_std": 0.1035911375656724, "rewards/MMContentORM/mean": 0.6959999978542328, "rewards/MMContentORM/std": 0.4113076165318489, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2270, "train_speed(iter/s)": 0.083217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.4, "completions/mean_length": 212.2875, "completions/min_length": 131.8, "epoch": 1.0921747479596735, "frac_reward_zero_std": 0.6, "grad_norm": 0.10571814328432083, "kl": 0.017926025390625, "learning_rate": 4.652933649637323e-06, "loss": 0.0007169050164520741, "memory(GiB)": 27.09, "reward": 0.48269999623298643, "reward_std": 0.07311483474913985, "rewards/MMContentORM/mean": 0.5855000197887421, "rewards/MMContentORM/std": 0.5871885895729065, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2275, "train_speed(iter/s)": 0.08323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 340.4, "completions/mean_length": 211.2625, "completions/min_length": 140.2, "epoch": 1.0945751320211234, "frac_reward_zero_std": 0.675, "grad_norm": 0.21430432796478271, "kl": 0.015948486328125, "learning_rate": 4.633135987004349e-06, "loss": 0.0006380814127624034, "memory(GiB)": 27.09, "reward": 0.5692499935626983, "reward_std": 0.02199101869482547, "rewards/MMContentORM/mean": 0.7749999940395356, "rewards/MMContentORM/std": 0.3264094144105911, "rewards/MMFormatORM/mean": 0.6481249809265137, "rewards/MMFormatORM/std": 0.007499998807907105, "rewards/MMRubricORM/mean": 0.0, "rewards/MMRubricORM/std": 0.0, "step": 2280, "train_speed(iter/s)": 0.083226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.8, "completions/mean_length": 211.85, "completions/min_length": 151.2, "epoch": 1.0969755160825732, "frac_reward_zero_std": 0.55, "grad_norm": 0.18268615007400513, "kl": 0.01839599609375, "learning_rate": 4.613344105494186e-06, "loss": 0.0007361322641372681, "memory(GiB)": 27.09, "reward": 0.5117499828338623, "reward_std": 0.12593571692705155, "rewards/MMContentORM/mean": 0.7049999952316284, "rewards/MMContentORM/std": 0.5719310343265533, "rewards/MMFormatORM/mean": 0.6056249856948852, "rewards/MMFormatORM/std": 0.15690345019102098, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 2285, "train_speed(iter/s)": 0.083239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 360.2, "completions/mean_length": 219.5375, "completions/min_length": 148.2, "epoch": 1.099375900144023, "frac_reward_zero_std": 0.6, "grad_norm": 0.16588640213012695, "kl": 0.017315673828125, "learning_rate": 4.593558316991606e-06, "loss": 0.000692180311307311, "memory(GiB)": 27.09, "reward": 0.4636499762535095, "reward_std": 0.1040154074318707, "rewards/MMContentORM/mean": 0.581000006198883, "rewards/MMContentORM/std": 0.6416810989379883, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.09680812656879426, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.14893558621406555, "step": 2290, "train_speed(iter/s)": 0.083227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 475.2, "completions/mean_length": 220.275, "completions/min_length": 109.0, "epoch": 1.1017762842054728, "frac_reward_zero_std": 0.625, "grad_norm": 0.1836349368095398, "kl": 0.024261474609375, "learning_rate": 4.57377893328536e-06, "loss": 0.0009730796329677105, "memory(GiB)": 27.09, "reward": 0.4206999778747559, "reward_std": 0.19699994921684266, "rewards/MMContentORM/mean": 0.5455000042915344, "rewards/MMContentORM/std": 0.7305709242820739, "rewards/MMFormatORM/mean": 0.568749976158142, "rewards/MMFormatORM/std": 0.21811503469944, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.335561603307724, "step": 2295, "train_speed(iter/s)": 0.083164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.0, "completions/mean_length": 216.825, "completions/min_length": 143.8, "epoch": 1.1041766682669227, "frac_reward_zero_std": 0.725, "grad_norm": 0.13757243752479553, "kl": 0.01383056640625, "learning_rate": 4.554006266063276e-06, "loss": 0.0005533020943403244, "memory(GiB)": 27.09, "reward": 0.49374998807907106, "reward_std": 0.0761553971329704, "rewards/MMContentORM/mean": 0.6274999976158142, "rewards/MMContentORM/std": 0.6148638248443603, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2300, "train_speed(iter/s)": 0.083171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.2, "completions/mean_length": 205.6125, "completions/min_length": 133.4, "epoch": 1.1065770523283724, "frac_reward_zero_std": 0.7, "grad_norm": 0.11595190316438675, "kl": 0.017413330078125, "learning_rate": 4.534240626907338e-06, "loss": 0.0006965134758502245, "memory(GiB)": 27.09, "reward": 0.520749980211258, "reward_std": 0.062296105083078146, "rewards/MMContentORM/mean": 0.6950000166893006, "rewards/MMContentORM/std": 0.579815822839737, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 2305, "train_speed(iter/s)": 0.083141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 340.0, "completions/mean_length": 216.1375, "completions/min_length": 147.8, "epoch": 1.1089774363898224, "frac_reward_zero_std": 0.65, "grad_norm": 0.14087656140327454, "kl": 0.0158203125, "learning_rate": 4.51448232728878e-06, "loss": 0.0006328361108899116, "memory(GiB)": 27.09, "reward": 0.4798499882221222, "reward_std": 0.06993285585194826, "rewards/MMContentORM/mean": 0.6215000033378602, "rewards/MMContentORM/std": 0.5286586560308933, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2310, "train_speed(iter/s)": 0.083141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.0, "completions/mean_length": 211.375, "completions/min_length": 140.0, "epoch": 1.1113778204512723, "frac_reward_zero_std": 0.725, "grad_norm": 0.1471603959798813, "kl": 0.0154541015625, "learning_rate": 4.494731678563182e-06, "loss": 0.0006185553036630153, "memory(GiB)": 27.09, "reward": 0.482699978351593, "reward_std": 0.03804234203416854, "rewards/MMContentORM/mean": 0.5855000197887421, "rewards/MMContentORM/std": 0.5835969924926758, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2315, "train_speed(iter/s)": 0.083155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.8, "completions/mean_length": 208.625, "completions/min_length": 132.4, "epoch": 1.113778204512722, "frac_reward_zero_std": 0.65, "grad_norm": 0.15146136283874512, "kl": 0.016741943359375, "learning_rate": 4.474988991965556e-06, "loss": 0.0006692257709801197, "memory(GiB)": 27.09, "reward": 0.47709997892379763, "reward_std": 0.11455130190588533, "rewards/MMContentORM/mean": 0.6165000081062317, "rewards/MMContentORM/std": 0.6526979386806488, "rewards/MMFormatORM/mean": 0.6074999928474426, "rewards/MMFormatORM/std": 0.12490466833114625, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 2320, "train_speed(iter/s)": 0.083179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.4, "completions/mean_length": 208.6375, "completions/min_length": 136.4, "epoch": 1.116178588574172, "frac_reward_zero_std": 0.6, "grad_norm": 0.11009304225444794, "kl": 0.01671142578125, "learning_rate": 4.455254578605447e-06, "loss": 0.0006676350720226764, "memory(GiB)": 27.09, "reward": 0.499949985742569, "reward_std": 0.053386559477075934, "rewards/MMContentORM/mean": 0.6429999947547913, "rewards/MMContentORM/std": 0.557970917224884, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2325, "train_speed(iter/s)": 0.0832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.6, "completions/mean_length": 215.6625, "completions/min_length": 134.6, "epoch": 1.1185789726356217, "frac_reward_zero_std": 0.6, "grad_norm": 0.15660890936851501, "kl": 0.016387939453125, "learning_rate": 4.435528749462026e-06, "loss": 0.0006562436930835247, "memory(GiB)": 27.09, "reward": 0.4931999921798706, "reward_std": 0.11030865758657456, "rewards/MMContentORM/mean": 0.6404999971389771, "rewards/MMContentORM/std": 0.6184715509414673, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2330, "train_speed(iter/s)": 0.083202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 372.6, "completions/mean_length": 209.925, "completions/min_length": 132.2, "epoch": 1.1209793566970716, "frac_reward_zero_std": 0.65, "grad_norm": 0.005811932031065226, "kl": 0.0166015625, "learning_rate": 4.415811815379198e-06, "loss": 0.000663516204804182, "memory(GiB)": 27.09, "reward": 0.5019499778747558, "reward_std": 0.10755094066262245, "rewards/MMContentORM/mean": 0.6479999959468842, "rewards/MMContentORM/std": 0.5736066222190856, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2335, "train_speed(iter/s)": 0.083186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.2, "completions/mean_length": 204.1875, "completions/min_length": 136.4, "epoch": 1.1233797407585213, "frac_reward_zero_std": 0.725, "grad_norm": 0.159584641456604, "kl": 0.018511962890625, "learning_rate": 4.396104087060689e-06, "loss": 0.0007406437769532203, "memory(GiB)": 27.09, "reward": 0.45104997754096987, "reward_std": 0.08619631510227918, "rewards/MMContentORM/mean": 0.5495000213384629, "rewards/MMContentORM/std": 0.5832443118095398, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2340, "train_speed(iter/s)": 0.083202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 366.2, "completions/mean_length": 221.7875, "completions/min_length": 137.0, "epoch": 1.1257801248199713, "frac_reward_zero_std": 0.6, "grad_norm": 0.13437344133853912, "kl": 0.017962646484375, "learning_rate": 4.376405875065165e-06, "loss": 0.0007189226802438497, "memory(GiB)": 27.09, "reward": 0.4833999931812286, "reward_std": 0.12416795073077083, "rewards/MMContentORM/mean": 0.6160000026226043, "rewards/MMContentORM/std": 0.6286175012588501, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2345, "train_speed(iter/s)": 0.083187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.0, "completions/mean_length": 213.275, "completions/min_length": 141.4, "epoch": 1.128180508881421, "frac_reward_zero_std": 0.675, "grad_norm": 0.14463329315185547, "kl": 0.0177001953125, "learning_rate": 4.35671748980133e-06, "loss": 0.0007080785930156708, "memory(GiB)": 27.09, "reward": 0.5173499882221222, "reward_std": 0.06088189166039228, "rewards/MMContentORM/mean": 0.6864999771118164, "rewards/MMContentORM/std": 0.5560923993587494, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 2350, "train_speed(iter/s)": 0.083196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.6, "completions/mean_length": 217.05, "completions/min_length": 154.4, "epoch": 1.130580892942871, "frac_reward_zero_std": 0.725, "grad_norm": 0.0970078557729721, "kl": 0.013482666015625, "learning_rate": 4.337039241523034e-06, "loss": 0.0005393566098064184, "memory(GiB)": 27.09, "reward": 0.5178999722003936, "reward_std": 0.04709331314079464, "rewards/MMContentORM/mean": 0.6734999895095826, "rewards/MMContentORM/std": 0.4659044176340103, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2355, "train_speed(iter/s)": 0.08321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.2, "completions/mean_length": 209.6875, "completions/min_length": 142.0, "epoch": 1.1329812770043206, "frac_reward_zero_std": 0.575, "grad_norm": 0.16535647213459015, "kl": 0.02020263671875, "learning_rate": 4.3173714403243926e-06, "loss": 0.0008074231445789337, "memory(GiB)": 27.09, "reward": 0.4483999729156494, "reward_std": 0.1086115974234417, "rewards/MMContentORM/mean": 0.5285000026226043, "rewards/MMContentORM/std": 0.6377276480197906, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 2360, "train_speed(iter/s)": 0.08323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.4, "completions/mean_length": 208.6125, "completions/min_length": 157.8, "epoch": 1.1353816610657705, "frac_reward_zero_std": 0.55, "grad_norm": 0.1501249223947525, "kl": 0.01739501953125, "learning_rate": 4.2977143961348846e-06, "loss": 0.0006959887687116861, "memory(GiB)": 27.09, "reward": 0.3825499892234802, "reward_std": 0.1649680064059794, "rewards/MMContentORM/mean": 0.4069999992847443, "rewards/MMContentORM/std": 0.696259343624115, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 2365, "train_speed(iter/s)": 0.083254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.4, "completions/mean_length": 211.1875, "completions/min_length": 141.6, "epoch": 1.1377820451272203, "frac_reward_zero_std": 0.525, "grad_norm": 0.12787005305290222, "kl": 0.016790771484375, "learning_rate": 4.278068418714488e-06, "loss": 0.000671594263985753, "memory(GiB)": 27.09, "reward": 0.4533999919891357, "reward_std": 0.10776307452470064, "rewards/MMContentORM/mean": 0.5409999907016754, "rewards/MMContentORM/std": 0.589427363872528, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2370, "train_speed(iter/s)": 0.083268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.6, "completions/mean_length": 211.275, "completions/min_length": 157.4, "epoch": 1.1401824291886702, "frac_reward_zero_std": 0.7, "grad_norm": 0.08513356745243073, "kl": 0.01527099609375, "learning_rate": 4.258433817648778e-06, "loss": 0.0006108290050178766, "memory(GiB)": 27.09, "reward": 0.5273999691009521, "reward_std": 0.08343859422020614, "rewards/MMContentORM/mean": 0.7134999930858612, "rewards/MMContentORM/std": 0.4296311870217323, "rewards/MMFormatORM/mean": 0.6237499833106994, "rewards/MMFormatORM/std": 0.08440345227718353, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2375, "train_speed(iter/s)": 0.08327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.0, "completions/mean_length": 219.425, "completions/min_length": 137.0, "epoch": 1.1425828132501201, "frac_reward_zero_std": 0.55, "grad_norm": 0.14258858561515808, "kl": 0.0161865234375, "learning_rate": 4.238810902344065e-06, "loss": 0.0006477432791143656, "memory(GiB)": 27.09, "reward": 0.45419996976852417, "reward_std": 0.11737972022965551, "rewards/MMContentORM/mean": 0.5430000007152558, "rewards/MMContentORM/std": 0.6464896261692047, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2380, "train_speed(iter/s)": 0.083263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.0, "completions/mean_length": 213.075, "completions/min_length": 133.4, "epoch": 1.1449831973115698, "frac_reward_zero_std": 0.675, "grad_norm": 0.1284060925245285, "kl": 0.01661376953125, "learning_rate": 4.219199982022512e-06, "loss": 0.0006643535569310188, "memory(GiB)": 27.09, "reward": 0.5382999777793884, "reward_std": 0.0634981878567487, "rewards/MMContentORM/mean": 0.7245000004768372, "rewards/MMContentORM/std": 0.5121561586856842, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2385, "train_speed(iter/s)": 0.083274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.8, "completions/mean_length": 217.0625, "completions/min_length": 140.4, "epoch": 1.1473835813730198, "frac_reward_zero_std": 0.725, "grad_norm": 0.14643016457557678, "kl": 0.014398193359375, "learning_rate": 4.199601365717259e-06, "loss": 0.0005763438530266285, "memory(GiB)": 27.09, "reward": 0.506199985742569, "reward_std": 0.033658286277204755, "rewards/MMContentORM/mean": 0.6154999971389771, "rewards/MMContentORM/std": 0.5775948464870453, "rewards/MMFormatORM/mean": 0.6499999761581421, "rewards/MMFormatORM/std": 0.0, "rewards/MMRubricORM/mean": 0.0, "rewards/MMRubricORM/std": 0.0, "step": 2390, "train_speed(iter/s)": 0.083288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 458.2, "completions/mean_length": 222.6375, "completions/min_length": 136.6, "epoch": 1.1497839654344695, "frac_reward_zero_std": 0.625, "grad_norm": 0.16194605827331543, "kl": 0.019708251953125, "learning_rate": 4.180015362267564e-06, "loss": 0.0007893730886280537, "memory(GiB)": 27.09, "reward": 0.4298999786376953, "reward_std": 0.14891668558120727, "rewards/MMContentORM/mean": 0.510999983549118, "rewards/MMContentORM/std": 0.6927963614463806, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 2395, "train_speed(iter/s)": 0.083235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 335.2, "completions/mean_length": 219.5625, "completions/min_length": 139.8, "epoch": 1.1521843494959194, "frac_reward_zero_std": 0.6, "grad_norm": 0.16206054389476776, "kl": 0.016741943359375, "learning_rate": 4.160442280313923e-06, "loss": 0.0006699077785015106, "memory(GiB)": 27.09, "reward": 0.45619996786117556, "reward_std": 0.11398561298847198, "rewards/MMContentORM/mean": 0.5354999899864197, "rewards/MMContentORM/std": 0.6260799109935761, "rewards/MMFormatORM/mean": 0.6237499833106994, "rewards/MMFormatORM/std": 0.07680481374263763, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2400, "train_speed(iter/s)": 0.08323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.8, "completions/mean_length": 203.65, "completions/min_length": 100.8, "epoch": 1.1545847335573691, "frac_reward_zero_std": 0.625, "grad_norm": 0.10279816389083862, "kl": 0.021893310546875, "learning_rate": 4.14088242829321e-06, "loss": 0.0008754994720220566, "memory(GiB)": 27.09, "reward": 0.4607499837875366, "reward_std": 0.09623723030090332, "rewards/MMContentORM/mean": 0.5450000107288361, "rewards/MMContentORM/std": 0.5940271973609924, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 2405, "train_speed(iter/s)": 0.083192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.8, "completions/mean_length": 207.1875, "completions/min_length": 132.4, "epoch": 1.156985117618819, "frac_reward_zero_std": 0.625, "grad_norm": 0.17465510964393616, "kl": 0.015850830078125, "learning_rate": 4.121336114433825e-06, "loss": 0.0006340592168271542, "memory(GiB)": 27.09, "reward": 0.39219998121261596, "reward_std": 0.09814641983248293, "rewards/MMContentORM/mean": 0.38800000250339506, "rewards/MMContentORM/std": 0.6825608611106873, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2410, "train_speed(iter/s)": 0.0832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.6, "completions/mean_length": 209.975, "completions/min_length": 150.6, "epoch": 1.1593855016802688, "frac_reward_zero_std": 0.675, "grad_norm": 0.07339280098676682, "kl": 0.021881103515625, "learning_rate": 4.101803646750826e-06, "loss": 0.0008749545551836491, "memory(GiB)": 27.09, "reward": 0.45749999284744264, "reward_std": 0.13420886893291026, "rewards/MMContentORM/mean": 0.5799999952316284, "rewards/MMContentORM/std": 0.5645999349653721, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.10480934381484985, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.16124515533447265, "step": 2415, "train_speed(iter/s)": 0.083212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 379.0, "completions/mean_length": 220.1, "completions/min_length": 119.6, "epoch": 1.1617858857417187, "frac_reward_zero_std": 0.675, "grad_norm": 0.17658497393131256, "kl": 0.021868896484375, "learning_rate": 4.082285333041085e-06, "loss": 0.0008741414174437522, "memory(GiB)": 27.09, "reward": 0.5191999733448028, "reward_std": 0.061801125714555386, "rewards/MMContentORM/mean": 0.7055000066757202, "rewards/MMContentORM/std": 0.5510667979717254, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 2420, "train_speed(iter/s)": 0.083195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 332.6, "completions/mean_length": 217.0625, "completions/min_length": 155.4, "epoch": 1.1641862698031684, "frac_reward_zero_std": 0.6, "grad_norm": 0.23948155343532562, "kl": 0.022308349609375, "learning_rate": 4.062781480878426e-06, "loss": 0.0008926920592784882, "memory(GiB)": 27.09, "reward": 0.42619999051094054, "reward_std": 0.1429769902024418, "rewards/MMContentORM/mean": 0.5305000066757202, "rewards/MMContentORM/std": 0.6429137587547302, "rewards/MMFormatORM/mean": 0.5849999785423279, "rewards/MMFormatORM/std": 0.1737115800380707, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2672485947608948, "step": 2425, "train_speed(iter/s)": 0.083198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 353.2, "completions/mean_length": 212.85, "completions/min_length": 139.4, "epoch": 1.1665866538646184, "frac_reward_zero_std": 0.65, "grad_norm": 0.22126245498657227, "kl": 0.015814208984375, "learning_rate": 4.0432923976087915e-06, "loss": 0.00063277967274189, "memory(GiB)": 27.09, "reward": 0.4893499791622162, "reward_std": 0.07785245273262262, "rewards/MMContentORM/mean": 0.6165000140666962, "rewards/MMContentORM/std": 0.594619619846344, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 2430, "train_speed(iter/s)": 0.083199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.6, "completions/mean_length": 210.85, "completions/min_length": 119.6, "epoch": 1.168987037926068, "frac_reward_zero_std": 0.625, "grad_norm": 0.1325448900461197, "kl": 0.027337646484375, "learning_rate": 4.02381839034539e-06, "loss": 0.0010928992182016374, "memory(GiB)": 27.09, "reward": 0.4188999950885773, "reward_std": 0.14410835653543472, "rewards/MMContentORM/mean": 0.4834999918937683, "rewards/MMContentORM/std": 0.6920648813247681, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.17440344989299775, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.2683130085468292, "step": 2435, "train_speed(iter/s)": 0.083209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.8, "completions/mean_length": 218.55, "completions/min_length": 134.8, "epoch": 1.171387421987518, "frac_reward_zero_std": 0.675, "grad_norm": 0.10885204374790192, "kl": 0.0170654296875, "learning_rate": 4.004359765963852e-06, "loss": 0.0006822014227509498, "memory(GiB)": 27.09, "reward": 0.4416999638080597, "reward_std": 0.15089658349752427, "rewards/MMContentORM/mean": 0.540499997138977, "rewards/MMContentORM/std": 0.6798429071903229, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.14990466833114624, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23062257766723632, "step": 2440, "train_speed(iter/s)": 0.083214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.6, "completions/mean_length": 212.0625, "completions/min_length": 143.8, "epoch": 1.173787806048968, "frac_reward_zero_std": 0.675, "grad_norm": 0.10752154886722565, "kl": 0.0148681640625, "learning_rate": 3.984916831097416e-06, "loss": 0.0005946123506873846, "memory(GiB)": 27.09, "reward": 0.46099998354911803, "reward_std": 0.11511698234826326, "rewards/MMContentORM/mean": 0.5599999904632569, "rewards/MMContentORM/std": 0.6491201877593994, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2445, "train_speed(iter/s)": 0.083234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025, "completions/max_length": 588.4, "completions/mean_length": 236.825, "completions/min_length": 135.4, "epoch": 1.1761881901104176, "frac_reward_zero_std": 0.5, "grad_norm": 0.200127974152565, "kl": 0.017510986328125, "learning_rate": 3.965489892132067e-06, "loss": 0.0007008564192801714, "memory(GiB)": 27.09, "reward": 0.419449982047081, "reward_std": 0.19212091341614723, "rewards/MMContentORM/mean": 0.5280000180006027, "rewards/MMContentORM/std": 0.7196203231811523, "rewards/MMFormatORM/mean": 0.5768749952316284, "rewards/MMFormatORM/std": 0.16249999403953552, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.25, "step": 2450, "train_speed(iter/s)": 0.083144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.8, "completions/mean_length": 215.9375, "completions/min_length": 165.4, "epoch": 1.1785885741718676, "frac_reward_zero_std": 0.55, "grad_norm": 0.1706075668334961, "kl": 0.0157958984375, "learning_rate": 3.9460792552017345e-06, "loss": 0.0006313313730061054, "memory(GiB)": 27.09, "reward": 0.4721999764442444, "reward_std": 0.0885297678411007, "rewards/MMContentORM/mean": 0.5880000114440918, "rewards/MMContentORM/std": 0.6373610079288483, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2455, "train_speed(iter/s)": 0.083165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.8, "completions/mean_length": 211.5625, "completions/min_length": 119.2, "epoch": 1.1809889582333173, "frac_reward_zero_std": 0.725, "grad_norm": 0.13533374667167664, "kl": 0.015020751953125, "learning_rate": 3.9266852261834474e-06, "loss": 0.0006013016682118177, "memory(GiB)": 27.09, "reward": 0.49929999113082885, "reward_std": 0.055861435388214885, "rewards/MMContentORM/mean": 0.6270000100135803, "rewards/MMContentORM/std": 0.5022149316966533, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2460, "train_speed(iter/s)": 0.083172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.0, "completions/mean_length": 219.2375, "completions/min_length": 145.2, "epoch": 1.1833893422947672, "frac_reward_zero_std": 0.6, "grad_norm": 0.10274124890565872, "kl": 0.01768798828125, "learning_rate": 3.9073081106925314e-06, "loss": 0.0007067923899739981, "memory(GiB)": 27.09, "reward": 0.4602999687194824, "reward_std": 0.14382552150636913, "rewards/MMContentORM/mean": 0.5870000004768372, "rewards/MMContentORM/std": 0.646770179271698, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 2465, "train_speed(iter/s)": 0.083177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 393.4, "completions/mean_length": 222.5875, "completions/min_length": 159.4, "epoch": 1.185789726356217, "frac_reward_zero_std": 0.625, "grad_norm": 0.14314013719558716, "kl": 0.013629150390625, "learning_rate": 3.887948214077782e-06, "loss": 0.0005453084595501423, "memory(GiB)": 27.09, "reward": 0.43144997358322146, "reward_std": 0.12409724295139313, "rewards/MMContentORM/mean": 0.47549999356269834, "rewards/MMContentORM/std": 0.6934274673461914, "rewards/MMFormatORM/mean": 0.621874988079071, "rewards/MMFormatORM/std": 0.11249999552965165, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 2470, "train_speed(iter/s)": 0.083159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 349.8, "completions/mean_length": 223.725, "completions/min_length": 145.8, "epoch": 1.1881901104176669, "frac_reward_zero_std": 0.75, "grad_norm": 0.004896758124232292, "kl": 0.01409912109375, "learning_rate": 3.8686058414166504e-06, "loss": 0.0005641079042106867, "memory(GiB)": 27.09, "reward": 0.4801999807357788, "reward_std": 0.04567909836769104, "rewards/MMContentORM/mean": 0.6080000057816506, "rewards/MMContentORM/std": 0.5569998919963837, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2475, "train_speed(iter/s)": 0.083132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 483.2, "completions/mean_length": 228.875, "completions/min_length": 141.6, "epoch": 1.1905904944791166, "frac_reward_zero_std": 0.6, "grad_norm": 0.18111135065555573, "kl": 0.018408203125, "learning_rate": 3.849281297510454e-06, "loss": 0.0007354037370532751, "memory(GiB)": 27.09, "reward": 0.41744997948408125, "reward_std": 0.17444324921816587, "rewards/MMContentORM/mean": 0.4980000019073486, "rewards/MMContentORM/std": 0.6551562428474427, "rewards/MMFormatORM/mean": 0.5893749833106995, "rewards/MMFormatORM/std": 0.14223275929689408, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.1957427144050598, "step": 2480, "train_speed(iter/s)": 0.083083 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.0, "completions/mean_length": 217.55, "completions/min_length": 147.0, "epoch": 1.1929908785405665, "frac_reward_zero_std": 0.4, "grad_norm": 0.2514013350009918, "kl": 0.01690673828125, "learning_rate": 3.829974886879549e-06, "loss": 0.0006760565564036369, "memory(GiB)": 27.09, "reward": 0.3862499862909317, "reward_std": 0.19424223005771638, "rewards/MMContentORM/mean": 0.4199999988079071, "rewards/MMContentORM/std": 0.7334108114242553, "rewards/MMFormatORM/mean": 0.5893749713897705, "rewards/MMFormatORM/std": 0.1641829013824463, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 2485, "train_speed(iter/s)": 0.083096 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.2, "completions/mean_length": 218.5625, "completions/min_length": 153.8, "epoch": 1.1953912626020164, "frac_reward_zero_std": 0.65, "grad_norm": 0.1668197512626648, "kl": 0.014886474609375, "learning_rate": 3.8106869137585507e-06, "loss": 0.0005956954322755337, "memory(GiB)": 27.09, "reward": 0.48804997801780703, "reward_std": 0.10062129367142916, "rewards/MMContentORM/mean": 0.6295000076293945, "rewards/MMContentORM/std": 0.5976639330387116, "rewards/MMFormatORM/mean": 0.6156249880790711, "rewards/MMFormatORM/std": 0.11690345108509063, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2490, "train_speed(iter/s)": 0.0831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.0, "completions/mean_length": 209.3625, "completions/min_length": 132.4, "epoch": 1.1977916466634662, "frac_reward_zero_std": 0.575, "grad_norm": 0.127728670835495, "kl": 0.02237548828125, "learning_rate": 3.791417682091527e-06, "loss": 0.0008956640027463436, "memory(GiB)": 27.09, "reward": 0.4219499886035919, "reward_std": 0.1320168349891901, "rewards/MMContentORM/mean": 0.505500003695488, "rewards/MMContentORM/std": 0.6720031678676606, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 2495, "train_speed(iter/s)": 0.083119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 453.2, "completions/mean_length": 226.9875, "completions/min_length": 155.8, "epoch": 1.2001920307249159, "frac_reward_zero_std": 0.6, "grad_norm": 0.05174791067838669, "kl": 0.0158203125, "learning_rate": 3.7721674955272204e-06, "loss": 0.0006332498509436846, "memory(GiB)": 27.09, "reward": 0.41349998116493225, "reward_std": 0.19813132584095, "rewards/MMContentORM/mean": 0.5149999976158142, "rewards/MMContentORM/std": 0.6243453428149224, "rewards/MMFormatORM/mean": 0.574999988079071, "rewards/MMFormatORM/std": 0.17490466833114623, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.27006530165672304, "step": 2500, "train_speed(iter/s)": 0.083078 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.8, "completions/mean_length": 216.775, "completions/min_length": 137.4, "epoch": 1.2025924147863658, "frac_reward_zero_std": 0.7, "grad_norm": 1.2531236410140991, "kl": 0.06446533203125, "learning_rate": 3.75293665741425e-06, "loss": 0.002578136883676052, "memory(GiB)": 27.09, "reward": 0.36609998643398284, "reward_std": 0.14919953048229218, "rewards/MMContentORM/mean": 0.351500004529953, "rewards/MMContentORM/std": 0.6848280310630799, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 2505, "train_speed(iter/s)": 0.083051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.2, "completions/mean_length": 205.3, "completions/min_length": 144.8, "epoch": 1.2049927988478157, "frac_reward_zero_std": 0.575, "grad_norm": 0.17887341976165771, "kl": 0.05023193359375, "learning_rate": 3.7337254707963382e-06, "loss": 0.0020127676427364348, "memory(GiB)": 27.09, "reward": 0.47439998388290405, "reward_std": 0.07014498831704259, "rewards/MMContentORM/mean": 0.593500018119812, "rewards/MMContentORM/std": 0.6442030429840088, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2510, "train_speed(iter/s)": 0.083075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.2, "completions/mean_length": 218.0875, "completions/min_length": 142.6, "epoch": 1.2073931829092654, "frac_reward_zero_std": 0.8, "grad_norm": 0.14247167110443115, "kl": 0.014263916015625, "learning_rate": 3.714534238407543e-06, "loss": 0.0005710631608963013, "memory(GiB)": 27.09, "reward": 0.5075999617576599, "reward_std": 0.08768124505877495, "rewards/MMContentORM/mean": 0.6765000104904175, "rewards/MMContentORM/std": 0.5244777373969555, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2515, "train_speed(iter/s)": 0.083085 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 326.6, "completions/mean_length": 210.3625, "completions/min_length": 126.8, "epoch": 1.2097935669707154, "frac_reward_zero_std": 0.625, "grad_norm": 0.09162472188472748, "kl": 0.01552734375, "learning_rate": 3.695363262667468e-06, "loss": 0.0006212275475263596, "memory(GiB)": 27.09, "reward": 0.4254499673843384, "reward_std": 0.13145115275401623, "rewards/MMContentORM/mean": 0.4855000078678131, "rewards/MMContentORM/std": 0.6825006127357482, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2520, "train_speed(iter/s)": 0.083091 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.6, "completions/mean_length": 209.475, "completions/min_length": 122.4, "epoch": 1.212193951032165, "frac_reward_zero_std": 0.625, "grad_norm": 0.1547505259513855, "kl": 0.016241455078125, "learning_rate": 3.6762128456765167e-06, "loss": 0.0006503340788185597, "memory(GiB)": 27.09, "reward": 0.4827499806880951, "reward_std": 0.0962372301146388, "rewards/MMContentORM/mean": 0.6000000059604644, "rewards/MMContentORM/std": 0.6012049317359924, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2525, "train_speed(iter/s)": 0.083092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 361.6, "completions/mean_length": 210.95, "completions/min_length": 120.6, "epoch": 1.214594335093615, "frac_reward_zero_std": 0.6, "grad_norm": 0.13495810329914093, "kl": 0.01824951171875, "learning_rate": 3.657083289211119e-06, "loss": 0.0007304124068468809, "memory(GiB)": 27.09, "reward": 0.5092499613761902, "reward_std": 0.1385222177952528, "rewards/MMContentORM/mean": 0.6825000166893005, "rewards/MMContentORM/std": 0.595128345489502, "rewards/MMFormatORM/mean": 0.6156249880790711, "rewards/MMFormatORM/std": 0.13036334812641143, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 2530, "train_speed(iter/s)": 0.083084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.6, "completions/mean_length": 217.025, "completions/min_length": 131.6, "epoch": 1.2169947191550647, "frac_reward_zero_std": 0.675, "grad_norm": 0.2071637213230133, "kl": 0.014971923828125, "learning_rate": 3.637974894718981e-06, "loss": 0.0005985705181956291, "memory(GiB)": 27.09, "reward": 0.5091499626636505, "reward_std": 0.0951058566570282, "rewards/MMContentORM/mean": 0.6659999966621399, "rewards/MMContentORM/std": 0.5581172168254852, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 2535, "train_speed(iter/s)": 0.083093 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 380.0, "completions/mean_length": 217.8125, "completions/min_length": 125.4, "epoch": 1.2193951032165147, "frac_reward_zero_std": 0.65, "grad_norm": 0.13724660873413086, "kl": 0.017254638671875, "learning_rate": 3.6188879633143363e-06, "loss": 0.0006901083514094352, "memory(GiB)": 27.09, "reward": 0.4133999764919281, "reward_std": 0.16942277988418936, "rewards/MMContentORM/mean": 0.4985000193119049, "rewards/MMContentORM/std": 0.6871892631053924, "rewards/MMFormatORM/mean": 0.5849999904632568, "rewards/MMFormatORM/std": 0.16980934143066406, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2612451553344727, "step": 2540, "train_speed(iter/s)": 0.083084 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 459.0, "completions/mean_length": 222.625, "completions/min_length": 129.4, "epoch": 1.2217954872779644, "frac_reward_zero_std": 0.725, "grad_norm": 0.004516011103987694, "kl": 0.015435791015625, "learning_rate": 3.5998227957731925e-06, "loss": 0.0006180405616760254, "memory(GiB)": 27.09, "reward": 0.4870499849319458, "reward_std": 0.10429824888706207, "rewards/MMContentORM/mean": 0.6394999861717224, "rewards/MMContentORM/std": 0.6266542971134186, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 2545, "train_speed(iter/s)": 0.083042 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 365.0, "completions/mean_length": 215.125, "completions/min_length": 152.6, "epoch": 1.2241958713394143, "frac_reward_zero_std": 0.6, "grad_norm": 0.1654830425977707, "kl": 0.014935302734375, "learning_rate": 3.580779692528606e-06, "loss": 0.0005975381471216678, "memory(GiB)": 27.09, "reward": 0.45944997668266296, "reward_std": 0.12353154704906047, "rewards/MMContentORM/mean": 0.5705000042915345, "rewards/MMContentORM/std": 0.6434941411018371, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 2550, "train_speed(iter/s)": 0.083034 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.4, "completions/mean_length": 214.4125, "completions/min_length": 145.4, "epoch": 1.2265962554008643, "frac_reward_zero_std": 0.65, "grad_norm": 0.13121533393859863, "kl": 0.015008544921875, "learning_rate": 3.56175895366593e-06, "loss": 0.0006003345362842083, "memory(GiB)": 27.09, "reward": 0.4941499769687653, "reward_std": 0.09482301429379732, "rewards/MMContentORM/mean": 0.6285000085830689, "rewards/MMContentORM/std": 0.6090345978736877, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 2555, "train_speed(iter/s)": 0.083055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 361.8, "completions/mean_length": 217.95, "completions/min_length": 139.8, "epoch": 1.228996639462314, "frac_reward_zero_std": 0.65, "grad_norm": 0.15669004619121552, "kl": 0.017437744140625, "learning_rate": 3.542760878918104e-06, "loss": 0.0006977845449000597, "memory(GiB)": 27.09, "reward": 0.4942499816417694, "reward_std": 0.11334921540692448, "rewards/MMContentORM/mean": 0.657500022649765, "rewards/MMContentORM/std": 0.5437608852982521, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 2560, "train_speed(iter/s)": 0.083051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.8, "completions/mean_length": 208.3375, "completions/min_length": 120.2, "epoch": 1.2313970235237637, "frac_reward_zero_std": 0.75, "grad_norm": 0.151866614818573, "kl": 0.021160888671875, "learning_rate": 3.5237857676609146e-06, "loss": 0.0008449718356132507, "memory(GiB)": 27.09, "reward": 0.49779998064041137, "reward_std": 0.08174154479056597, "rewards/MMContentORM/mean": 0.6519999980926514, "rewards/MMContentORM/std": 0.4489804258570075, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2565, "train_speed(iter/s)": 0.083065 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 457.8, "completions/mean_length": 224.7875, "completions/min_length": 153.8, "epoch": 1.2337974075852136, "frac_reward_zero_std": 0.725, "grad_norm": 0.10642041265964508, "kl": 0.019122314453125, "learning_rate": 3.504833918908285e-06, "loss": 0.0007648383732885122, "memory(GiB)": 27.09, "reward": 0.4542999804019928, "reward_std": 0.15117942336946727, "rewards/MMContentORM/mean": 0.5719999969005585, "rewards/MMContentORM/std": 0.6259439706802368, "rewards/MMFormatORM/mean": 0.6012499749660491, "rewards/MMFormatORM/std": 0.10254122316837311, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.15775573253631592, "step": 2570, "train_speed(iter/s)": 0.083017 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 367.0, "completions/mean_length": 212.3125, "completions/min_length": 137.8, "epoch": 1.2361977916466635, "frac_reward_zero_std": 0.625, "grad_norm": 0.14486433565616608, "kl": 0.015093994140625, "learning_rate": 3.485905631307569e-06, "loss": 0.0006040884181857109, "memory(GiB)": 27.09, "reward": 0.5060999631881714, "reward_std": 0.14354267725721, "rewards/MMContentORM/mean": 0.7014999866485596, "rewards/MMContentORM/std": 0.598384690284729, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 2575, "train_speed(iter/s)": 0.083015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.2, "completions/mean_length": 209.325, "completions/min_length": 137.4, "epoch": 1.2385981757081133, "frac_reward_zero_std": 0.65, "grad_norm": 0.15318423509597778, "kl": 0.01575927734375, "learning_rate": 3.4670012031348322e-06, "loss": 0.0006300311535596848, "memory(GiB)": 27.09, "reward": 0.47869997620582583, "reward_std": 0.1590990237891674, "rewards/MMContentORM/mean": 0.6330000102519989, "rewards/MMContentORM/std": 0.6443586707115173, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 2580, "train_speed(iter/s)": 0.083024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 444.8, "completions/mean_length": 217.175, "completions/min_length": 122.4, "epoch": 1.2409985597695632, "frac_reward_zero_std": 0.625, "grad_norm": 0.21531488001346588, "kl": 0.01881103515625, "learning_rate": 3.448120932290162e-06, "loss": 0.0007515028119087219, "memory(GiB)": 27.09, "reward": 0.5039499998092651, "reward_std": 0.09963134194258601, "rewards/MMContentORM/mean": 0.653000009059906, "rewards/MMContentORM/std": 0.6097694873809815, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2585, "train_speed(iter/s)": 0.082982 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.8, "completions/mean_length": 211.8375, "completions/min_length": 133.4, "epoch": 1.243398943831013, "frac_reward_zero_std": 0.725, "grad_norm": 0.09009666740894318, "kl": 0.064111328125, "learning_rate": 3.4292651162929646e-06, "loss": 0.0025743709877133368, "memory(GiB)": 27.09, "reward": 0.4833499789237976, "reward_std": 0.08520637114997953, "rewards/MMContentORM/mean": 0.6015000224113465, "rewards/MMContentORM/std": 0.5234865859150887, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 2590, "train_speed(iter/s)": 0.082999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.2, "completions/mean_length": 213.6375, "completions/min_length": 128.0, "epoch": 1.2457993278924628, "frac_reward_zero_std": 0.725, "grad_norm": 0.11567575484514236, "kl": 0.02685546875, "learning_rate": 3.4104340522772892e-06, "loss": 0.0010737581178545952, "memory(GiB)": 27.09, "reward": 0.4908499777317047, "reward_std": 0.128339883685112, "rewards/MMContentORM/mean": 0.6490000128746033, "rewards/MMContentORM/std": 0.6057178854942322, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 2595, "train_speed(iter/s)": 0.083011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 487.2, "completions/mean_length": 232.225, "completions/min_length": 159.8, "epoch": 1.2481997119539125, "frac_reward_zero_std": 0.575, "grad_norm": 0.19209064543247223, "kl": 0.01651611328125, "learning_rate": 3.391628036987131e-06, "loss": 0.0006616008933633566, "memory(GiB)": 27.09, "reward": 0.49179998636245725, "reward_std": 0.10097484849393368, "rewards/MMContentORM/mean": 0.6370000183582306, "rewards/MMContentORM/std": 0.5937303185462952, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2600, "train_speed(iter/s)": 0.082964 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.4, "completions/mean_length": 206.225, "completions/min_length": 122.6, "epoch": 1.2506000960153625, "frac_reward_zero_std": 0.65, "grad_norm": 0.15095524489879608, "kl": 0.03800048828125, "learning_rate": 3.3728473667717624e-06, "loss": 0.0015181325376033782, "memory(GiB)": 27.09, "reward": 0.47339999079704287, "reward_std": 0.12077383659780025, "rewards/MMContentORM/mean": 0.5910000026226043, "rewards/MMContentORM/std": 0.6444690108299256, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2605, "train_speed(iter/s)": 0.08294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.0, "completions/mean_length": 215.8875, "completions/min_length": 140.6, "epoch": 1.2530004800768122, "frac_reward_zero_std": 0.75, "grad_norm": 0.1393103450536728, "kl": 0.013934326171875, "learning_rate": 3.3540923375810687e-06, "loss": 0.0005574138835072517, "memory(GiB)": 27.09, "reward": 0.5067499876022339, "reward_std": 0.07643824107944966, "rewards/MMContentORM/mean": 0.6599999904632569, "rewards/MMContentORM/std": 0.5702946484088898, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2610, "train_speed(iter/s)": 0.082956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.0, "completions/mean_length": 205.475, "completions/min_length": 120.6, "epoch": 1.2554008641382621, "frac_reward_zero_std": 0.55, "grad_norm": 0.166556254029274, "kl": 0.01988525390625, "learning_rate": 3.3353632449608703e-06, "loss": 0.0007944651879370213, "memory(GiB)": 27.09, "reward": 0.3788499802350998, "reward_std": 0.20470740869641305, "rewards/MMContentORM/mean": 0.4265000134706497, "rewards/MMContentORM/std": 0.7319641351699829, "rewards/MMFormatORM/mean": 0.576874977350235, "rewards/MMFormatORM/std": 0.17944467663764954, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.27606874108314516, "step": 2615, "train_speed(iter/s)": 0.082975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.4, "completions/mean_length": 211.975, "completions/min_length": 123.8, "epoch": 1.257801248199712, "frac_reward_zero_std": 0.5, "grad_norm": 0.17242266237735748, "kl": 0.03348388671875, "learning_rate": 3.3166603840482815e-06, "loss": 0.0013363593257963657, "memory(GiB)": 27.09, "reward": 0.4672499895095825, "reward_std": 0.14361337879672648, "rewards/MMContentORM/mean": 0.5900000095367431, "rewards/MMContentORM/std": 0.5261604383587837, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2620, "train_speed(iter/s)": 0.08299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.8, "completions/mean_length": 214.225, "completions/min_length": 145.2, "epoch": 1.2602016322611618, "frac_reward_zero_std": 0.675, "grad_norm": 0.12951968610286713, "kl": 0.016705322265625, "learning_rate": 3.297984049567041e-06, "loss": 0.0006685989443212748, "memory(GiB)": 27.09, "reward": 0.4681499719619751, "reward_std": 0.12480434402823448, "rewards/MMContentORM/mean": 0.6210000038146972, "rewards/MMContentORM/std": 0.6814538955688476, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 2625, "train_speed(iter/s)": 0.083002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.8, "completions/mean_length": 208.0875, "completions/min_length": 115.2, "epoch": 1.2626020163226115, "frac_reward_zero_std": 0.7, "grad_norm": 0.0806797593832016, "kl": 0.017120361328125, "learning_rate": 3.2793345358228935e-06, "loss": 0.000685088150203228, "memory(GiB)": 27.09, "reward": 0.4675999879837036, "reward_std": 0.09291383468080312, "rewards/MMContentORM/mean": 0.6089999973773956, "rewards/MMContentORM/std": 0.6673885464668274, "rewards/MMFormatORM/mean": 0.5974999785423278, "rewards/MMFormatORM/std": 0.1443081244826317, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 2630, "train_speed(iter/s)": 0.083009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.4, "completions/mean_length": 225.5125, "completions/min_length": 153.2, "epoch": 1.2650024003840614, "frac_reward_zero_std": 0.65, "grad_norm": 0.1779165416955948, "kl": 0.016461181640625, "learning_rate": 3.2607121366989216e-06, "loss": 0.0006584211252629757, "memory(GiB)": 27.09, "reward": 0.4672499716281891, "reward_std": 0.10175266489386559, "rewards/MMContentORM/mean": 0.5775000095367432, "rewards/MMContentORM/std": 0.6095366299152374, "rewards/MMFormatORM/mean": 0.6156249880790711, "rewards/MMFormatORM/std": 0.11690345108509063, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2635, "train_speed(iter/s)": 0.083015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 346.8, "completions/mean_length": 221.1875, "completions/min_length": 122.8, "epoch": 1.2674027844455114, "frac_reward_zero_std": 0.7, "grad_norm": 0.17611438035964966, "kl": 0.022784423828125, "learning_rate": 3.242117145650936e-06, "loss": 0.0009104921482503414, "memory(GiB)": 27.09, "reward": 0.4905499994754791, "reward_std": 0.13272394693922251, "rewards/MMContentORM/mean": 0.6645000100135803, "rewards/MMContentORM/std": 0.5864324927330017, "rewards/MMFormatORM/mean": 0.5993749856948852, "rewards/MMFormatORM/std": 0.12920948565006257, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 2640, "train_speed(iter/s)": 0.083009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.0, "completions/mean_length": 212.8875, "completions/min_length": 133.4, "epoch": 1.269803168506961, "frac_reward_zero_std": 0.625, "grad_norm": 0.1824599802494049, "kl": 0.016510009765625, "learning_rate": 3.2235498557028487e-06, "loss": 0.0006610351148992777, "memory(GiB)": 27.09, "reward": 0.3902499735355377, "reward_std": 0.1405021134763956, "rewards/MMContentORM/mean": 0.3975000023841858, "rewards/MMContentORM/std": 0.6953428506851196, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2645, "train_speed(iter/s)": 0.083028 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 534.6, "completions/mean_length": 226.7, "completions/min_length": 137.4, "epoch": 1.272203552568411, "frac_reward_zero_std": 0.575, "grad_norm": 0.17883527278900146, "kl": 0.01495361328125, "learning_rate": 3.2050105594420463e-06, "loss": 0.0005979948677122593, "memory(GiB)": 27.09, "reward": 0.4455499768257141, "reward_std": 0.17939299046993257, "rewards/MMContentORM/mean": 0.5645000159740448, "rewards/MMContentORM/std": 0.68505859375, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.18630690574645997, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2866260170936584, "step": 2650, "train_speed(iter/s)": 0.082961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 457.2, "completions/mean_length": 217.825, "completions/min_length": 132.0, "epoch": 1.2746039366298607, "frac_reward_zero_std": 0.55, "grad_norm": 0.15864062309265137, "kl": 0.01883544921875, "learning_rate": 3.186499549014788e-06, "loss": 0.0007538811769336462, "memory(GiB)": 27.09, "reward": 0.3846499860286713, "reward_std": 0.18024151772260666, "rewards/MMContentORM/mean": 0.4284999966621399, "rewards/MMContentORM/std": 0.7736505270004272, "rewards/MMFormatORM/mean": 0.5831249833106995, "rewards/MMFormatORM/std": 0.19467147588729858, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2989355862140656, "step": 2655, "train_speed(iter/s)": 0.08292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.2, "completions/mean_length": 220.7875, "completions/min_length": 154.4, "epoch": 1.2770043206913106, "frac_reward_zero_std": 0.65, "grad_norm": 0.20636998116970062, "kl": 0.0220458984375, "learning_rate": 3.168017116121594e-06, "loss": 0.0008822778239846229, "memory(GiB)": 27.09, "reward": 0.5081999719142913, "reward_std": 0.09531799275428057, "rewards/MMContentORM/mean": 0.6780000150203704, "rewards/MMContentORM/std": 0.5256944436579942, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2660, "train_speed(iter/s)": 0.082931 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.8, "completions/mean_length": 215.725, "completions/min_length": 121.8, "epoch": 1.2794047047527606, "frac_reward_zero_std": 0.575, "grad_norm": 0.10092300921678543, "kl": 0.023101806640625, "learning_rate": 3.149563552012662e-06, "loss": 0.0009249597787857056, "memory(GiB)": 27.09, "reward": 0.47155000567436217, "reward_std": 0.15959399938583374, "rewards/MMContentORM/mean": 0.6170000016689301, "rewards/MMContentORM/std": 0.6519209682941437, "rewards/MMFormatORM/mean": 0.5993749856948852, "rewards/MMFormatORM/std": 0.17476680278778076, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.2683130085468292, "step": 2665, "train_speed(iter/s)": 0.08294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 444.0, "completions/mean_length": 218.575, "completions/min_length": 132.6, "epoch": 1.2818050888142103, "frac_reward_zero_std": 0.65, "grad_norm": 0.16917936503887177, "kl": 0.01629638671875, "learning_rate": 3.1311391474832596e-06, "loss": 0.0006522711366415024, "memory(GiB)": 27.09, "reward": 0.4537999749183655, "reward_std": 0.0890954568516463, "rewards/MMContentORM/mean": 0.542000013589859, "rewards/MMContentORM/std": 0.6675113797187805, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2670, "train_speed(iter/s)": 0.082909 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.2, "completions/mean_length": 205.0625, "completions/min_length": 136.0, "epoch": 1.28420547287566, "frac_reward_zero_std": 0.675, "grad_norm": 0.13570167124271393, "kl": 0.0154052734375, "learning_rate": 3.1127441928691575e-06, "loss": 0.0006159848999232054, "memory(GiB)": 27.09, "reward": 0.4889999747276306, "reward_std": 0.09192388076335192, "rewards/MMContentORM/mean": 0.6299999952316284, "rewards/MMContentORM/std": 0.5938115835189819, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2675, "train_speed(iter/s)": 0.082919 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.6, "completions/mean_length": 207.45, "completions/min_length": 135.2, "epoch": 1.28660585693711, "frac_reward_zero_std": 0.675, "grad_norm": 0.11782620847225189, "kl": 0.018231201171875, "learning_rate": 3.0943789780420473e-06, "loss": 0.0007292300462722779, "memory(GiB)": 27.09, "reward": 0.41379998326301576, "reward_std": 0.06477098376490176, "rewards/MMContentORM/mean": 0.4419999986886978, "rewards/MMContentORM/std": 0.6502050161361694, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 2680, "train_speed(iter/s)": 0.08293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.2, "completions/mean_length": 218.4, "completions/min_length": 146.4, "epoch": 1.2890062409985599, "frac_reward_zero_std": 0.675, "grad_norm": 0.1357801854610443, "kl": 0.01868896484375, "learning_rate": 3.0760437924049723e-06, "loss": 0.0007480094209313393, "memory(GiB)": 27.09, "reward": 0.4612999796867371, "reward_std": 0.14127993900328875, "rewards/MMContentORM/mean": 0.5895000040531159, "rewards/MMContentORM/std": 0.5788779146969318, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 2685, "train_speed(iter/s)": 0.082928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 326.4, "completions/mean_length": 213.9875, "completions/min_length": 152.0, "epoch": 1.2914066250600096, "frac_reward_zero_std": 0.575, "grad_norm": 0.18375363945960999, "kl": 0.015374755859375, "learning_rate": 3.0577389248877737e-06, "loss": 0.0006146729923784733, "memory(GiB)": 27.09, "reward": 0.42199998497962954, "reward_std": 0.130673336237669, "rewards/MMContentORM/mean": 0.4625000059604645, "rewards/MMContentORM/std": 0.6796015799045563, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 2690, "train_speed(iter/s)": 0.082933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.8, "completions/mean_length": 197.325, "completions/min_length": 133.8, "epoch": 1.2938070091214593, "frac_reward_zero_std": 0.675, "grad_norm": 0.15211942791938782, "kl": 0.0177978515625, "learning_rate": 3.0394646639425276e-06, "loss": 0.0007108286954462528, "memory(GiB)": 27.09, "reward": 0.4626499831676483, "reward_std": 0.11787469983100891, "rewards/MMContentORM/mean": 0.5785000056028367, "rewards/MMContentORM/std": 0.6342697024345398, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 2695, "train_speed(iter/s)": 0.082962 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.8, "completions/mean_length": 215.3875, "completions/min_length": 126.2, "epoch": 1.2962073931829092, "frac_reward_zero_std": 0.65, "grad_norm": 0.1444334089756012, "kl": 0.0158203125, "learning_rate": 3.021221297539007e-06, "loss": 0.000632589589804411, "memory(GiB)": 27.09, "reward": 0.49159998297691343, "reward_std": 0.10861160224303604, "rewards/MMContentORM/mean": 0.6365000009536743, "rewards/MMContentORM/std": 0.5993415236473083, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.08490467071533203, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 2700, "train_speed(iter/s)": 0.08297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.4, "completions/mean_length": 217.225, "completions/min_length": 136.2, "epoch": 1.2986077772443592, "frac_reward_zero_std": 0.5, "grad_norm": 0.1874951273202896, "kl": 0.01707763671875, "learning_rate": 3.0030091131601458e-06, "loss": 0.0006825667340308427, "memory(GiB)": 27.09, "reward": 0.43964999318122866, "reward_std": 0.22167797833681108, "rewards/MMContentORM/mean": 0.5785000145435333, "rewards/MMContentORM/std": 0.7030053317546845, "rewards/MMFormatORM/mean": 0.576874989271164, "rewards/MMFormatORM/std": 0.20004121959209442, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.30775573253631594, "step": 2705, "train_speed(iter/s)": 0.08295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.4, "completions/mean_length": 214.0625, "completions/min_length": 147.0, "epoch": 1.3010081613058089, "frac_reward_zero_std": 0.675, "grad_norm": 0.13891303539276123, "kl": 0.015338134765625, "learning_rate": 2.984828397797499e-06, "loss": 0.0006134298164397478, "memory(GiB)": 27.09, "reward": 0.4906999826431274, "reward_std": 0.15061374455690385, "rewards/MMContentORM/mean": 0.6630000233650207, "rewards/MMContentORM/std": 0.5227236907929182, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 2710, "train_speed(iter/s)": 0.082958 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 340.2, "completions/mean_length": 212.925, "completions/min_length": 128.6, "epoch": 1.3034085453672588, "frac_reward_zero_std": 0.5, "grad_norm": 0.22613218426704407, "kl": 0.018865966796875, "learning_rate": 2.966679437946732e-06, "loss": 0.0007538828998804092, "memory(GiB)": 27.09, "reward": 0.4224499940872192, "reward_std": 0.13795652985572815, "rewards/MMContentORM/mean": 0.47800001204013826, "rewards/MMContentORM/std": 0.6876972198486329, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 2715, "train_speed(iter/s)": 0.08295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.6, "completions/mean_length": 211.9875, "completions/min_length": 144.6, "epoch": 1.3058089294287085, "frac_reward_zero_std": 0.725, "grad_norm": 0.1565045714378357, "kl": 0.018109130859375, "learning_rate": 2.948562519603093e-06, "loss": 0.0007246591150760651, "memory(GiB)": 27.09, "reward": 0.41704997420310974, "reward_std": 0.11052079051733017, "rewards/MMContentORM/mean": 0.4644999921321869, "rewards/MMContentORM/std": 0.6703303098678589, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2720, "train_speed(iter/s)": 0.082952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 340.0, "completions/mean_length": 219.45, "completions/min_length": 151.0, "epoch": 1.3082093134901585, "frac_reward_zero_std": 0.75, "grad_norm": 0.13537226617336273, "kl": 0.015643310546875, "learning_rate": 2.930477928256921e-06, "loss": 0.0006261279806494713, "memory(GiB)": 27.09, "reward": 0.4939499914646149, "reward_std": 0.11151073649525642, "rewards/MMContentORM/mean": 0.628000009059906, "rewards/MMContentORM/std": 0.6397946953773499, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 2725, "train_speed(iter/s)": 0.082952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.2, "completions/mean_length": 209.675, "completions/min_length": 131.4, "epoch": 1.3106096975516084, "frac_reward_zero_std": 0.6, "grad_norm": 0.148203507065773, "kl": 0.014532470703125, "learning_rate": 2.912425948889134e-06, "loss": 0.000581054575741291, "memory(GiB)": 27.09, "reward": 0.4759999871253967, "reward_std": 0.11653119549155236, "rewards/MMContentORM/mean": 0.5975000083446502, "rewards/MMContentORM/std": 0.6301550388336181, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2730, "train_speed(iter/s)": 0.082965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.6, "completions/mean_length": 213.9, "completions/min_length": 133.6, "epoch": 1.313010081613058, "frac_reward_zero_std": 0.575, "grad_norm": 0.2361445128917694, "kl": 0.024627685546875, "learning_rate": 2.894406865966739e-06, "loss": 0.0009845934808254242, "memory(GiB)": 27.09, "reward": 0.4808999836444855, "reward_std": 0.1356230785138905, "rewards/MMContentORM/mean": 0.6385000109672546, "rewards/MMContentORM/std": 0.6562785744667053, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 2735, "train_speed(iter/s)": 0.082973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.6, "completions/mean_length": 215.15, "completions/min_length": 137.4, "epoch": 1.3154104656745078, "frac_reward_zero_std": 0.625, "grad_norm": 0.13266721367835999, "kl": 0.013671875, "learning_rate": 2.876420963438369e-06, "loss": 0.000547263352200389, "memory(GiB)": 27.09, "reward": 0.4798999786376953, "reward_std": 0.12119809612631798, "rewards/MMContentORM/mean": 0.6360000133514404, "rewards/MMContentORM/std": 0.6224928319454193, "rewards/MMFormatORM/mean": 0.6012499690055847, "rewards/MMFormatORM/std": 0.13321036398410796, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.20493902564048766, "step": 2740, "train_speed(iter/s)": 0.08299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.4, "completions/mean_length": 208.075, "completions/min_length": 129.4, "epoch": 1.3178108497359577, "frac_reward_zero_std": 0.725, "grad_norm": 0.14808304607868195, "kl": 0.015924072265625, "learning_rate": 2.8584685247297735e-06, "loss": 0.0006361417472362518, "memory(GiB)": 27.09, "reward": 0.455049991607666, "reward_std": 0.09468159638345242, "rewards/MMContentORM/mean": 0.559500002861023, "rewards/MMContentORM/std": 0.5647126242518425, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2745, "train_speed(iter/s)": 0.083004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 370.0, "completions/mean_length": 217.9125, "completions/min_length": 148.2, "epoch": 1.3202112337974077, "frac_reward_zero_std": 0.65, "grad_norm": 0.20360088348388672, "kl": 0.015631103515625, "learning_rate": 2.840549832739388e-06, "loss": 0.0006251013837754726, "memory(GiB)": 27.09, "reward": 0.4665999889373779, "reward_std": 0.091923877899535, "rewards/MMContentORM/mean": 0.5739999890327454, "rewards/MMContentORM/std": 0.6347232937812806, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2750, "train_speed(iter/s)": 0.082996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.0, "completions/mean_length": 214.2875, "completions/min_length": 142.2, "epoch": 1.3226116178588574, "frac_reward_zero_std": 0.75, "grad_norm": 0.14281363785266876, "kl": 0.01690673828125, "learning_rate": 2.822665169833851e-06, "loss": 0.000676287803798914, "memory(GiB)": 27.09, "reward": 0.4568999886512756, "reward_std": 0.047376152616925536, "rewards/MMContentORM/mean": 0.5210000097751617, "rewards/MMContentORM/std": 0.6158773601055145, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 2755, "train_speed(iter/s)": 0.083002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 486.8, "completions/mean_length": 232.45, "completions/min_length": 141.2, "epoch": 1.3250120019203073, "frac_reward_zero_std": 0.625, "grad_norm": 0.2492820769548416, "kl": 0.020391845703125, "learning_rate": 2.8048148178435666e-06, "loss": 0.0008158944547176361, "memory(GiB)": 27.09, "reward": 0.3429499715566635, "reward_std": 0.19226232618093492, "rewards/MMContentORM/mean": 0.36550000309944153, "rewards/MMContentORM/std": 0.7640251278877258, "rewards/MMFormatORM/mean": 0.5606249749660492, "rewards/MMFormatORM/std": 0.22384813129901887, "rewards/MMRubricORM/mean": -0.1375, "rewards/MMRubricORM/std": 0.34438174962997437, "step": 2760, "train_speed(iter/s)": 0.082952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.6, "completions/mean_length": 214.4625, "completions/min_length": 140.2, "epoch": 1.327412385981757, "frac_reward_zero_std": 0.725, "grad_norm": 0.1096256673336029, "kl": 0.01571044921875, "learning_rate": 2.7869990580582584e-06, "loss": 0.0006275205872952938, "memory(GiB)": 27.09, "reward": 0.5268999934196472, "reward_std": 0.06434671822935342, "rewards/MMContentORM/mean": 0.6960000038146973, "rewards/MMContentORM/std": 0.5165394425392151, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2765, "train_speed(iter/s)": 0.082961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.6, "completions/mean_length": 212.7, "completions/min_length": 152.4, "epoch": 1.329812770043207, "frac_reward_zero_std": 0.625, "grad_norm": 0.16367603838443756, "kl": 0.01417236328125, "learning_rate": 2.769218171222538e-06, "loss": 0.0005671579390764236, "memory(GiB)": 27.09, "reward": 0.4968499898910522, "reward_std": 0.1402192786335945, "rewards/MMContentORM/mean": 0.6639999985694885, "rewards/MMContentORM/std": 0.5903096914291381, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 2770, "train_speed(iter/s)": 0.08298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.6, "completions/mean_length": 211.125, "completions/min_length": 140.4, "epoch": 1.3322131541046567, "frac_reward_zero_std": 0.7, "grad_norm": 0.200842946767807, "kl": 0.03021240234375, "learning_rate": 2.7514724375314866e-06, "loss": 0.0012100426480174064, "memory(GiB)": 27.09, "reward": 0.5299499869346619, "reward_std": 0.06965001099742948, "rewards/MMContentORM/mean": 0.7180000185966492, "rewards/MMContentORM/std": 0.4512696675956249, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2775, "train_speed(iter/s)": 0.082989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 428.0, "completions/mean_length": 215.7125, "completions/min_length": 139.8, "epoch": 1.3346135381661066, "frac_reward_zero_std": 0.65, "grad_norm": 0.1629835069179535, "kl": 0.01583251953125, "learning_rate": 2.733762136626229e-06, "loss": 0.0006339491344988346, "memory(GiB)": 27.09, "reward": 0.46864998936653135, "reward_std": 0.1297540941275656, "rewards/MMContentORM/mean": 0.5935000061988831, "rewards/MMContentORM/std": 0.6146703898906708, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2780, "train_speed(iter/s)": 0.082955 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.2, "completions/mean_length": 217.0375, "completions/min_length": 124.8, "epoch": 1.3370139222275563, "frac_reward_zero_std": 0.575, "grad_norm": 0.20240968465805054, "kl": 0.0284912109375, "learning_rate": 2.7160875475895336e-06, "loss": 0.0011421437375247478, "memory(GiB)": 27.09, "reward": 0.4283499836921692, "reward_std": 0.19070670306682586, "rewards/MMContentORM/mean": 0.5214999914169312, "rewards/MMContentORM/std": 0.6971070170402527, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.16571036279201506, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.25493902564048765, "step": 2785, "train_speed(iter/s)": 0.082956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.0, "completions/mean_length": 211.175, "completions/min_length": 94.6, "epoch": 1.3394143062890063, "frac_reward_zero_std": 0.65, "grad_norm": 0.21979370713233948, "kl": 0.018133544921875, "learning_rate": 2.6984489489414123e-06, "loss": 0.0007254761178046465, "memory(GiB)": 27.09, "reward": 0.46289997100830077, "reward_std": 0.1593818672001362, "rewards/MMContentORM/mean": 0.5935000061988831, "rewards/MMContentORM/std": 0.6262759625911712, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.14990466833114624, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23062257766723632, "step": 2790, "train_speed(iter/s)": 0.082967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.6, "completions/mean_length": 213.75, "completions/min_length": 154.8, "epoch": 1.3418146903504562, "frac_reward_zero_std": 0.75, "grad_norm": 0.07046330720186234, "kl": 0.018560791015625, "learning_rate": 2.680846618634741e-06, "loss": 0.0007423401810228824, "memory(GiB)": 27.09, "reward": 0.4933999955654144, "reward_std": 0.07155919813085347, "rewards/MMContentORM/mean": 0.6410000085830688, "rewards/MMContentORM/std": 0.557767578959465, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 2795, "train_speed(iter/s)": 0.082976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 333.8, "completions/mean_length": 221.375, "completions/min_length": 156.6, "epoch": 1.344215074411906, "frac_reward_zero_std": 0.65, "grad_norm": 0.08354189991950989, "kl": 0.01629638671875, "learning_rate": 2.6632808340508577e-06, "loss": 0.0006520752795040607, "memory(GiB)": 27.09, "reward": 0.4364499866962433, "reward_std": 0.12155165586154908, "rewards/MMContentORM/mean": 0.5130000054836273, "rewards/MMContentORM/std": 0.6738922476768494, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2800, "train_speed(iter/s)": 0.08298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.8, "completions/mean_length": 212.125, "completions/min_length": 136.2, "epoch": 1.3466154584733556, "frac_reward_zero_std": 0.65, "grad_norm": 0.07269278913736343, "kl": 0.013916015625, "learning_rate": 2.6457518719952126e-06, "loss": 0.0005559300072491169, "memory(GiB)": 27.09, "reward": 0.4854999780654907, "reward_std": 0.08329717591404914, "rewards/MMContentORM/mean": 0.5925000071525574, "rewards/MMContentORM/std": 0.5689398109912872, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2805, "train_speed(iter/s)": 0.082961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.8, "completions/mean_length": 207.2375, "completions/min_length": 144.4, "epoch": 1.3490158425348056, "frac_reward_zero_std": 0.675, "grad_norm": 0.21120710670948029, "kl": 0.016229248046875, "learning_rate": 2.628260008693e-06, "loss": 0.0006484090350568295, "memory(GiB)": 27.09, "reward": 0.4767999827861786, "reward_std": 0.12105667740106582, "rewards/MMContentORM/mean": 0.5995000123977661, "rewards/MMContentORM/std": 0.6707588911056519, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 2810, "train_speed(iter/s)": 0.082981 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.2, "completions/mean_length": 215.175, "completions/min_length": 147.6, "epoch": 1.3514162265962555, "frac_reward_zero_std": 0.6, "grad_norm": 0.17420639097690582, "kl": 0.013153076171875, "learning_rate": 2.6108055197848013e-06, "loss": 0.0005263995379209518, "memory(GiB)": 27.09, "reward": 0.49479998350143434, "reward_std": 0.1250164821743965, "rewards/MMContentORM/mean": 0.6445000052452088, "rewards/MMContentORM/std": 0.6272825956344604, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 2815, "train_speed(iter/s)": 0.082983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.0, "completions/mean_length": 211.6875, "completions/min_length": 148.4, "epoch": 1.3538166106577052, "frac_reward_zero_std": 0.675, "grad_norm": 0.13600178062915802, "kl": 0.015380859375, "learning_rate": 2.5933886803222453e-06, "loss": 0.0006146400235593319, "memory(GiB)": 27.09, "reward": 0.5410999894142151, "reward_std": 0.07990306429564953, "rewards/MMContentORM/mean": 0.7315000176429749, "rewards/MMContentORM/std": 0.4983065962791443, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2820, "train_speed(iter/s)": 0.082998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.6, "completions/mean_length": 209.3125, "completions/min_length": 116.4, "epoch": 1.3562169947191551, "frac_reward_zero_std": 0.625, "grad_norm": 0.11442702263593674, "kl": 0.017779541015625, "learning_rate": 2.5760097647636695e-06, "loss": 0.0007101839408278465, "memory(GiB)": 27.09, "reward": 0.48674996495246886, "reward_std": 0.1194303346797824, "rewards/MMContentORM/mean": 0.6099999904632568, "rewards/MMContentORM/std": 0.6200049042701721, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 2825, "train_speed(iter/s)": 0.083002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.4, "completions/mean_length": 210.4875, "completions/min_length": 157.0, "epoch": 1.3586173787806048, "frac_reward_zero_std": 0.575, "grad_norm": 0.1676364243030548, "kl": 0.02005615234375, "learning_rate": 2.558669046969802e-06, "loss": 0.0008024025708436966, "memory(GiB)": 27.09, "reward": 0.4595999836921692, "reward_std": 0.1962928393855691, "rewards/MMContentORM/mean": 0.614000004529953, "rewards/MMContentORM/std": 0.6589715838432312, "rewards/MMFormatORM/mean": 0.5849999785423279, "rewards/MMFormatORM/std": 0.1737115800380707, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2672485947608948, "step": 2830, "train_speed(iter/s)": 0.083023 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.6, "completions/mean_length": 207.925, "completions/min_length": 124.2, "epoch": 1.3610177628420548, "frac_reward_zero_std": 0.65, "grad_norm": 0.11557711660861969, "kl": 0.016168212890625, "learning_rate": 2.541366800199441e-06, "loss": 0.0006470034830272197, "memory(GiB)": 27.09, "reward": 0.4768499732017517, "reward_std": 0.1373908487148583, "rewards/MMContentORM/mean": 0.6140000224113464, "rewards/MMContentORM/std": 0.6126730859279632, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 2835, "train_speed(iter/s)": 0.083045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 455.6, "completions/mean_length": 221.3125, "completions/min_length": 119.6, "epoch": 1.3634181469035045, "frac_reward_zero_std": 0.525, "grad_norm": 0.17624257504940033, "kl": 0.021038818359375, "learning_rate": 2.524103297105147e-06, "loss": 0.0008411226794123649, "memory(GiB)": 27.09, "reward": 0.4879499852657318, "reward_std": 0.17599886879324914, "rewards/MMContentORM/mean": 0.6705000042915344, "rewards/MMContentORM/std": 0.5964474260807038, "rewards/MMFormatORM/mean": 0.5931249916553497, "rewards/MMFormatORM/std": 0.1556377649307251, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.23944272398948668, "step": 2840, "train_speed(iter/s)": 0.083004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 330.4, "completions/mean_length": 212.0625, "completions/min_length": 137.4, "epoch": 1.3658185309649544, "frac_reward_zero_std": 0.625, "grad_norm": 0.11209463328123093, "kl": 0.015325927734375, "learning_rate": 2.5068788097289563e-06, "loss": 0.0006131676957011223, "memory(GiB)": 27.09, "reward": 0.508549964427948, "reward_std": 0.09256027387455106, "rewards/MMContentORM/mean": 0.6645000219345093, "rewards/MMContentORM/std": 0.49717583805322646, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2845, "train_speed(iter/s)": 0.083015 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 326.0, "completions/mean_length": 207.475, "completions/min_length": 144.2, "epoch": 1.3682189150264041, "frac_reward_zero_std": 0.675, "grad_norm": 0.14943933486938477, "kl": 0.015374755859375, "learning_rate": 2.4896936094980813e-06, "loss": 0.0006145826540887356, "memory(GiB)": 27.09, "reward": 0.51869997382164, "reward_std": 0.07311484031379223, "rewards/MMContentORM/mean": 0.6755000114440918, "rewards/MMContentORM/std": 0.5316406607627868, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2850, "train_speed(iter/s)": 0.083019 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.6, "completions/mean_length": 212.0875, "completions/min_length": 142.2, "epoch": 1.370619299087854, "frac_reward_zero_std": 0.675, "grad_norm": 0.13996316492557526, "kl": 0.016644287109375, "learning_rate": 2.47254796722064e-06, "loss": 0.000665975920855999, "memory(GiB)": 27.09, "reward": 0.477649986743927, "reward_std": 0.11419774293899536, "rewards/MMContentORM/mean": 0.6160000085830688, "rewards/MMContentORM/std": 0.6154688060283661, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 2855, "train_speed(iter/s)": 0.083038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.8, "completions/mean_length": 202.125, "completions/min_length": 120.2, "epoch": 1.373019683149304, "frac_reward_zero_std": 0.75, "grad_norm": 0.09467165917158127, "kl": 0.019873046875, "learning_rate": 2.455442153081388e-06, "loss": 0.0007954918779432774, "memory(GiB)": 27.09, "reward": 0.4916999876499176, "reward_std": 0.07297342019155621, "rewards/MMContentORM/mean": 0.6655000030994416, "rewards/MMContentORM/std": 0.5675058551132679, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.14990466833114624, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23062257766723632, "step": 2860, "train_speed(iter/s)": 0.083051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.4, "completions/mean_length": 204.1, "completions/min_length": 111.6, "epoch": 1.3754200672107537, "frac_reward_zero_std": 0.75, "grad_norm": 0.1891569048166275, "kl": 0.01641845703125, "learning_rate": 2.4383764366374608e-06, "loss": 0.0006566672120243311, "memory(GiB)": 27.09, "reward": 0.5054999768733979, "reward_std": 0.05897270615678281, "rewards/MMContentORM/mean": 0.6424999952316284, "rewards/MMContentORM/std": 0.5048037022352219, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2865, "train_speed(iter/s)": 0.083069 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.0, "completions/mean_length": 197.075, "completions/min_length": 130.0, "epoch": 1.3778204512722034, "frac_reward_zero_std": 0.65, "grad_norm": 0.1894003450870514, "kl": 0.016998291015625, "learning_rate": 2.4213510868141253e-06, "loss": 0.0006809456273913384, "memory(GiB)": 27.09, "reward": 0.46944997906684877, "reward_std": 0.14615897387266158, "rewards/MMContentORM/mean": 0.5954999923706055, "rewards/MMContentORM/std": 0.6364932656288147, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2870, "train_speed(iter/s)": 0.083076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 437.6, "completions/mean_length": 216.4625, "completions/min_length": 132.4, "epoch": 1.3802208353336534, "frac_reward_zero_std": 0.775, "grad_norm": 0.16716767847537994, "kl": 2.116851806640625, "learning_rate": 2.404366371900541e-06, "loss": 0.08460500240325927, "memory(GiB)": 27.09, "reward": 0.47039997577667236, "reward_std": 0.08881261080969124, "rewards/MMContentORM/mean": 0.5835000097751617, "rewards/MMContentORM/std": 0.5921829402446747, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2875, "train_speed(iter/s)": 0.083038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 434.2, "completions/mean_length": 214.5625, "completions/min_length": 143.2, "epoch": 1.3826212193951033, "frac_reward_zero_std": 0.6, "grad_norm": 0.22403933107852936, "kl": 0.01729736328125, "learning_rate": 2.387422559545539e-06, "loss": 0.0006910515949130058, "memory(GiB)": 27.09, "reward": 0.4853999733924866, "reward_std": 0.1501894833520055, "rewards/MMContentORM/mean": 0.6535000264644623, "rewards/MMContentORM/std": 0.5772003047168255, "rewards/MMFormatORM/mean": 0.5974999785423278, "rewards/MMFormatORM/std": 0.13168290257453918, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 2880, "train_speed(iter/s)": 0.082999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 366.0, "completions/mean_length": 218.0125, "completions/min_length": 145.6, "epoch": 1.385021603456553, "frac_reward_zero_std": 0.65, "grad_norm": 0.1463712602853775, "kl": 0.01533203125, "learning_rate": 2.3705199167533933e-06, "loss": 0.0006130510475486517, "memory(GiB)": 27.09, "reward": 0.5021499812602996, "reward_std": 0.08464068132452666, "rewards/MMContentORM/mean": 0.6485000193119049, "rewards/MMContentORM/std": 0.5746123373508454, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2885, "train_speed(iter/s)": 0.082987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.2, "completions/mean_length": 221.3625, "completions/min_length": 130.8, "epoch": 1.387421987518003, "frac_reward_zero_std": 0.65, "grad_norm": 0.14541618525981903, "kl": 0.022015380859375, "learning_rate": 2.35365870987962e-06, "loss": 0.000881551206111908, "memory(GiB)": 27.09, "reward": 0.4464499831199646, "reward_std": 0.12112738967407495, "rewards/MMContentORM/mean": 0.5380000054836274, "rewards/MMContentORM/std": 0.587418507039547, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 2890, "train_speed(iter/s)": 0.082996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.8, "completions/mean_length": 207.3625, "completions/min_length": 114.0, "epoch": 1.3898223715794527, "frac_reward_zero_std": 0.675, "grad_norm": 0.006435598712414503, "kl": 0.112371826171875, "learning_rate": 2.336839204626781e-06, "loss": 0.00448373295366764, "memory(GiB)": 27.09, "reward": 0.48819997906684875, "reward_std": 0.0975807286798954, "rewards/MMContentORM/mean": 0.6280000001192093, "rewards/MMContentORM/std": 0.5371371787041426, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2895, "train_speed(iter/s)": 0.083007 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 341.8, "completions/mean_length": 216.225, "completions/min_length": 128.0, "epoch": 1.3922227556409026, "frac_reward_zero_std": 0.6, "grad_norm": 0.12930913269519806, "kl": 0.031195068359375, "learning_rate": 2.320061666040293e-06, "loss": 0.0012484462931752205, "memory(GiB)": 27.09, "reward": 0.4482999861240387, "reward_std": 0.13816866455599666, "rewards/MMContentORM/mean": 0.5569999933242797, "rewards/MMContentORM/std": 0.6566318869590759, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 2900, "train_speed(iter/s)": 0.083008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 381.6, "completions/mean_length": 225.3125, "completions/min_length": 159.4, "epoch": 1.3946231397023523, "frac_reward_zero_std": 0.725, "grad_norm": 0.11358582973480225, "kl": 0.016644287109375, "learning_rate": 2.303326358504254e-06, "loss": 0.0006654250435531139, "memory(GiB)": 27.09, "reward": 0.5058999717235565, "reward_std": 0.08046875060535967, "rewards/MMContentORM/mean": 0.643500006198883, "rewards/MMContentORM/std": 0.5736395001411438, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2905, "train_speed(iter/s)": 0.082965 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.8, "completions/mean_length": 208.175, "completions/min_length": 143.8, "epoch": 1.3970235237638022, "frac_reward_zero_std": 0.75, "grad_norm": 0.11047236621379852, "kl": 0.017657470703125, "learning_rate": 2.286633545737275e-06, "loss": 0.0007063564844429493, "memory(GiB)": 27.09, "reward": 0.444299989938736, "reward_std": 0.12515790089964868, "rewards/MMContentORM/mean": 0.5469999969005584, "rewards/MMContentORM/std": 0.6875294208526611, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 2910, "train_speed(iter/s)": 0.08298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.0, "completions/mean_length": 213.2125, "completions/min_length": 118.8, "epoch": 1.399423907825252, "frac_reward_zero_std": 0.6, "grad_norm": 0.18031013011932373, "kl": 0.017578125, "learning_rate": 2.2699834907883284e-06, "loss": 0.0007039817050099373, "memory(GiB)": 27.09, "reward": 0.4786999821662903, "reward_std": 0.13420886383391917, "rewards/MMContentORM/mean": 0.6330000162124634, "rewards/MMContentORM/std": 0.6391359031200409, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 2915, "train_speed(iter/s)": 0.082991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 333.8, "completions/mean_length": 220.05, "completions/min_length": 153.6, "epoch": 1.4018242918867019, "frac_reward_zero_std": 0.625, "grad_norm": 0.18326731026172638, "kl": 0.019744873046875, "learning_rate": 2.2533764560325956e-06, "loss": 0.0007898284122347832, "memory(GiB)": 27.09, "reward": 0.4050499856472015, "reward_std": 0.2015961468219757, "rewards/MMContentORM/mean": 0.49200000166893004, "rewards/MMContentORM/std": 0.7459113121032714, "rewards/MMFormatORM/mean": 0.5768749833106994, "rewards/MMFormatORM/std": 0.2062115788459778, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.3172485947608948, "step": 2920, "train_speed(iter/s)": 0.082989 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 363.4, "completions/mean_length": 210.2875, "completions/min_length": 125.4, "epoch": 1.4042246759481518, "frac_reward_zero_std": 0.6, "grad_norm": 0.12701493501663208, "kl": 0.0131103515625, "learning_rate": 2.236812703167337e-06, "loss": 0.0005245218984782696, "memory(GiB)": 27.09, "reward": 0.4417999804019928, "reward_std": 0.15414927080273627, "rewards/MMContentORM/mean": 0.5445000112056733, "rewards/MMContentORM/std": 0.6644897401332855, "rewards/MMFormatORM/mean": 0.5974999785423278, "rewards/MMFormatORM/std": 0.11980934292078019, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.16124515533447265, "step": 2925, "train_speed(iter/s)": 0.082983 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.8, "completions/mean_length": 211.0, "completions/min_length": 129.4, "epoch": 1.4066250600096015, "frac_reward_zero_std": 0.575, "grad_norm": 0.14272558689117432, "kl": 0.0160888671875, "learning_rate": 2.2202924932077703e-06, "loss": 0.0006431899964809418, "memory(GiB)": 27.09, "reward": 0.44374998211860656, "reward_std": 0.1390879049897194, "rewards/MMContentORM/mean": 0.5599999845027923, "rewards/MMContentORM/std": 0.6930801510810852, "rewards/MMFormatORM/mean": 0.5931249976158142, "rewards/MMFormatORM/std": 0.18240466713905334, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2806225776672363, "step": 2930, "train_speed(iter/s)": 0.082994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.0, "completions/mean_length": 213.025, "completions/min_length": 138.4, "epoch": 1.4090254440710512, "frac_reward_zero_std": 0.65, "grad_norm": 0.1295127421617508, "kl": 0.01728515625, "learning_rate": 2.2038160864829516e-06, "loss": 0.0006916459649801254, "memory(GiB)": 27.09, "reward": 0.4630999803543091, "reward_std": 0.1404314052313566, "rewards/MMContentORM/mean": 0.5940000176429748, "rewards/MMContentORM/std": 0.6665144979953765, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 2935, "train_speed(iter/s)": 0.083004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 353.8, "completions/mean_length": 216.4, "completions/min_length": 145.4, "epoch": 1.4114258281325012, "frac_reward_zero_std": 0.675, "grad_norm": 0.1919974982738495, "kl": 0.013800048828125, "learning_rate": 2.1873837426316775e-06, "loss": 0.0005520004779100418, "memory(GiB)": 27.09, "reward": 0.4706499934196472, "reward_std": 0.1478560283780098, "rewards/MMContentORM/mean": 0.5984999895095825, "rewards/MMContentORM/std": 0.6424413204193116, "rewards/MMFormatORM/mean": 0.609375, "rewards/MMFormatORM/std": 0.16249999403953552, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.25, "step": 2940, "train_speed(iter/s)": 0.082999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.8, "completions/mean_length": 210.3, "completions/min_length": 140.8, "epoch": 1.413826212193951, "frac_reward_zero_std": 0.65, "grad_norm": 0.1101124957203865, "kl": 0.01229248046875, "learning_rate": 2.1709957205983904e-06, "loss": 0.0004918764345347882, "memory(GiB)": 27.09, "reward": 0.5490499854087829, "reward_std": 0.0635689014568925, "rewards/MMContentORM/mean": 0.7369999945163727, "rewards/MMContentORM/std": 0.4961404323577881, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 2945, "train_speed(iter/s)": 0.082996 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.8, "completions/mean_length": 212.0375, "completions/min_length": 128.2, "epoch": 1.4162265962554008, "frac_reward_zero_std": 0.75, "grad_norm": 0.15772999823093414, "kl": 0.01820068359375, "learning_rate": 2.1546522786291055e-06, "loss": 0.000728009082376957, "memory(GiB)": 27.09, "reward": 0.5127999722957611, "reward_std": 0.09956063730642199, "rewards/MMContentORM/mean": 0.6895000219345093, "rewards/MMContentORM/std": 0.5332017622888088, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2950, "train_speed(iter/s)": 0.082998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.8, "completions/mean_length": 197.4125, "completions/min_length": 136.6, "epoch": 1.4186269803168507, "frac_reward_zero_std": 0.825, "grad_norm": 0.005250279791653156, "kl": 0.0145751953125, "learning_rate": 2.138353674267332e-06, "loss": 0.0005831093527376652, "memory(GiB)": 27.09, "reward": 0.4632999837398529, "reward_std": 0.05529574602842331, "rewards/MMContentORM/mean": 0.5370000183582306, "rewards/MMContentORM/std": 0.5351093679666519, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 2955, "train_speed(iter/s)": 0.083011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 292.4, "completions/mean_length": 206.375, "completions/min_length": 134.0, "epoch": 1.4210273643783005, "frac_reward_zero_std": 0.675, "grad_norm": 0.15277594327926636, "kl": 0.015472412109375, "learning_rate": 2.1221001643500124e-06, "loss": 0.0006176586262881756, "memory(GiB)": 27.09, "reward": 0.5094499945640564, "reward_std": 0.05168950129300356, "rewards/MMContentORM/mean": 0.6379999995231629, "rewards/MMContentORM/std": 0.5326842725276947, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 2960, "train_speed(iter/s)": 0.083029 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 347.0, "completions/mean_length": 210.15, "completions/min_length": 145.8, "epoch": 1.4234277484397504, "frac_reward_zero_std": 0.65, "grad_norm": 0.11770177632570267, "kl": 0.0151611328125, "learning_rate": 2.1058920050034916e-06, "loss": 0.0006076030433177948, "memory(GiB)": 27.09, "reward": 0.45579997897148133, "reward_std": 0.13378460630774497, "rewards/MMContentORM/mean": 0.546999990940094, "rewards/MMContentORM/std": 0.6460906147956849, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 2965, "train_speed(iter/s)": 0.08303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.0, "completions/mean_length": 208.5375, "completions/min_length": 137.4, "epoch": 1.4258281325012003, "frac_reward_zero_std": 0.575, "grad_norm": 0.18440692126750946, "kl": 0.019158935546875, "learning_rate": 2.089729451639464e-06, "loss": 0.0007669050246477127, "memory(GiB)": 27.09, "reward": 0.4253999888896942, "reward_std": 0.18809040188789367, "rewards/MMContentORM/mean": 0.5285000085830689, "rewards/MMContentORM/std": 0.6971115171909332, "rewards/MMFormatORM/mean": 0.5849999845027923, "rewards/MMFormatORM/std": 0.16754122078418732, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2577557325363159, "step": 2970, "train_speed(iter/s)": 0.083041 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.0, "completions/mean_length": 207.1125, "completions/min_length": 120.8, "epoch": 1.42822851656265, "frac_reward_zero_std": 0.725, "grad_norm": 0.08875837922096252, "kl": 0.020159912109375, "learning_rate": 2.0736127589509574e-06, "loss": 0.0008051252923905849, "memory(GiB)": 27.09, "reward": 0.4783999741077423, "reward_std": 0.09956063062418252, "rewards/MMContentORM/mean": 0.6034999847412109, "rewards/MMContentORM/std": 0.5641872756183147, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 2975, "train_speed(iter/s)": 0.083048 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.2, "completions/mean_length": 213.2375, "completions/min_length": 142.4, "epoch": 1.4306289006240998, "frac_reward_zero_std": 0.75, "grad_norm": 0.005619046278297901, "kl": 0.016546630859375, "learning_rate": 2.057542180908314e-06, "loss": 0.0006620488129556179, "memory(GiB)": 27.09, "reward": 0.4491499841213226, "reward_std": 0.0645588494837284, "rewards/MMContentORM/mean": 0.5159999966621399, "rewards/MMContentORM/std": 0.6301424145698548, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2980, "train_speed(iter/s)": 0.083063 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.8, "completions/mean_length": 204.6625, "completions/min_length": 116.0, "epoch": 1.4330292846855497, "frac_reward_zero_std": 0.675, "grad_norm": 0.2883993685245514, "kl": 0.045916748046875, "learning_rate": 2.0415179707551972e-06, "loss": 0.0018356535583734512, "memory(GiB)": 27.09, "reward": 0.48004997372627256, "reward_std": 0.11250068647786975, "rewards/MMContentORM/mean": 0.6220000118017197, "rewards/MMContentORM/std": 0.5794163227081299, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 2985, "train_speed(iter/s)": 0.083059 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.8, "completions/mean_length": 212.075, "completions/min_length": 142.8, "epoch": 1.4354296687469996, "frac_reward_zero_std": 0.8, "grad_norm": 0.005611935164779425, "kl": 0.01571044921875, "learning_rate": 2.025540381004592e-06, "loss": 0.0006283918395638466, "memory(GiB)": 27.09, "reward": 0.5766499638557434, "reward_std": 0.03358757034875452, "rewards/MMContentORM/mean": 0.806000006198883, "rewards/MMContentORM/std": 0.41324327513575554, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 2990, "train_speed(iter/s)": 0.083062 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.2, "completions/mean_length": 210.15, "completions/min_length": 140.6, "epoch": 1.4378300528084493, "frac_reward_zero_std": 0.7, "grad_norm": 0.16042460501194, "kl": 0.019024658203125, "learning_rate": 2.009609663434823e-06, "loss": 0.0007612261921167374, "memory(GiB)": 27.09, "reward": 0.42854997515678406, "reward_std": 0.059184834850020705, "rewards/MMContentORM/mean": 0.46450000554323195, "rewards/MMContentORM/std": 0.6028416275978088, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 2995, "train_speed(iter/s)": 0.083066 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.2, "completions/mean_length": 213.075, "completions/min_length": 123.8, "epoch": 1.4402304368698993, "frac_reward_zero_std": 0.55, "grad_norm": 0.17296624183654785, "kl": 0.014404296875, "learning_rate": 1.9937260690856038e-06, "loss": 0.0005766497924923897, "memory(GiB)": 27.09, "reward": 0.5021999835968017, "reward_std": 0.1513208493590355, "rewards/MMContentORM/mean": 0.6630000114440918, "rewards/MMContentORM/std": 0.6147877216339112, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 3000, "train_speed(iter/s)": 0.083074 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 356.2, "completions/mean_length": 213.15, "completions/min_length": 137.2, "epoch": 1.442630820931349, "frac_reward_zero_std": 0.6, "grad_norm": 0.16241255402565002, "kl": 0.01876220703125, "learning_rate": 1.977889848254063e-06, "loss": 0.0007500813342630863, "memory(GiB)": 27.09, "reward": 0.4236499905586243, "reward_std": 0.1413506418466568, "rewards/MMContentORM/mean": 0.4810000091791153, "rewards/MMContentORM/std": 0.6403470635414124, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 3005, "train_speed(iter/s)": 0.08304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.4, "completions/mean_length": 212.275, "completions/min_length": 142.6, "epoch": 1.445031204992799, "frac_reward_zero_std": 0.625, "grad_norm": 0.06250451505184174, "kl": 0.01522216796875, "learning_rate": 1.962101250490809e-06, "loss": 0.0006091888062655926, "memory(GiB)": 27.09, "reward": 0.4913999855518341, "reward_std": 0.1162483523832634, "rewards/MMContentORM/mean": 0.6360000133514404, "rewards/MMContentORM/std": 0.5707429587841034, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3010, "train_speed(iter/s)": 0.08305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.4, "completions/mean_length": 214.075, "completions/min_length": 128.6, "epoch": 1.4474315890542486, "frac_reward_zero_std": 0.7, "grad_norm": 0.16592533886432648, "kl": 0.014422607421875, "learning_rate": 1.946360524595992e-06, "loss": 0.0005768738687038421, "memory(GiB)": 27.09, "reward": 0.4861999869346619, "reward_std": 0.09022682073991746, "rewards/MMContentORM/mean": 0.6230000138282776, "rewards/MMContentORM/std": 0.49670754447579385, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 3015, "train_speed(iter/s)": 0.083063 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.0, "completions/mean_length": 208.425, "completions/min_length": 125.8, "epoch": 1.4498319731156986, "frac_reward_zero_std": 0.725, "grad_norm": 0.11563282459974289, "kl": 0.017535400390625, "learning_rate": 1.930667918615396e-06, "loss": 0.0007019482553005218, "memory(GiB)": 27.09, "reward": 0.5023999869823456, "reward_std": 0.08259006794542074, "rewards/MMContentORM/mean": 0.6634999990463257, "rewards/MMContentORM/std": 0.5144065268337726, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3020, "train_speed(iter/s)": 0.083066 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.2, "completions/mean_length": 217.95, "completions/min_length": 129.4, "epoch": 1.4522323571771483, "frac_reward_zero_std": 0.625, "grad_norm": 0.1004854291677475, "kl": 0.016015625, "learning_rate": 1.915023679836513e-06, "loss": 0.0006412723101675511, "memory(GiB)": 27.09, "reward": 0.44114998579025266, "reward_std": 0.08266078755259514, "rewards/MMContentORM/mean": 0.4959999889135361, "rewards/MMContentORM/std": 0.6346112012863159, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3025, "train_speed(iter/s)": 0.083076 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.2, "completions/mean_length": 208.8, "completions/min_length": 140.2, "epoch": 1.4546327412385982, "frac_reward_zero_std": 0.7, "grad_norm": 0.07408447563648224, "kl": 0.017010498046875, "learning_rate": 1.8994280547846516e-06, "loss": 0.0006803128868341446, "memory(GiB)": 27.09, "reward": 0.48314996957778933, "reward_std": 0.07813529700506479, "rewards/MMContentORM/mean": 0.6009999990463257, "rewards/MMContentORM/std": 0.6228325486183166, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3030, "train_speed(iter/s)": 0.083097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.0, "completions/mean_length": 204.5, "completions/min_length": 130.6, "epoch": 1.4570331253000481, "frac_reward_zero_std": 0.675, "grad_norm": 0.12042010575532913, "kl": 0.015240478515625, "learning_rate": 1.8838812892190655e-06, "loss": 0.000609145499765873, "memory(GiB)": 27.09, "reward": 0.5214499771595001, "reward_std": 0.053386559383943676, "rewards/MMContentORM/mean": 0.6680000066757202, "rewards/MMContentORM/std": 0.47634573876857755, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 3035, "train_speed(iter/s)": 0.083108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 371.4, "completions/mean_length": 220.7875, "completions/min_length": 149.4, "epoch": 1.4594335093614978, "frac_reward_zero_std": 0.725, "grad_norm": 0.14619703590869904, "kl": 0.01563720703125, "learning_rate": 1.8683836281290608e-06, "loss": 0.0006260167807340622, "memory(GiB)": 27.09, "reward": 0.5266999781131745, "reward_std": 0.08329718094319105, "rewards/MMContentORM/mean": 0.728000009059906, "rewards/MMContentORM/std": 0.5765750944614411, "rewards/MMFormatORM/mean": 0.6137499928474426, "rewards/MMFormatORM/std": 0.14499999433755875, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 3040, "train_speed(iter/s)": 0.083102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.8, "completions/mean_length": 221.7625, "completions/min_length": 155.8, "epoch": 1.4618338934229476, "frac_reward_zero_std": 0.7, "grad_norm": 0.12591391801834106, "kl": 0.013836669921875, "learning_rate": 1.8529353157301477e-06, "loss": 0.0005534658208489418, "memory(GiB)": 27.09, "reward": 0.4464499831199646, "reward_std": 0.11136931926012039, "rewards/MMContentORM/mean": 0.5380000054836274, "rewards/MMContentORM/std": 0.6489728450775146, "rewards/MMFormatORM/mean": 0.609375, "rewards/MMFormatORM/std": 0.16249999403953552, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.25, "step": 3045, "train_speed(iter/s)": 0.083104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.8, "completions/mean_length": 219.4625, "completions/min_length": 134.8, "epoch": 1.4642342774843975, "frac_reward_zero_std": 0.625, "grad_norm": 0.24321743845939636, "kl": 0.030645751953125, "learning_rate": 1.8375365954601882e-06, "loss": 0.001224792841821909, "memory(GiB)": 27.09, "reward": 0.4084999799728394, "reward_std": 0.11978388726711273, "rewards/MMContentORM/mean": 0.45750000774860383, "rewards/MMContentORM/std": 0.7140669703483582, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.17440344989299775, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.2683130085468292, "step": 3050, "train_speed(iter/s)": 0.08311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.0, "completions/mean_length": 207.55, "completions/min_length": 138.4, "epoch": 1.4666346615458474, "frac_reward_zero_std": 0.75, "grad_norm": 0.17153875529766083, "kl": 0.018304443359375, "learning_rate": 1.8221877099755635e-06, "loss": 0.0007329397834837436, "memory(GiB)": 27.09, "reward": 0.48879997730255126, "reward_std": 0.09842926461715251, "rewards/MMContentORM/mean": 0.6295000195503235, "rewards/MMContentORM/std": 0.6123105943202972, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3055, "train_speed(iter/s)": 0.083119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 339.4, "completions/mean_length": 212.4125, "completions/min_length": 142.8, "epoch": 1.4690350456072971, "frac_reward_zero_std": 0.6, "grad_norm": 0.18333663046360016, "kl": 0.01534423828125, "learning_rate": 1.8068889011473472e-06, "loss": 0.0006133603863418102, "memory(GiB)": 27.09, "reward": 0.4755499720573425, "reward_std": 0.10585388541221619, "rewards/MMContentORM/mean": 0.5819999992847442, "rewards/MMContentORM/std": 0.6341395020484925, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3060, "train_speed(iter/s)": 0.083124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.0, "completions/mean_length": 203.5125, "completions/min_length": 121.0, "epoch": 1.471435429668747, "frac_reward_zero_std": 0.675, "grad_norm": 0.26711151003837585, "kl": 0.0223388671875, "learning_rate": 1.7916404100574858e-06, "loss": 0.0008931753225624561, "memory(GiB)": 27.09, "reward": 0.4269499838352203, "reward_std": 0.13951216414570808, "rewards/MMContentORM/mean": 0.5180000185966491, "rewards/MMContentORM/std": 0.7072037339210511, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.16571036279201506, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.25493902564048765, "step": 3065, "train_speed(iter/s)": 0.083143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.8, "completions/mean_length": 204.775, "completions/min_length": 127.8, "epoch": 1.4738358137301968, "frac_reward_zero_std": 0.725, "grad_norm": 0.11704003810882568, "kl": 0.015966796875, "learning_rate": 1.77644247699502e-06, "loss": 0.0006390390917658806, "memory(GiB)": 27.09, "reward": 0.5196999788284302, "reward_std": 0.038890871894545855, "rewards/MMContentORM/mean": 0.678000009059906, "rewards/MMContentORM/std": 0.5452348232269287, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 3070, "train_speed(iter/s)": 0.083156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.2, "completions/mean_length": 212.1375, "completions/min_length": 147.4, "epoch": 1.4762361977916467, "frac_reward_zero_std": 0.775, "grad_norm": 0.11178287863731384, "kl": 0.015142822265625, "learning_rate": 1.7612953414522787e-06, "loss": 0.0006058240309357643, "memory(GiB)": 27.09, "reward": 0.5580999732017518, "reward_std": 0.07170062698423862, "rewards/MMContentORM/mean": 0.7740000009536743, "rewards/MMContentORM/std": 0.47003708481788636, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 3075, "train_speed(iter/s)": 0.08317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.2, "completions/mean_length": 215.875, "completions/min_length": 129.0, "epoch": 1.4786365818530964, "frac_reward_zero_std": 0.65, "grad_norm": 0.1516324132680893, "kl": 0.014453125, "learning_rate": 1.7461992421211144e-06, "loss": 0.0005788389593362808, "memory(GiB)": 27.09, "reward": 0.4489999830722809, "reward_std": 0.09079250320792198, "rewards/MMContentORM/mean": 0.5300000131130218, "rewards/MMContentORM/std": 0.6716732859611512, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 3080, "train_speed(iter/s)": 0.083181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 341.0, "completions/mean_length": 215.9125, "completions/min_length": 125.6, "epoch": 1.4810369659145464, "frac_reward_zero_std": 0.75, "grad_norm": 0.1026253029704094, "kl": 0.01588134765625, "learning_rate": 1.7311544168891397e-06, "loss": 0.0006352938711643219, "memory(GiB)": 27.09, "reward": 0.49314998388290404, "reward_std": 0.09171175360679626, "rewards/MMContentORM/mean": 0.6260000169277191, "rewards/MMContentORM/std": 0.5888873279094696, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3085, "train_speed(iter/s)": 0.083182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.0, "completions/mean_length": 222.55, "completions/min_length": 147.4, "epoch": 1.483437349975996, "frac_reward_zero_std": 0.6, "grad_norm": 0.19577515125274658, "kl": 0.46920166015625, "learning_rate": 1.7161611028359776e-06, "loss": 0.01879151463508606, "memory(GiB)": 27.09, "reward": 0.4272999823093414, "reward_std": 0.1520279485033825, "rewards/MMContentORM/mean": 0.504500013589859, "rewards/MMContentORM/std": 0.6994093418121338, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 3090, "train_speed(iter/s)": 0.08317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 476.0, "completions/mean_length": 229.6, "completions/min_length": 144.4, "epoch": 1.485837734037446, "frac_reward_zero_std": 0.65, "grad_norm": 0.17094756662845612, "kl": 0.022589111328125, "learning_rate": 1.701219536229531e-06, "loss": 0.0009042560122907162, "memory(GiB)": 27.09, "reward": 0.41979997158050536, "reward_std": 0.1699884652160108, "rewards/MMContentORM/mean": 0.5145000159740448, "rewards/MMContentORM/std": 0.6915717840194702, "rewards/MMFormatORM/mean": 0.5849999845027923, "rewards/MMFormatORM/std": 0.1430424392223358, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.22006530165672303, "step": 3095, "train_speed(iter/s)": 0.083129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 444.0, "completions/mean_length": 228.7625, "completions/min_length": 142.6, "epoch": 1.488238118098896, "frac_reward_zero_std": 0.65, "grad_norm": 0.06433889269828796, "kl": 0.019317626953125, "learning_rate": 1.686329952522251e-06, "loss": 0.0007725300267338753, "memory(GiB)": 27.09, "reward": 0.4120999813079834, "reward_std": 0.23122391402721404, "rewards/MMContentORM/mean": 0.5240000247955322, "rewards/MMContentORM/std": 0.7343219518661499, "rewards/MMFormatORM/mean": 0.5687499880790711, "rewards/MMFormatORM/std": 0.2142127960920334, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.3295581638813019, "step": 3100, "train_speed(iter/s)": 0.083097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.4, "completions/mean_length": 215.2625, "completions/min_length": 146.6, "epoch": 1.4906385021603457, "frac_reward_zero_std": 0.625, "grad_norm": 0.07017713785171509, "kl": 0.014739990234375, "learning_rate": 1.6714925863474317e-06, "loss": 0.000588908651843667, "memory(GiB)": 27.09, "reward": 0.43324996829032897, "reward_std": 0.1584626256953925, "rewards/MMContentORM/mean": 0.5625000119209289, "rewards/MMContentORM/std": 0.7162049651145935, "rewards/MMFormatORM/mean": 0.5768749833106994, "rewards/MMFormatORM/std": 0.2062115788459778, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.3172485947608948, "step": 3105, "train_speed(iter/s)": 0.083075 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.6, "completions/mean_length": 209.9375, "completions/min_length": 151.6, "epoch": 1.4930388862217954, "frac_reward_zero_std": 0.625, "grad_norm": 0.1349526196718216, "kl": 0.015740966796875, "learning_rate": 1.6567076715155118e-06, "loss": 0.0006291633006185293, "memory(GiB)": 27.09, "reward": 0.4481499850749969, "reward_std": 0.08690342083573341, "rewards/MMContentORM/mean": 0.5135000020265579, "rewards/MMContentORM/std": 0.6392456710338592, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3110, "train_speed(iter/s)": 0.083088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.2, "completions/mean_length": 206.9, "completions/min_length": 143.6, "epoch": 1.4954392702832453, "frac_reward_zero_std": 0.675, "grad_norm": 0.15179786086082458, "kl": 0.021075439453125, "learning_rate": 1.6419754410103949e-06, "loss": 0.0008435728028416634, "memory(GiB)": 27.09, "reward": 0.42634997963905336, "reward_std": 0.1286227189935744, "rewards/MMContentORM/mean": 0.5165000140666962, "rewards/MMContentORM/std": 0.6922470927238464, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 3115, "train_speed(iter/s)": 0.083093 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.2, "completions/mean_length": 218.2875, "completions/min_length": 119.0, "epoch": 1.4978396543446952, "frac_reward_zero_std": 0.625, "grad_norm": 0.21460042893886566, "kl": 0.032769775390625, "learning_rate": 1.6272961269857657e-06, "loss": 0.0013143711723387242, "memory(GiB)": 27.09, "reward": 0.43154999017715456, "reward_std": 0.14884596914052964, "rewards/MMContentORM/mean": 0.5295000314712525, "rewards/MMContentORM/std": 0.7493190169334412, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.18630690574645997, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2866260170936584, "step": 3120, "train_speed(iter/s)": 0.083091 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.2, "completions/mean_length": 208.25, "completions/min_length": 130.8, "epoch": 1.500240038406145, "frac_reward_zero_std": 0.7, "grad_norm": 0.12087953835725784, "kl": 0.01927490234375, "learning_rate": 1.6126699607614427e-06, "loss": 0.0007707193493843078, "memory(GiB)": 27.09, "reward": 0.5104999840259552, "reward_std": 0.06943788453936577, "rewards/MMContentORM/mean": 0.6549999952316284, "rewards/MMContentORM/std": 0.5868445634841919, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 3125, "train_speed(iter/s)": 0.083097 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.8, "completions/mean_length": 209.8625, "completions/min_length": 131.4, "epoch": 1.5026404224675947, "frac_reward_zero_std": 0.725, "grad_norm": 0.0041847084648907185, "kl": 0.01480712890625, "learning_rate": 1.5980971728197342e-06, "loss": 0.0005915745161473752, "memory(GiB)": 27.09, "reward": 0.5453999817371369, "reward_std": 0.10776307303458452, "rewards/MMContentORM/mean": 0.7710000157356263, "rewards/MMContentORM/std": 0.4581117108464241, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3130, "train_speed(iter/s)": 0.083106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 330.4, "completions/mean_length": 219.2625, "completions/min_length": 140.8, "epoch": 1.5050408065290446, "frac_reward_zero_std": 0.7, "grad_norm": 0.10535841435194016, "kl": 0.013946533203125, "learning_rate": 1.583577992801797e-06, "loss": 0.0005582999438047409, "memory(GiB)": 27.09, "reward": 0.5231499969959259, "reward_std": 0.07700392559636385, "rewards/MMContentORM/mean": 0.7010000109672546, "rewards/MMContentORM/std": 0.5700598895549774, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3135, "train_speed(iter/s)": 0.083109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.6, "completions/mean_length": 214.6, "completions/min_length": 149.8, "epoch": 1.5074411905904945, "frac_reward_zero_std": 0.575, "grad_norm": 0.0759124755859375, "kl": 0.015350341796875, "learning_rate": 1.5691126495040238e-06, "loss": 0.000614680303260684, "memory(GiB)": 27.09, "reward": 0.47774999141693114, "reward_std": 0.1819385740207508, "rewards/MMContentORM/mean": 0.6450000107288361, "rewards/MMContentORM/std": 0.619084757566452, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.14121158123016359, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.21724859476089478, "step": 3140, "train_speed(iter/s)": 0.083108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.2, "completions/mean_length": 209.8875, "completions/min_length": 139.0, "epoch": 1.5098415746519445, "frac_reward_zero_std": 0.625, "grad_norm": 0.12398523837327957, "kl": 0.015985107421875, "learning_rate": 1.5547013708744347e-06, "loss": 0.0006400375626981258, "memory(GiB)": 27.09, "reward": 0.39759998619556425, "reward_std": 0.1776252317475155, "rewards/MMContentORM/mean": 0.45900002419948577, "rewards/MMContentORM/std": 0.7010067760944366, "rewards/MMFormatORM/mean": 0.5849999725818634, "rewards/MMFormatORM/std": 0.14694467782974244, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.22606874108314515, "step": 3145, "train_speed(iter/s)": 0.083126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 365.6, "completions/mean_length": 219.8125, "completions/min_length": 149.6, "epoch": 1.5122419587133942, "frac_reward_zero_std": 0.7, "grad_norm": 0.13619284331798553, "kl": 0.0173095703125, "learning_rate": 1.5403443840090943e-06, "loss": 0.0006929846480488777, "memory(GiB)": 27.09, "reward": 0.5145999729633332, "reward_std": 0.0552957494975999, "rewards/MMContentORM/mean": 0.6940000057220459, "rewards/MMContentORM/std": 0.4436347268521786, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.08490467071533203, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 3150, "train_speed(iter/s)": 0.083117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.4, "completions/mean_length": 210.0, "completions/min_length": 124.0, "epoch": 1.5146423427748439, "frac_reward_zero_std": 0.7, "grad_norm": 0.1929241418838501, "kl": 0.015277099609375, "learning_rate": 1.5260419151485151e-06, "loss": 0.0006110362242907286, "memory(GiB)": 27.09, "reward": 0.4635499775409698, "reward_std": 0.12282444722950459, "rewards/MMContentORM/mean": 0.584500002861023, "rewards/MMContentORM/std": 0.5167646646499634, "rewards/MMFormatORM/mean": 0.6056249856948852, "rewards/MMFormatORM/std": 0.13240466862916947, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 3155, "train_speed(iter/s)": 0.083126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 382.4, "completions/mean_length": 225.3375, "completions/min_length": 132.8, "epoch": 1.5170427268362938, "frac_reward_zero_std": 0.55, "grad_norm": 0.18700656294822693, "kl": 0.017669677734375, "learning_rate": 1.511794189674109e-06, "loss": 0.0007067018188536168, "memory(GiB)": 27.09, "reward": 0.4364499807357788, "reward_std": 0.20866721048951148, "rewards/MMContentORM/mean": 0.5704999923706054, "rewards/MMContentORM/std": 0.605513896048069, "rewards/MMFormatORM/mean": 0.5768749833106994, "rewards/MMFormatORM/std": 0.14713743329048157, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.22636529207229614, "step": 3160, "train_speed(iter/s)": 0.083113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.8, "completions/mean_length": 209.6875, "completions/min_length": 121.6, "epoch": 1.5194431108977438, "frac_reward_zero_std": 0.675, "grad_norm": 0.15477411448955536, "kl": 0.014892578125, "learning_rate": 1.4976014321046323e-06, "loss": 0.000595424510538578, "memory(GiB)": 27.09, "reward": 0.49574996829032897, "reward_std": 0.09482301846146583, "rewards/MMContentORM/mean": 0.6325000166893006, "rewards/MMContentORM/std": 0.5926263153553009, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3165, "train_speed(iter/s)": 0.083112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.0, "completions/mean_length": 209.625, "completions/min_length": 135.4, "epoch": 1.5218434949591935, "frac_reward_zero_std": 0.7, "grad_norm": 0.14993643760681152, "kl": 0.013580322265625, "learning_rate": 1.4834638660926403e-06, "loss": 0.0005426953546702861, "memory(GiB)": 27.09, "reward": 0.44014999866485593, "reward_std": 0.0772867701947689, "rewards/MMContentORM/mean": 0.4934999912977219, "rewards/MMContentORM/std": 0.6292442440986633, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3170, "train_speed(iter/s)": 0.083121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 346.6, "completions/mean_length": 217.0125, "completions/min_length": 132.2, "epoch": 1.5242438790206432, "frac_reward_zero_std": 0.575, "grad_norm": 0.181192085146904, "kl": 0.0142333984375, "learning_rate": 1.4693817144209699e-06, "loss": 0.0005688410252332688, "memory(GiB)": 27.09, "reward": 0.48604997992515564, "reward_std": 0.11419774182140827, "rewards/MMContentORM/mean": 0.6370000183582306, "rewards/MMContentORM/std": 0.6059823155403137, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 3175, "train_speed(iter/s)": 0.083117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025, "completions/max_length": 462.0, "completions/mean_length": 227.6375, "completions/min_length": 130.4, "epoch": 1.5266442630820931, "frac_reward_zero_std": 0.75, "grad_norm": 0.12335384637117386, "kl": 0.0125244140625, "learning_rate": 1.4553551989992238e-06, "loss": 0.0005013378337025643, "memory(GiB)": 27.09, "reward": 0.5401999652385712, "reward_std": 0.10210621654987335, "rewards/MMContentORM/mean": 0.7580000162124634, "rewards/MMContentORM/std": 0.5495529055595398, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3180, "train_speed(iter/s)": 0.083082 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.8, "completions/mean_length": 212.6125, "completions/min_length": 139.2, "epoch": 1.529044647143543, "frac_reward_zero_std": 0.7, "grad_norm": 0.14260004460811615, "kl": 0.02462158203125, "learning_rate": 1.4413845408602838e-06, "loss": 0.0009833592921495438, "memory(GiB)": 27.09, "reward": 0.4681999832391739, "reward_std": 0.07283199802041054, "rewards/MMContentORM/mean": 0.5780000060796737, "rewards/MMContentORM/std": 0.587296724319458, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 3185, "train_speed(iter/s)": 0.083092 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 332.6, "completions/mean_length": 221.975, "completions/min_length": 147.8, "epoch": 1.5314450312049928, "frac_reward_zero_std": 0.7, "grad_norm": 0.1370711326599121, "kl": 0.015081787109375, "learning_rate": 1.427469960156812e-06, "loss": 0.0006033728364855051, "memory(GiB)": 27.09, "reward": 0.4931999921798706, "reward_std": 0.07085209367796778, "rewards/MMContentORM/mean": 0.6405000150203705, "rewards/MMContentORM/std": 0.6245416283607483, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.08490467071533203, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 3190, "train_speed(iter/s)": 0.083091 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.8, "completions/mean_length": 206.65, "completions/min_length": 127.6, "epoch": 1.5338454152664425, "frac_reward_zero_std": 0.525, "grad_norm": 0.17985497415065765, "kl": 0.018963623046875, "learning_rate": 1.4136116761577935e-06, "loss": 0.0007579845376312732, "memory(GiB)": 27.09, "reward": 0.5177499890327454, "reward_std": 0.09425732623785735, "rewards/MMContentORM/mean": 0.6875000119209289, "rewards/MMContentORM/std": 0.5862172305583954, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3195, "train_speed(iter/s)": 0.0831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 380.2, "completions/mean_length": 223.8, "completions/min_length": 136.0, "epoch": 1.5362457993278924, "frac_reward_zero_std": 0.725, "grad_norm": 0.11441948264837265, "kl": 0.01436767578125, "learning_rate": 1.3998099072450811e-06, "loss": 0.0005743363872170448, "memory(GiB)": 27.09, "reward": 0.4908499836921692, "reward_std": 0.11193500682711602, "rewards/MMContentORM/mean": 0.6490000128746033, "rewards/MMContentORM/std": 0.5852620244026184, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 3200, "train_speed(iter/s)": 0.083089 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 501.4, "completions/mean_length": 236.55, "completions/min_length": 149.6, "epoch": 1.5386461833893423, "frac_reward_zero_std": 0.625, "grad_norm": 0.11317011713981628, "kl": 0.0179931640625, "learning_rate": 1.386064870909946e-06, "loss": 0.0007195640355348587, "memory(GiB)": 27.09, "reward": 0.4382499873638153, "reward_std": 0.12918841242790222, "rewards/MMContentORM/mean": 0.5174999952316284, "rewards/MMContentORM/std": 0.6612499058246613, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 3205, "train_speed(iter/s)": 0.083004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 348.2, "completions/mean_length": 218.575, "completions/min_length": 139.2, "epoch": 1.5410465674507923, "frac_reward_zero_std": 0.55, "grad_norm": 0.14002180099487305, "kl": 0.014068603515625, "learning_rate": 1.3723767837496571e-06, "loss": 0.0005637550726532936, "memory(GiB)": 27.09, "reward": 0.43539997935295105, "reward_std": 0.12756206155754626, "rewards/MMContentORM/mean": 0.49599999785423277, "rewards/MMContentORM/std": 0.6566171884536743, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 3210, "train_speed(iter/s)": 0.083004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.8, "completions/mean_length": 207.0125, "completions/min_length": 145.0, "epoch": 1.543446951512242, "frac_reward_zero_std": 0.725, "grad_norm": 0.17006883025169373, "kl": 0.016265869140625, "learning_rate": 1.3587458614640648e-06, "loss": 0.0006507603451609611, "memory(GiB)": 27.09, "reward": 0.5275500059127808, "reward_std": 0.07700392962433397, "rewards/MMContentORM/mean": 0.7120000004768372, "rewards/MMContentORM/std": 0.5522327601909638, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3215, "train_speed(iter/s)": 0.08302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 454.8, "completions/mean_length": 222.8125, "completions/min_length": 118.6, "epoch": 1.5458473355736917, "frac_reward_zero_std": 0.625, "grad_norm": 0.19454053044319153, "kl": 0.02646484375, "learning_rate": 1.3451723188522043e-06, "loss": 0.0010566259734332561, "memory(GiB)": 27.09, "reward": 0.3924499750137329, "reward_std": 0.19169664829969407, "rewards/MMContentORM/mean": 0.46050000190734863, "rewards/MMContentORM/std": 0.7452011108398438, "rewards/MMFormatORM/mean": 0.5768749833106994, "rewards/MMFormatORM/std": 0.2062115788459778, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.3172485947608948, "step": 3220, "train_speed(iter/s)": 0.082975 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 472.6, "completions/mean_length": 222.15, "completions/min_length": 142.2, "epoch": 1.5482477196351416, "frac_reward_zero_std": 0.475, "grad_norm": 0.19158095121383667, "kl": 0.0150146484375, "learning_rate": 1.331656369808908e-06, "loss": 0.0006003158167004585, "memory(GiB)": 27.09, "reward": 0.4775499701499939, "reward_std": 0.1092479906976223, "rewards/MMContentORM/mean": 0.6195000171661377, "rewards/MMContentORM/std": 0.5317566640675068, "rewards/MMFormatORM/mean": 0.6056249737739563, "rewards/MMFormatORM/std": 0.11180812567472458, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.14893558621406555, "step": 3225, "train_speed(iter/s)": 0.082941 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.6, "completions/mean_length": 211.1875, "completions/min_length": 118.8, "epoch": 1.5506481036965916, "frac_reward_zero_std": 0.75, "grad_norm": 0.1315494179725647, "kl": 0.01778564453125, "learning_rate": 1.318198227321436e-06, "loss": 0.000711181340739131, "memory(GiB)": 27.09, "reward": 0.5287999749183655, "reward_std": 0.011313705006614327, "rewards/MMContentORM/mean": 0.6720000028610229, "rewards/MMContentORM/std": 0.5012042224407196, "rewards/MMFormatORM/mean": 0.6499999761581421, "rewards/MMFormatORM/std": 0.0, "rewards/MMRubricORM/mean": 0.0, "rewards/MMRubricORM/std": 0.0, "step": 3230, "train_speed(iter/s)": 0.082949 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.6, "completions/mean_length": 216.4625, "completions/min_length": 109.6, "epoch": 1.5530484877580413, "frac_reward_zero_std": 0.575, "grad_norm": 0.15574294328689575, "kl": 0.020379638671875, "learning_rate": 1.3047981034661245e-06, "loss": 0.0008146503940224647, "memory(GiB)": 27.09, "reward": 0.46579996943473817, "reward_std": 0.16772572547197342, "rewards/MMContentORM/mean": 0.62950000166893, "rewards/MMContentORM/std": 0.683431351184845, "rewards/MMFormatORM/mean": 0.5849999785423279, "rewards/MMFormatORM/std": 0.19821036159992217, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.3049390256404877, "step": 3235, "train_speed(iter/s)": 0.082953 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.2, "completions/mean_length": 217.525, "completions/min_length": 132.6, "epoch": 1.555448871819491, "frac_reward_zero_std": 0.65, "grad_norm": 0.20693257451057434, "kl": 0.02076416015625, "learning_rate": 1.2914562094050343e-06, "loss": 0.0008313735015690327, "memory(GiB)": 27.09, "reward": 0.47934995889663695, "reward_std": 0.09482302069664002, "rewards/MMContentORM/mean": 0.5915000021457673, "rewards/MMContentORM/std": 0.5874766707420349, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3240, "train_speed(iter/s)": 0.082959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.8, "completions/mean_length": 214.3125, "completions/min_length": 146.0, "epoch": 1.557849255880941, "frac_reward_zero_std": 0.75, "grad_norm": 0.17259366810321808, "kl": 0.01396484375, "learning_rate": 1.2781727553826307e-06, "loss": 0.0005578281357884407, "memory(GiB)": 27.09, "reward": 0.5059499919414521, "reward_std": 0.06173042135778815, "rewards/MMContentORM/mean": 0.658000010251999, "rewards/MMContentORM/std": 0.44256684333086016, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.05240467190742493, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.08062257766723632, "step": 3245, "train_speed(iter/s)": 0.082967 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.2, "completions/mean_length": 201.55, "completions/min_length": 137.4, "epoch": 1.5602496399423909, "frac_reward_zero_std": 0.7, "grad_norm": 0.066941037774086, "kl": 0.016973876953125, "learning_rate": 1.264947950722467e-06, "loss": 0.0006793485023081303, "memory(GiB)": 27.09, "reward": 0.535649973154068, "reward_std": 0.12777419239282609, "rewards/MMContentORM/mean": 0.7610000252723694, "rewards/MMContentORM/std": 0.5375386297702789, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 3250, "train_speed(iter/s)": 0.082973 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.0, "completions/mean_length": 228.2, "completions/min_length": 154.6, "epoch": 1.5626500240038406, "frac_reward_zero_std": 0.7, "grad_norm": 0.09117994457483292, "kl": 0.012646484375, "learning_rate": 1.2517820038238893e-06, "loss": 0.0005060765892267227, "memory(GiB)": 27.09, "reward": 0.4888499915599823, "reward_std": 0.09454017840325832, "rewards/MMContentORM/mean": 0.6439999967813492, "rewards/MMContentORM/std": 0.5739769160747528, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 3255, "train_speed(iter/s)": 0.082976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.6, "completions/mean_length": 197.9125, "completions/min_length": 121.4, "epoch": 1.5650504080652905, "frac_reward_zero_std": 0.55, "grad_norm": 0.2288537472486496, "kl": 0.017327880859375, "learning_rate": 1.2386751221587478e-06, "loss": 0.0006930924020707608, "memory(GiB)": 27.09, "reward": 0.3989499807357788, "reward_std": 0.1745846627280116, "rewards/MMContentORM/mean": 0.4479999840259552, "rewards/MMContentORM/std": 0.7100707769393921, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 3260, "train_speed(iter/s)": 0.082991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 465.4, "completions/mean_length": 225.6875, "completions/min_length": 145.0, "epoch": 1.5674507921267402, "frac_reward_zero_std": 0.75, "grad_norm": 0.004609475843608379, "kl": 0.01778564453125, "learning_rate": 1.2256275122681304e-06, "loss": 0.0007108909543603658, "memory(GiB)": 27.09, "reward": 0.4348500072956085, "reward_std": 0.10231834650039673, "rewards/MMContentORM/mean": 0.5090000003576278, "rewards/MMContentORM/std": 0.6001178443431854, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 3265, "train_speed(iter/s)": 0.082956 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.8, "completions/mean_length": 213.2625, "completions/min_length": 151.2, "epoch": 1.5698511761881901, "frac_reward_zero_std": 0.575, "grad_norm": 0.18112541735172272, "kl": 0.01341552734375, "learning_rate": 1.2126393797591112e-06, "loss": 0.0005367286503314972, "memory(GiB)": 27.09, "reward": 0.4611499786376953, "reward_std": 0.07771103186532854, "rewards/MMContentORM/mean": 0.5460000038146973, "rewards/MMContentORM/std": 0.6368870377540589, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3270, "train_speed(iter/s)": 0.082961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.6, "completions/mean_length": 203.625, "completions/min_length": 121.0, "epoch": 1.57225156024964, "frac_reward_zero_std": 0.675, "grad_norm": 0.1886938065290451, "kl": 0.016790771484375, "learning_rate": 1.1997109293015015e-06, "loss": 0.0006717256270349026, "memory(GiB)": 27.09, "reward": 0.4238499701023102, "reward_std": 0.12975409450009465, "rewards/MMContentORM/mean": 0.48149998784065245, "rewards/MMContentORM/std": 0.6749303579330445, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 3275, "train_speed(iter/s)": 0.082972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.4, "completions/mean_length": 217.0375, "completions/min_length": 127.0, "epoch": 1.5746519443110898, "frac_reward_zero_std": 0.7, "grad_norm": 0.1641152799129486, "kl": 0.017828369140625, "learning_rate": 1.1868423646246323e-06, "loss": 0.0007128944620490074, "memory(GiB)": 27.09, "reward": 0.5149999856948853, "reward_std": 0.09135819021612405, "rewards/MMContentORM/mean": 0.695000022649765, "rewards/MMContentORM/std": 0.49438799545168877, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3280, "train_speed(iter/s)": 0.082969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.0, "completions/mean_length": 208.25, "completions/min_length": 136.2, "epoch": 1.5770523283725395, "frac_reward_zero_std": 0.725, "grad_norm": 0.13112139701843262, "kl": 0.016351318359375, "learning_rate": 1.1740338885141422e-06, "loss": 0.0006545517593622207, "memory(GiB)": 27.09, "reward": 0.4730999946594238, "reward_std": 0.11455129862297327, "rewards/MMContentORM/mean": 0.6190000176429749, "rewards/MMContentORM/std": 0.6247550487518311, "rewards/MMFormatORM/mean": 0.6012499868869782, "rewards/MMFormatORM/std": 0.12313776612281799, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.1894427239894867, "step": 3285, "train_speed(iter/s)": 0.082985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.2, "completions/mean_length": 207.8, "completions/min_length": 140.6, "epoch": 1.5794527124339894, "frac_reward_zero_std": 0.7, "grad_norm": 0.18867741525173187, "kl": 0.01796875, "learning_rate": 1.16128570280878e-06, "loss": 0.0007181556895375252, "memory(GiB)": 27.09, "reward": 0.45814998745918273, "reward_std": 0.1034497192595154, "rewards/MMContentORM/mean": 0.5959999918937683, "rewards/MMContentORM/std": 0.6685267508029937, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 3290, "train_speed(iter/s)": 0.082978 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.6, "completions/mean_length": 207.7875, "completions/min_length": 142.6, "epoch": 1.5818530964954394, "frac_reward_zero_std": 0.65, "grad_norm": 0.2017498016357422, "kl": 0.017095947265625, "learning_rate": 1.1485980083972242e-06, "loss": 0.0006845677271485329, "memory(GiB)": 27.09, "reward": 0.4696999967098236, "reward_std": 0.09899494857527316, "rewards/MMContentORM/mean": 0.6105000078678131, "rewards/MMContentORM/std": 0.6631593823432922, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 3295, "train_speed(iter/s)": 0.082991 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.0, "completions/mean_length": 209.1, "completions/min_length": 145.0, "epoch": 1.584253480556889, "frac_reward_zero_std": 0.725, "grad_norm": 0.1147226095199585, "kl": 0.01207275390625, "learning_rate": 1.1359710052149191e-06, "loss": 0.00048305182717740537, "memory(GiB)": 27.09, "reward": 0.536549985408783, "reward_std": 0.05296229436062276, "rewards/MMContentORM/mean": 0.7220000147819519, "rewards/MMContentORM/std": 0.5010363392531871, "rewards/MMFormatORM/mean": 0.6318749904632568, "rewards/MMFormatORM/std": 0.07249999642372132, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 3300, "train_speed(iter/s)": 0.083011 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.2, "completions/mean_length": 204.5875, "completions/min_length": 114.2, "epoch": 1.5866538646183388, "frac_reward_zero_std": 0.6, "grad_norm": 0.22295618057250977, "kl": 0.020770263671875, "learning_rate": 1.123404892240927e-06, "loss": 0.0008308948017656803, "memory(GiB)": 27.09, "reward": 0.4197499752044678, "reward_std": 0.14799744696356357, "rewards/MMContentORM/mean": 0.5, "rewards/MMContentORM/std": 0.672368848323822, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.16571036279201506, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.25493902564048765, "step": 3305, "train_speed(iter/s)": 0.082994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 389.0, "completions/mean_length": 214.7875, "completions/min_length": 132.8, "epoch": 1.5890542486797887, "frac_reward_zero_std": 0.75, "grad_norm": 0.08081990480422974, "kl": 0.016156005859375, "learning_rate": 1.110899867494784e-06, "loss": 0.0006469148676842452, "memory(GiB)": 27.09, "reward": 0.4746500015258789, "reward_std": 0.11108647771179676, "rewards/MMContentORM/mean": 0.6085000157356262, "rewards/MMContentORM/std": 0.6200405597686768, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.09680812656879426, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.14893558621406555, "step": 3310, "train_speed(iter/s)": 0.082984 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 339.8, "completions/mean_length": 217.85, "completions/min_length": 144.8, "epoch": 1.5914546327412387, "frac_reward_zero_std": 0.575, "grad_norm": 0.20784571766853333, "kl": 0.018511962890625, "learning_rate": 1.0984561280333867e-06, "loss": 0.0007408755365759135, "memory(GiB)": 27.09, "reward": 0.48464998602867126, "reward_std": 0.1252286109374836, "rewards/MMContentORM/mean": 0.6210000038146972, "rewards/MMContentORM/std": 0.5666950985789299, "rewards/MMFormatORM/mean": 0.615624976158142, "rewards/MMFormatORM/std": 0.10976680517196655, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3315, "train_speed(iter/s)": 0.082985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.4, "completions/mean_length": 200.8625, "completions/min_length": 88.8, "epoch": 1.5938550168026886, "frac_reward_zero_std": 0.625, "grad_norm": 0.11155827343463898, "kl": 0.035028076171875, "learning_rate": 1.0860738699478852e-06, "loss": 0.001404472440481186, "memory(GiB)": 27.09, "reward": 0.4386999785900116, "reward_std": 0.1506137415766716, "rewards/MMContentORM/mean": 0.5330000042915344, "rewards/MMContentORM/std": 0.6952720165252686, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.17440344989299775, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.2683130085468292, "step": 3320, "train_speed(iter/s)": 0.083002 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 521.0, "completions/mean_length": 233.6, "completions/min_length": 137.4, "epoch": 1.5962554008641383, "frac_reward_zero_std": 0.6, "grad_norm": 0.1469486504793167, "kl": 0.017987060546875, "learning_rate": 1.0737532883605916e-06, "loss": 0.0007191974669694901, "memory(GiB)": 27.09, "reward": 0.4517999827861786, "reward_std": 0.19148451760411261, "rewards/MMContentORM/mean": 0.5945000112056732, "rewards/MMContentORM/std": 0.6807171523571014, "rewards/MMFormatORM/mean": 0.5849999845027923, "rewards/MMFormatORM/std": 0.16754122078418732, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2577557325363159, "step": 3325, "train_speed(iter/s)": 0.082945 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.0, "completions/mean_length": 215.8875, "completions/min_length": 132.0, "epoch": 1.598655784925588, "frac_reward_zero_std": 0.7, "grad_norm": 0.13840670883655548, "kl": 0.015753173828125, "learning_rate": 1.0614945774219082e-06, "loss": 0.0006300761830061674, "memory(GiB)": 27.09, "reward": 0.501199996471405, "reward_std": 0.11200571209192275, "rewards/MMContentORM/mean": 0.6605000019073486, "rewards/MMContentORM/std": 0.5745877206325531, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3330, "train_speed(iter/s)": 0.082953 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 363.6, "completions/mean_length": 220.6125, "completions/min_length": 135.4, "epoch": 1.601056168987038, "frac_reward_zero_std": 0.625, "grad_norm": 0.1523619294166565, "kl": 0.014703369140625, "learning_rate": 1.049297930307262e-06, "loss": 0.0005889554508030414, "memory(GiB)": 27.09, "reward": 0.39344998002052306, "reward_std": 0.12918840944767, "rewards/MMContentORM/mean": 0.4055000126361847, "rewards/MMContentORM/std": 0.696004319190979, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 3335, "train_speed(iter/s)": 0.082948 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.4, "completions/mean_length": 211.5125, "completions/min_length": 128.0, "epoch": 1.6034565530484879, "frac_reward_zero_std": 0.775, "grad_norm": 0.09966878592967987, "kl": 0.017742919921875, "learning_rate": 1.037163539214072e-06, "loss": 0.0007098756264895201, "memory(GiB)": 27.09, "reward": 0.4519999802112579, "reward_std": 0.08032733157742769, "rewards/MMContentORM/mean": 0.537500011920929, "rewards/MMContentORM/std": 0.5419799767434597, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3340, "train_speed(iter/s)": 0.082952 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.4, "completions/mean_length": 207.275, "completions/min_length": 120.8, "epoch": 1.6058569371099376, "frac_reward_zero_std": 0.625, "grad_norm": 0.15431775152683258, "kl": 0.018145751953125, "learning_rate": 1.0250915953587088e-06, "loss": 0.0007263108156621457, "memory(GiB)": 27.09, "reward": 0.4955999791622162, "reward_std": 0.07580183688551187, "rewards/MMContentORM/mean": 0.6215000033378602, "rewards/MMContentORM/std": 0.5888689577579498, "rewards/MMFormatORM/mean": 0.6299999833106995, "rewards/MMFormatORM/std": 0.06737477481365203, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 3345, "train_speed(iter/s)": 0.082961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 348.4, "completions/mean_length": 223.1125, "completions/min_length": 131.8, "epoch": 1.6082573211713873, "frac_reward_zero_std": 0.675, "grad_norm": 0.08975112438201904, "kl": 0.01865234375, "learning_rate": 1.013082288973481e-06, "loss": 0.0007464576978236437, "memory(GiB)": 27.09, "reward": 0.4766999840736389, "reward_std": 0.15117942318320274, "rewards/MMContentORM/mean": 0.628000009059906, "rewards/MMContentORM/std": 0.6414366006851197, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.14990466833114624, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23062257766723632, "step": 3350, "train_speed(iter/s)": 0.082961 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.2, "completions/mean_length": 213.5625, "completions/min_length": 143.2, "epoch": 1.6106577052328372, "frac_reward_zero_std": 0.7, "grad_norm": 0.1311890333890915, "kl": 0.01627197265625, "learning_rate": 1.0011358093036527e-06, "loss": 0.0006509319879114628, "memory(GiB)": 27.09, "reward": 0.46369996666908264, "reward_std": 0.08089300859719514, "rewards/MMContentORM/mean": 0.595499986410141, "rewards/MMContentORM/std": 0.6696452021598815, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 3355, "train_speed(iter/s)": 0.082963 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.4, "completions/mean_length": 210.25, "completions/min_length": 101.8, "epoch": 1.6130580892942872, "frac_reward_zero_std": 0.725, "grad_norm": 0.12645810842514038, "kl": 0.0197021484375, "learning_rate": 9.89252344604444e-07, "loss": 0.000787085946649313, "memory(GiB)": 27.09, "reward": 0.4845999777317047, "reward_std": 0.11115718111395836, "rewards/MMContentORM/mean": 0.6190000116825104, "rewards/MMContentORM/std": 0.6094013214111328, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3360, "train_speed(iter/s)": 0.082972 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 332.0, "completions/mean_length": 212.8875, "completions/min_length": 153.4, "epoch": 1.615458473355737, "frac_reward_zero_std": 0.75, "grad_norm": 0.07932203263044357, "kl": 0.014056396484375, "learning_rate": 9.774320821380734e-07, "loss": 0.0005630974192172289, "memory(GiB)": 27.09, "reward": 0.43234997391700747, "reward_std": 0.0758725541876629, "rewards/MMContentORM/mean": 0.47400000095367434, "rewards/MMContentORM/std": 0.6691441416740418, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3365, "train_speed(iter/s)": 0.08298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.0, "completions/mean_length": 212.225, "completions/min_length": 151.6, "epoch": 1.6178588574171866, "frac_reward_zero_std": 0.7, "grad_norm": 0.1142314150929451, "kl": 0.014459228515625, "learning_rate": 9.656752081708031e-07, "loss": 0.0005782804451882839, "memory(GiB)": 27.09, "reward": 0.43564997911453246, "reward_std": 0.10175266563892364, "rewards/MMContentORM/mean": 0.510999995470047, "rewards/MMContentORM/std": 0.6983252167701721, "rewards/MMFormatORM/mean": 0.609375, "rewards/MMFormatORM/std": 0.16249999403953552, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.25, "step": 3370, "train_speed(iter/s)": 0.082987 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 461.6, "completions/mean_length": 228.4, "completions/min_length": 124.0, "epoch": 1.6202592414786365, "frac_reward_zero_std": 0.6, "grad_norm": 0.15813876688480377, "kl": 0.02144775390625, "learning_rate": 9.539819079700096e-07, "loss": 0.0008579882793128491, "memory(GiB)": 27.09, "reward": 0.4178499698638916, "reward_std": 0.11221784348599613, "rewards/MMContentORM/mean": 0.4665000081062317, "rewards/MMContentORM/std": 0.6567980706691742, "rewards/MMFormatORM/mean": 0.6093749821186065, "rewards/MMFormatORM/std": 0.09063776731491088, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.1394427239894867, "step": 3375, "train_speed(iter/s)": 0.082951 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.4, "completions/mean_length": 212.875, "completions/min_length": 125.2, "epoch": 1.6226596255400865, "frac_reward_zero_std": 0.625, "grad_norm": 0.28574997186660767, "kl": 0.0211181640625, "learning_rate": 9.423523658012568e-07, "loss": 0.0008457589894533158, "memory(GiB)": 27.09, "reward": 0.4936999797821045, "reward_std": 0.13618875967804342, "rewards/MMContentORM/mean": 0.6705000221729278, "rewards/MMContentORM/std": 0.5642464995384217, "rewards/MMFormatORM/mean": 0.6012499749660491, "rewards/MMFormatORM/std": 0.10254122316837311, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.15775573253631592, "step": 3380, "train_speed(iter/s)": 0.082959 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.6, "completions/mean_length": 209.95, "completions/min_length": 126.0, "epoch": 1.6250600096015364, "frac_reward_zero_std": 0.725, "grad_norm": 0.11802355945110321, "kl": 0.012750244140625, "learning_rate": 9.30786764925396e-07, "loss": 0.000509438058361411, "memory(GiB)": 27.09, "reward": 0.5361499786376953, "reward_std": 0.05635640830732882, "rewards/MMContentORM/mean": 0.7210000157356262, "rewards/MMContentORM/std": 0.5421726107597351, "rewards/MMFormatORM/mean": 0.6318749904632568, "rewards/MMFormatORM/std": 0.07249999642372132, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 3385, "train_speed(iter/s)": 0.082966 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.6, "completions/mean_length": 208.0, "completions/min_length": 138.8, "epoch": 1.6274603936629861, "frac_reward_zero_std": 0.7, "grad_norm": 0.006833571009337902, "kl": 0.015826416015625, "learning_rate": 9.192852875956787e-07, "loss": 0.0006330645643174649, "memory(GiB)": 27.09, "reward": 0.4655999720096588, "reward_std": 0.1080459140241146, "rewards/MMContentORM/mean": 0.571500027179718, "rewards/MMContentORM/std": 0.6459370970726013, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3390, "train_speed(iter/s)": 0.082977 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 362.0, "completions/mean_length": 229.1375, "completions/min_length": 148.4, "epoch": 1.6298607777244358, "frac_reward_zero_std": 0.725, "grad_norm": 0.005902289412915707, "kl": 0.0166748046875, "learning_rate": 9.078481150548824e-07, "loss": 0.0006678791251033545, "memory(GiB)": 27.09, "reward": 0.42405000030994416, "reward_std": 0.11476342976093293, "rewards/MMContentORM/mean": 0.4819999933242798, "rewards/MMContentORM/std": 0.671118414402008, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 3395, "train_speed(iter/s)": 0.08297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.2, "completions/mean_length": 217.4, "completions/min_length": 134.8, "epoch": 1.6322611617858858, "frac_reward_zero_std": 0.675, "grad_norm": 0.15501824021339417, "kl": 0.016632080078125, "learning_rate": 8.964754275324589e-07, "loss": 0.0006651143543422222, "memory(GiB)": 27.09, "reward": 0.4716499984264374, "reward_std": 0.07870098501443863, "rewards/MMContentORM/mean": 0.6010000109672546, "rewards/MMContentORM/std": 0.6552097082138062, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 3400, "train_speed(iter/s)": 0.082979 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 459.4, "completions/mean_length": 236.1, "completions/min_length": 152.0, "epoch": 1.6346615458473357, "frac_reward_zero_std": 0.625, "grad_norm": 0.07511002570390701, "kl": 0.01353759765625, "learning_rate": 8.851674042416852e-07, "loss": 0.0005421666894108057, "memory(GiB)": 27.09, "reward": 0.4685499846935272, "reward_std": 0.10839946605265141, "rewards/MMContentORM/mean": 0.5644999861717224, "rewards/MMContentORM/std": 0.5619328938424587, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3405, "train_speed(iter/s)": 0.082918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.8, "completions/mean_length": 204.75, "completions/min_length": 130.6, "epoch": 1.6370619299087854, "frac_reward_zero_std": 0.625, "grad_norm": 0.1756453961133957, "kl": 0.0164794921875, "learning_rate": 8.739242233768519e-07, "loss": 0.0006595761980861426, "memory(GiB)": 27.09, "reward": 0.4861499845981598, "reward_std": 0.14361338005401195, "rewards/MMContentORM/mean": 0.6535000085830689, "rewards/MMContentORM/std": 0.6205046653747559, "rewards/MMFormatORM/mean": 0.5993749856948852, "rewards/MMFormatORM/std": 0.13680812418460847, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 3410, "train_speed(iter/s)": 0.082932 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.8, "completions/mean_length": 216.9875, "completions/min_length": 124.0, "epoch": 1.6394623139702351, "frac_reward_zero_std": 0.575, "grad_norm": 0.06857043504714966, "kl": 0.031121826171875, "learning_rate": 8.627460621104444e-07, "loss": 0.001243231911212206, "memory(GiB)": 27.09, "reward": 0.46214998364448545, "reward_std": 0.17797877669800072, "rewards/MMContentORM/mean": 0.6060000151395798, "rewards/MMContentORM/std": 0.5733154647052288, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 3415, "train_speed(iter/s)": 0.082938 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.0, "completions/mean_length": 213.0625, "completions/min_length": 142.6, "epoch": 1.641862698031685, "frac_reward_zero_std": 0.575, "grad_norm": 0.1507033109664917, "kl": 0.01561279296875, "learning_rate": 8.516330965903564e-07, "loss": 0.0006242851726710796, "memory(GiB)": 27.09, "reward": 0.4748999834060669, "reward_std": 0.14523972067981958, "rewards/MMContentORM/mean": 0.6235000133514405, "rewards/MMContentORM/std": 0.6498092889785767, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 3420, "train_speed(iter/s)": 0.08295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.2, "completions/mean_length": 220.0, "completions/min_length": 117.2, "epoch": 1.644263082093135, "frac_reward_zero_std": 0.65, "grad_norm": 0.1359294056892395, "kl": 0.0308837890625, "learning_rate": 8.405855019371123e-07, "loss": 0.0012361595407128334, "memory(GiB)": 27.09, "reward": 0.3967999845743179, "reward_std": 0.17083699703216554, "rewards/MMContentORM/mean": 0.4570000171661377, "rewards/MMContentORM/std": 0.6885712265968322, "rewards/MMFormatORM/mean": 0.5849999964237214, "rewards/MMFormatORM/std": 0.1881377637386322, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.28944272398948667, "step": 3425, "train_speed(iter/s)": 0.082954 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.6, "completions/mean_length": 209.8, "completions/min_length": 152.6, "epoch": 1.6466634661545847, "frac_reward_zero_std": 0.75, "grad_norm": 0.08136511594057083, "kl": 0.397613525390625, "learning_rate": 8.296034522411078e-07, "loss": 0.01587701141834259, "memory(GiB)": 27.09, "reward": 0.555299985408783, "reward_std": 0.0552957494975999, "rewards/MMContentORM/mean": 0.7670000076293946, "rewards/MMContentORM/std": 0.4387725330889225, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 3430, "train_speed(iter/s)": 0.082969 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.6, "completions/mean_length": 210.5875, "completions/min_length": 129.6, "epoch": 1.6490638502160344, "frac_reward_zero_std": 0.45, "grad_norm": 0.18326374888420105, "kl": 0.018804931640625, "learning_rate": 8.186871205598712e-07, "loss": 0.0007523265201598405, "memory(GiB)": 27.09, "reward": 0.4282499849796295, "reward_std": 0.175716033577919, "rewards/MMContentORM/mean": 0.5500000059604645, "rewards/MMContentORM/std": 0.6840834498405457, "rewards/MMFormatORM/mean": 0.5768749833106994, "rewards/MMFormatORM/std": 0.17163621485233307, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.26405572295188906, "step": 3435, "train_speed(iter/s)": 0.082985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 340.8, "completions/mean_length": 220.225, "completions/min_length": 150.8, "epoch": 1.6514642342774843, "frac_reward_zero_std": 0.625, "grad_norm": 0.11703846603631973, "kl": 0.0169677734375, "learning_rate": 8.078366789153241e-07, "loss": 0.0006778911687433719, "memory(GiB)": 27.09, "reward": 0.4566999852657318, "reward_std": 0.10988439926877618, "rewards/MMContentORM/mean": 0.5529999971389771, "rewards/MMContentORM/std": 0.635290002822876, "rewards/MMFormatORM/mean": 0.6137499809265137, "rewards/MMFormatORM/std": 0.11046060025691987, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3440, "train_speed(iter/s)": 0.082986 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.8, "completions/mean_length": 221.65, "completions/min_length": 146.4, "epoch": 1.6538646183389343, "frac_reward_zero_std": 0.575, "grad_norm": 0.17084509134292603, "kl": 0.015997314453125, "learning_rate": 7.970522982910856e-07, "loss": 0.0006411694921553135, "memory(GiB)": 27.09, "reward": 0.4217999815940857, "reward_std": 0.1796051269862801, "rewards/MMContentORM/mean": 0.5195000052452088, "rewards/MMContentORM/std": 0.6755177021026612, "rewards/MMFormatORM/mean": 0.5849999845027923, "rewards/MMFormatORM/std": 0.16754122078418732, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2577557325363159, "step": 3445, "train_speed(iter/s)": 0.082994 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.0, "completions/mean_length": 222.2875, "completions/min_length": 165.8, "epoch": 1.6562650024003842, "frac_reward_zero_std": 0.7, "grad_norm": 0.17295852303504944, "kl": 0.01490478515625, "learning_rate": 7.863341486297682e-07, "loss": 0.0005966905970126391, "memory(GiB)": 27.09, "reward": 0.4945999622344971, "reward_std": 0.09814641983248293, "rewards/MMContentORM/mean": 0.6440000176429749, "rewards/MMContentORM/std": 0.5497657291591167, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3450, "train_speed(iter/s)": 0.082998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 370.6, "completions/mean_length": 221.95, "completions/min_length": 137.8, "epoch": 1.658665386461834, "frac_reward_zero_std": 0.65, "grad_norm": 0.14504937827587128, "kl": 0.01641845703125, "learning_rate": 7.756823988303025e-07, "loss": 0.0006571163889020681, "memory(GiB)": 27.09, "reward": 0.4818499743938446, "reward_std": 0.08124656807631254, "rewards/MMContentORM/mean": 0.6264999985694886, "rewards/MMContentORM/std": 0.6504538416862488, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 3455, "train_speed(iter/s)": 0.08299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.0, "completions/mean_length": 209.3625, "completions/min_length": 125.2, "epoch": 1.6610657705232836, "frac_reward_zero_std": 0.625, "grad_norm": 0.14114868640899658, "kl": 0.017041015625, "learning_rate": 7.650972167452752e-07, "loss": 0.0006805134937167168, "memory(GiB)": 27.09, "reward": 0.476099956035614, "reward_std": 0.13675445076078177, "rewards/MMContentORM/mean": 0.626500004529953, "rewards/MMContentORM/std": 0.6218415260314941, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 3460, "train_speed(iter/s)": 0.082999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.6, "completions/mean_length": 210.4875, "completions/min_length": 126.6, "epoch": 1.6634661545847336, "frac_reward_zero_std": 0.625, "grad_norm": 0.21840178966522217, "kl": 0.016424560546875, "learning_rate": 7.545787691782847e-07, "loss": 0.0006578662432730198, "memory(GiB)": 27.09, "reward": 0.3567999839782715, "reward_std": 0.17606958658434452, "rewards/MMContentORM/mean": 0.38949999660253526, "rewards/MMContentORM/std": 0.7360579133033752, "rewards/MMFormatORM/mean": 0.5649999618530274, "rewards/MMFormatORM/std": 0.18591444790363312, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.2823687314987183, "step": 3465, "train_speed(iter/s)": 0.083009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 335.6, "completions/mean_length": 217.7875, "completions/min_length": 152.2, "epoch": 1.6658665386461835, "frac_reward_zero_std": 0.825, "grad_norm": 0.13330751657485962, "kl": 0.011822509765625, "learning_rate": 7.441272218813156e-07, "loss": 0.0004730843007564545, "memory(GiB)": 27.09, "reward": 0.5238499701023102, "reward_std": 0.03981010988354683, "rewards/MMContentORM/mean": 0.6740000247955322, "rewards/MMContentORM/std": 0.5537904977798462, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 3470, "train_speed(iter/s)": 0.083019 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 356.4, "completions/mean_length": 227.1375, "completions/min_length": 125.0, "epoch": 1.6682669227076332, "frac_reward_zero_std": 0.725, "grad_norm": 0.19898498058319092, "kl": 0.01395263671875, "learning_rate": 7.337427395521173e-07, "loss": 0.0005571233108639717, "memory(GiB)": 27.09, "reward": 0.5003499805927276, "reward_std": 0.08888331830967218, "rewards/MMContentORM/mean": 0.6440000176429749, "rewards/MMContentORM/std": 0.6199796617031097, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3475, "train_speed(iter/s)": 0.083012 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.0, "completions/mean_length": 221.5875, "completions/min_length": 139.8, "epoch": 1.670667306769083, "frac_reward_zero_std": 0.65, "grad_norm": 0.15592429041862488, "kl": 0.01651611328125, "learning_rate": 7.234254858316187e-07, "loss": 0.0006610705517232418, "memory(GiB)": 27.09, "reward": 0.4636499762535095, "reward_std": 0.10514677353203297, "rewards/MMContentORM/mean": 0.581000006198883, "rewards/MMContentORM/std": 0.6593972444534302, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 3480, "train_speed(iter/s)": 0.083008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.8, "completions/mean_length": 206.2125, "completions/min_length": 124.2, "epoch": 1.6730676908305329, "frac_reward_zero_std": 0.675, "grad_norm": 0.10853126645088196, "kl": 0.013946533203125, "learning_rate": 7.13175623301347e-07, "loss": 0.0005579915829002857, "memory(GiB)": 27.09, "reward": 0.5001499652862549, "reward_std": 0.08407499315217137, "rewards/MMContentORM/mean": 0.6435000181198121, "rewards/MMContentORM/std": 0.5942914664745331, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3485, "train_speed(iter/s)": 0.083022 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.2, "completions/mean_length": 208.8, "completions/min_length": 114.4, "epoch": 1.6754680748919828, "frac_reward_zero_std": 0.65, "grad_norm": 0.26080384850502014, "kl": 0.019244384765625, "learning_rate": 7.02993313480862e-07, "loss": 0.0007686344906687737, "memory(GiB)": 27.09, "reward": 0.5148499727249145, "reward_std": 0.1034497192595154, "rewards/MMContentORM/mean": 0.7090000152587891, "rewards/MMContentORM/std": 0.5670624554157258, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 3490, "train_speed(iter/s)": 0.08303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.4, "completions/mean_length": 218.8125, "completions/min_length": 144.0, "epoch": 1.6778684589534325, "frac_reward_zero_std": 0.7, "grad_norm": 0.16066478192806244, "kl": 0.015631103515625, "learning_rate": 6.928787168252132e-07, "loss": 0.0006245138123631477, "memory(GiB)": 27.09, "reward": 0.45314998030662534, "reward_std": 0.11943033039569854, "rewards/MMContentORM/mean": 0.5835000038146972, "rewards/MMContentORM/std": 0.6094723448157311, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.16571036279201506, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.25493902564048765, "step": 3495, "train_speed(iter/s)": 0.083039 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 344.4, "completions/mean_length": 216.3875, "completions/min_length": 134.0, "epoch": 1.6802688430148822, "frac_reward_zero_std": 0.825, "grad_norm": 0.00513013731688261, "kl": 0.015216064453125, "learning_rate": 6.828319927224114e-07, "loss": 0.0006088857538998127, "memory(GiB)": 27.09, "reward": 0.5228999733924866, "reward_std": 0.05359869406092912, "rewards/MMContentORM/mean": 0.6860000133514405, "rewards/MMContentORM/std": 0.5610509395599366, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 3500, "train_speed(iter/s)": 0.083035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 363.6, "completions/mean_length": 215.85, "completions/min_length": 95.2, "epoch": 1.6826692270763322, "frac_reward_zero_std": 0.675, "grad_norm": 0.17233267426490784, "kl": 0.021417236328125, "learning_rate": 6.728532994909203e-07, "loss": 0.0008568591438233852, "memory(GiB)": 27.09, "reward": 0.401749986410141, "reward_std": 0.15421998733654618, "rewards/MMContentORM/mean": 0.4550000071525574, "rewards/MMContentORM/std": 0.6894314765930176, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 3505, "train_speed(iter/s)": 0.082993 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.2, "completions/mean_length": 211.375, "completions/min_length": 151.2, "epoch": 1.685069611137782, "frac_reward_zero_std": 0.625, "grad_norm": 0.14501921832561493, "kl": 0.01634521484375, "learning_rate": 6.629427943771532e-07, "loss": 0.0006534026004374027, "memory(GiB)": 27.09, "reward": 0.49959996342658997, "reward_std": 0.10917728263884782, "rewards/MMContentORM/mean": 0.6565000057220459, "rewards/MMContentORM/std": 0.628342616558075, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3510, "train_speed(iter/s)": 0.082999 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.2, "completions/mean_length": 209.375, "completions/min_length": 115.8, "epoch": 1.687469995199232, "frac_reward_zero_std": 0.75, "grad_norm": 0.15510372817516327, "kl": 0.0161865234375, "learning_rate": 6.531006335530016e-07, "loss": 0.0006463156081736088, "memory(GiB)": 27.09, "reward": 0.4707499802112579, "reward_std": 0.06851864596828819, "rewards/MMContentORM/mean": 0.5699999809265137, "rewards/MMContentORM/std": 0.5937826454639434, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3515, "train_speed(iter/s)": 0.083008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 348.2, "completions/mean_length": 223.975, "completions/min_length": 132.6, "epoch": 1.6898703792606817, "frac_reward_zero_std": 0.65, "grad_norm": 0.18824124336242676, "kl": 0.013726806640625, "learning_rate": 6.433269721133767e-07, "loss": 0.0005491763353347778, "memory(GiB)": 27.09, "reward": 0.4896999835968018, "reward_std": 0.08810550197958947, "rewards/MMContentORM/mean": 0.602999997138977, "rewards/MMContentORM/std": 0.617827194929123, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 3520, "train_speed(iter/s)": 0.083 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.0, "completions/mean_length": 209.875, "completions/min_length": 147.0, "epoch": 1.6922707633221314, "frac_reward_zero_std": 0.75, "grad_norm": 0.18868878483772278, "kl": 0.018115234375, "learning_rate": 6.336219640737568e-07, "loss": 0.0007253088988363743, "memory(GiB)": 27.09, "reward": 0.4806499779224396, "reward_std": 0.12635998169425874, "rewards/MMContentORM/mean": 0.6235000014305114, "rewards/MMContentORM/std": 0.6232763528823853, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.09680812656879426, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.14893558621406555, "step": 3525, "train_speed(iter/s)": 0.083009 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.8, "completions/mean_length": 211.3375, "completions/min_length": 137.4, "epoch": 1.6946711473835814, "frac_reward_zero_std": 0.65, "grad_norm": 0.15571913123130798, "kl": 0.01719970703125, "learning_rate": 6.23985762367768e-07, "loss": 0.0006875310558825731, "memory(GiB)": 27.09, "reward": 0.47434998154640196, "reward_std": 0.08322646701708436, "rewards/MMContentORM/mean": 0.5790000081062316, "rewards/MMContentORM/std": 0.6246312737464905, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3530, "train_speed(iter/s)": 0.083019 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.8, "completions/mean_length": 213.8125, "completions/min_length": 136.2, "epoch": 1.6970715314450313, "frac_reward_zero_std": 0.65, "grad_norm": 0.14880269765853882, "kl": 0.0153564453125, "learning_rate": 6.144185188447682e-07, "loss": 0.000614521512761712, "memory(GiB)": 27.09, "reward": 0.5258000135421753, "reward_std": 0.08343859082087875, "rewards/MMContentORM/mean": 0.7220000326633453, "rewards/MMContentORM/std": 0.4758839137852192, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.08490467071533203, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 3535, "train_speed(iter/s)": 0.083024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.8, "completions/mean_length": 220.625, "completions/min_length": 151.2, "epoch": 1.699471915506481, "frac_reward_zero_std": 0.675, "grad_norm": 0.17675665020942688, "kl": 0.01483154296875, "learning_rate": 6.049203842674628e-07, "loss": 0.0005933211185038089, "memory(GiB)": 27.09, "reward": 0.4919499814510345, "reward_std": 0.08209509402513504, "rewards/MMContentORM/mean": 0.6230000138282776, "rewards/MMContentORM/std": 0.6088876247406005, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3540, "train_speed(iter/s)": 0.083022 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.2, "completions/mean_length": 214.075, "completions/min_length": 130.6, "epoch": 1.7018722995679307, "frac_reward_zero_std": 0.5, "grad_norm": 0.17425678670406342, "kl": 0.015087890625, "learning_rate": 5.954915083095164e-07, "loss": 0.0006034282967448234, "memory(GiB)": 27.09, "reward": 0.42609999179840086, "reward_std": 0.1848377011716366, "rewards/MMContentORM/mean": 0.5015000104904175, "rewards/MMContentORM/std": 0.7074744701385498, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 3545, "train_speed(iter/s)": 0.083032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.2, "completions/mean_length": 213.4125, "completions/min_length": 126.6, "epoch": 1.7042726836293807, "frac_reward_zero_std": 0.575, "grad_norm": 0.19085994362831116, "kl": 0.01416015625, "learning_rate": 5.86132039553205e-07, "loss": 0.000567510724067688, "memory(GiB)": 27.09, "reward": 0.41705000400543213, "reward_std": 0.14219917133450508, "rewards/MMContentORM/mean": 0.4645000100135803, "rewards/MMContentORM/std": 0.6884559154510498, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 3550, "train_speed(iter/s)": 0.083032 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 345.2, "completions/mean_length": 222.175, "completions/min_length": 140.8, "epoch": 1.7066730676908306, "frac_reward_zero_std": 0.625, "grad_norm": 0.14451654255390167, "kl": 0.016253662109375, "learning_rate": 5.768421254870721e-07, "loss": 0.0006507723592221737, "memory(GiB)": 27.09, "reward": 0.4542999863624573, "reward_std": 0.08343860041350126, "rewards/MMContentORM/mean": 0.5719999969005585, "rewards/MMContentORM/std": 0.46381150707602503, "rewards/MMFormatORM/mean": 0.6012499749660491, "rewards/MMFormatORM/std": 0.10254122316837311, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.15775573253631592, "step": 3555, "train_speed(iter/s)": 0.083035 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.2, "completions/mean_length": 211.3375, "completions/min_length": 101.4, "epoch": 1.7090734517522803, "frac_reward_zero_std": 0.625, "grad_norm": 0.10909511148929596, "kl": 0.018408203125, "learning_rate": 5.676219125036008e-07, "loss": 0.0007358456961810589, "memory(GiB)": 27.09, "reward": 0.4575499951839447, "reward_std": 0.17769593372941017, "rewards/MMContentORM/mean": 0.5944999873638153, "rewards/MMContentORM/std": 0.6650677740573883, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.14121158123016359, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.21724859476089478, "step": 3560, "train_speed(iter/s)": 0.083043 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.6, "completions/mean_length": 219.425, "completions/min_length": 124.2, "epoch": 1.7114738358137302, "frac_reward_zero_std": 0.7, "grad_norm": 0.20709584653377533, "kl": 0.017572021484375, "learning_rate": 5.584715458969103e-07, "loss": 0.0007023838814347982, "memory(GiB)": 27.09, "reward": 0.4612499952316284, "reward_std": 0.10458109080791474, "rewards/MMContentORM/mean": 0.575000011920929, "rewards/MMContentORM/std": 0.5526260115206242, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 3565, "train_speed(iter/s)": 0.083037 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.8, "completions/mean_length": 209.3125, "completions/min_length": 125.0, "epoch": 1.71387421987518, "frac_reward_zero_std": 0.65, "grad_norm": 0.07817840576171875, "kl": 0.021575927734375, "learning_rate": 5.493911698604648e-07, "loss": 0.0008630914613604546, "memory(GiB)": 27.09, "reward": 0.4372999846935272, "reward_std": 0.12600643069017678, "rewards/MMContentORM/mean": 0.5294999957084656, "rewards/MMContentORM/std": 0.6551137328147888, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 3570, "train_speed(iter/s)": 0.083038 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.2, "completions/mean_length": 208.3125, "completions/min_length": 128.4, "epoch": 1.71627460393663, "frac_reward_zero_std": 0.8, "grad_norm": 0.1356513947248459, "kl": 0.015533447265625, "learning_rate": 5.403809274848048e-07, "loss": 0.0006216357462108136, "memory(GiB)": 27.09, "reward": 0.5266999721527099, "reward_std": 0.05444721775129437, "rewards/MMContentORM/mean": 0.6955000042915345, "rewards/MMContentORM/std": 0.5412708878517151, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 3575, "train_speed(iter/s)": 0.083049 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.0, "completions/mean_length": 205.125, "completions/min_length": 124.2, "epoch": 1.7186749879980798, "frac_reward_zero_std": 0.65, "grad_norm": 0.19772173464298248, "kl": 0.0181396484375, "learning_rate": 5.314409607552845e-07, "loss": 0.0007258410565555096, "memory(GiB)": 27.09, "reward": 0.4547999739646912, "reward_std": 0.18611050322651862, "rewards/MMContentORM/mean": 0.6020000219345093, "rewards/MMContentORM/std": 0.6907771944999694, "rewards/MMFormatORM/mean": 0.5849999785423279, "rewards/MMFormatORM/std": 0.1737115800380707, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2672485947608948, "step": 3580, "train_speed(iter/s)": 0.083061 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.0, "completions/mean_length": 212.6, "completions/min_length": 129.2, "epoch": 1.7210753720595295, "frac_reward_zero_std": 0.775, "grad_norm": 0.19165228307247162, "kl": 0.016009521484375, "learning_rate": 5.225714105498414e-07, "loss": 0.0006398680619895458, "memory(GiB)": 27.09, "reward": 0.4763499915599823, "reward_std": 0.07304412834346294, "rewards/MMContentORM/mean": 0.5839999854564667, "rewards/MMContentORM/std": 0.5775633066892624, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3585, "train_speed(iter/s)": 0.083072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.8, "completions/mean_length": 220.3, "completions/min_length": 126.0, "epoch": 1.7234757561209793, "frac_reward_zero_std": 0.55, "grad_norm": 0.29149454832077026, "kl": 0.015185546875, "learning_rate": 5.137724166367763e-07, "loss": 0.0006067929789423943, "memory(GiB)": 27.09, "reward": 0.3497999906539917, "reward_std": 0.2576697215437889, "rewards/MMContentORM/mean": 0.39700001031160354, "rewards/MMContentORM/std": 0.7802090883255005, "rewards/MMFormatORM/mean": 0.5524999856948852, "rewards/MMFormatORM/std": 0.224040886759758, "rewards/MMRubricORM/mean": -0.15, "rewards/MMRubricORM/std": 0.34467830061912536, "step": 3590, "train_speed(iter/s)": 0.083072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.6, "completions/mean_length": 212.625, "completions/min_length": 145.8, "epoch": 1.7258761401824292, "frac_reward_zero_std": 0.775, "grad_norm": 0.12896399199962616, "kl": 0.014410400390625, "learning_rate": 5.050441176725468e-07, "loss": 0.0005758726038038731, "memory(GiB)": 27.09, "reward": 0.46919997930526736, "reward_std": 0.05953839020803571, "rewards/MMContentORM/mean": 0.5805000185966491, "rewards/MMContentORM/std": 0.6441609025001526, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3595, "train_speed(iter/s)": 0.083085 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 360.2, "completions/mean_length": 220.2375, "completions/min_length": 122.2, "epoch": 1.7282765242438791, "frac_reward_zero_std": 0.6, "grad_norm": 0.4579542875289917, "kl": 0.030523681640625, "learning_rate": 4.96386651199583e-07, "loss": 0.0012195698916912078, "memory(GiB)": 27.09, "reward": 0.4777999818325043, "reward_std": 0.1202081507537514, "rewards/MMContentORM/mean": 0.6020000159740448, "rewards/MMContentORM/std": 0.638184130191803, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.08490467071533203, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13062257766723634, "step": 3600, "train_speed(iter/s)": 0.08308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 377.0, "completions/mean_length": 222.575, "completions/min_length": 138.0, "epoch": 1.7306769083053288, "frac_reward_zero_std": 0.725, "grad_norm": 0.005367351695895195, "kl": 0.0135009765625, "learning_rate": 4.878001536441213e-07, "loss": 0.0005398348905146122, "memory(GiB)": 27.09, "reward": 0.49624998569488527, "reward_std": 0.043204221641644835, "rewards/MMContentORM/mean": 0.6049999952316284, "rewards/MMContentORM/std": 0.5939936757087707, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 3605, "train_speed(iter/s)": 0.083045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.0, "completions/mean_length": 211.625, "completions/min_length": 125.4, "epoch": 1.7330772923667785, "frac_reward_zero_std": 0.725, "grad_norm": 0.13431750237941742, "kl": 0.014501953125, "learning_rate": 4.792847603140587e-07, "loss": 0.000580282649025321, "memory(GiB)": 27.09, "reward": 0.48459998369216917, "reward_std": 0.09192387647926807, "rewards/MMContentORM/mean": 0.6190000116825104, "rewards/MMContentORM/std": 0.6144611597061157, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3610, "train_speed(iter/s)": 0.083051 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 326.0, "completions/mean_length": 216.1125, "completions/min_length": 127.0, "epoch": 1.7354776764282285, "frac_reward_zero_std": 0.575, "grad_norm": 0.14910034835338593, "kl": 0.03046875, "learning_rate": 4.7084060539681066e-07, "loss": 0.0012202151119709015, "memory(GiB)": 27.09, "reward": 0.37774998843669894, "reward_std": 0.15733125656843186, "rewards/MMContentORM/mean": 0.4525000035762787, "rewards/MMContentORM/std": 0.7233627915382386, "rewards/MMFormatORM/mean": 0.5606249868869781, "rewards/MMFormatORM/std": 0.21994589269161224, "rewards/MMRubricORM/mean": -0.1375, "rewards/MMRubricORM/std": 0.33837831020355225, "step": 3615, "train_speed(iter/s)": 0.08305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.2, "completions/mean_length": 217.2375, "completions/min_length": 139.0, "epoch": 1.7378780604896784, "frac_reward_zero_std": 0.55, "grad_norm": 0.12053893506526947, "kl": 0.017779541015625, "learning_rate": 4.624678219572043e-07, "loss": 0.0007117808330804109, "memory(GiB)": 27.09, "reward": 0.4127999782562256, "reward_std": 0.17083699852228165, "rewards/MMContentORM/mean": 0.49699999690055846, "rewards/MMContentORM/std": 0.7366461873054504, "rewards/MMFormatORM/mean": 0.5849999785423279, "rewards/MMFormatORM/std": 0.19821036159992217, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.3049390256404877, "step": 3620, "train_speed(iter/s)": 0.083055 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 345.2, "completions/mean_length": 223.6625, "completions/min_length": 124.0, "epoch": 1.7402784445511283, "frac_reward_zero_std": 0.725, "grad_norm": 0.17625129222869873, "kl": 0.0965087890625, "learning_rate": 4.5416654193538245e-07, "loss": 0.003856099024415016, "memory(GiB)": 27.09, "reward": 0.45974999070167544, "reward_std": 0.15648272663820534, "rewards/MMContentORM/mean": 0.6000000178813935, "rewards/MMContentORM/std": 0.6247810423374176, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 3625, "train_speed(iter/s)": 0.083045 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 341.8, "completions/mean_length": 218.475, "completions/min_length": 143.6, "epoch": 1.742678828612578, "frac_reward_zero_std": 0.675, "grad_norm": 0.15371958911418915, "kl": 0.064898681640625, "learning_rate": 4.459368961447169e-07, "loss": 0.002591692842543125, "memory(GiB)": 27.09, "reward": 0.47674998044967654, "reward_std": 0.08266077786684037, "rewards/MMContentORM/mean": 0.5849999964237214, "rewards/MMContentORM/std": 0.6398021399974823, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3630, "train_speed(iter/s)": 0.083041 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 406.4, "completions/mean_length": 218.6625, "completions/min_length": 125.0, "epoch": 1.7450792126740278, "frac_reward_zero_std": 0.525, "grad_norm": 0.1884879767894745, "kl": 0.022528076171875, "learning_rate": 4.3777901426975465e-07, "loss": 0.0009016599506139755, "memory(GiB)": 27.09, "reward": 0.4047499895095825, "reward_std": 0.13470384031534194, "rewards/MMContentORM/mean": 0.4875000238418579, "rewards/MMContentORM/std": 0.7453335165977478, "rewards/MMFormatORM/mean": 0.576874977350235, "rewards/MMFormatORM/std": 0.17944467663764954, "rewards/MMRubricORM/mean": -0.10500000119209289, "rewards/MMRubricORM/std": 0.28511459827423097, "step": 3635, "train_speed(iter/s)": 0.083021 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 442.8, "completions/mean_length": 224.3125, "completions/min_length": 135.2, "epoch": 1.7474795967354777, "frac_reward_zero_std": 0.725, "grad_norm": 0.13846170902252197, "kl": 0.021331787109375, "learning_rate": 4.2969302486417064e-07, "loss": 0.0008540621027350425, "memory(GiB)": 27.09, "reward": 0.41774998903274535, "reward_std": 0.09086322523653508, "rewards/MMContentORM/mean": 0.4375, "rewards/MMContentORM/std": 0.6664343476295471, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3640, "train_speed(iter/s)": 0.082997 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.4, "completions/mean_length": 217.0375, "completions/min_length": 117.4, "epoch": 1.7498799807969276, "frac_reward_zero_std": 0.75, "grad_norm": 0.18284378945827484, "kl": 0.0134521484375, "learning_rate": 4.2167905534874153e-07, "loss": 0.0005382131785154343, "memory(GiB)": 27.09, "reward": 0.5565499782562255, "reward_std": 0.07672108276747167, "rewards/MMContentORM/mean": 0.7845000267028809, "rewards/MMContentORM/std": 0.42741707861423495, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3645, "train_speed(iter/s)": 0.082998 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.0, "completions/mean_length": 223.3625, "completions/min_length": 120.6, "epoch": 1.7522803648583773, "frac_reward_zero_std": 0.75, "grad_norm": 0.08318620175123215, "kl": 0.019390869140625, "learning_rate": 4.1373723200934136e-07, "loss": 0.0007759532425552606, "memory(GiB)": 27.09, "reward": 0.5463499784469604, "reward_std": 0.0758725541876629, "rewards/MMContentORM/mean": 0.7590000092983246, "rewards/MMContentORM/std": 0.4582708589732647, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3650, "train_speed(iter/s)": 0.083003 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.8, "completions/mean_length": 210.5, "completions/min_length": 139.2, "epoch": 1.754680748919827, "frac_reward_zero_std": 0.775, "grad_norm": 0.1796911358833313, "kl": 0.01278076171875, "learning_rate": 4.0586767999494514e-07, "loss": 0.0005109596066176891, "memory(GiB)": 27.09, "reward": 0.5610999763011932, "reward_std": 0.055012908577919004, "rewards/MMContentORM/mean": 0.7815000295639039, "rewards/MMContentORM/std": 0.3631765726953745, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 3655, "train_speed(iter/s)": 0.083007 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 348.2, "completions/mean_length": 216.5625, "completions/min_length": 132.6, "epoch": 1.757081132981277, "frac_reward_zero_std": 0.6, "grad_norm": 0.08192643523216248, "kl": 0.014990234375, "learning_rate": 3.980705233156662e-07, "loss": 0.0005991185083985328, "memory(GiB)": 27.09, "reward": 0.5434999465942383, "reward_std": 0.05840701770503074, "rewards/MMContentORM/mean": 0.7375, "rewards/MMContentORM/std": 0.5282024204730987, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 3660, "train_speed(iter/s)": 0.083004 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.8, "completions/mean_length": 211.125, "completions/min_length": 156.6, "epoch": 1.759481517042727, "frac_reward_zero_std": 0.7, "grad_norm": 0.11645707488059998, "kl": 0.0146240234375, "learning_rate": 3.903458848407915e-07, "loss": 0.0005853664129972458, "memory(GiB)": 27.09, "reward": 0.48039997220039365, "reward_std": 0.0814586978405714, "rewards/MMContentORM/mean": 0.6085000038146973, "rewards/MMContentORM/std": 0.6318390727043152, "rewards/MMFormatORM/mean": 0.6174999952316285, "rewards/MMFormatORM/std": 0.12999999523162842, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.2, "step": 3665, "train_speed(iter/s)": 0.083016 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 457.4, "completions/mean_length": 228.6125, "completions/min_length": 123.4, "epoch": 1.7618819011041766, "frac_reward_zero_std": 0.575, "grad_norm": 0.20300821959972382, "kl": 0.01519775390625, "learning_rate": 3.8269388629685266e-07, "loss": 0.0006076143123209477, "memory(GiB)": 27.09, "reward": 0.436849981546402, "reward_std": 0.10740951672196389, "rewards/MMContentORM/mean": 0.489000004529953, "rewards/MMContentORM/std": 0.6648125410079956, "rewards/MMFormatORM/mean": 0.6218749761581421, "rewards/MMFormatORM/std": 0.09190345257520675, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3670, "train_speed(iter/s)": 0.082985 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 451.8, "completions/mean_length": 222.0625, "completions/min_length": 150.2, "epoch": 1.7642822851656264, "frac_reward_zero_std": 0.675, "grad_norm": 0.07441962510347366, "kl": 0.0164794921875, "learning_rate": 3.7511464826570476e-07, "loss": 0.0006591953337192535, "memory(GiB)": 27.09, "reward": 0.43319997787475584, "reward_std": 0.14212846159934997, "rewards/MMContentORM/mean": 0.5480000078678131, "rewards/MMContentORM/std": 0.7250023484230042, "rewards/MMFormatORM/mean": 0.5849999904632568, "rewards/MMFormatORM/std": 0.19430812299251557, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2989355862140656, "step": 3675, "train_speed(iter/s)": 0.082957 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 519.4, "completions/mean_length": 235.7625, "completions/min_length": 150.4, "epoch": 1.7666826692270763, "frac_reward_zero_std": 0.8, "grad_norm": 0.07605559378862381, "kl": 0.019140625, "learning_rate": 3.676082901826267e-07, "loss": 0.0007654134184122086, "memory(GiB)": 27.09, "reward": 0.4716499924659729, "reward_std": 0.06908433209173381, "rewards/MMContentORM/mean": 0.6010000109672546, "rewards/MMContentORM/std": 0.6110691726207733, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.11740466952323914, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18062257766723633, "step": 3680, "train_speed(iter/s)": 0.082914 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.0, "completions/mean_length": 215.6375, "completions/min_length": 129.6, "epoch": 1.7690830532885262, "frac_reward_zero_std": 0.775, "grad_norm": 0.12616391479969025, "kl": 0.0119415283203125, "learning_rate": 3.601749303344415e-07, "loss": 0.000477463286370039, "memory(GiB)": 27.09, "reward": 0.530249971151352, "reward_std": 0.04150716739241034, "rewards/MMContentORM/mean": 0.6900000095367431, "rewards/MMContentORM/std": 0.5334938883781433, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 3685, "train_speed(iter/s)": 0.082921 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.4, "completions/mean_length": 211.1875, "completions/min_length": 137.8, "epoch": 1.7714834373499762, "frac_reward_zero_std": 0.725, "grad_norm": 0.14239180088043213, "kl": 1.172998046875, "learning_rate": 3.528146858576464e-07, "loss": 0.0469234973192215, "memory(GiB)": 27.09, "reward": 0.5906499743461608, "reward_std": 0.024536601221188902, "rewards/MMContentORM/mean": 0.8409999966621399, "rewards/MMContentORM/std": 0.2608364664018154, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 3690, "train_speed(iter/s)": 0.082933 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 444.0, "completions/mean_length": 225.4375, "completions/min_length": 155.2, "epoch": 1.7738838214114259, "frac_reward_zero_std": 0.75, "grad_norm": 0.1946728527545929, "kl": 0.014678955078125, "learning_rate": 3.4552767273657416e-07, "loss": 0.0005875344388186932, "memory(GiB)": 27.09, "reward": 0.4611999809741974, "reward_std": 0.07240773178637028, "rewards/MMContentORM/mean": 0.5605000138282776, "rewards/MMContentORM/std": 0.6589470744132996, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3695, "train_speed(iter/s)": 0.082904 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.2, "completions/mean_length": 204.9, "completions/min_length": 115.4, "epoch": 1.7762842054728756, "frac_reward_zero_std": 0.675, "grad_norm": 0.0957147628068924, "kl": 0.017193603515625, "learning_rate": 3.383140058015605e-07, "loss": 0.0006867312826216221, "memory(GiB)": 27.09, "reward": 0.4408499926328659, "reward_std": 0.15040160596836358, "rewards/MMContentORM/mean": 0.5564999967813492, "rewards/MMContentORM/std": 0.6231798827648163, "rewards/MMFormatORM/mean": 0.5893749713897705, "rewards/MMFormatORM/std": 0.1667675107717514, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.25493902564048765, "step": 3700, "train_speed(iter/s)": 0.082917 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.6, "completions/mean_length": 209.525, "completions/min_length": 106.4, "epoch": 1.7786845895343255, "frac_reward_zero_std": 0.65, "grad_norm": 0.16963821649551392, "kl": 0.039617919921875, "learning_rate": 3.3117379872713573e-07, "loss": 0.0015896432101726531, "memory(GiB)": 27.09, "reward": 0.49789999723434447, "reward_std": 0.1166726142168045, "rewards/MMContentORM/mean": 0.6810000121593476, "rewards/MMContentORM/std": 0.6037951707839966, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.17440344989299775, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.2683130085468292, "step": 3705, "train_speed(iter/s)": 0.082899 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.0, "completions/mean_length": 218.6375, "completions/min_length": 135.0, "epoch": 1.7810849735957754, "frac_reward_zero_std": 0.625, "grad_norm": 0.09570092707872391, "kl": 0.014276123046875, "learning_rate": 3.2410716403023404e-07, "loss": 0.0005716872867196799, "memory(GiB)": 27.09, "reward": 0.42004998922348025, "reward_std": 0.12551144529134034, "rewards/MMContentORM/mean": 0.471999990940094, "rewards/MMContentORM/std": 0.6835508227348328, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 3710, "train_speed(iter/s)": 0.082911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.2, "completions/mean_length": 211.375, "completions/min_length": 131.2, "epoch": 1.7834853576572252, "frac_reward_zero_std": 0.725, "grad_norm": 0.07856486737728119, "kl": 0.01624755859375, "learning_rate": 3.1711421306841903e-07, "loss": 0.0006491564214229584, "memory(GiB)": 27.09, "reward": 0.5439499855041504, "reward_std": 0.08831763297785074, "rewards/MMContentORM/mean": 0.753000020980835, "rewards/MMContentORM/std": 0.47651802077889444, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3715, "train_speed(iter/s)": 0.082911 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.4, "completions/mean_length": 217.9125, "completions/min_length": 134.4, "epoch": 1.7858857417186749, "frac_reward_zero_std": 0.725, "grad_norm": 0.11409315466880798, "kl": 0.01444091796875, "learning_rate": 3.101950560381339e-07, "loss": 0.0005774036049842835, "memory(GiB)": 27.09, "reward": 0.5369499802589417, "reward_std": 0.08577205196488649, "rewards/MMContentORM/mean": 0.7355000257492066, "rewards/MMContentORM/std": 0.44938567504286764, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3720, "train_speed(iter/s)": 0.082918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 339.0, "completions/mean_length": 220.0125, "completions/min_length": 132.8, "epoch": 1.7882861257801248, "frac_reward_zero_std": 0.65, "grad_norm": 0.13937775790691376, "kl": 0.015142822265625, "learning_rate": 3.033498019729553e-07, "loss": 0.0006057361606508493, "memory(GiB)": 27.09, "reward": 0.4632499754428864, "reward_std": 0.12211733981966973, "rewards/MMContentORM/mean": 0.5800000250339508, "rewards/MMContentORM/std": 0.6220081090927124, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 3725, "train_speed(iter/s)": 0.082918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.8, "completions/mean_length": 220.2125, "completions/min_length": 151.6, "epoch": 1.7906865098415747, "frac_reward_zero_std": 0.725, "grad_norm": 0.12366022914648056, "kl": 0.0125, "learning_rate": 2.965785587418857e-07, "loss": 0.0005001377779990434, "memory(GiB)": 27.09, "reward": 0.5290499746799469, "reward_std": 0.07488261461257935, "rewards/MMContentORM/mean": 0.6870000004768372, "rewards/MMContentORM/std": 0.5478822708129882, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 3730, "train_speed(iter/s)": 0.082923 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.6, "completions/mean_length": 220.45, "completions/min_length": 144.8, "epoch": 1.7930868939030244, "frac_reward_zero_std": 0.5, "grad_norm": 0.23606330156326294, "kl": 0.01500244140625, "learning_rate": 2.898814330476457e-07, "loss": 0.0006001268513500691, "memory(GiB)": 27.09, "reward": 0.42784997820854187, "reward_std": 0.18462557792663575, "rewards/MMContentORM/mean": 0.5490000188350678, "rewards/MMContentORM/std": 0.7097955226898194, "rewards/MMFormatORM/mean": 0.5768749833106994, "rewards/MMFormatORM/std": 0.2062115788459778, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.3172485947608948, "step": 3735, "train_speed(iter/s)": 0.082928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.4, "completions/mean_length": 210.825, "completions/min_length": 127.2, "epoch": 1.7954872779644742, "frac_reward_zero_std": 0.5, "grad_norm": 0.1833251416683197, "kl": 0.017486572265625, "learning_rate": 2.8325853042499796e-07, "loss": 0.000699461530894041, "memory(GiB)": 27.09, "reward": 0.42219996452331543, "reward_std": 0.21411193013191224, "rewards/MMContentORM/mean": 0.5205000072717667, "rewards/MMContentORM/std": 0.6509887754917145, "rewards/MMFormatORM/mean": 0.5849999845027923, "rewards/MMFormatORM/std": 0.1430424392223358, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.22006530165672303, "step": 3740, "train_speed(iter/s)": 0.082937 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 427.8, "completions/mean_length": 221.6375, "completions/min_length": 134.6, "epoch": 1.797887662025924, "frac_reward_zero_std": 0.65, "grad_norm": 0.09696277230978012, "kl": 0.02608642578125, "learning_rate": 2.7670995523908007e-07, "loss": 0.001044764183461666, "memory(GiB)": 27.09, "reward": 0.45574997663497924, "reward_std": 0.14347196728922426, "rewards/MMContentORM/mean": 0.5900000095367431, "rewards/MMContentORM/std": 0.580910587310791, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 3745, "train_speed(iter/s)": 0.082918 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 490.8, "completions/mean_length": 221.175, "completions/min_length": 134.0, "epoch": 1.800288046087374, "frac_reward_zero_std": 0.675, "grad_norm": 0.1732310652732849, "kl": 0.0142822265625, "learning_rate": 2.702358106837616e-07, "loss": 0.0005715820007026196, "memory(GiB)": 27.09, "reward": 0.48334997296333315, "reward_std": 0.07615540148690343, "rewards/MMContentORM/mean": 0.6015000104904175, "rewards/MMContentORM/std": 0.6337794065475464, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3750, "train_speed(iter/s)": 0.082881 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.2, "completions/mean_length": 212.7, "completions/min_length": 121.8, "epoch": 1.802688430148824, "frac_reward_zero_std": 0.575, "grad_norm": 0.15966768562793732, "kl": 0.017254638671875, "learning_rate": 2.63836198780022e-07, "loss": 0.0006905402522534132, "memory(GiB)": 27.09, "reward": 0.4441499710083008, "reward_std": 0.11745043210685253, "rewards/MMContentORM/mean": 0.5359999895095825, "rewards/MMContentORM/std": 0.6547886967658997, "rewards/MMFormatORM/mean": 0.6056249737739563, "rewards/MMFormatORM/std": 0.13630690723657607, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 3755, "train_speed(iter/s)": 0.082883 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.4, "completions/mean_length": 210.7375, "completions/min_length": 136.6, "epoch": 1.8050888142102737, "frac_reward_zero_std": 0.525, "grad_norm": 0.17423182725906372, "kl": 0.02149658203125, "learning_rate": 2.575112203743313e-07, "loss": 0.0008604388684034347, "memory(GiB)": 27.09, "reward": 0.44089998602867125, "reward_std": 0.18257496803998946, "rewards/MMContentORM/mean": 0.5385000109672546, "rewards/MMContentORM/std": 0.6908077597618103, "rewards/MMFormatORM/mean": 0.6012499928474426, "rewards/MMFormatORM/std": 0.17440344989299775, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.2683130085468292, "step": 3760, "train_speed(iter/s)": 0.082888 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.8, "completions/mean_length": 217.0125, "completions/min_length": 142.4, "epoch": 1.8074891982717234, "frac_reward_zero_std": 0.65, "grad_norm": 0.17498211562633514, "kl": 0.0125, "learning_rate": 2.51260975137077e-07, "loss": 0.000500024575740099, "memory(GiB)": 27.09, "reward": 0.5402999818325043, "reward_std": 0.08216580227017403, "rewards/MMContentORM/mean": 0.72950000166893, "rewards/MMContentORM/std": 0.4286257430911064, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 3765, "train_speed(iter/s)": 0.08289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.4, "completions/mean_length": 211.875, "completions/min_length": 135.8, "epoch": 1.8098895823331733, "frac_reward_zero_std": 0.75, "grad_norm": 0.1608082801103592, "kl": 0.014971923828125, "learning_rate": 2.4508556156097983e-07, "loss": 0.0005985355004668236, "memory(GiB)": 27.09, "reward": 0.49359997510910036, "reward_std": 0.05670996003318578, "rewards/MMContentORM/mean": 0.6414999842643738, "rewards/MMContentORM/std": 0.6027493834495544, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3770, "train_speed(iter/s)": 0.082893 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.4, "completions/mean_length": 214.9375, "completions/min_length": 139.6, "epoch": 1.8122899663946233, "frac_reward_zero_std": 0.725, "grad_norm": 0.08479262888431549, "kl": 0.022100830078125, "learning_rate": 2.3898507695954807e-07, "loss": 0.0008829880505800247, "memory(GiB)": 27.09, "reward": 0.5497499763965606, "reward_std": 0.06936717466451228, "rewards/MMContentORM/mean": 0.7675000071525574, "rewards/MMContentORM/std": 0.3865751329809427, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3775, "train_speed(iter/s)": 0.082901 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 458.8, "completions/mean_length": 231.45, "completions/min_length": 149.0, "epoch": 1.814690350456073, "frac_reward_zero_std": 0.625, "grad_norm": 0.20971421897411346, "kl": 0.014501953125, "learning_rate": 2.3295961746554464e-07, "loss": 0.0005800392478704453, "memory(GiB)": 27.09, "reward": 0.45644997954368594, "reward_std": 0.12833987697958946, "rewards/MMContentORM/mean": 0.5630000114440918, "rewards/MMContentORM/std": 0.6289644300937652, "rewards/MMFormatORM/mean": 0.6093749761581421, "rewards/MMFormatORM/std": 0.12130690813064575, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.18662601709365845, "step": 3780, "train_speed(iter/s)": 0.082871 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.0, "completions/mean_length": 209.8125, "completions/min_length": 124.8, "epoch": 1.8170907345175227, "frac_reward_zero_std": 0.725, "grad_norm": 0.1644957810640335, "kl": 0.022076416015625, "learning_rate": 2.2700927802946748e-07, "loss": 0.0008836163207888604, "memory(GiB)": 27.09, "reward": 0.5015999794006347, "reward_std": 0.09899494738783687, "rewards/MMContentORM/mean": 0.6615000128746032, "rewards/MMContentORM/std": 0.5929094016551971, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3785, "train_speed(iter/s)": 0.082885 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 370.2, "completions/mean_length": 221.825, "completions/min_length": 137.6, "epoch": 1.8194911185789726, "frac_reward_zero_std": 0.65, "grad_norm": 0.14464011788368225, "kl": 0.015411376953125, "learning_rate": 2.211341524180599e-07, "loss": 0.0006168725434690714, "memory(GiB)": 27.09, "reward": 0.3968499720096588, "reward_std": 0.16454374492168428, "rewards/MMContentORM/mean": 0.471500039100647, "rewards/MMContentORM/std": 0.7294471979141235, "rewards/MMFormatORM/mean": 0.576874989271164, "rewards/MMFormatORM/std": 0.20004121959209442, "rewards/MMRubricORM/mean": -0.1125, "rewards/MMRubricORM/std": 0.30775573253631594, "step": 3790, "train_speed(iter/s)": 0.08288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025, "completions/max_length": 570.2, "completions/mean_length": 231.825, "completions/min_length": 119.8, "epoch": 1.8218915026404225, "frac_reward_zero_std": 0.725, "grad_norm": 0.1182423084974289, "kl": 0.018719482421875, "learning_rate": 2.1533433321282548e-07, "loss": 0.0007486558984965086, "memory(GiB)": 27.09, "reward": 0.41229996681213377, "reward_std": 0.1336431846022606, "rewards/MMContentORM/mean": 0.46700000762939453, "rewards/MMContentORM/std": 0.6891016006469727, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 3795, "train_speed(iter/s)": 0.082825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 340.6, "completions/mean_length": 214.1, "completions/min_length": 126.0, "epoch": 1.8242918867018723, "frac_reward_zero_std": 0.6, "grad_norm": 0.13948172330856323, "kl": 0.019952392578125, "learning_rate": 2.096099118085776e-07, "loss": 0.0007983671501278877, "memory(GiB)": 27.09, "reward": 0.5301999688148499, "reward_std": 0.10040915980935097, "rewards/MMContentORM/mean": 0.7330000162124634, "rewards/MMContentORM/std": 0.554861056804657, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3800, "train_speed(iter/s)": 0.082827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.6, "completions/mean_length": 209.8625, "completions/min_length": 152.0, "epoch": 1.826692270763322, "frac_reward_zero_std": 0.525, "grad_norm": 0.18707101047039032, "kl": 0.013671875, "learning_rate": 2.039609784119906e-07, "loss": 0.0005472441203892231, "memory(GiB)": 27.09, "reward": 0.4373499691486359, "reward_std": 0.0874691043049097, "rewards/MMContentORM/mean": 0.4865000069141388, "rewards/MMContentORM/std": 0.6470930695533752, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3805, "train_speed(iter/s)": 0.082815 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.4, "completions/mean_length": 209.6125, "completions/min_length": 127.2, "epoch": 1.829092654824772, "frac_reward_zero_std": 0.85, "grad_norm": 0.07913219183683395, "kl": 0.01658935546875, "learning_rate": 1.983876220401848e-07, "loss": 0.0006637333892285824, "memory(GiB)": 27.09, "reward": 0.539849978685379, "reward_std": 0.032456200616434214, "rewards/MMContentORM/mean": 0.7139999866485596, "rewards/MMContentORM/std": 0.5111204564571381, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 3810, "train_speed(iter/s)": 0.082827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.0, "completions/mean_length": 218.7125, "completions/min_length": 125.2, "epoch": 1.8314930388862218, "frac_reward_zero_std": 0.725, "grad_norm": 0.10300786793231964, "kl": 0.033880615234375, "learning_rate": 1.9288993051932047e-07, "loss": 0.0013558823615312577, "memory(GiB)": 27.09, "reward": 0.518399977684021, "reward_std": 0.11200571432709694, "rewards/MMContentORM/mean": 0.7035000085830688, "rewards/MMContentORM/std": 0.5992733359336853, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3815, "train_speed(iter/s)": 0.082827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.6, "completions/mean_length": 216.15, "completions/min_length": 150.2, "epoch": 1.8338934229476718, "frac_reward_zero_std": 0.625, "grad_norm": 0.20239703357219696, "kl": 0.0139892578125, "learning_rate": 1.8746799048321386e-07, "loss": 0.0005595901049673558, "memory(GiB)": 27.09, "reward": 0.5116499781608581, "reward_std": 0.08534778701141477, "rewards/MMContentORM/mean": 0.6435000181198121, "rewards/MMContentORM/std": 0.5361906588077545, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 3820, "train_speed(iter/s)": 0.082836 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.0, "completions/mean_length": 215.5125, "completions/min_length": 115.8, "epoch": 1.8362938070091215, "frac_reward_zero_std": 0.5, "grad_norm": 0.1797892451286316, "kl": 0.015447998046875, "learning_rate": 1.8212188737197657e-07, "loss": 0.0006183533929288388, "memory(GiB)": 27.09, "reward": 0.4594499826431274, "reward_std": 0.16086678504943847, "rewards/MMContentORM/mean": 0.5705000162124634, "rewards/MMContentORM/std": 0.6871401906013489, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 3825, "train_speed(iter/s)": 0.082844 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 494.8, "completions/mean_length": 227.725, "completions/min_length": 144.0, "epoch": 1.8386941910705712, "frac_reward_zero_std": 0.775, "grad_norm": 0.10412738472223282, "kl": 0.016107177734375, "learning_rate": 1.7685170543065955e-07, "loss": 0.0006441749632358551, "memory(GiB)": 27.09, "reward": 0.4660999894142151, "reward_std": 0.0735391038004309, "rewards/MMContentORM/mean": 0.601500004529953, "rewards/MMContentORM/std": 0.6630040287971497, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 3830, "train_speed(iter/s)": 0.082814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 332.4, "completions/mean_length": 218.1375, "completions/min_length": 154.8, "epoch": 1.8410945751320211, "frac_reward_zero_std": 0.55, "grad_norm": 0.1359497755765915, "kl": 0.0163330078125, "learning_rate": 1.7165752770793742e-07, "loss": 0.0006539277732372284, "memory(GiB)": 27.09, "reward": 0.4121499717235565, "reward_std": 0.17345329225063325, "rewards/MMContentORM/mean": 0.4809999972581863, "rewards/MMContentORM/std": 0.6978591680526733, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 3835, "train_speed(iter/s)": 0.082817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.2, "completions/mean_length": 205.2, "completions/min_length": 118.4, "epoch": 1.843494959193471, "frac_reward_zero_std": 0.575, "grad_norm": 0.11098845303058624, "kl": 0.019683837890625, "learning_rate": 1.665394360547895e-07, "loss": 0.0007876243442296981, "memory(GiB)": 27.09, "reward": 0.4245999872684479, "reward_std": 0.1688570961356163, "rewards/MMContentORM/mean": 0.526499992609024, "rewards/MMContentORM/std": 0.7182626962661743, "rewards/MMFormatORM/mean": 0.5849999785423279, "rewards/MMFormatORM/std": 0.19821036159992217, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.3049390256404877, "step": 3840, "train_speed(iter/s)": 0.082826 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 342.2, "completions/mean_length": 214.1875, "completions/min_length": 125.2, "epoch": 1.8458953432549208, "frac_reward_zero_std": 0.675, "grad_norm": 0.2774093449115753, "kl": 0.01912841796875, "learning_rate": 1.6149751112321643e-07, "loss": 0.0007657586131244898, "memory(GiB)": 27.09, "reward": 0.43994998931884766, "reward_std": 0.11943033430725336, "rewards/MMContentORM/mean": 0.5505000114440918, "rewards/MMContentORM/std": 0.5636135444045067, "rewards/MMFormatORM/mean": 0.5931249737739563, "rewards/MMFormatORM/std": 0.14121158123016359, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.21724859476089478, "step": 3845, "train_speed(iter/s)": 0.082827 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 480.0, "completions/mean_length": 232.475, "completions/min_length": 149.8, "epoch": 1.8482957273163705, "frac_reward_zero_std": 0.675, "grad_norm": 0.09440695494413376, "kl": 0.0136474609375, "learning_rate": 1.565318323649667e-07, "loss": 0.0005458991043269634, "memory(GiB)": 27.09, "reward": 0.4410999774932861, "reward_std": 0.15004805505741386, "rewards/MMContentORM/mean": 0.539000004529953, "rewards/MMContentORM/std": 0.6789550423622132, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 3850, "train_speed(iter/s)": 0.082796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 389.2, "completions/mean_length": 222.3, "completions/min_length": 140.8, "epoch": 1.8506961113778204, "frac_reward_zero_std": 0.7, "grad_norm": 0.08402004837989807, "kl": 0.01473388671875, "learning_rate": 1.5164247803028443e-07, "loss": 0.0005890860687941313, "memory(GiB)": 27.09, "reward": 0.4956999808549881, "reward_std": 0.06378102500457317, "rewards/MMContentORM/mean": 0.6180000007152557, "rewards/MMContentORM/std": 0.4799440011382103, "rewards/MMFormatORM/mean": 0.6337499737739563, "rewards/MMFormatORM/std": 0.04440345466136932, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.06831300854682923, "step": 3855, "train_speed(iter/s)": 0.082787 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.2, "completions/mean_length": 215.3875, "completions/min_length": 143.4, "epoch": 1.8530964954392704, "frac_reward_zero_std": 0.775, "grad_norm": 0.10355502367019653, "kl": 0.012347412109375, "learning_rate": 1.4682952516667848e-07, "loss": 0.0004940344952046871, "memory(GiB)": 27.09, "reward": 0.5468499898910523, "reward_std": 0.03330472691450268, "rewards/MMContentORM/mean": 0.731499993801117, "rewards/MMContentORM/std": 0.5080301821231842, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 3860, "train_speed(iter/s)": 0.082793 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.6, "completions/mean_length": 211.8875, "completions/min_length": 146.2, "epoch": 1.85549687950072, "frac_reward_zero_std": 0.65, "grad_norm": 0.11793594062328339, "kl": 0.014239501953125, "learning_rate": 1.4209304961770364e-07, "loss": 0.0005696051754057408, "memory(GiB)": 27.09, "reward": 0.4437499940395355, "reward_std": 0.08237794116139412, "rewards/MMContentORM/mean": 0.5025000140070915, "rewards/MMContentORM/std": 0.5952349126338958, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3865, "train_speed(iter/s)": 0.082799 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 332.6, "completions/mean_length": 208.3625, "completions/min_length": 135.8, "epoch": 1.85789726356217, "frac_reward_zero_std": 0.75, "grad_norm": 0.14964303374290466, "kl": 0.01424560546875, "learning_rate": 1.374331260217726e-07, "loss": 0.0005695806816220283, "memory(GiB)": 27.09, "reward": 0.5307499766349792, "reward_std": 0.08266077996231616, "rewards/MMContentORM/mean": 0.7200000047683716, "rewards/MMContentORM/std": 0.48706189841032027, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3870, "train_speed(iter/s)": 0.082801 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.8, "completions/mean_length": 207.7375, "completions/min_length": 128.4, "epoch": 1.8602976476236197, "frac_reward_zero_std": 0.725, "grad_norm": 0.13720768690109253, "kl": 0.014617919921875, "learning_rate": 1.32849827810973e-07, "loss": 0.0005841460078954697, "memory(GiB)": 27.09, "reward": 0.46409997940063474, "reward_std": 0.13901719748973845, "rewards/MMContentORM/mean": 0.5965000092983246, "rewards/MMContentORM/std": 0.6498379826545715, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 3875, "train_speed(iter/s)": 0.08281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 338.2, "completions/mean_length": 214.3625, "completions/min_length": 151.8, "epoch": 1.8626980316850696, "frac_reward_zero_std": 0.6, "grad_norm": 0.20279008150100708, "kl": 0.015106201171875, "learning_rate": 1.2834322720991332e-07, "loss": 0.0006037722807377577, "memory(GiB)": 27.09, "reward": 0.5412999749183655, "reward_std": 0.049638888845220205, "rewards/MMContentORM/mean": 0.7320000052452087, "rewards/MMContentORM/std": 0.46277309134602546, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 3880, "train_speed(iter/s)": 0.082805 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.2, "completions/mean_length": 211.4375, "completions/min_length": 141.4, "epoch": 1.8650984157465196, "frac_reward_zero_std": 0.625, "grad_norm": 0.08262795954942703, "kl": 0.0156005859375, "learning_rate": 1.2391339523458502e-07, "loss": 0.0006241547875106334, "memory(GiB)": 27.09, "reward": 0.46244998574256896, "reward_std": 0.11985459551215172, "rewards/MMContentORM/mean": 0.578000009059906, "rewards/MMContentORM/std": 0.6259812593460083, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 3885, "train_speed(iter/s)": 0.082824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 345.6, "completions/mean_length": 223.2, "completions/min_length": 161.6, "epoch": 1.8674987998079693, "frac_reward_zero_std": 0.675, "grad_norm": 0.18606510758399963, "kl": 0.015350341796875, "learning_rate": 1.1956040169124217e-07, "loss": 0.0006135111209005118, "memory(GiB)": 27.09, "reward": 0.4125999629497528, "reward_std": 0.13562307790853084, "rewards/MMContentORM/mean": 0.4965000033378601, "rewards/MMContentORM/std": 0.707841980457306, "rewards/MMFormatORM/mean": 0.5849999785423279, "rewards/MMFormatORM/std": 0.1737115800380707, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2672485947608948, "step": 3890, "train_speed(iter/s)": 0.082825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.2, "completions/mean_length": 217.7625, "completions/min_length": 150.2, "epoch": 1.869899183869419, "frac_reward_zero_std": 0.65, "grad_norm": 0.08665505051612854, "kl": 0.01368408203125, "learning_rate": 1.1528431517530414e-07, "loss": 0.0005479637067764998, "memory(GiB)": 27.09, "reward": 0.5135999739170074, "reward_std": 0.10069200224243105, "rewards/MMContentORM/mean": 0.6915000200271606, "rewards/MMContentORM/std": 0.5325116083025933, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3895, "train_speed(iter/s)": 0.082832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.0, "completions/mean_length": 208.8375, "completions/min_length": 129.4, "epoch": 1.872299567930869, "frac_reward_zero_std": 0.7, "grad_norm": 0.10476063936948776, "kl": 0.024139404296875, "learning_rate": 1.1108520307027026e-07, "loss": 0.0009668363258242607, "memory(GiB)": 27.09, "reward": 0.4625499784946442, "reward_std": 0.15648272782564163, "rewards/MMContentORM/mean": 0.6070000171661377, "rewards/MMContentORM/std": 0.6449923276901245, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.18630690574645997, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2866260170936584, "step": 3900, "train_speed(iter/s)": 0.082839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.8, "completions/mean_length": 210.8875, "completions/min_length": 110.4, "epoch": 1.8746999519923189, "frac_reward_zero_std": 0.775, "grad_norm": 0.15975040197372437, "kl": 0.014141845703125, "learning_rate": 1.0696313154666016e-07, "loss": 0.0005653574131429196, "memory(GiB)": 27.09, "reward": 0.5340999722480774, "reward_std": 0.060952600184828044, "rewards/MMContentORM/mean": 0.714000004529953, "rewards/MMContentORM/std": 0.5186110436916351, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 3905, "train_speed(iter/s)": 0.082817 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.8, "completions/mean_length": 211.0375, "completions/min_length": 136.2, "epoch": 1.8771003360537686, "frac_reward_zero_std": 0.675, "grad_norm": 0.18092653155326843, "kl": 0.01676025390625, "learning_rate": 1.0291816556097455e-07, "loss": 0.000670450646430254, "memory(GiB)": 27.09, "reward": 0.5369499742984771, "reward_std": 0.08633773510809988, "rewards/MMContentORM/mean": 0.7355000138282776, "rewards/MMContentORM/std": 0.5291013896465302, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3910, "train_speed(iter/s)": 0.082831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.4, "completions/mean_length": 211.275, "completions/min_length": 114.2, "epoch": 1.8795007201152183, "frac_reward_zero_std": 0.65, "grad_norm": 0.2313622385263443, "kl": 0.01806640625, "learning_rate": 9.895036885466503e-08, "loss": 0.000722192507237196, "memory(GiB)": 27.09, "reward": 0.4598999798297882, "reward_std": 0.1432598352432251, "rewards/MMContentORM/mean": 0.5860000014305115, "rewards/MMContentORM/std": 0.6623120665550232, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 3915, "train_speed(iter/s)": 0.082841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.0, "completions/mean_length": 208.8875, "completions/min_length": 138.6, "epoch": 1.8819011041766682, "frac_reward_zero_std": 0.675, "grad_norm": 0.13324828445911407, "kl": 0.016552734375, "learning_rate": 9.505980395313364e-08, "loss": 0.0006618403363972903, "memory(GiB)": 27.09, "reward": 0.5237999677658081, "reward_std": 0.10606601641047746, "rewards/MMContentORM/mean": 0.717000025510788, "rewards/MMContentORM/std": 0.45795624777674676, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 3920, "train_speed(iter/s)": 0.08284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.2, "completions/mean_length": 211.4875, "completions/min_length": 127.4, "epoch": 1.8843014882381182, "frac_reward_zero_std": 0.5, "grad_norm": 0.2311268001794815, "kl": 0.02252197265625, "learning_rate": 9.124653216474766e-08, "loss": 0.0009013652801513672, "memory(GiB)": 27.09, "reward": 0.43879998922348024, "reward_std": 0.22344573587179184, "rewards/MMContentORM/mean": 0.5945000112056732, "rewards/MMContentORM/std": 0.6892549335956574, "rewards/MMFormatORM/mean": 0.5649999797344207, "rewards/MMFormatORM/std": 0.21041721403598784, "rewards/MMRubricORM/mean": -0.125, "rewards/MMRubricORM/std": 0.32006530165672303, "step": 3925, "train_speed(iter/s)": 0.082839 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 340.2, "completions/mean_length": 223.475, "completions/min_length": 154.2, "epoch": 1.886701872299568, "frac_reward_zero_std": 0.75, "grad_norm": 0.16043339669704437, "kl": 0.01510009765625, "learning_rate": 8.751061357987367e-08, "loss": 0.0006042405962944031, "memory(GiB)": 27.09, "reward": 0.5149499654769898, "reward_std": 0.08633773569017648, "rewards/MMContentORM/mean": 0.6805000185966492, "rewards/MMContentORM/std": 0.5845986545085907, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 3930, "train_speed(iter/s)": 0.082841 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.6, "completions/mean_length": 222.7875, "completions/min_length": 160.2, "epoch": 1.8891022563610178, "frac_reward_zero_std": 0.7, "grad_norm": 0.07241669297218323, "kl": 0.012701416015625, "learning_rate": 8.385210706992608e-08, "loss": 0.000508300494402647, "memory(GiB)": 27.09, "reward": 0.49414998292922974, "reward_std": 0.07785245187114924, "rewards/MMContentORM/mean": 0.6285000085830689, "rewards/MMContentORM/std": 0.6101788878440857, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3935, "train_speed(iter/s)": 0.082847 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.2, "completions/mean_length": 216.7625, "completions/min_length": 136.4, "epoch": 1.8915026404224675, "frac_reward_zero_std": 0.675, "grad_norm": 0.13122500479221344, "kl": 0.016265869140625, "learning_rate": 8.027107028644621e-08, "loss": 0.0006506592035293579, "memory(GiB)": 27.09, "reward": 0.4648999750614166, "reward_std": 0.1226123157190159, "rewards/MMContentORM/mean": 0.5860000073909759, "rewards/MMContentORM/std": 0.645108425617218, "rewards/MMFormatORM/mean": 0.6074999809265137, "rewards/MMFormatORM/std": 0.14226680397987365, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 3940, "train_speed(iter/s)": 0.082855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 344.2, "completions/mean_length": 215.7375, "completions/min_length": 136.6, "epoch": 1.8939030244839175, "frac_reward_zero_std": 0.675, "grad_norm": 0.10751134157180786, "kl": 0.015667724609375, "learning_rate": 7.676755966018967e-08, "loss": 0.0006263887509703637, "memory(GiB)": 27.09, "reward": 0.49424999952316284, "reward_std": 0.11278352783992887, "rewards/MMContentORM/mean": 0.6574999928474426, "rewards/MMContentORM/std": 0.5534313529729843, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 3945, "train_speed(iter/s)": 0.082853 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.0, "completions/mean_length": 221.6625, "completions/min_length": 154.8, "epoch": 1.8963034085453674, "frac_reward_zero_std": 0.65, "grad_norm": 0.20207758247852325, "kl": 0.014703369140625, "learning_rate": 7.334163040023823e-08, "loss": 0.000587776442989707, "memory(GiB)": 27.09, "reward": 0.5048999905586242, "reward_std": 0.05699280113913119, "rewards/MMContentORM/mean": 0.6285000026226044, "rewards/MMContentORM/std": 0.5722138583660126, "rewards/MMFormatORM/mean": 0.6399999856948853, "rewards/MMFormatORM/std": 0.03999999761581421, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 3950, "train_speed(iter/s)": 0.082857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 452.6, "completions/mean_length": 224.375, "completions/min_length": 155.0, "epoch": 1.898703792606817, "frac_reward_zero_std": 0.725, "grad_norm": 0.1518968939781189, "kl": 0.016015625, "learning_rate": 6.999333649312933e-08, "loss": 0.000639676209539175, "memory(GiB)": 27.09, "reward": 0.4759999752044678, "reward_std": 0.11539982631802559, "rewards/MMContentORM/mean": 0.5974999904632569, "rewards/MMContentORM/std": 0.6229348480701447, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3955, "train_speed(iter/s)": 0.082832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.4, "completions/mean_length": 207.525, "completions/min_length": 128.0, "epoch": 1.9011041766682668, "frac_reward_zero_std": 0.7, "grad_norm": 0.2073817253112793, "kl": 0.019140625, "learning_rate": 6.672273070200464e-08, "loss": 0.0007669827900826931, "memory(GiB)": 27.09, "reward": 0.5025999784469605, "reward_std": 0.08174153957515955, "rewards/MMContentORM/mean": 0.6640000104904175, "rewards/MMContentORM/std": 0.534474528580904, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 3960, "train_speed(iter/s)": 0.082842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 469.4, "completions/mean_length": 225.8875, "completions/min_length": 138.0, "epoch": 1.9035045607297167, "frac_reward_zero_std": 0.675, "grad_norm": 0.11310164630413055, "kl": 0.025762939453125, "learning_rate": 6.352986456578224e-08, "loss": 0.0010307587683200837, "memory(GiB)": 27.09, "reward": 0.4900999844074249, "reward_std": 0.08315575905144215, "rewards/MMContentORM/mean": 0.6615000009536743, "rewards/MMContentORM/std": 0.6141018033027649, "rewards/MMFormatORM/mean": 0.6012499868869782, "rewards/MMFormatORM/std": 0.12313776612281799, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.1894427239894867, "step": 3965, "train_speed(iter/s)": 0.082814 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.0, "completions/mean_length": 207.1125, "completions/min_length": 125.6, "epoch": 1.9059049447911667, "frac_reward_zero_std": 0.725, "grad_norm": 0.1447732150554657, "kl": 0.018310546875, "learning_rate": 6.041478839834025e-08, "loss": 0.0007323446683585644, "memory(GiB)": 27.09, "reward": 0.4383999824523926, "reward_std": 0.1336431846022606, "rewards/MMContentORM/mean": 0.5610000193119049, "rewards/MMContentORM/std": 0.6906715393066406, "rewards/MMFormatORM/mean": 0.5849999904632568, "rewards/MMFormatORM/std": 0.19430812299251557, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2989355862140656, "step": 3970, "train_speed(iter/s)": 0.082821 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.2, "completions/mean_length": 213.025, "completions/min_length": 144.0, "epoch": 1.9083053288526164, "frac_reward_zero_std": 0.7, "grad_norm": 0.19179855287075043, "kl": 0.013043212890625, "learning_rate": 5.7377551287724484e-08, "loss": 0.000521748187020421, "memory(GiB)": 27.09, "reward": 0.48369997143745425, "reward_std": 0.05642711967229843, "rewards/MMContentORM/mean": 0.5879999935626984, "rewards/MMContentORM/std": 0.5680493891239167, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 3975, "train_speed(iter/s)": 0.082824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.4, "completions/mean_length": 207.3, "completions/min_length": 132.0, "epoch": 1.910705712914066, "frac_reward_zero_std": 0.725, "grad_norm": 0.11744437366724014, "kl": 0.0150634765625, "learning_rate": 5.4418201095377544e-08, "loss": 0.000602102093398571, "memory(GiB)": 27.09, "reward": 0.5488999843597412, "reward_std": 0.06491240309551358, "rewards/MMContentORM/mean": 0.7509999990463256, "rewards/MMContentORM/std": 0.4212790600955486, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 3980, "train_speed(iter/s)": 0.082831 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 346.2, "completions/mean_length": 211.7875, "completions/min_length": 119.4, "epoch": 1.913106096975516, "frac_reward_zero_std": 0.575, "grad_norm": 0.1685052067041397, "kl": 0.018121337890625, "learning_rate": 5.153678445538324e-08, "loss": 0.0007251160684973001, "memory(GiB)": 27.09, "reward": 0.3905999720096588, "reward_std": 0.2324967123568058, "rewards/MMContentORM/mean": 0.48649999499320984, "rewards/MMContentORM/std": 0.7621617078781128, "rewards/MMFormatORM/mean": 0.5587499678134918, "rewards/MMFormatORM/std": 0.19895429015159607, "rewards/MMRubricORM/mean": -0.1375, "rewards/MMRubricORM/std": 0.30669131875038147, "step": 3985, "train_speed(iter/s)": 0.08283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.0, "completions/mean_length": 230.875, "completions/min_length": 150.2, "epoch": 1.915506481036966, "frac_reward_zero_std": 0.675, "grad_norm": 0.17441634833812714, "kl": 0.015863037109375, "learning_rate": 4.873334677373054e-08, "loss": 0.0006344456225633622, "memory(GiB)": 27.09, "reward": 0.4771999716758728, "reward_std": 0.11483414098620415, "rewards/MMContentORM/mean": 0.6005000114440918, "rewards/MMContentORM/std": 0.6646744608879089, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 3990, "train_speed(iter/s)": 0.082834 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 340.0, "completions/mean_length": 215.3625, "completions/min_length": 106.6, "epoch": 1.917906865098416, "frac_reward_zero_std": 0.75, "grad_norm": 0.10349483042955399, "kl": 0.06673583984375, "learning_rate": 4.600793222759858e-08, "loss": 0.002681119553744793, "memory(GiB)": 27.09, "reward": 0.512749969959259, "reward_std": 0.08492352233733982, "rewards/MMContentORM/mean": 0.675000011920929, "rewards/MMContentORM/std": 0.5857814848423004, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 3995, "train_speed(iter/s)": 0.082835 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 349.8, "completions/mean_length": 219.95, "completions/min_length": 121.2, "epoch": 1.9203072491598656, "frac_reward_zero_std": 0.6, "grad_norm": 0.13771557807922363, "kl": 0.01771240234375, "learning_rate": 4.33605837646639e-08, "loss": 0.0007083784788846969, "memory(GiB)": 27.09, "reward": 0.4103999733924866, "reward_std": 0.1940301053225994, "rewards/MMContentORM/mean": 0.49100000262260435, "rewards/MMContentORM/std": 0.6967435419559479, "rewards/MMFormatORM/mean": 0.5849999785423279, "rewards/MMFormatORM/std": 0.1737115800380707, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2672485947608948, "step": 4000, "train_speed(iter/s)": 0.082832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.2, "completions/mean_length": 210.9625, "completions/min_length": 129.8, "epoch": 1.9227076332213153, "frac_reward_zero_std": 0.7, "grad_norm": 0.16067036986351013, "kl": 0.013800048828125, "learning_rate": 4.079134310241706e-08, "loss": 0.000552175985649228, "memory(GiB)": 27.09, "reward": 0.4725499749183655, "reward_std": 0.11066220700740814, "rewards/MMContentORM/mean": 0.5745000123977662, "rewards/MMContentORM/std": 0.6102168440818787, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 4005, "train_speed(iter/s)": 0.082769 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 332.8, "completions/mean_length": 220.4125, "completions/min_length": 142.4, "epoch": 1.9251080172827653, "frac_reward_zero_std": 0.6, "grad_norm": 0.18261726200580597, "kl": 0.01759033203125, "learning_rate": 3.8300250727510423e-08, "loss": 0.0007028756663203239, "memory(GiB)": 27.09, "reward": 0.43639997243881223, "reward_std": 0.1443912021815777, "rewards/MMContentORM/mean": 0.5559999942779541, "rewards/MMContentORM/std": 0.6905377149581909, "rewards/MMFormatORM/mean": 0.5849999845027923, "rewards/MMFormatORM/std": 0.16754122078418732, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2577557325363159, "step": 4010, "train_speed(iter/s)": 0.082768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.4, "completions/mean_length": 205.5875, "completions/min_length": 133.2, "epoch": 1.9275084013442152, "frac_reward_zero_std": 0.75, "grad_norm": 0.12302592396736145, "kl": 0.014111328125, "learning_rate": 3.588734589511977e-08, "loss": 0.0005644991528242826, "memory(GiB)": 27.09, "reward": 0.5111499905586243, "reward_std": 0.08379215330351145, "rewards/MMContentORM/mean": 0.6710000097751617, "rewards/MMContentORM/std": 0.5448502898216248, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 4015, "train_speed(iter/s)": 0.082782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 337.2, "completions/mean_length": 221.45, "completions/min_length": 142.6, "epoch": 1.929908785405665, "frac_reward_zero_std": 0.675, "grad_norm": 0.16786423325538635, "kl": 0.0176513671875, "learning_rate": 3.3552666628323126e-08, "loss": 0.0007057101465761662, "memory(GiB)": 27.09, "reward": 0.521749985218048, "reward_std": 0.14743175983894616, "rewards/MMContentORM/mean": 0.7550000071525573, "rewards/MMContentORM/std": 0.5161954037845135, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 4020, "train_speed(iter/s)": 0.082782 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.8, "completions/mean_length": 215.6625, "completions/min_length": 116.0, "epoch": 1.9323091694671146, "frac_reward_zero_std": 0.6, "grad_norm": 0.36218705773353577, "kl": 0.017657470703125, "learning_rate": 3.1296249717504e-08, "loss": 0.0007065317593514919, "memory(GiB)": 27.09, "reward": 0.45779996514320376, "reward_std": 0.11285423804074526, "rewards/MMContentORM/mean": 0.5720000088214874, "rewards/MMContentORM/std": 0.6419292092323303, "rewards/MMFormatORM/mean": 0.6037499904632568, "rewards/MMFormatORM/std": 0.14527987241744994, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 4025, "train_speed(iter/s)": 0.082785 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.4, "completions/mean_length": 222.45, "completions/min_length": 143.2, "epoch": 1.9347095535285646, "frac_reward_zero_std": 0.8, "grad_norm": 0.10553177446126938, "kl": 0.015179443359375, "learning_rate": 2.91181307197691e-08, "loss": 0.0006072814110666513, "memory(GiB)": 27.09, "reward": 0.49004998803138733, "reward_std": 0.09835855364799499, "rewards/MMContentORM/mean": 0.647000002861023, "rewards/MMContentORM/std": 0.6379193365573883, "rewards/MMFormatORM/mean": 0.609375, "rewards/MMFormatORM/std": 0.16249999403953552, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.25, "step": 4030, "train_speed(iter/s)": 0.082792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 338.8, "completions/mean_length": 208.3625, "completions/min_length": 128.0, "epoch": 1.9371099375900145, "frac_reward_zero_std": 0.625, "grad_norm": 0.12949110567569733, "kl": 0.022711181640625, "learning_rate": 2.7018343958392092e-08, "loss": 0.000908501259982586, "memory(GiB)": 27.09, "reward": 0.41979997158050536, "reward_std": 0.18243354400619866, "rewards/MMContentORM/mean": 0.5145000040531158, "rewards/MMContentORM/std": 0.7357254981994629, "rewards/MMFormatORM/mean": 0.5849999904632568, "rewards/MMFormatORM/std": 0.16980934143066406, "rewards/MMRubricORM/mean": -0.1, "rewards/MMRubricORM/std": 0.2612451553344727, "step": 4035, "train_speed(iter/s)": 0.082792 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 369.8, "completions/mean_length": 224.0875, "completions/min_length": 159.6, "epoch": 1.9395103216514642, "frac_reward_zero_std": 0.7, "grad_norm": 0.13331717252731323, "kl": 0.0154541015625, "learning_rate": 2.499692252226793e-08, "loss": 0.0006182675249874591, "memory(GiB)": 27.09, "reward": 0.4871499836444855, "reward_std": 0.06908432978671045, "rewards/MMContentORM/mean": 0.6109999895095826, "rewards/MMContentORM/std": 0.6157109498977661, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 4040, "train_speed(iter/s)": 0.082788 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.6, "completions/mean_length": 209.9125, "completions/min_length": 138.4, "epoch": 1.941910705712914, "frac_reward_zero_std": 0.7, "grad_norm": 0.19583547115325928, "kl": 0.014129638671875, "learning_rate": 2.3053898265395503e-08, "loss": 0.0005656382068991661, "memory(GiB)": 27.09, "reward": 0.48734999299049375, "reward_std": 0.07954950779676437, "rewards/MMContentORM/mean": 0.6115000009536743, "rewards/MMContentORM/std": 0.6199671626091003, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 4045, "train_speed(iter/s)": 0.082796 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.6, "completions/mean_length": 218.2625, "completions/min_length": 139.0, "epoch": 1.9443110897743638, "frac_reward_zero_std": 0.725, "grad_norm": 0.12564736604690552, "kl": 0.014019775390625, "learning_rate": 2.1189301806372463e-08, "loss": 0.0005606723949313164, "memory(GiB)": 27.09, "reward": 0.5056499779224396, "reward_std": 0.040092954062856734, "rewards/MMContentORM/mean": 0.6284999966621398, "rewards/MMContentORM/std": 0.495737274736166, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 4050, "train_speed(iter/s)": 0.082803 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.4, "completions/mean_length": 209.225, "completions/min_length": 111.8, "epoch": 1.9467114738358138, "frac_reward_zero_std": 0.7, "grad_norm": 0.09130167961120605, "kl": 0.019110107421875, "learning_rate": 1.940316252791563e-08, "loss": 0.0007643857039511204, "memory(GiB)": 27.09, "reward": 0.49664999842643737, "reward_std": 0.11900607645511627, "rewards/MMContentORM/mean": 0.6690000057220459, "rewards/MMContentORM/std": 0.6181300818920136, "rewards/MMFormatORM/mean": 0.6056249737739563, "rewards/MMFormatORM/std": 0.1592322736978531, "rewards/MMRubricORM/mean": -0.06599999964237213, "rewards/MMRubricORM/std": 0.2501555383205414, "step": 4055, "train_speed(iter/s)": 0.08281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.6, "completions/mean_length": 217.1625, "completions/min_length": 143.8, "epoch": 1.9491118578972637, "frac_reward_zero_std": 0.6, "grad_norm": 0.15874595940113068, "kl": 0.015081787109375, "learning_rate": 1.7695508576395237e-08, "loss": 0.0006026300135999918, "memory(GiB)": 27.09, "reward": 0.4948499917984009, "reward_std": 0.1311683064326644, "rewards/MMContentORM/mean": 0.6590000033378601, "rewards/MMContentORM/std": 0.5976063251495362, "rewards/MMFormatORM/mean": 0.6093749821186065, "rewards/MMFormatORM/std": 0.09063776731491088, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.1394427239894867, "step": 4060, "train_speed(iter/s)": 0.082818 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.0, "completions/mean_length": 215.8125, "completions/min_length": 141.2, "epoch": 1.9515122419587134, "frac_reward_zero_std": 0.675, "grad_norm": 0.14566867053508759, "kl": 0.012939453125, "learning_rate": 1.6066366861393068e-08, "loss": 0.000517718493938446, "memory(GiB)": 27.09, "reward": 0.5360999763011932, "reward_std": 0.08697413904592395, "rewards/MMContentORM/mean": 0.7190000057220459, "rewards/MMContentORM/std": 0.5173257470130921, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 4065, "train_speed(iter/s)": 0.082824 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 332.6, "completions/mean_length": 214.15, "completions/min_length": 143.2, "epoch": 1.9539126260201631, "frac_reward_zero_std": 0.575, "grad_norm": 0.17086957395076752, "kl": 0.018682861328125, "learning_rate": 1.4515763055278354e-08, "loss": 0.0007456324063241481, "memory(GiB)": 27.09, "reward": 0.4182999789714813, "reward_std": 0.10818733535706997, "rewards/MMContentORM/mean": 0.48199999928474424, "rewards/MMContentORM/std": 0.6929208874702454, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 4070, "train_speed(iter/s)": 0.082825 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.0, "completions/mean_length": 212.15, "completions/min_length": 144.8, "epoch": 1.956313010081613, "frac_reward_zero_std": 0.8, "grad_norm": 0.11877346783876419, "kl": 0.013311767578125, "learning_rate": 1.3043721592803093e-08, "loss": 0.0005328983068466186, "memory(GiB)": 27.09, "reward": 0.5734499812126159, "reward_std": 0.040941482339985666, "rewards/MMContentORM/mean": 0.7980000138282776, "rewards/MMContentORM/std": 0.42314670234918594, "rewards/MMFormatORM/mean": 0.6418749809265136, "rewards/MMFormatORM/std": 0.032499998807907104, "rewards/MMRubricORM/mean": -0.0125, "rewards/MMRubricORM/std": 0.05, "step": 4075, "train_speed(iter/s)": 0.082833 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 331.8, "completions/mean_length": 224.875, "completions/min_length": 161.8, "epoch": 1.958713394143063, "frac_reward_zero_std": 0.675, "grad_norm": 0.13452792167663574, "kl": 0.0132568359375, "learning_rate": 1.1650265670716255e-08, "loss": 0.0005301388446241617, "memory(GiB)": 27.09, "reward": 0.5045499742031098, "reward_std": 0.0688014852348715, "rewards/MMContentORM/mean": 0.6545000076293945, "rewards/MMContentORM/std": 0.592427009344101, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 4080, "train_speed(iter/s)": 0.082833 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 327.6, "completions/mean_length": 217.6875, "completions/min_length": 136.8, "epoch": 1.9611137782045127, "frac_reward_zero_std": 0.625, "grad_norm": 0.12339378893375397, "kl": 0.014471435546875, "learning_rate": 1.0335417247398505e-08, "loss": 0.000578406685963273, "memory(GiB)": 27.09, "reward": 0.49459999799728394, "reward_std": 0.06547808232717216, "rewards/MMContentORM/mean": 0.6440000057220459, "rewards/MMContentORM/std": 0.5158215515315533, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 4085, "train_speed(iter/s)": 0.082838 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 292.8, "completions/mean_length": 208.3125, "completions/min_length": 130.6, "epoch": 1.9635141622659624, "frac_reward_zero_std": 0.75, "grad_norm": 0.12326997518539429, "kl": 0.015618896484375, "learning_rate": 9.099197042517493e-09, "loss": 0.0006246047094464302, "memory(GiB)": 27.09, "reward": 0.4959499776363373, "reward_std": 0.07756961362902075, "rewards/MMContentORM/mean": 0.6329999923706054, "rewards/MMContentORM/std": 0.6094204008579254, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 4090, "train_speed(iter/s)": 0.082846 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.6, "completions/mean_length": 207.375, "completions/min_length": 121.8, "epoch": 1.9659145463274124, "frac_reward_zero_std": 0.75, "grad_norm": 0.16281642019748688, "kl": 0.016046142578125, "learning_rate": 7.941624536699221e-09, "loss": 0.0006411905866116286, "memory(GiB)": 27.09, "reward": 0.5397499680519104, "reward_std": 0.06823580265045166, "rewards/MMContentORM/mean": 0.7425000071525574, "rewards/MMContentORM/std": 0.47391852661967276, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 4095, "train_speed(iter/s)": 0.082849 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.2, "completions/mean_length": 206.6, "completions/min_length": 125.0, "epoch": 1.9683149303888623, "frac_reward_zero_std": 0.55, "grad_norm": 0.1633000671863556, "kl": 0.0170166015625, "learning_rate": 6.8627179712232875e-09, "loss": 0.0006804309785366058, "memory(GiB)": 27.09, "reward": 0.47594999670982363, "reward_std": 0.10245976857841015, "rewards/MMContentORM/mean": 0.5830000042915344, "rewards/MMContentORM/std": 0.6251704752445221, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 4100, "train_speed(iter/s)": 0.082859 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.6, "completions/mean_length": 211.1, "completions/min_length": 149.2, "epoch": 1.970715314450312, "frac_reward_zero_std": 0.725, "grad_norm": 0.16619880497455597, "kl": 0.02249755859375, "learning_rate": 5.862494347733672e-09, "loss": 0.0008998697623610497, "memory(GiB)": 27.09, "reward": 0.5654999852180481, "reward_std": 0.047093309834599494, "rewards/MMContentORM/mean": 0.7925000190734863, "rewards/MMContentORM/std": 0.4002851232886314, "rewards/MMFormatORM/mean": 0.6337499856948853, "rewards/MMFormatORM/std": 0.06499999761581421, "rewards/MMRubricORM/mean": -0.025, "rewards/MMRubricORM/std": 0.1, "step": 4105, "train_speed(iter/s)": 0.082845 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.0, "completions/mean_length": 211.35, "completions/min_length": 129.2, "epoch": 1.9731156985117617, "frac_reward_zero_std": 0.775, "grad_norm": 0.11561845242977142, "kl": 0.026605224609375, "learning_rate": 4.9409694279711765e-09, "loss": 0.0010593479499220848, "memory(GiB)": 27.09, "reward": 0.5204499781131744, "reward_std": 0.026516501186415554, "rewards/MMContentORM/mean": 0.653000020980835, "rewards/MMContentORM/std": 0.4970328502357006, "rewards/MMFormatORM/mean": 0.6481249809265137, "rewards/MMFormatORM/std": 0.007499998807907105, "rewards/MMRubricORM/mean": 0.0, "rewards/MMRubricORM/std": 0.0, "step": 4110, "train_speed(iter/s)": 0.082852 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.0, "completions/mean_length": 213.1625, "completions/min_length": 146.6, "epoch": 1.9755160825732117, "frac_reward_zero_std": 0.625, "grad_norm": 0.18078495562076569, "kl": 0.017230224609375, "learning_rate": 4.098157733525842e-09, "loss": 0.0006890918128192424, "memory(GiB)": 27.09, "reward": 0.472899979352951, "reward_std": 0.1401485550450161, "rewards/MMContentORM/mean": 0.6185000002384186, "rewards/MMContentORM/std": 0.6067943811416626, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.12930812537670136, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.19893558621406554, "step": 4115, "train_speed(iter/s)": 0.082855 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 346.0, "completions/mean_length": 211.8375, "completions/min_length": 115.8, "epoch": 1.9779164666346616, "frac_reward_zero_std": 0.575, "grad_norm": 0.14201927185058594, "kl": 0.029730224609375, "learning_rate": 3.3340725456071364e-09, "loss": 0.0011919239535927773, "memory(GiB)": 27.09, "reward": 0.4554999828338623, "reward_std": 0.1658872556872666, "rewards/MMContentORM/mean": 0.5750000178813934, "rewards/MMContentORM/std": 0.6658959984779358, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 4120, "train_speed(iter/s)": 0.082848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.0, "completions/mean_length": 218.8875, "completions/min_length": 137.2, "epoch": 1.9803168506961115, "frac_reward_zero_std": 0.5, "grad_norm": 0.19586387276649475, "kl": 0.016632080078125, "learning_rate": 2.6487259048357803e-09, "loss": 0.000665505975484848, "memory(GiB)": 27.09, "reward": 0.47424999475479124, "reward_std": 0.15803836286067963, "rewards/MMContentORM/mean": 0.6074999988079071, "rewards/MMContentORM/std": 0.6695436835289001, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 4125, "train_speed(iter/s)": 0.08285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 356.2, "completions/mean_length": 221.2, "completions/min_length": 129.6, "epoch": 1.9827172347575612, "frac_reward_zero_std": 0.75, "grad_norm": 0.10946598649024963, "kl": 0.015472412109375, "learning_rate": 2.0421286110533513e-09, "loss": 0.0006184926256537438, "memory(GiB)": 27.09, "reward": 0.5523499727249146, "reward_std": 0.086054896004498, "rewards/MMContentORM/mean": 0.7740000247955322, "rewards/MMContentORM/std": 0.44600327536463735, "rewards/MMFormatORM/mean": 0.6256249904632568, "rewards/MMFormatORM/std": 0.09749999642372131, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.15, "step": 4130, "train_speed(iter/s)": 0.082848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.0, "completions/mean_length": 213.225, "completions/min_length": 137.4, "epoch": 1.985117618819011, "frac_reward_zero_std": 0.775, "grad_norm": 0.07553966343402863, "kl": 0.01331787109375, "learning_rate": 1.5142902231513045e-09, "loss": 0.0005324673838913441, "memory(GiB)": 27.09, "reward": 0.5357499957084656, "reward_std": 0.06823580311611295, "rewards/MMContentORM/mean": 0.732500022649765, "rewards/MMContentORM/std": 0.44778469279408456, "rewards/MMFormatORM/mean": 0.6256249785423279, "rewards/MMFormatORM/std": 0.07690345346927643, "rewards/MMRubricORM/mean": -0.0375, "rewards/MMRubricORM/std": 0.11831300854682922, "step": 4135, "train_speed(iter/s)": 0.082857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.0, "completions/mean_length": 214.9375, "completions/min_length": 135.4, "epoch": 1.9875180028804609, "frac_reward_zero_std": 0.725, "grad_norm": 0.09850002825260162, "kl": 0.016082763671875, "learning_rate": 1.0652190589210965e-09, "loss": 0.0006438469514250756, "memory(GiB)": 27.09, "reward": 0.44769997596740724, "reward_std": 0.1412799373269081, "rewards/MMContentORM/mean": 0.5554999947547913, "rewards/MMContentORM/std": 0.6557976067066192, "rewards/MMFormatORM/mean": 0.6012499809265137, "rewards/MMFormatORM/std": 0.15380690693855287, "rewards/MMRubricORM/mean": -0.075, "rewards/MMRubricORM/std": 0.23662601709365844, "step": 4140, "train_speed(iter/s)": 0.082857 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.8, "completions/mean_length": 210.8875, "completions/min_length": 134.0, "epoch": 1.9899183869419108, "frac_reward_zero_std": 0.625, "grad_norm": 0.15459950268268585, "kl": 0.020318603515625, "learning_rate": 6.949221949248408e-10, "loss": 0.0008131683804094792, "memory(GiB)": 27.09, "reward": 0.4830499768257141, "reward_std": 0.11561195463873446, "rewards/MMContentORM/mean": 0.6294999957084656, "rewards/MMContentORM/std": 0.6491626858711242, "rewards/MMFormatORM/mean": 0.6093749880790711, "rewards/MMFormatORM/std": 0.14190345108509064, "rewards/MMRubricORM/mean": -0.0625, "rewards/MMRubricORM/std": 0.21831300854682922, "step": 4145, "train_speed(iter/s)": 0.082861 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.4, "completions/mean_length": 214.8375, "completions/min_length": 132.2, "epoch": 1.9923187710033605, "frac_reward_zero_std": 0.625, "grad_norm": 0.1465650200843811, "kl": 0.01693115234375, "learning_rate": 4.0340546638040213e-10, "loss": 0.0006770275533199311, "memory(GiB)": 27.09, "reward": 0.436549985408783, "reward_std": 0.15535135762766003, "rewards/MMContentORM/mean": 0.54200000166893, "rewards/MMContentORM/std": 0.6834682941436767, "rewards/MMFormatORM/mean": 0.5931249797344208, "rewards/MMFormatORM/std": 0.1350412219762802, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.2077557325363159, "step": 4150, "train_speed(iter/s)": 0.082874 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0125, "completions/max_length": 492.8, "completions/mean_length": 222.3125, "completions/min_length": 123.2, "epoch": 1.9947191550648102, "frac_reward_zero_std": 0.675, "grad_norm": 0.08558174967765808, "kl": 0.0202880859375, "learning_rate": 1.9067346707202227e-10, "loss": 0.0008112492971122265, "memory(GiB)": 27.09, "reward": 0.4795499801635742, "reward_std": 0.16411948413588107, "rewards/MMContentORM/mean": 0.6495000183582306, "rewards/MMContentORM/std": 0.5548809096217155, "rewards/MMFormatORM/mean": 0.5931249856948853, "rewards/MMFormatORM/std": 0.16180812418460847, "rewards/MMRubricORM/mean": -0.0875, "rewards/MMRubricORM/std": 0.24893558621406556, "step": 4155, "train_speed(iter/s)": 0.082842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.4, "completions/mean_length": 214.2625, "completions/min_length": 151.0, "epoch": 1.9971195391262602, "frac_reward_zero_std": 0.7, "grad_norm": 0.1454610973596573, "kl": 0.015252685546875, "learning_rate": 5.672954927593566e-11, "loss": 0.0006094192154705525, "memory(GiB)": 27.09, "reward": 0.5129999816417694, "reward_std": 0.08400428430177272, "rewards/MMContentORM/mean": 0.6900000154972077, "rewards/MMContentORM/std": 0.49128730222582817, "rewards/MMFormatORM/mean": 0.6174999833106994, "rewards/MMFormatORM/std": 0.10940345227718354, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.16831300854682923, "step": 4160, "train_speed(iter/s)": 0.082842 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.2, "completions/mean_length": 208.1375, "completions/min_length": 121.0, "epoch": 1.99951992318771, "frac_reward_zero_std": 0.725, "grad_norm": 0.14134319126605988, "kl": 0.018817138671875, "learning_rate": 1.5758237104090968e-12, "loss": 0.0007528647780418396, "memory(GiB)": 27.09, "reward": 0.478799968957901, "reward_std": 0.0989949492039159, "rewards/MMContentORM/mean": 0.6045000076293945, "rewards/MMContentORM/std": 0.6013319611549377, "rewards/MMFormatORM/mean": 0.6174999713897705, "rewards/MMFormatORM/std": 0.08880690932273864, "rewards/MMRubricORM/mean": -0.05, "rewards/MMRubricORM/std": 0.13662601709365846, "step": 4165, "train_speed(iter/s)": 0.082852 } ], "logging_steps": 5, "max_steps": 4166, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }